From 21577ff948b342b83e100ef83171ee38998cf406 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Thu, 18 Jan 2024 17:10:23 +0000 Subject: [PATCH 1/7] Update GCP docs --- buildstockbatch/gcp/README.md | 23 ++++++++++++++ buildstockbatch/gcp/arch.svg | 1 + buildstockbatch/gcp/gcp.py | 15 ++-------- buildstockbatch/gcp/main.tf | 4 +-- docs/installation.rst | 56 ++++++++++++++++++++++++++--------- docs/project_defn.rst | 28 +++++++++--------- docs/run_sims.rst | 23 +++++++++++--- 7 files changed, 104 insertions(+), 46 deletions(-) create mode 100644 buildstockbatch/gcp/README.md create mode 100644 buildstockbatch/gcp/arch.svg diff --git a/buildstockbatch/gcp/README.md b/buildstockbatch/gcp/README.md new file mode 100644 index 00000000..10ecc644 --- /dev/null +++ b/buildstockbatch/gcp/README.md @@ -0,0 +1,23 @@ +# Buildstock Batch on GCP + +![Architecture diagram](/buildstockbatch/gcp/arch.svg) + +Buildstock Batch runs on GCP in a few steps: + + * Locally + - Build a Docker image that includes OpenStudio and BuildStock Batch. + - Push the Docker image to GCP Artifact Registry. + - Run sampling, and split the generated buildings + upgrades into batches. + - Collect all the required input files (including downloading weather files) + and upload them to a Cloud Storage bucket. + - Kick off the Batch and Cloud Run jobs (described below), and wait for them to finish. + + * In GCP Batch + - Run a job on GCP Batch where each task runs one batch of simulations. + GCP Batch uses the Docker image to run OpenStudio on Compute Engine VMs. + - Raw output files are written to the bucket in Cloud Storage. + + * In Cloud Run + - Create and start a Cloud Run job for post-processing steps. + Also uses the Docker image. + - Aggregated output files are written to the bucket in Cloud Storage. diff --git a/buildstockbatch/gcp/arch.svg b/buildstockbatch/gcp/arch.svg new file mode 100644 index 00000000..d7bdcd9a --- /dev/null +++ b/buildstockbatch/gcp/arch.svg @@ -0,0 +1 @@ + diff --git a/buildstockbatch/gcp/gcp.py b/buildstockbatch/gcp/gcp.py index 0dda94e5..65605f10 100644 --- a/buildstockbatch/gcp/gcp.py +++ b/buildstockbatch/gcp/gcp.py @@ -5,16 +5,7 @@ ~~~~~~~~~~~~~~~ This class contains the object & methods that allow for usage of the library with GCP Batch. -Architecture overview (these steps are split between GcpBatch and DockerBatchBase): - - Build a Docker image that includes OpenStudio and BuildStock Batch. - - Push the Docker image to GCP Artifact Registry. - - Run sampling, and split the generated buildings into batches. - - Collect all the required input files (including downloading weather files) - and upload them to Cloud Storage. - - Run a job on GCP Batch where each task runs one batch of simulations. - Uses the Docker image to run OpenStudio on Compute Engine VMs. - - Run a Cloud Run job for post-processing steps. Also uses the Docker image. - - Output files are written to a bucket in Cloud Storage. +See the README for an overview of the architecture. :author: Robert LaThanh, Natalie Weires :copyright: (c) 2023 by The Alliance for Sustainable Energy @@ -468,7 +459,7 @@ def show_jobs(self): """ # GCP Batch job that runs the simulations if job := self.get_existing_batch_job(): - logger.info("Batch job") + logger.info("--------------- Batch job ---------------") logger.info(f" Name: {job.name}") logger.info(f" UID: {job.uid}") logger.info(f" Status: {job.status.state.name}") @@ -490,7 +481,7 @@ def show_jobs(self): status = "Running" if last_execution.completion_time: status = "Completed" - logger.info("Post-processing Cloud Run job") + logger.info("----- Post-processing Cloud Run job -----") logger.info(f" Name: {job.name}") logger.info(f" Status of latest run ({last_execution.name}): {status}") logger.debug(f"Full job info:\n{job}") diff --git a/buildstockbatch/gcp/main.tf b/buildstockbatch/gcp/main.tf index 98e12e90..677856d9 100644 --- a/buildstockbatch/gcp/main.tf +++ b/buildstockbatch/gcp/main.tf @@ -4,10 +4,10 @@ # terraform init # # To see what changes will be applied: -# terraform plan +# terraform plan -var="gcp_project=myproject" # # To apply those changes: -# terraform apply +# terraform apply -var="gcp_project=myproject" # # Optionally set variables: # terraform apply -var="gcp_project=myproject" -var="bucket_name=mybucket" -var="region=us-east1-b" diff --git a/docs/installation.rst b/docs/installation.rst index ee82f39b..d108210b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -246,14 +246,28 @@ Google Cloud Platform Shared, one-time GCP setup .......................... -One-time GCP setup shared by all users. +One-time GCP setup that can be shared by multiple users. 1. If needed, create a GCP Project. The following steps will occur in that project. -2. `Create a repository`_ in Artifact Registry (to store Docker images). -3. `Create a Google Cloud Storage Bucket`_ (that will store simulation and postprocessing output). - Alternatively, each user can create and use their own bucket. -4. Create a Service Account. Alternatively, each user can create their own service account, or each - user can install the `gcloud CLI`_. The following documentation will assume use of a Service +2. Set up the following resources in your GCP projects. You can either do this manually or + using terraform. + a. Option 1: Manual setup + * `Create a Google Cloud Storage Bucket`_ (that will store simulation and postprocessing output). + Alternatively, each user can create and use their own bucket. + * `Create a repository`_ in Artifact Registry (to store Docker images). + * This is expected to be in the same region as the storage bucket. + b. Option 2: Use Terraform + * From the buildstockbatch/gcp/ directory, run the following with your chosen GCP project and region. + You can optionally specify the names of the storage bucket and artifact registery repository. See + main.tf for more details. + + :: + + terraform init + terraform apply -var="gcp_project=PROJECT" -var="region=REGION" + +3. Optionally, create a shared Service Account. Alternatively, each user can create their own service account, + or each user can install the `gcloud CLI`_. The following documentation will assume use of a Service Account. .. _Create a repository: @@ -262,9 +276,9 @@ One-time GCP setup shared by all users. https://cloud.google.com/storage/docs/creating-buckets .. _gcloud CLI: https://cloud.google.com/sdk/docs/install -Per-developer setup +Per-user setup ................... -One-time setup that each developer needs to do on the workstation from which they'll launch and +One-time setup that each user needs to do on the workstation from which they'll launch and manage BuildStockBatch runs. 1. `Install Docker`_. This is needed by the script to manage Docker images (pull, push, etc). @@ -272,13 +286,27 @@ manage BuildStockBatch runs. above (i.e., create a Python virtual environment, activate the venv, and install buildstockbatch to it). 3. Download/Clone ResStock or ComStock. -4. Create and download a `Service Account Key`_ for GCP authentication. +4. GCP Authentication + a. Option 1: Create and download a `Service Account Key`_ for GCP authentication. + + * Add the location of the key file as an environment variable; e.g., + ``export GOOGLE_APPLICATION_CREDENTIALS="~/path/to/service-account-key.json"``. This can be + done at the command line (in which case it will need to be done for every shell session that + will run BuildStockBatch, and it will only be in effect for only that session), or added to a + shell startup script (in which case it will be available to all shell sessions). + + b. Option 2: Install the `Google Cloud CLI`_ and run the following: + + :: + + gcloud config set project PROJECT + gcloud auth application-default login + + gcloud auth login + gcloud auth configure-docker REGION-docker.pkg.dev + - * Add the location of the key file as an environment variable; e.g., - ``export GOOGLE_APPLICATION_CREDENTIALS="~/path/to/service-account-key.json"``. This can be - done at the command line (in which case it will need to be done for every shell session that - will run BuildStockBatch, and it will only be in effect for only that session), or added to a - shell startup script (in which case it will be available to all shell sessions). .. _Install Docker: https://www.docker.com/get-started/ .. _Service Account Key: https://cloud.google.com/iam/docs/keys-create-delete +.. _Google Cloud CLI: https://cloud.google.com/sdk/docs/install-sdk diff --git a/docs/project_defn.rst b/docs/project_defn.rst index ae419ae4..f235a222 100644 --- a/docs/project_defn.rst +++ b/docs/project_defn.rst @@ -270,11 +270,10 @@ using `GCP Batch `_ and `Cloud Run `_ and `Cloud Run `_ and `Cloud Run 8). Default: None (which should result in a 30 GB boot disk according to the docs linked above). - * ``machine_type``: GCP Compute Engine machine type to use. If omitted, GCP Batch will + * ``machine_type``: Optional. GCP Compute Engine machine type to use. If omitted, GCP Batch will choose a machine type based on the requested vCPUs and memory. If set, the machine type should have at least as many resources as requested for each simulation above. If it is large enough, multiple simulations will be run in parallel on the same machine. Usually safe to leave unset. - * ``use_spot``: true or false. This tells the project whether to use - `Spot VMs `_ for data simulations, which can reduce - costs by up to 91%. Default: false + * ``use_spot``: Optional. Whether to use `Spot VMs `_ + for data simulations, which can reduce costs by up to 91%. Default: false * ``postprocessing_environment``: Optional. Specifies the Cloud Run computing environment for postprocessing. - * ``cpus``: `Number of CPUs`_ to use. Default: 2. - * ``memory_mib``: `Amount of RAM`_ needed in MiB. 2048 MiB per CPU is recommended. Default: - 4096. + * ``cpus``: Optional. `Number of CPUs`_ to use. Default: 2. + * ``memory_mib``: Optional. `Amount of RAM`_ needed in MiB. 2048 MiB per CPU is recommended. + Default: 4096. .. _GCP's default behavior: https://cloud.google.com/python/docs/reference/batch/latest/google.cloud.batch_v1.types.TaskGroup .. _job limits: https://cloud.google.com/batch/quotas diff --git a/docs/run_sims.rst b/docs/run_sims.rst index f4c81b60..028c21eb 100644 --- a/docs/run_sims.rst +++ b/docs/run_sims.rst @@ -126,7 +126,7 @@ tool. The first time you run ``buildstock_gcp`` it may take several minutes, especially over a slower internet connection as it is downloading and building a docker image. -GCP Specific Project configuration +GCP specific project configuration .................................. For the project to run on GCP, you will need to add a ``gcp`` section to your config @@ -139,9 +139,9 @@ file, something like this: project: myorg_project region: us-central1 artifact_registry: - repository: buildstockbatch + repository: buildstockbatch-docker gcs: - bucket: mybucket + bucket: buildstockbatch prefix: national01_run01 use_spot: true batch_array_size: 10000 @@ -154,7 +154,7 @@ You can optionally override the ``job_identifier`` from the command line quickly assign a new ID with each run without updating the config file. -List existing jobs +Show existing jobs .................. Run ``buildstock_gcp your_project_file.yml [job_identifier] --show_jobs`` to see the existing @@ -162,6 +162,14 @@ jobs matching the project specified. This can show you whether a previously-star has completed, is still running, or has already been cleaned up. +Post-processing only +..................... + +If ``buildstock_gcp`` is interrupted after the simulations are kicked off (i.e. the Batch job is +running), the simulations will finish, but post-processing will not be started. You can run only +the post-processing steps later with the ``--postprocessonly`` flag. + + Cleaning up after yourself .......................... @@ -169,3 +177,10 @@ When the simulations and postprocessing are complete, run ``buildstock_gcp your_project_file.yml [job_identifier] --clean``. This will clean up all the GCP resources that were created to run the specified project, other than files in Cloud Storage. If the project is still running, it will be cancelled. Your output files will still be available in GCS. + +You can clean up files in Cloud Storage from the `GCP Console`_. + +If you make changes to the package between runs, you may also want to clean up the docker images +created each time, with ``docker image prune``. + +.. _GCP Console: https://console.cloud.google.com/storage/browser From 23362bf823b9333dc80c03a1657d55aae82ea43d Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Thu, 18 Jan 2024 22:13:06 +0000 Subject: [PATCH 2/7] Crop svg --- buildstockbatch/gcp/arch.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildstockbatch/gcp/arch.svg b/buildstockbatch/gcp/arch.svg index d7bdcd9a..8c550b44 100644 --- a/buildstockbatch/gcp/arch.svg +++ b/buildstockbatch/gcp/arch.svg @@ -1 +1 @@ - + From 310a9d73ed236214e1c2c7857984ad292c547836 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Thu, 18 Jan 2024 22:20:54 +0000 Subject: [PATCH 3/7] Fix crop --- buildstockbatch/gcp/arch.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildstockbatch/gcp/arch.svg b/buildstockbatch/gcp/arch.svg index 8c550b44..a586e0fd 100644 --- a/buildstockbatch/gcp/arch.svg +++ b/buildstockbatch/gcp/arch.svg @@ -1 +1 @@ - + From 57a0793b138d27c868cb06ef6a03efc5f88716ad Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Fri, 19 Jan 2024 15:41:48 +0000 Subject: [PATCH 4/7] More doc updates --- buildstockbatch/gcp/README.md | 9 ++++---- docs/installation.rst | 40 +++++++++++++++++++++-------------- docs/run_sims.rst | 8 +++---- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/buildstockbatch/gcp/README.md b/buildstockbatch/gcp/README.md index 10ecc644..3c9aad8a 100644 --- a/buildstockbatch/gcp/README.md +++ b/buildstockbatch/gcp/README.md @@ -2,22 +2,21 @@ ![Architecture diagram](/buildstockbatch/gcp/arch.svg) -Buildstock Batch runs on GCP in a few steps: +Buildstock Batch runs on GCP in a few phases: * Locally - Build a Docker image that includes OpenStudio and BuildStock Batch. - Push the Docker image to GCP Artifact Registry. - - Run sampling, and split the generated buildings + upgrades into batches. + - Run sampling and split the generated buildings + upgrades into batches. - Collect all the required input files (including downloading weather files) and upload them to a Cloud Storage bucket. - Kick off the Batch and Cloud Run jobs (described below), and wait for them to finish. * In GCP Batch - - Run a job on GCP Batch where each task runs one batch of simulations. + - Run a job where each task runs one batch of simulations. GCP Batch uses the Docker image to run OpenStudio on Compute Engine VMs. - Raw output files are written to the bucket in Cloud Storage. * In Cloud Run - - Create and start a Cloud Run job for post-processing steps. - Also uses the Docker image. + - Run a job for post-processing steps. Also uses the Docker image. - Aggregated output files are written to the bucket in Cloud Storage. diff --git a/docs/installation.rst b/docs/installation.rst index d108210b..80c5b58c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -251,15 +251,20 @@ One-time GCP setup that can be shared by multiple users. 1. If needed, create a GCP Project. The following steps will occur in that project. 2. Set up the following resources in your GCP projects. You can either do this manually or using terraform. - a. Option 1: Manual setup + + * **Option 1**: Manual setup + * `Create a Google Cloud Storage Bucket`_ (that will store simulation and postprocessing output). Alternatively, each user can create and use their own bucket. * `Create a repository`_ in Artifact Registry (to store Docker images). - * This is expected to be in the same region as the storage bucket. - b. Option 2: Use Terraform + This is expected to be in the same region as the storage bucket. + + * **Option 2**: Terraform + + * Install `Terraform`_ * From the buildstockbatch/gcp/ directory, run the following with your chosen GCP project and region. You can optionally specify the names of the storage bucket and artifact registery repository. See - main.tf for more details. + `main.tf` for more details. :: @@ -275,29 +280,32 @@ One-time GCP setup that can be shared by multiple users. .. _Create a Google Cloud Storage Bucket: https://cloud.google.com/storage/docs/creating-buckets .. _gcloud CLI: https://cloud.google.com/sdk/docs/install +.. _Terraform: https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli + Per-user setup -................... +.............. One-time setup that each user needs to do on the workstation from which they'll launch and manage BuildStockBatch runs. -1. `Install Docker`_. This is needed by the script to manage Docker images (pull, push, etc). +1. Install `Docker`_. This is needed by the script to manage Docker images (pull, push, etc). 2. Get BuildStockBatch and set up a Python environment for it using the :ref:`python` instructions above (i.e., create a Python virtual environment, activate the venv, and install buildstockbatch to it). 3. Download/Clone ResStock or ComStock. -4. GCP Authentication - a. Option 1: Create and download a `Service Account Key`_ for GCP authentication. +4. Set up GCP authentication + + * **Option 1**: Create and download a `Service Account Key`_. - * Add the location of the key file as an environment variable; e.g., - ``export GOOGLE_APPLICATION_CREDENTIALS="~/path/to/service-account-key.json"``. This can be - done at the command line (in which case it will need to be done for every shell session that - will run BuildStockBatch, and it will only be in effect for only that session), or added to a - shell startup script (in which case it will be available to all shell sessions). + * Add the location of the key file as an environment variable; e.g., + ``export GOOGLE_APPLICATION_CREDENTIALS="~/path/to/service-account-key.json"``. This can be + done at the command line (in which case it will need to be done for every shell session that + will run BuildStockBatch, and it will only be in effect for only that session), or added to a + shell startup script (in which case it will be available to all shell sessions). - b. Option 2: Install the `Google Cloud CLI`_ and run the following: + * **Option 2**: Install the `Google Cloud CLI`_ and run the following: - :: + :: gcloud config set project PROJECT gcloud auth application-default login @@ -307,6 +315,6 @@ manage BuildStockBatch runs. -.. _Install Docker: https://www.docker.com/get-started/ +.. _Docker: https://www.docker.com/get-started/ .. _Service Account Key: https://cloud.google.com/iam/docs/keys-create-delete .. _Google Cloud CLI: https://cloud.google.com/sdk/docs/install-sdk diff --git a/docs/run_sims.rst b/docs/run_sims.rst index 028c21eb..6ede2810 100644 --- a/docs/run_sims.rst +++ b/docs/run_sims.rst @@ -117,8 +117,7 @@ on S3 and queryable in Athena. Google Cloud Platform ~~~~~~~~~~~~~~~~~~~~~ -Running a batch on GCP is done by calling the ``buildstock_gcp`` command line -tool. +Run a project on GCP by calling the ``buildstock_gcp`` command line tool. .. command-output:: buildstock_gcp --help :ellipsis: 0,8 @@ -136,6 +135,7 @@ file, something like this: gcp: job_identifier: national01 + # The project, Artifact Registry repo, and GCS bucket must already exist. project: myorg_project region: us-central1 artifact_registry: @@ -180,7 +180,7 @@ still running, it will be cancelled. Your output files will still be available i You can clean up files in Cloud Storage from the `GCP Console`_. -If you make changes to the package between runs, you may also want to clean up the docker images -created each time, with ``docker image prune``. +If you make code changes between runs, you may want to occasionally clean up the docker +images created for each run with ``docker image prune``. .. _GCP Console: https://console.cloud.google.com/storage/browser From 51f4e0d3c839c5efce1bbf6edb592465b081dfd8 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Fri, 19 Jan 2024 16:05:33 +0000 Subject: [PATCH 5/7] Fix indentation --- docs/project_defn.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/project_defn.rst b/docs/project_defn.rst index f235a222..f52b7611 100644 --- a/docs/project_defn.rst +++ b/docs/project_defn.rst @@ -314,7 +314,7 @@ using `GCP Batch `_ and `Cloud Run `_ - for data simulations, which can reduce costs by up to 91%. Default: false + for data simulations, which can reduce costs by up to 91%. Default: false * ``postprocessing_environment``: Optional. Specifies the Cloud Run computing environment for postprocessing. From e4e2d7ce4818e1767e33701db5e6c3be7673c0c8 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Mon, 22 Jan 2024 16:24:58 +0000 Subject: [PATCH 6/7] Small edits --- buildstockbatch/gcp/README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/buildstockbatch/gcp/README.md b/buildstockbatch/gcp/README.md index 3c9aad8a..88f4aeec 100644 --- a/buildstockbatch/gcp/README.md +++ b/buildstockbatch/gcp/README.md @@ -10,13 +10,18 @@ Buildstock Batch runs on GCP in a few phases: - Run sampling and split the generated buildings + upgrades into batches. - Collect all the required input files (including downloading weather files) and upload them to a Cloud Storage bucket. - - Kick off the Batch and Cloud Run jobs (described below), and wait for them to finish. + - Create and start the Batch and Cloud Run jobs (described below), + and wait for them to finish. * In GCP Batch - - Run a job where each task runs one batch of simulations. + - Run a batch job where each task runs a small group of simulations. GCP Batch uses the Docker image to run OpenStudio on Compute Engine VMs. - Raw output files are written to the bucket in Cloud Storage. * In Cloud Run - Run a job for post-processing steps. Also uses the Docker image. - Aggregated output files are written to the bucket in Cloud Storage. + + +`gcp.py` also supports validating a project file, cleaning up old projects, +and viewing the state of existing jobs. From 400accc9106dd1622156fe2ca1ebac5470cc40b9 Mon Sep 17 00:00:00 2001 From: Natalie Weires Date: Wed, 24 Jan 2024 21:51:58 +0000 Subject: [PATCH 7/7] PR comment updates --- docs/project_defn.rst | 9 +++++---- docs/run_sims.rst | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/project_defn.rst b/docs/project_defn.rst index f52b7611..25e21acc 100644 --- a/docs/project_defn.rst +++ b/docs/project_defn.rst @@ -270,7 +270,7 @@ using `GCP Batch `_ and `Cloud Run `_ and `Cloud Run `_ and `Cloud Run `_ and `Cloud Run