From bfce8714dc4b86743e414f576647dd4fb811dfca Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 30 Oct 2024 17:56:59 +0000 Subject: [PATCH 1/3] update open_data README to reflect Makefile steps --- open_data/README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/open_data/README.md b/open_data/README.md index c2106a3b5..600fb3baf 100644 --- a/open_data/README.md +++ b/open_data/README.md @@ -27,15 +27,21 @@ Traffic Ops had a request for all transit routes and transit stops to be publish * Download the zipped shapefiles from the Hub to your local filesystem. 1. If there are new datasets to add or changes to make, make them in `metadata.yml` and/or `data_dictionary.yml`. * If there are changes to make in `metadata.yml`, make them. Afterwards, in terminal, run: `python supplement_meta.py` +1. If there are changes to be made to metadata.yml (adding new datasets, changing descriptions, change contact information, etc), make them. This is infrequent. An updated analysis date is already automated and does not have to be updated here. +1. In terminal: `python supplement_meta.py` +1. In terminal: `python update_data_dict.py`. + * Check the log results, which tells you if there are columns missing from `data_dictionary.yml`. These columns and their descriptions need to be added. Every column in the ESRI layer must have a definition, and where there's an external data dictionary website to cite, provide a definition source. +1. In terminal: `python update_fields_fgdc.py`. This populates fields with `data_dictionary.yml` values. + * Only run if `update_data_dict` had changes to incorporate 1. Run [arcgis_pro_script](./arcgis_pro_script.py) to create XML files. * Open a notebook in Hub and find the `ARCGIS_PATH` * Hardcode that path for `arcpy.env.workspace = ARCGIS_PATH` * The exported XML metadata will be in file gdb directory. * Upload the XML metadata into Hub in `open_data/xml/`. -1. If there are new datasets added, open `open_data.py` and modify the script. -1. In terminal: `python open_data.py`. +1. If there are new datasets added, open `update_vars.py` and modify the script. +1. In terminal: `python metadata_update_pro.py`. * Change into the `open_data` directory: `cd open_data/`. - * The overwritten XML is stored in `open_data/metadata_xml/run_in_esri/`. + * The overwritten XML is stored in `open_data/xml/run_in_esri/`. * Download the overwritten XML files locally to run in ArcGIS. 1. Run [arcgis_pro_script](./arcgis_pro_script.py) after import the updated XML metadata for each feature class. * There are steps to create FGDC templates for each datasets to store field information. @@ -45,7 +51,7 @@ Traffic Ops had a request for all transit routes and transit stops to be publish ### Metadata * [Metadata](./metadata.yml) * [Data dictionary](./data_dictionary.yml) -* [update_vars](./update_vars.py) and [publish_utils](./publish_utils.py) contain a lot of the variables that would frequently get updated in the publishing process. +* [update_vars](./update_vars.py) contains a lot of the variables that would frequently get updated in the publishing process. * Apply standardized column names across published datasets, even they differ from internal keys (`org_id` in favor of `gtfs_dataset_key`, `agency` in favor of `organization_name`). * Since we do not save multiple versions of published datasets, the columns are renamed prior to exporting the geoparquet as a zipped shapefile. From f65c81afc6b83f50b1db0c769d4775a63e50b7f5 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 30 Oct 2024 18:10:00 +0000 Subject: [PATCH 2/3] oct hqta metadata update --- open_data/Makefile | 3 ++- open_data/update_vars.py | 12 ++++++------ open_data/xml/ca_hq_transit_areas.xml | 4 ++-- open_data/xml/ca_hq_transit_stops.xml | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/open_data/Makefile b/open_data/Makefile index 3d68717e5..1d305e5ca 100644 --- a/open_data/Makefile +++ b/open_data/Makefile @@ -12,7 +12,8 @@ compile_open_data_portal: python update_fields_fgdc.py # populate fields with data dictionary yml values, run if update_data_dict had changes to incorporate # Download the zipped shapefiles and metadata.yml and move to local ESRI directory #python arcgis_script_pro.py #(in ESRI!) - python metadata_update_pro.py # go back into ESRI and update xml + # Bring the ESRI rewritten XML files into Hub and drop into xml/ and allow overwrite(s) + python metadata_update_pro.py # (in Hub) # Download the overwritten XML files in xml/run_in_esri/ and move to local ESRI directory. #python arcgis_script_pro.py #(in ESRI!) python cleanup.py # run after ESRI work done \ No newline at end of file diff --git a/open_data/update_vars.py b/open_data/update_vars.py index 5569d44f4..7a3d71eb9 100644 --- a/open_data/update_vars.py +++ b/open_data/update_vars.py @@ -19,10 +19,10 @@ DATA_DICT_YML = Path("data_dictionary.yml") RUN_ME = [ - #"ca_hq_transit_areas", - #"ca_hq_transit_stops", - "ca_transit_routes", - "ca_transit_stops", - "speeds_by_stop_segments", - "speeds_by_route_time_of_day", + "ca_hq_transit_areas", + "ca_hq_transit_stops", + #"ca_transit_routes", + #"ca_transit_stops", + #"speeds_by_stop_segments", + #"speeds_by_route_time_of_day", ] \ No newline at end of file diff --git a/open_data/xml/ca_hq_transit_areas.xml b/open_data/xml/ca_hq_transit_areas.xml index d46a0f366..78d8a8dda 100644 --- a/open_data/xml/ca_hq_transit_areas.xml +++ b/open_data/xml/ca_hq_transit_areas.xml @@ -20,7 +20,7 @@ - 2024-10-08 + 2024-10-30 ISO 19139 Geographic Information - Metadata - Implementation Specification @@ -85,7 +85,7 @@ - 2024-09-18 + 2024-10-16 diff --git a/open_data/xml/ca_hq_transit_stops.xml b/open_data/xml/ca_hq_transit_stops.xml index c753d5fd1..e171b5047 100644 --- a/open_data/xml/ca_hq_transit_stops.xml +++ b/open_data/xml/ca_hq_transit_stops.xml @@ -20,7 +20,7 @@ - 2024-10-08 + 2024-10-30 ISO 19139 Geographic Information - Metadata - Implementation Specification @@ -85,7 +85,7 @@ - 2024-09-18 + 2024-10-16 From 8a4be84fdfc33e2701811eddf660dca88761c278 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 30 Oct 2024 18:14:26 +0000 Subject: [PATCH 3/3] more instructions in README --- open_data/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/open_data/README.md b/open_data/README.md index 600fb3baf..1de42e1b2 100644 --- a/open_data/README.md +++ b/open_data/README.md @@ -47,6 +47,7 @@ Traffic Ops had a request for all transit routes and transit stops to be publish * There are steps to create FGDC templates for each datasets to store field information. * This only needs to be done once when a new dataset is created. 1. In terminal: `python cleanup.py` to clean up old XML files and remove zipped shapefiles. + * The YAML and XML files created/have changes get checked into GitHub. ### Metadata * [Metadata](./metadata.yml)