From 78d57c4020aac00ce81e38ce4abc85369aa2c66e Mon Sep 17 00:00:00 2001 From: Andrey Fedorov Date: Mon, 25 Nov 2024 12:56:18 -0500 Subject: [PATCH] BUG: ignore manifest lines that do not contain s3 URLs This came up while trying to use manifests generated using Looker Studio --- idc_index/index.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/idc_index/index.py b/idc_index/index.py index 00f619d..3c055c7 100644 --- a/idc_index/index.py +++ b/idc_index/index.py @@ -836,7 +836,9 @@ def _validate_update_manifest_and_get_download_size( REGEXP_EXTRACT(manifest_cp_cmd, '(?:.*?\\/){{3}}([^\\/?#]+)', 1) AS manifest_crdc_series_uuid, REGEXP_EXTRACT(manifest_cp_cmd, 's3://\\S+') AS s3_url, FROM - manifest_df ) + manifest_df + WHERE + REGEXP_EXTRACT(manifest_cp_cmd, 's3://\\S+') IS NOT NULL) SELECT seriesInstanceuid, index_crdc_series_uuid,