From d5059bc8dd37ffe450b86a6ad1d3662dc3b774e9 Mon Sep 17 00:00:00 2001 From: Joseph Shearer Date: Mon, 12 Sep 2022 15:03:51 -0400 Subject: [PATCH 1/4] refactor: Remove opengraph from database schema, update seed sql scripts --- .../opengraph_changes_1.sql => diff.sql | 86 +++-- scripts/seed_connectors.sql | 301 +++++++++--------- ...rs_once_connector_metadata_is_writable.sql | 196 ------------ supabase/migrations/06_connectors.sql | 74 +---- supabase/migrations/10_spec_ext.sql | 1 - supabase/seed.sql | 18 +- ...ed_once_connector_metadata_is_writable.sql | 104 ------ 7 files changed, 199 insertions(+), 581 deletions(-) rename supabase/pending/opengraph_changes_1.sql => diff.sql (55%) delete mode 100644 scripts/seed_connectors_once_connector_metadata_is_writable.sql delete mode 100644 supabase/seed_once_connector_metadata_is_writable.sql diff --git a/supabase/pending/opengraph_changes_1.sql b/diff.sql similarity index 55% rename from supabase/pending/opengraph_changes_1.sql rename to diff.sql index 79b3373..52c9754 100644 --- a/supabase/pending/opengraph_changes_1.sql +++ b/diff.sql @@ -1,68 +1,30 @@ +NOTE: Configuring authentication for DESKTOP mode. -- This script was generated by the Schema Diff utility in pgAdmin 4 -- For the circular dependencies, the order in which Schema Diff writes the objects is not very sophisticated -- and may require manual changes to the script to ensure changes are applied in the correct order. -- Please report an issue for any failure with the reproduction steps. --- DOMAIN: public.jsonb_internationalized_value --- DROP DOMAIN IF EXISTS public.jsonb_internationalized_value; +alter table public.connectors alter column title drop expression drop default; +alter table public.connectors alter column short_description drop expression drop default; +alter table public.connectors alter column logo_url drop expression drop default; -CREATE DOMAIN public.jsonb_internationalized_value - AS jsonb; - -ALTER DOMAIN public.jsonb_internationalized_value OWNER TO postgres; - -ALTER DOMAIN public.jsonb_internationalized_value - ADD CONSTRAINT jsonb_internationalized_value_check CHECK (VALUE IS NULL OR jsonb_typeof(VALUE) = 'object'::text AND (VALUE -> 'en-US'::text) IS NOT NULL); - -COMMENT ON DOMAIN public.jsonb_internationalized_value - IS 'jsonb_internationalized_value is JSONB object which is required to at least have en-US internationalized values'; -CREATE OR REPLACE FUNCTION public.generate_opengraph_value( - opengraph_raw jsonb, - opengraph_patch jsonb, - field text) - RETURNS jsonb_internationalized_value - LANGUAGE 'plpgsql' - COST 100 - IMMUTABLE PARALLEL UNSAFE -AS $BODY$ -BEGIN - RETURN json_build_object('en-US',internal.jsonb_merge_patch(opengraph_raw, opengraph_patch) #>> ('{"en-US", "'|| field ||'"}')::text[]); -END -$BODY$; - -alter function public.generate_opengraph_value(jsonb, jsonb, text) owner to authenticated; - - -ALTER TABLE IF EXISTS public.connectors - ADD COLUMN short_description jsonb_internationalized_value GENERATED ALWAYS AS (generate_opengraph_value((open_graph_raw)::jsonb, (open_graph_patch)::jsonb, 'description'::text)) STORED; - -COMMENT ON COLUMN public.connectors.short_description - IS 'A short description of this connector, at most a few sentences. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the description string as values'; - -ALTER TABLE IF EXISTS public.connectors - ADD COLUMN title jsonb_internationalized_value GENERATED ALWAYS AS (generate_opengraph_value((open_graph_raw)::jsonb, (open_graph_patch)::jsonb, 'title'::text)) STORED; COMMENT ON COLUMN public.connectors.title IS 'The title of this connector. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the title string as values'; -ALTER TABLE IF EXISTS public.connectors - ADD COLUMN logo_url jsonb_internationalized_value GENERATED ALWAYS AS (generate_opengraph_value((open_graph_raw)::jsonb, (open_graph_patch)::jsonb, 'image'::text)) STORED; +COMMENT ON COLUMN public.connectors.short_description + IS 'A short description of this connector, at most a few sentences. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the description string as values'; COMMENT ON COLUMN public.connectors.logo_url IS 'The url for this connector''s logo image. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and urls as values'; -ALTER TABLE IF EXISTS public.connectors - ADD COLUMN recommended boolean NOT NULL GENERATED ALWAYS AS ( -CASE - WHEN (((internal.jsonb_merge_patch((open_graph_raw)::jsonb, (open_graph_patch)::jsonb) -> 'en-US'::text) ->> 'recommended'::text) = 'True'::text) THEN true - ELSE false -END) STORED; -- Changing the columns in a view requires dropping and re-creating the view. -- This may fail if other objects are dependent upon this view, -- or may cause procedural functions to fail if they are not modified to -- take account of the changes. +drop view public.draft_specs_ext; DROP VIEW public.live_specs_ext; CREATE OR REPLACE VIEW public.live_specs_ext AS @@ -81,7 +43,6 @@ CREATE OR REPLACE VIEW public.live_specs_ext l.writes_to, c.external_url AS connector_external_url, c.id AS connector_id, - c.open_graph AS connector_open_graph, c.title AS connector_title, c.short_description AS connector_short_description, c.logo_url AS connector_logo_url, @@ -100,7 +61,32 @@ CREATE OR REPLACE VIEW public.live_specs_ext LATERAL view_user_profile(p.user_id) u(user_id, email, full_name, avatar_url); COMMENT ON VIEW public.live_specs_ext IS 'View of `live_specs` extended with metadata of its last publication'; - -alter view public.live_specs_ext owner to authenticated; - - +alter view live_specs_ext owner to authenticated; + +-- Extended view of user draft specifications. +create view draft_specs_ext as +select + d.*, + l.last_pub_detail, + l.last_pub_id, + l.last_pub_user_id, + l.last_pub_user_avatar_url, + l.last_pub_user_email, + l.last_pub_user_full_name, + l.spec as live_spec, + l.spec_type as live_spec_type +from draft_specs d +left outer join live_specs_ext l + on d.catalog_name = l.catalog_name; +alter view draft_specs_ext owner to authenticated; + +comment on view draft_specs_ext is + 'View of `draft_specs` extended with metadata of its live specification'; + +ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph; + +ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph_raw; + +ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph_patch; + +DROP FUNCTION IF EXISTS public.generate_opengraph_value(opengraph_raw jsonb, opengraph_patch jsonb, field text); diff --git a/scripts/seed_connectors.sql b/scripts/seed_connectors.sql index c2750d1..d92806b 100644 --- a/scripts/seed_connectors.sql +++ b/scripts/seed_connectors.sql @@ -14,181 +14,170 @@ declare connector_id flowid; begin - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/source-hello-world', - 'A flood of greetings', - 'https://github.com/estuary/connectors/tree/main/source-hello-world' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/source-http-file', + json_build_object('en-US','HTTP File'), + json_build_object('en-US',''), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/07/Group-22372-5-300x300.png'), + false, + 'https://go.estuary.dev/source-http-file' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/source-postgres', - 'Capture PostgreSQL tables into collections', - 'https://postgresql.org' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/source-s3', + json_build_object('en-US','Amazon S3'), + json_build_object('en-US',''), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2021/09/Amazon-S3.png'), + false, + 'https://aws.amazon.com/s3/' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/materialize-postgres', - 'Materialize collections into PostgreSQL', +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/source-postgres', + json_build_object('en-US','PostgreSQL'), + json_build_object('en-US','The world''s most advanced open source database.'), + json_build_object('en-US','https://www.postgresql.org/media/img/about/press/elephant.png'), + true, 'https://postgresql.org' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/materialize-rockset', - 'Materialize collections into Rockset', - 'https://rockset.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/materialize-firebolt', - 'Materialize collections into Firebolt', - 'https://www.firebolt.io/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, detail, external_url) values ( +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( 'ghcr.io/estuary/source-mysql', - 'Capture MySQL tables into collections', + json_build_object('en-US','MySQL'), + json_build_object('en-US',''), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/mysql-300x295.png'), + true, 'https://www.mysql.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/source-s3', - 'Capture S3 files into collections', +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/source-kafka', + json_build_object('en-US','Apache Kafka'), + json_build_object('en-US','Apache Kafka: A Distributed Streaming Platform.'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/kafka-300x300.png'), + false, + 'https://kafka.apache.org/' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); + +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/materialize-bigquery', + json_build_object('en-US','Bigquery'), + json_build_object('en-US','BigQuery is a serverless, cost-effective and multicloud data warehouse designed to help you turn big data into valuable business insights. Start free.'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/image-12513891-2-300x300.png'), + false, + 'https://cloud.google.com/bigquery' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); + +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/materialize-s3-parquet', + json_build_object('en-US','Amazon S3 Parquet'), + json_build_object('en-US',''), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2021/09/Parquet.png'), + false, 'https://aws.amazon.com/s3/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( 'ghcr.io/estuary/source-gcs', - 'Capture Google Cloud Storage files into collections', + json_build_object('en-US','Google Cloud Storage'), + json_build_object('en-US','Object storage for companies of all sizes. Secure, durable, and with low latency. Store any amount of data. Retrieve it as often as you’d like.'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/image-12513891-300x300.png'), + false, 'https://cloud.google.com/storage' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/source-kinesis', - 'Capture Kinesis topics into collections', - 'https://aws.amazon.com/kinesis/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/source-kafka', - 'Capture Kafka topics into collections', - 'https://kafka.apache.org/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/materialize-postgres', + json_build_object('en-US','PostgreSQL'), + json_build_object('en-US','The world''s most advanced open source database.'), + json_build_object('en-US','https://www.postgresql.org/media/img/about/press/elephant.png'), + false, + 'https://postgresql.org' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/materialize-bigquery', - 'Materialize collections into BigQuery', - 'https://cloud.google.com/bigquery' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/materialize-firebolt', + json_build_object('en-US','Firebolt'), + json_build_object('en-US','Firebolt is a complete redesign of the cloud data warehouse for the era of cloud and data lakes. Data warehousing with extreme speed & elasticity at scale.'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/Bitmap-300x300.png'), + false, + 'https://www.firebolt.io/' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( 'ghcr.io/estuary/materialize-snowflake', - 'Materialize collections into Snowflake', - 'https://www.snowflake.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); + json_build_object('en-US','Snowflake Data Cloud'), + json_build_object('en-US',''), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2021/10/Snowflake.png'), + false, + 'https://external' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); + +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/source-hello-world', + json_build_object('en-US','Hello World'), + json_build_object('en-US','Connectors for capturing data from external data sources - connectors/source-hello-world at main · estuary/connectors'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/Group-4-300x300.png'), + false, + 'https://github.com/estuary/connectors/tree/main/source-hello-world' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/materialize-s3-parquet', - 'Materialize collections into S3 using Parquet', - 'https://aws.amazon.com/s3/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/materialize-rockset', + json_build_object('en-US','Rockset'), + json_build_object('en-US','Rockset is a real-time analytics database for serving fast analytics at scale, enabling developers to build modern data apps in record time.'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/rockset-150x150.png'), + false, + 'https://rockset.com/' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - insert into connectors (image_name, detail, external_url) values ( +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( + 'ghcr.io/estuary/source-kinesis', + json_build_object('en-US','Amazon Kinesis'), + json_build_object('en-US',''), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/Group-22372-2-300x300.png'), + false, + 'https://aws.amazon.com/kinesis/' +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); + +insert into connectors (image_name, title, short_description, logo_url, recommended external_url) values ( 'ghcr.io/estuary/materialize-elasticsearch', - 'Materialize collections into Elastic', + json_build_object('en-US','Elastic'), + json_build_object('en-US','Elasticsearch is the leading distributed, RESTful, free and open search and analytics engine designed for speed, horizontal scalability, reliability, and easy management. Get started for free.'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/Elastic-300x300.png'), + false, 'https://www.elastic.co/elasticsearch/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-exchange-rates', - 'Capture exchange rates into collections', - 'https://exchangeratesapi.io/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.2.5'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-hubspot', - 'Capture from Hubspot into collections', - 'https://www.hubspot.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.10'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-facebook-marketing', - 'Capture from Facebook Marketing into collections', - 'https://www.facebook.com/business/marketing/facebook' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.2.14'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-google-sheets', - 'Capture from Google Sheets into collections', - 'https://www.google.com/sheets/about/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.2.4'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-google-ads', - 'Capture from Google Ads into collections', - 'https://ads.google.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.3'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-github', - 'Capture Github Events into collections', - 'https://github.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.6'); - - insert into connectors (image_name, detail, external_url) values ( - 'airbyte/source-google-analytics-v4', - 'Capture from Google Analytics into collections', - 'https://marketingplatform.google.com/about/analytics/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.0'); - - insert into connectors (image_name, detail, external_url) values ( - 'ghcr.io/estuary/source-http-file', - 'Capture from any single file', - 'https://go.estuary.dev/source-http-file' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); +) +returning id strict into connector_id; +insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); end; $$ language plpgsql; diff --git a/scripts/seed_connectors_once_connector_metadata_is_writable.sql b/scripts/seed_connectors_once_connector_metadata_is_writable.sql deleted file mode 100644 index db5c74a..0000000 --- a/scripts/seed_connectors_once_connector_metadata_is_writable.sql +++ /dev/null @@ -1,196 +0,0 @@ - --- This script is used to drop and re-create recommended connectors, --- along with their descriptions and tags. Run as: --- --- psql ${DATABASE_URL} --file scripts/seed_connectors.sql - -begin; - -delete from connector_tags; -delete from connectors; - -do $$ -declare - connector_id flowid; -begin - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-hello-world', - json_build_object('en-US','A flood of greetings'), - 'https://github.com/estuary/connectors/tree/main/source-hello-world' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-postgres', - json_build_object('en-US','Capture PostgreSQL tables into collections'), - 'https://postgresql.org' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-postgres', - json_build_object('en-US','Materialize collections into PostgreSQL'), - 'https://postgresql.org' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-rockset', - json_build_object('en-US','Materialize collections into Rockset'), - 'https://rockset.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-firebolt', - json_build_object('en-US','Materialize collections into Firebolt'), - 'https://www.firebolt.io/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-mysql', - json_build_object('en-US','Capture MySQL tables into collections'), - 'https://www.mysql.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-s3', - json_build_object('en-US','Capture S3 files into collections'), - 'https://aws.amazon.com/s3/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-gcs', - json_build_object('en-US','Capture Google Cloud Storage files into collections'), - 'https://cloud.google.com/storage' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-kinesis', - json_build_object('en-US','Capture Kinesis topics into collections'), - 'https://aws.amazon.com/kinesis/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-kafka', - json_build_object('en-US','Capture Kafka topics into collections'), - 'https://kafka.apache.org/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-bigquery', - json_build_object('en-US','Materialize collections into BigQuery'), - 'https://cloud.google.com/bigquery' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-snowflake', - json_build_object('en-US','Materialize collections into Snowflake'), - 'https://www.snowflake.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-s3-parquet', - json_build_object('en-US','Materialize collections into S3 using Parquet'), - 'https://aws.amazon.com/s3/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-elasticsearch', - json_build_object('en-US','Materialize collections into Elastic'), - 'https://www.elastic.co/elasticsearch/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-exchange-rates', - json_build_object('en-US','Capture exchange rates into collections'), - 'https://exchangeratesapi.io/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.2.5'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-hubspot', - json_build_object('en-US','Capture from Hubspot into collections'), - 'https://www.hubspot.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.10'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-facebook-marketing', - json_build_object('en-US','Capture from Facebook Marketing into collections'), - 'https://www.facebook.com/business/marketing/facebook' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.2.14'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-google-sheets', - json_build_object('en-US','Capture from Google Sheets into collections'), - 'https://www.google.com/sheets/about/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.2.4'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-google-ads', - json_build_object('en-US','Capture from Google Ads into collections'), - 'https://ads.google.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.3'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-github', - json_build_object('en-US','Capture Github Events into collections'), - 'https://github.com/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.6'); - - insert into connectors (image_name, short_description, external_url) values ( - 'airbyte/source-google-analytics-v4', - json_build_object('en-US','Capture from Google Analytics into collections'), - 'https://marketingplatform.google.com/about/analytics/' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':0.1.0'); - - insert into connectors (image_name, short_description, external_url) values ( - 'ghcr.io/estuary/source-http-file', - json_build_object('en-US','Capture from any single file'), - 'https://go.estuary.dev/source-http-file' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':dev'); - -end; -$$ language plpgsql; - -commit; diff --git a/supabase/migrations/06_connectors.sql b/supabase/migrations/06_connectors.sql index 8017f32..5d17c8f 100644 --- a/supabase/migrations/06_connectors.sql +++ b/supabase/migrations/06_connectors.sql @@ -6,32 +6,16 @@ create domain jsonb_internationalized_value as jsonb check ( comment on domain jsonb_internationalized_value is 'jsonb_internationalized_value is JSONB object which is required to at least have en-US internationalized values'; -CREATE OR REPLACE FUNCTION -generate_opengraph_value( opengraph_raw jsonb, opengraph_patch jsonb, field text ) -RETURNS jsonb_internationalized_value -AS $CODE$ -BEGIN - RETURN json_build_object('en-US',internal.jsonb_merge_patch(opengraph_raw, opengraph_patch) #>> ('{"en-US", "'|| field ||'"}')::text[]); -END -$CODE$ -LANGUAGE plpgsql IMMUTABLE; - -- Known connectors. create table connectors ( like internal._model including all, external_url text not null, image_name text unique not null, - -- To be deleted in a future change -- - open_graph jsonb_obj - generated always as (internal.jsonb_merge_patch(open_graph_raw, open_graph_patch)) stored, - open_graph_raw jsonb_obj, - open_graph_patch jsonb_obj, - -- End to be deleted -- - title jsonb_internationalized_value generated always as (generate_opengraph_value(open_graph_raw, open_graph_patch,'title')) stored, - short_description jsonb_internationalized_value generated always as (generate_opengraph_value(open_graph_raw, open_graph_patch,'description')) stored, - logo_url jsonb_internationalized_value generated always as (generate_opengraph_value(open_graph_raw, open_graph_patch,'image')) stored, - recommended boolean not null generated always as (case when internal.jsonb_merge_patch(open_graph_raw, open_graph_patch)->'en-US'->>'recommended'::text = 'True' then TRUE else FALSE end) stored, + title jsonb_internationalized_value not null, + short_description jsonb_internationalized_value not null, + logo_url jsonb_internationalized_value not null, + recommended boolean not null default false, oauth2_client_id text, oauth2_client_secret text, oauth2_injected_values jsonb_obj, @@ -51,12 +35,6 @@ comment on column connectors.external_url is 'External URL which provides more information about the endpoint'; comment on column connectors.image_name is 'Name of the connector''s container (Docker) image, for example "ghcr.io/estuary/source-postgres"'; -comment on column connectors.open_graph is - 'Open-graph metadata for the connector, such as title, description, and image'; -comment on column connectors.open_graph_raw is - 'Open-graph metadata as returned by the external_url'; -comment on column connectors.open_graph_patch is - 'Patches to open-graph metadata, as a JSON merge patch'; comment on column connectors.oauth2_client_id is 'oauth client id'; comment on column connectors.oauth2_client_secret is @@ -72,49 +50,9 @@ comment on column public.connectors.title is comment on column public.connectors.short_description is 'A short description of this connector, at most a few sentences. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the description string as values'; --- don't expose details of open_graph raw responses & patching and oauth2 secret +-- don't expose details of oauth2 secret -- authenticated may select other columns for all connectors connectors. -grant select(id, detail, updated_at, created_at, image_name, external_url, open_graph, title, short_description, logo_url, recommended, oauth2_client_id) on table connectors to authenticated; - - --- TODO(johnny): Here's the plan for open graph: --- For any given connector, we need to identify a suitable URL which is typically --- just it's website, like https://postgresql.org or https://hubspot.com. --- We can fetch Open Graph responses from these URL as an administrative scripted task. --- We can shell out for this, and this tool seems to do a pretty good job of it: --- go install github.com/johnreutersward/opengraph/cmd/opengraph@latest --- --- Example: --- ~/go/bin/opengraph -json https://postgresql.org | jq 'map( { (.Property|tostring): .Content } ) | add' --- { --- "url": "https://www.postgresql.org/", --- "type": "article", --- "image": "https://www.postgresql.org/media/img/about/press/elephant.png", --- "title": "PostgreSQL", --- "description": "The world's most advanced open source database.", --- "site_name": "PostgreSQL" --- } --- --- We'll store these responses verbatim in `open_graph_raw`. --- Payloads almost always include `title`, `image`, `description`, `url`, sometimes `site_name`, --- and sometimes other things. Often the responses are directly suitable for inclusion --- in user-facing UI components. A few sites don't support any scrapping at all --- (a notable example is Google analytics), and others return fields which aren't quite --- right or suited for direct display within our UI. --- --- So, we'll need to tweak many of them, and we'll do this by maintaining minimal --- patches of open-graph responses in the `open_graph_patch`. These can be dynamically --- edited via Supabase as needed, as an administrative function, and are applied --- via JSON merge patch to the raw responses, with the merged object stored in the --- user-facing `open_graph` column. Keeping patches in the database allows non-technical --- folks to use Supabase, Retool, or similar to edit this stuff without getting --- an engineer involved. --- --- We can, for example, specify '{"title":"A better title"}' within the connector patch, --- which will update the `open_graph` response while leaving all other fields (say, the --- `description` or `image`) as they are in the raw response. This is important because --- it gives us an easy means to periodically update connector logos, text copy, etc. - +grant select(id, detail, updated_at, created_at, image_name, external_url, title, short_description, logo_url, recommended, oauth2_client_id) on table connectors to authenticated; create table connector_tags ( like internal._model_async including all, diff --git a/supabase/migrations/10_spec_ext.sql b/supabase/migrations/10_spec_ext.sql index 09dbd09..2dcb751 100644 --- a/supabase/migrations/10_spec_ext.sql +++ b/supabase/migrations/10_spec_ext.sql @@ -81,7 +81,6 @@ select l.*, c.external_url as connector_external_url, c.id as connector_id, - c.open_graph as connector_open_graph, -- To be removed c.title as connector_title, c.short_description as connector_short_description, c.logo_url as connector_logo_url, diff --git a/supabase/seed.sql b/supabase/seed.sql index b5075e6..7b513ab 100644 --- a/supabase/seed.sql +++ b/supabase/seed.sql @@ -71,25 +71,31 @@ declare connector_id flowid; begin - insert into connectors (image_name, detail, external_url) values ( + insert into connectors (image_name, title, short_description, logo_url, external_url) values ( 'ghcr.io/estuary/source-hello-world', - 'A flood of greetings', + json_build_object('en-US','Hello World'), + json_build_object('en-US','A flood of greetings'), + json_build_object('en-US','https://www.estuary.dev/wp-content/uploads/2022/05/Group-4-300x300.png'), 'https://estuary.dev' ) returning id strict into connector_id; insert into connector_tags (connector_id, image_tag) values (connector_id, ':v1'); - insert into connectors (image_name, detail, external_url) values ( + insert into connectors (image_name, title, short_description, logo_url, external_url) values ( 'ghcr.io/estuary/source-postgres', - 'Capture PostgreSQL tables into collections', + json_build_object('en-US','PostgreSQL'), + json_build_object('en-US','Capture PostgreSQL tables into collections'), + json_build_object('en-US','https://www.postgresql.org/media/img/about/press/elephant.png'), 'https://postgresql.org' ) returning id strict into connector_id; insert into connector_tags (connector_id, image_tag) values (connector_id, ':v1'); - insert into connectors (image_name, detail, external_url) values ( + insert into connectors (image_name, title, short_description, logo_url, external_url) values ( 'ghcr.io/estuary/materialize-postgres', - 'Materialize collections into PostgreSQL', + json_build_object('en-US','PostgreSQL'), + json_build_object('en-US','Materialize collections into PostgreSQL'), + json_build_object('en-US','https://www.postgresql.org/media/img/about/press/elephant.png'), 'https://postgresql.org' ) returning id strict into connector_id; diff --git a/supabase/seed_once_connector_metadata_is_writable.sql b/supabase/seed_once_connector_metadata_is_writable.sql deleted file mode 100644 index cb7c3d1..0000000 --- a/supabase/seed_once_connector_metadata_is_writable.sql +++ /dev/null @@ -1,104 +0,0 @@ - -begin; - -insert into auth.users (id, email) values - ('11111111-1111-1111-1111-111111111111', 'alice@example.com'), - ('22222222-2222-2222-2222-222222222222', 'bob@example.com'), - ('33333333-3333-3333-3333-333333333333', 'carol@example.com') -; - --- Tweak auth.users to conform with what a local Supabase install creates --- if you perform the email "Sign Up" flow. In development mode it --- doesn't actually send an email, and immediately creates a record like this: -update auth.users set - "role" = 'authenticated', - aud = 'authenticated', - confirmation_token = '', - created_at = now(), - email_change = '', - email_change_confirm_status = 0, - email_change_token_new = '', - email_confirmed_at = now(), - encrypted_password = '$2a$10$vQCyRoGamfEBXOR05iNgseK.ukEUPV52W1B95Qt6Tb3kN4N32odji', -- "password" - instance_id = '00000000-0000-0000-0000-000000000000', - is_super_admin = false, - last_sign_in_at = now(), - raw_app_meta_data = '{"provider": "email", "providers": ["email"]}', - raw_user_meta_data = '{}', - recovery_token = '', - updated_at = now() -; - -insert into auth.identities (id, user_id, identity_data, provider, last_sign_in_at, created_at, updated_at) -select id, id, json_build_object('sub', id), 'email', now(), now(), now() from auth.users; - -insert into user_grants (user_id, object_role, capability) values - ('11111111-1111-1111-1111-111111111111', 'aliceCo/', 'admin'), - ('22222222-2222-2222-2222-222222222222', 'bobCo/', 'admin'), - ('33333333-3333-3333-3333-333333333333', 'carolCo/', 'admin') -; - --- Also grant other namespaces commonly used while testing. --- aliceCo, bobCo, and carolCo are distinct owned namespaces, --- but all are also able to admin examples/ -insert into role_grants (subject_role, object_role, capability) values - ('aliceCo/', 'aliceCo/', 'write'), - ('aliceCo/', 'examples/', 'admin'), - ('aliceCo/', 'ops/aliceCo/', 'read'), - ('bobCo/', 'bobCo/', 'write'), - ('bobCo/', 'examples/', 'admin'), - ('bobCo/', 'ops/bobCo/', 'read'), - ('carolCo/', 'carolCo/', 'write'), - ('carolCo/', 'examples/', 'admin'), - ('carolCo/', 'ops/carolCo/', 'read'), - ('examples/', 'examples/', 'write'), - ('examples/', 'ops/examples/', 'read') -; - --- Create corresponding storage mappings. -insert into storage_mappings (catalog_prefix, spec) values - ('aliceCo/', '{"stores":[{"provider":"S3","bucket":"a-bucket"}]}'), - ('bobCo/', '{"stores":[{"provider":"S3","bucket":"a-bucket"}]}'), - ('carolCo/', '{"stores":[{"provider":"S3","bucket":"a-bucket"}]}'), - ('examples/', '{"stores":[{"provider":"S3","bucket":"a-bucket"}]}'), - ('ops/', '{"stores":[{"provider":"S3","bucket":"a-bucket"}]}'), - ('recovery/', '{"stores":[{"provider":"S3","bucket":"a-bucket"}]}'); - --- Seed a small number of connectors. This is a small list, separate from our --- production connectors, because each is pulled onto your dev machine. -do $$ -declare - connector_id flowid; -begin - - insert into connectors (image_name, title, short_description, external_url) values ( - 'ghcr.io/estuary/source-hello-world', - json_build_object('en-US','Hello World'), - json_build_object('en-US','A flood of greetings'), - 'https://estuary.dev' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':v1'); - - insert into connectors (image_name, title, short_description, external_url) values ( - 'ghcr.io/estuary/source-postgres', - json_build_object('en-US','PostgreSQL'), - json_build_object('en-US','Capture PostgreSQL tables into collections'), - 'https://postgresql.org' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':v1'); - - insert into connectors (image_name, title, short_description, external_url) values ( - 'ghcr.io/estuary/materialize-postgres', - json_build_object('en-US','PostgreSQL'), - json_build_object('en-US','Materialize collections into PostgreSQL'), - 'https://postgresql.org' - ) - returning id strict into connector_id; - insert into connector_tags (connector_id, image_tag) values (connector_id, ':v1'); - -end; -$$ language plpgsql; - -commit; From 1b26ab12ea7fc1377f9b7770eac8c1ed19bbf671 Mon Sep 17 00:00:00 2001 From: Joseph Shearer Date: Mon, 12 Sep 2022 15:06:39 -0400 Subject: [PATCH 2/4] refactor: Remove all references to opengraph --- Dockerfile | 3 - crates/agent-sql/src/connector_tags.rs | 21 ---- crates/agent/src/connector_tags.rs | 23 ----- fetch-open-graph/go.mod | 5 - fetch-open-graph/go.sum | 6 -- fetch-open-graph/main.go | 133 ------------------------- local/start-component.sh | 8 +- readme.md | 11 +- scripts/entrypoint.sh | 1 - scripts/healthcheck.sh | 3 +- 10 files changed, 5 insertions(+), 209 deletions(-) delete mode 100644 fetch-open-graph/go.mod delete mode 100644 fetch-open-graph/go.sum delete mode 100644 fetch-open-graph/main.go diff --git a/Dockerfile b/Dockerfile index c751343..8b341dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -100,12 +100,9 @@ RUN locale-gen ${LOCALE} COPY . /animated-carnival WORKDIR /animated-carnival -RUN cd fetch-open-graph && \ - go build -o /usr/local/bin/ RUN cargo build --release FROM ubuntu:20.04 COPY --from=builder /animated-carnival/target/release/agent /usr/local/bin -COPY --from=builder /usr/local/bin/fetch-open-graph /usr/local/bin/fetch-open-graph RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends \ ca-certificates \ diff --git a/crates/agent-sql/src/connector_tags.rs b/crates/agent-sql/src/connector_tags.rs index 3914d5e..cb3751f 100644 --- a/crates/agent-sql/src/connector_tags.rs +++ b/crates/agent-sql/src/connector_tags.rs @@ -66,27 +66,6 @@ where Ok(()) } -pub async fn update_open_graph_raw( - connector_id: Id, - open_graph_raw: Box, - txn: &mut sqlx::Transaction<'_, sqlx::Postgres>, -) -> sqlx::Result<()> { - sqlx::query!( - r#"update connectors set - open_graph_raw = $2, - updated_at = clock_timestamp() - where id = $1 - returning 1 as "must_exist"; - "#, - connector_id as Id, - Json(open_graph_raw) as Json>, - ) - .fetch_one(txn) - .await?; - - Ok(()) -} - pub async fn update_oauth2_spec( connector_id: Id, oauth2_spec: Box, diff --git a/crates/agent/src/connector_tags.rs b/crates/agent/src/connector_tags.rs index c5e2ade..bd1572a 100644 --- a/crates/agent/src/connector_tags.rs +++ b/crates/agent/src/connector_tags.rs @@ -112,29 +112,6 @@ impl TagHandler { return Ok((row.tag_id, JobStatus::SpecFailed)); } - let fetch_open_graph = - tokio::process::Command::new(format!("{}/fetch-open-graph", &self.bindir)) - .kill_on_drop(true) - .arg("-url") - .arg(&row.external_url) - .output() - .await - .context("fetching open graph metadata")?; - - if !fetch_open_graph.status.success() { - return Ok(( - row.tag_id, - JobStatus::OpenGraphFailed { - error: String::from_utf8_lossy(&fetch_open_graph.stderr).into(), - }, - )); - } - let open_graph_raw: Box = serde_json::from_slice(&fetch_open_graph.stdout) - .context("parsing open graph response")?; - - agent_sql::connector_tags::update_open_graph_raw(row.connector_id, open_graph_raw, txn) - .await?; - /// Spec is the output shape of the `flowctl api spec` command. #[derive(Deserialize)] #[serde(rename_all = "camelCase")] diff --git a/fetch-open-graph/go.mod b/fetch-open-graph/go.mod deleted file mode 100644 index 6237c71..0000000 --- a/fetch-open-graph/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/estuary/animated-carnival/fetch-open-graph - -go 1.18 - -require golang.org/x/net v0.0.0-20201224014010-6772e930b67b diff --git a/fetch-open-graph/go.sum b/fetch-open-graph/go.sum deleted file mode 100644 index b8f64b7..0000000 --- a/fetch-open-graph/go.sum +++ /dev/null @@ -1,6 +0,0 @@ -golang.org/x/net v0.0.0-20201224014010-6772e930b67b h1:iFwSg7t5GZmB/Q5TjiEAsdoLDrdJRC1RiF2WhuV29Qw= -golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/fetch-open-graph/main.go b/fetch-open-graph/main.go deleted file mode 100644 index 9986b03..0000000 --- a/fetch-open-graph/main.go +++ /dev/null @@ -1,133 +0,0 @@ -package main - -import ( - "encoding/json" - "flag" - "io" - "log" - "net/http" - "os" - "strings" - - "golang.org/x/net/html" -) - -func main() { - var ( - url = flag.String("url", "", "URL to fetch") - ) - flag.Parse() - - if *url == "" { - flag.Usage() - os.Exit(0) - } - - // TODO(johnny): Request multiple languages. - const language = "en-US" - - req, err := http.NewRequest("GET", *url, nil) - if err != nil { - log.Fatal(err) - } - req.Header.Add("Accept-Language", language) - - resp, err := http.DefaultClient.Do(req) - if err != nil { - log.Fatal(err) - } - meta := extract(resp.Body) - - var enc = json.NewEncoder(os.Stdout) - enc.SetIndent(" ", " ") - - if err := enc.Encode(map[string]interface{}{ - language: meta, - }); err != nil { - log.Fatal(err) - } - - os.Exit(0) -} - -// Credit to https://gist.github.com/inotnako/c4a82f6723f6ccea5d83c5d3689373dd -type HTMLMeta struct { - Description string `json:"description,omitempty"` - Image string `json:"image,omitempty"` - ImageHeight string `json:"image_height,omitempty"` - ImageWidth string `json:"image_width,omitempty"` - SiteName string `json:"site_name,omitempty"` - Title string `json:"title,omitempty"` -} - -func extract(resp io.Reader) *HTMLMeta { - z := html.NewTokenizer(resp) - - titleFound := false - - hm := new(HTMLMeta) - - for { - tt := z.Next() - switch tt { - case html.ErrorToken: - return hm - case html.StartTagToken, html.SelfClosingTagToken: - t := z.Token() - if t.Data == `body` { - return hm - } - if t.Data == "title" { - titleFound = true - } - if t.Data == "meta" { - desc, ok := extractMetaProperty(t, "description") - if ok { - hm.Description = desc - } - ogDesc, ok := extractMetaProperty(t, "og:description") - if ok { - hm.Description = ogDesc - } - ogImage, ok := extractMetaProperty(t, "og:image") - if ok { - hm.Image = ogImage - } - ogImageWidth, ok := extractMetaProperty(t, "og:image:width") - if ok { - hm.ImageWidth = ogImageWidth - } - ogImageHeight, ok := extractMetaProperty(t, "og:image:height") - if ok { - hm.ImageHeight = ogImageHeight - } - ogSiteName, ok := extractMetaProperty(t, "og:site_name") - if ok { - hm.SiteName = ogSiteName - } - ogTitle, ok := extractMetaProperty(t, "og:title") - if ok { - hm.Title = ogTitle - } - } - case html.TextToken: - if titleFound { - t := z.Token() - hm.Title = strings.TrimSpace(t.Data) - titleFound = false - } - } - } -} - -func extractMetaProperty(t html.Token, prop string) (content string, ok bool) { - for _, attr := range t.Attr { - if attr.Key == "property" && attr.Val == prop { - ok = true - } - if attr.Key == "content" { - content = attr.Val - } - } - return -} diff --git a/local/start-component.sh b/local/start-component.sh index 3e4c405..5219aeb 100755 --- a/local/start-component.sh +++ b/local/start-component.sh @@ -33,11 +33,11 @@ function must_run() { function wait_until_listening() { local port="$1" local desc="$2" - log Waiting for $desc to be listening on port $port - while ! nc -z localhost $port; do + log Waiting for "$desc" to be listening on port "$port" + while ! nc -z localhost "$port"; do sleep 1 done - log $desc is now listening on port $port + log "$desc" is now listening on port "$port" } SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}"; )" &> /dev/null && pwd 2> /dev/null; )"; @@ -90,8 +90,6 @@ function start_control_plane() { function start_control_plane_agent() { local flow_bin_dir="$(project_dir 'flow')/.build/package/bin" - cd "$(project_dir 'animated-carnival')/fetch-open-graph" - go build -o "$flow_bin_dir" cd "$(project_dir 'animated-carnival')" # Start building immediately, since it could take a while diff --git a/readme.md b/readme.md index 2d87747..639b06b 100644 --- a/readme.md +++ b/readme.md @@ -29,7 +29,7 @@ The agent is a non-user-facing component which lives under [crates/agent/](crate Today this includes: -* Fetching connector details, such as open-graph metadata and endpoint / resource JSON-schemas. +* Fetching connector details, such as endpoint / resource JSON-schemas. * Running connector discovery operations to produce proposed catalog specifications. * Publishing catalog drafts by testing and then activating them into the data-plane. @@ -129,15 +129,6 @@ data-plane-gateway _Note: The gateway allows for configuring the port, the Flow service ports, the signing secret, and the CORS settings. The defaults should work out of the box._ -### Build `fetch-open-graph`: - -Build the fetch-open-graph helper to the same location where the flow binaries live. This is the same path that will be provided to the agent using `--bin--dir` argument: - -```console -cd fetch-open-graph/ -go build -o ~/estuary/flow/.build/package/bin/ -``` - ### Start the `agent`: Again from within your checkout of this repo: diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index a4fba6d..6e53969 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -4,7 +4,6 @@ cd /home/agent wget https://github.com/estuary/flow/releases/download/dev/flow-x86-linux.tar.gz mkdir -p /home/agent/.bin tar -xvf flow-x86-linux.tar.gz -C /home/agent/.bin -cp /usr/local/bin/fetch-open-graph /home/agent/.bin/fetch-open-graph if [[ $1 ]]; then eval "$@" diff --git a/scripts/healthcheck.sh b/scripts/healthcheck.sh index dc74820..36509b3 100755 --- a/scripts/healthcheck.sh +++ b/scripts/healthcheck.sh @@ -5,8 +5,7 @@ paths=( /usr/local/bin/flowctl-go /usr/bin/gsutil /usr/local/bin/sops - /usr/bin/jq - /usr/local/bin/fetch-open-graph) + /usr/bin/jq) for i in "${paths[@]}" do From 996c27a8b15eb81f03699b2658acf95f0f70ec27 Mon Sep 17 00:00:00 2001 From: Joseph Shearer Date: Thu, 15 Sep 2022 10:43:52 -0400 Subject: [PATCH 3/4] fix: Remove unneccesary file --- diff.sql | 92 -------------------------------------------------------- 1 file changed, 92 deletions(-) delete mode 100644 diff.sql diff --git a/diff.sql b/diff.sql deleted file mode 100644 index 52c9754..0000000 --- a/diff.sql +++ /dev/null @@ -1,92 +0,0 @@ -NOTE: Configuring authentication for DESKTOP mode. --- This script was generated by the Schema Diff utility in pgAdmin 4 --- For the circular dependencies, the order in which Schema Diff writes the objects is not very sophisticated --- and may require manual changes to the script to ensure changes are applied in the correct order. --- Please report an issue for any failure with the reproduction steps. - - -alter table public.connectors alter column title drop expression drop default; -alter table public.connectors alter column short_description drop expression drop default; -alter table public.connectors alter column logo_url drop expression drop default; - - -COMMENT ON COLUMN public.connectors.title - IS 'The title of this connector. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the title string as values'; - -COMMENT ON COLUMN public.connectors.short_description - IS 'A short description of this connector, at most a few sentences. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the description string as values'; - -COMMENT ON COLUMN public.connectors.logo_url - IS 'The url for this connector''s logo image. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and urls as values'; - - --- Changing the columns in a view requires dropping and re-creating the view. --- This may fail if other objects are dependent upon this view, --- or may cause procedural functions to fail if they are not modified to --- take account of the changes. -drop view public.draft_specs_ext; -DROP VIEW public.live_specs_ext; -CREATE OR REPLACE VIEW public.live_specs_ext - AS - SELECT l.created_at, - l.detail, - l.id, - l.updated_at, - l.catalog_name, - l.connector_image_name, - l.connector_image_tag, - l.last_build_id, - l.last_pub_id, - l.reads_from, - l.spec, - l.spec_type, - l.writes_to, - c.external_url AS connector_external_url, - c.id AS connector_id, - c.title AS connector_title, - c.short_description AS connector_short_description, - c.logo_url AS connector_logo_url, - c.recommended AS connector_recommended, - t.id AS connector_tag_id, - t.documentation_url AS connector_tag_documentation_url, - p.detail AS last_pub_detail, - p.user_id AS last_pub_user_id, - u.avatar_url AS last_pub_user_avatar_url, - u.email AS last_pub_user_email, - u.full_name AS last_pub_user_full_name - FROM live_specs l - LEFT JOIN publication_specs p ON l.id::macaddr8 = p.live_spec_id::macaddr8 AND l.last_pub_id::macaddr8 = p.pub_id::macaddr8 - LEFT JOIN connectors c ON c.image_name = l.connector_image_name - LEFT JOIN connector_tags t ON c.id::macaddr8 = t.connector_id::macaddr8 AND l.connector_image_tag = t.image_tag, - LATERAL view_user_profile(p.user_id) u(user_id, email, full_name, avatar_url); -COMMENT ON VIEW public.live_specs_ext - IS 'View of `live_specs` extended with metadata of its last publication'; -alter view live_specs_ext owner to authenticated; - --- Extended view of user draft specifications. -create view draft_specs_ext as -select - d.*, - l.last_pub_detail, - l.last_pub_id, - l.last_pub_user_id, - l.last_pub_user_avatar_url, - l.last_pub_user_email, - l.last_pub_user_full_name, - l.spec as live_spec, - l.spec_type as live_spec_type -from draft_specs d -left outer join live_specs_ext l - on d.catalog_name = l.catalog_name; -alter view draft_specs_ext owner to authenticated; - -comment on view draft_specs_ext is - 'View of `draft_specs` extended with metadata of its live specification'; - -ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph; - -ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph_raw; - -ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph_patch; - -DROP FUNCTION IF EXISTS public.generate_opengraph_value(opengraph_raw jsonb, opengraph_patch jsonb, field text); From c8b75546c4e2400fbeb5ce5da1f8e390227522c6 Mon Sep 17 00:00:00 2001 From: Joseph Shearer Date: Thu, 15 Sep 2022 10:45:26 -0400 Subject: [PATCH 4/4] fix: Add back migration --- supabase/pending/remove_opengraph.sql | 92 +++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 supabase/pending/remove_opengraph.sql diff --git a/supabase/pending/remove_opengraph.sql b/supabase/pending/remove_opengraph.sql new file mode 100644 index 0000000..52c9754 --- /dev/null +++ b/supabase/pending/remove_opengraph.sql @@ -0,0 +1,92 @@ +NOTE: Configuring authentication for DESKTOP mode. +-- This script was generated by the Schema Diff utility in pgAdmin 4 +-- For the circular dependencies, the order in which Schema Diff writes the objects is not very sophisticated +-- and may require manual changes to the script to ensure changes are applied in the correct order. +-- Please report an issue for any failure with the reproduction steps. + + +alter table public.connectors alter column title drop expression drop default; +alter table public.connectors alter column short_description drop expression drop default; +alter table public.connectors alter column logo_url drop expression drop default; + + +COMMENT ON COLUMN public.connectors.title + IS 'The title of this connector. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the title string as values'; + +COMMENT ON COLUMN public.connectors.short_description + IS 'A short description of this connector, at most a few sentences. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and the description string as values'; + +COMMENT ON COLUMN public.connectors.logo_url + IS 'The url for this connector''s logo image. Represented as a json object with IETF language tags as keys (https://en.wikipedia.org/wiki/IETF_language_tag), and urls as values'; + + +-- Changing the columns in a view requires dropping and re-creating the view. +-- This may fail if other objects are dependent upon this view, +-- or may cause procedural functions to fail if they are not modified to +-- take account of the changes. +drop view public.draft_specs_ext; +DROP VIEW public.live_specs_ext; +CREATE OR REPLACE VIEW public.live_specs_ext + AS + SELECT l.created_at, + l.detail, + l.id, + l.updated_at, + l.catalog_name, + l.connector_image_name, + l.connector_image_tag, + l.last_build_id, + l.last_pub_id, + l.reads_from, + l.spec, + l.spec_type, + l.writes_to, + c.external_url AS connector_external_url, + c.id AS connector_id, + c.title AS connector_title, + c.short_description AS connector_short_description, + c.logo_url AS connector_logo_url, + c.recommended AS connector_recommended, + t.id AS connector_tag_id, + t.documentation_url AS connector_tag_documentation_url, + p.detail AS last_pub_detail, + p.user_id AS last_pub_user_id, + u.avatar_url AS last_pub_user_avatar_url, + u.email AS last_pub_user_email, + u.full_name AS last_pub_user_full_name + FROM live_specs l + LEFT JOIN publication_specs p ON l.id::macaddr8 = p.live_spec_id::macaddr8 AND l.last_pub_id::macaddr8 = p.pub_id::macaddr8 + LEFT JOIN connectors c ON c.image_name = l.connector_image_name + LEFT JOIN connector_tags t ON c.id::macaddr8 = t.connector_id::macaddr8 AND l.connector_image_tag = t.image_tag, + LATERAL view_user_profile(p.user_id) u(user_id, email, full_name, avatar_url); +COMMENT ON VIEW public.live_specs_ext + IS 'View of `live_specs` extended with metadata of its last publication'; +alter view live_specs_ext owner to authenticated; + +-- Extended view of user draft specifications. +create view draft_specs_ext as +select + d.*, + l.last_pub_detail, + l.last_pub_id, + l.last_pub_user_id, + l.last_pub_user_avatar_url, + l.last_pub_user_email, + l.last_pub_user_full_name, + l.spec as live_spec, + l.spec_type as live_spec_type +from draft_specs d +left outer join live_specs_ext l + on d.catalog_name = l.catalog_name; +alter view draft_specs_ext owner to authenticated; + +comment on view draft_specs_ext is + 'View of `draft_specs` extended with metadata of its live specification'; + +ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph; + +ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph_raw; + +ALTER TABLE IF EXISTS public.connectors DROP COLUMN IF EXISTS open_graph_patch; + +DROP FUNCTION IF EXISTS public.generate_opengraph_value(opengraph_raw jsonb, opengraph_patch jsonb, field text);