diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b074626be4..bfd8a5f024 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -174,6 +174,7 @@ jobs: "materialize-dynamodb", "materialize-elasticsearch", "source-dynamodb", + "source-kafka", "source-kinesis", "source-mysql", "source-postgres", @@ -232,11 +233,6 @@ jobs: cd ..; pytest ${{matrix.connector}}/tests; - - name: Start Dockerized test infrastructure - if: matrix.connector == 'source-kafka' - run: | - docker compose --file infra/docker-compose.yaml up --wait - - name: Source connector ${{ matrix.connector }} integration tests if: | contains(fromJson('[ diff --git a/infra/docker-compose.yaml b/infra/docker-compose.yaml deleted file mode 100644 index 40dba549b7..0000000000 --- a/infra/docker-compose.yaml +++ /dev/null @@ -1,65 +0,0 @@ -version: "3" -services: - # Secured Kafka with SCRAM enabled - kafka: - image: 'bitnami/kafka:3.3' - container_name: infra-kafka-1 - networks: - - flow-test - ports: - - '9092:9092' - volumes: - - kafka_data:/bitnami/kafka - # TODO: I don't believe we *should* need to create this file ourselves. - # The Bitnami image attempts to build this configuration file from the - # environment variables provided, but I believe there is a bug (many - # bugs?) in their 800 line bash script. Check back in on this in the - # future so we don't need to maintain this file ourselves. - - ./kafka_jaas.conf:/bitnami/kafka/config/kafka_jaas.conf - depends_on: - - zookeeper - environment: - - KAFKA_ENABLE_KRAFT=no - - - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_ZOOKEEPER_PROTOCOL=SASL - - KAFKA_ZOOKEEPER_USER=zoo - - KAFKA_ZOOKEEPER_PASSWORD=keeper - - # We'll use SASL/SCRAM for our external clients, and SASL/PLAIN for the - # internal broker communication. - - KAFKA_CFG_SASL_ENABLED_MECHANISMS=PLAIN,SCRAM-SHA-256 - - KAFKA_CFG_SASL_MECHANISM_INTER_BROKER_PROTOCOL=PLAIN - - - KAFKA_INTER_BROKER_LISTENER_NAME=INTERNAL - - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=INTERNAL:SASL_PLAINTEXT,EXTERNAL:SASL_PLAINTEXT - - KAFKA_CFG_LISTENERS=INTERNAL://:29092,EXTERNAL://infra-kafka-1.flow-test:9092 - - KAFKA_CFG_ADVERTISED_LISTENERS=INTERNAL://kafka:29092,EXTERNAL://infra-kafka-1.flow-test:9092 - - # These values are used by the Docker entrypoint script to create - # users/passwords stored in Zookeeper. If we change the kafka_jaas.conf, - # we may need to adjust these values. Be careful, since things may - # continue to work locally between reboots of Kafka because settings are - # stored in Zookeeper. - - KAFKA_CLIENT_USERS=alice,bob - - KAFKA_CLIENT_PASSWORDS=alice-pass,bob-pass - - zookeeper: - image: 'bitnami/zookeeper:3.7' - networks: - - flow-test - volumes: - - zookeeper_data:/bitnami/zookeeper - environment: - - ZOO_ENABLE_AUTH=yes - - ZOO_SERVER_USERS=zoo - - ZOO_SERVER_PASSWORDS=keeper - -networks: - flow-test: - name: flow-test - external: true - -volumes: - kafka_data: - zookeeper_data: diff --git a/infra/kafka_jaas.conf b/infra/kafka_jaas.conf deleted file mode 100644 index 288d0a3c71..0000000000 --- a/infra/kafka_jaas.conf +++ /dev/null @@ -1,21 +0,0 @@ -KafkaClient { - org.apache.kafka.common.security.scram.ScramLoginModule required - username="alice" - password="alice-pass" - user_alice="alice-pass"; -}; - -KafkaServer { - org.apache.kafka.common.security.scram.ScramLoginModule required; - org.apache.kafka.common.security.plain.PlainLoginModule required - username="bob" - password="bob-pass" - user_bob="bob-pass"; -}; - -Client { - org.apache.kafka.common.security.plain.PlainLoginModule required - username="zoo" - password="keeper" - user_zoo="keeper"; -}; diff --git a/source-kafka/Cargo.lock b/source-kafka/Cargo.lock index 71175c6bcf..17050cea4a 100644 --- a/source-kafka/Cargo.lock +++ b/source-kafka/Cargo.lock @@ -2,30 +2,75 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a93b8a41dbe230ad5087cc721f8d41611de654542180586b315d9f4cf6b72bef" +dependencies = [ + "psl", + "psl-types", +] + [[package]] name = "addr2line" -version = "0.21.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -42,42 +87,63 @@ dependencies = [ ] [[package]] -name = "ansi_term" -version = "0.12.1" +name = "anyhow" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" [[package]] -name = "anyhow" -version = "1.0.75" +name = "apache-avro" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" +dependencies = [ + "bigdecimal 0.4.6", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "strum", + "strum_macros", + "thiserror", + "typed-builder", + "uuid", +] [[package]] -name = "atty" -version = "0.2.14" +name = "async-trait" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ - "hermit-abi", - "libc", - "winapi", + "proc-macro2", + "quote", + "syn 2.0.79", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" -version = "1.1.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-credential-types" -version = "1.1.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a1629320d319dc715c6189b172349186557e209d2a7b893ff3d14efd33a47c" +checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -85,51 +151,38 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-http" -version = "0.60.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e4199d5d62ab09be6a64650c06cc5c4aa45806fed4c74bc4a5c8eaf039a6fa" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "http", - "http-body", - "pin-project-lite", - "tracing", -] - [[package]] name = "aws-runtime" -version = "1.1.1" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87116d357c905b53f1828d15366363fd27b330a0393cbef349e653f686d36bad" +checksum = "a10d5c055aa540164d9561a0e2e74ad30f0dcf7393c3a92f6733ddf9c5762468" dependencies = [ "aws-credential-types", - "aws-http", "aws-sigv4", "aws-smithy-async", "aws-smithy-http", + "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", + "bytes", "fastrand", - "http", + "http 0.2.12", + "http-body 0.4.6", + "once_cell", "percent-encoding", + "pin-project-lite", "tracing", "uuid", ] [[package]] name = "aws-sdk-iam" -version = "1.7.0" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439c3078bf22ea17ef5c5e49116baa7abcd7c53cb2b9a6dd8b0c4f9d856adba6" +checksum = "be31397b48f632a5d3e78f96c9f23ac7559a507de035c4472ec728ade2efa8df" dependencies = [ "aws-credential-types", - "aws-http", "aws-runtime", "aws-smithy-async", "aws-smithy-http", @@ -140,7 +193,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "http", + "http 0.2.12", "once_cell", "regex-lite", "tracing", @@ -148,9 +201,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.1.1" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d222297ca90209dc62245f0a490355795f29de362eb5c19caea4f7f55fe69078" +checksum = "cc8db6904450bafe7473c6ca9123f88cc11089e41a025408f992db4e22d3be68" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -160,7 +213,8 @@ dependencies = [ "form_urlencoded", "hex", "hmac", - "http", + "http 0.2.12", + "http 1.1.0", "once_cell", "percent-encoding", "sha2", @@ -170,9 +224,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.1.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9f65000917e3aa94c259d67fe01fa9e4cd456187d026067d642436e6311a81" +checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c" dependencies = [ "futures-util", "pin-project-lite", @@ -181,17 +235,17 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.60.1" +version = "0.60.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e816425a6b9caea4929ac97d0cb33674849bd5f0086418abc0d02c63f7a1bf" +checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "once_cell", "percent-encoding", "pin-project-lite", @@ -201,18 +255,18 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.60.1" +version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ab3f6d49e08df2f8d05e1bb5b68998e1e67b76054d3c43e7b954becb9a5e9ac" +checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-query" -version = "0.60.1" +version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f94a7a3aa509ff9e8b8d80749851d04e5eee0954c43f2e7d6396c4740028737" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" dependencies = [ "aws-smithy-types", "urlencoding", @@ -220,9 +274,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.1.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da5b0a3617390e769576321816112f711c13d7e1114685e022505cf51fe5e48" +checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -230,46 +284,53 @@ dependencies = [ "aws-smithy-types", "bytes", "fastrand", - "h2", - "http", - "http-body", - "hyper", - "hyper-rustls", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "http-body 1.0.1", + "httparse", + "hyper 0.14.31", + "hyper-rustls 0.24.2", "once_cell", "pin-project-lite", "pin-utils", - "rustls", + "rustls 0.21.12", "tokio", "tracing", ] [[package]] name = "aws-smithy-runtime-api" -version = "1.1.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2404c9eb08bfe9af255945254d9afc69a367b7ee008b8db75c05e3bca485fc65" +checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96" dependencies = [ "aws-smithy-async", "aws-smithy-types", "bytes", - "http", + "http 0.2.12", + "http 1.1.0", "pin-project-lite", "tokio", "tracing", + "zeroize", ] [[package]] name = "aws-smithy-types" -version = "1.1.1" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aba8136605d14ac88f57dc3a693a9f8a4eab4a3f52bc03ff13746f0cd704e97" +checksum = "147100a7bea70fa20ef224a6bad700358305f5dc0f84649c53769761395b355b" dependencies = [ "base64-simd", "bytes", "bytes-utils", "futures-core", - "http", - "http-body", + "http 0.2.12", + "http 1.1.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", "itoa", "num-integer", "pin-project-lite", @@ -283,48 +344,59 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.1" +version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8f03926587fc881b12b102048bb04305bf7fb8c83e776f0ccc51eaa2378263" +checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.1.1" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e5d5ee29077e0fcd5ddd0c227b521a33aaf02434b7cdba1c55eec5c1f18ac47" +checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" dependencies = [ "aws-credential-types", "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "http", "rustc_version", "tracing", ] [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets", ] [[package]] name = "base64" -version = "0.21.5" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "base64-simd" @@ -337,16 +409,74 @@ dependencies = [ ] [[package]] -name = "bitflags" -version = "1.3.2" +name = "bigdecimal" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6773ddc0eafc0e509fb60e48dff7f450f8e674a0686ae8605e8d9901bd5eefa" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bigdecimal" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f850665a0385e070b64c38d2354e6c104c8479c59868d1e48a0c13ee2c7a1c1" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "bit-set" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bitvec" +version = "0.19.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "55f93d0ef3363c364d5976646a38f04cf67cfe1d4c8d160cdea02cab2c116b33" +dependencies = [ + "funty 1.1.0", + "radium 0.5.3", + "tap", + "wyz 0.2.0", +] + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty 2.0.0", + "radium 0.7.0", + "tap", + "wyz 0.5.1", +] [[package]] name = "block-buffer" @@ -359,15 +489,43 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "bytes-utils" @@ -381,11 +539,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" dependencies = [ + "jobserver", "libc", + "shlex", ] [[package]] @@ -396,53 +556,35 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", - "js-sys", "num-traits", - "serde", - "wasm-bindgen", - "windows-targets 0.48.5", -] - -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim 0.8.0", - "textwrap", - "unicode-width", - "vec_map", + "windows-targets", ] [[package]] name = "cmake" -version = "0.1.50" +version = "0.1.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" dependencies = [ "cc", ] [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] @@ -457,71 +599,81 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] -name = "cpufeatures" -version = "0.2.11" +name = "core2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" dependencies = [ - "libc", + "memchr", ] [[package]] -name = "crypto-common" -version = "0.1.6" +name = "cpufeatures" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ - "generic-array", - "typenum", + "libc", ] [[package]] -name = "darling" -version = "0.13.4" +name = "crc32fast" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ - "darling_core", - "darling_macro", + "cfg-if", ] [[package]] -name = "darling_core" -version = "0.13.4" +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crypto-common" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", + "generic-array", + "typenum", ] [[package]] -name = "darling_macro" -version = "0.13.4" +name = "dary_heap" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", ] [[package]] name = "deranged" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ "powerfmt", + "serde", ] [[package]] @@ -535,11 +687,46 @@ dependencies = [ "subtle", ] +[[package]] +name = "doc" +version = "0.0.0" +source = "git+https://github.com/estuary/flow#60e47f697826a8128b533cc5f4600acb5d80cdbc" +dependencies = [ + "base64 0.13.1", + "bigdecimal 0.3.1", + "bumpalo", + "bytes", + "fancy-regex 0.10.0", + "futures", + "fxhash", + "itertools 0.10.5", + "json", + "lz4", + "proto-gazette", + "rkyv", + "schemars", + "serde", + "serde_json", + "tempfile", + "thiserror", + "time", + "tracing", + "tuple", + "url", + "uuid", +] + +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + [[package]] name = "duct" -version = "0.13.6" +version = "0.13.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ae3fc31835f74c2a7ceda3aeede378b0ae2e74c8f1c36559fcc9ae2a4e7d3e" +checksum = "e4ab5718d1224b63252cd0c6f74f6480f9ffeb117438a2e0f5cf6d9a4798929c" dependencies = [ "libc", "once_cell", @@ -549,15 +736,15 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" [[package]] name = "either" -version = "1.9.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encode_unicode" @@ -565,37 +752,65 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + [[package]] name = "equivalent" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "erased-serde" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c" +dependencies = [ + "serde", +] + [[package]] name = "errno" -version = "0.3.5" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] -name = "eyre" -version = "0.6.8" +name = "fancy-regex" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c2b6b5a29c02cdc822728b7d7b8ae1bab3e3b05d44522770ddd49722eeac7eb" +checksum = "0678ab2d46fa5195aaf59ad034c083d351377d4af57f3e073c074d0da3e3c766" dependencies = [ - "indenter", - "once_cell", + "bit-set", + "regex", +] + +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", ] [[package]] name = "fastrand" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fixedbitset" @@ -609,6 +824,21 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -618,43 +848,114 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", + "futures-sink", ] [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", ] [[package]] @@ -669,9 +970,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", @@ -680,9 +981,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "h2" @@ -695,7 +996,26 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", "indexmap", "slab", "tokio", @@ -705,24 +1025,28 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] -name = "heck" -version = "0.3.3" +name = "hashbrown" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "unicode-segmentation", + "ahash 0.8.11", + "allocator-api2", ] [[package]] -name = "heck" -version = "0.4.1" +name = "hashbrown" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" [[package]] name = "heck" @@ -732,12 +1056,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.1.19" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -747,9 +1068,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "highway" -version = "0.6.4" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3461b968f695ca312b968503261f5a345de0f02a39dbaa3021f20d53b426395d" +checksum = "c706f1711006204c2ba8fb1a7bd55f689bbf7feca9ff40325206b5e140cff6df" [[package]] name = "hmac" @@ -762,9 +1083,20 @@ dependencies = [ [[package]] name = "http" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -778,15 +1110,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", "pin-project-lite", ] [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "httpdate" @@ -796,28 +1151,48 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" dependencies = [ "bytes", "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2", "tokio", "tower-service", "tracing", "want", ] +[[package]] +name = "hyper" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -825,20 +1200,72 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.31", "log", - "rustls", + "rustls 0.21.12", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.5.0", + "hyper-util", + "rustls 0.23.15", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.5.0", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.5.0", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", ] [[package]] name = "iana-time-zone" -version = "0.1.58" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -858,48 +1285,59 @@ dependencies = [ ] [[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "indenter" -version = "0.3.3" +name = "idna" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] [[package]] name = "indexmap" -version = "2.1.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.0", ] [[package]] name = "insta" -version = "1.34.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d64600be34b2fcfc267740a243fa7744441bb4947a619ac4e5bb6507f35fbfc" +checksum = "6593a41c7a73841868772495db7dc1e8ecab43bb5c0b6da2059246c4b506ab60" dependencies = [ "console", "lazy_static", "linked-hash-map", - "pest", - "pest_derive", "serde", "similar", - "yaml-rust", +] + +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + +[[package]] +name = "iri-string" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0586ad318a04c73acdbad33f67969519b5452c80770c4c72059a686da48a7e" +dependencies = [ + "memchr", + "serde", ] [[package]] name = "itertools" -version = "0.11.0" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] @@ -915,36 +1353,117 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] [[package]] name = "js-sys" -version = "0.3.65" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54c0c35952f67de54bb584e9fd912b3023117cbafc0a77d8f3dee1fb5f572fe8" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json" +version = "0.0.0" +source = "git+https://github.com/estuary/flow#60e47f697826a8128b533cc5f4600acb5d80cdbc" +dependencies = [ + "addr", + "bigdecimal 0.3.1", + "bitvec 0.19.6", + "fancy-regex 0.10.0", + "fxhash", + "iri-string", + "itertools 0.10.5", + "lazy_static", + "percent-encoding", + "serde", + "serde_json", + "thiserror", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "json-pointer" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe841b94e719a482213cee19dd04927cf412f26d8dc84c5a446c081e49c2997" +dependencies = [ + "serde_json", +] + +[[package]] +name = "jsonway" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effcb749443c905fbaef49d214f8b1049c240e0adb7af9baa0e201e625e4f9de" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.150" +version = "0.2.161" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" + +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + +[[package]] +name = "libm" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libz-sys" -version = "1.1.12" +version = "1.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" dependencies = [ "cc", "libc", @@ -960,15 +1479,44 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] [[package]] name = "log" -version = "0.4.20" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lz4" +version = "1.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d1febb2b4a79ddd1980eede06a8f7902197960aa0383ffcfdd62fe723036725" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] [[package]] name = "matchers" @@ -981,61 +1529,101 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mime" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] name = "mio" -version = "0.8.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ + "hermit-abi", "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "multimap" -version = "0.8.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] [[package]] name = "nu-ansi-term" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "overload", - "winapi", + "num-integer", + "num-traits", + "serde", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] @@ -1063,18 +1651,44 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "openssl" +version = "0.10.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] [[package]] name = "openssl-probe" @@ -1084,9 +1698,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.97" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3eaad34cdd97d81de97964fc7f29e2d104f483840d906ef56daa1912338460b" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -1096,12 +1710,12 @@ dependencies = [ [[package]] name = "os_pipe" -version = "1.1.4" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ae859aa07428ca9a929b936690f8b12dc5f11dd8c6992a18ca93919f28bc177" +checksum = "5ffd2b0a5634335b135d5728d84c5e0fd726954b87111f7506a61c502280d982" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -1116,13 +1730,36 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + [[package]] name = "pbjson" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68" dependencies = [ - "base64", + "base64 0.21.7", "serde", ] @@ -1132,7 +1769,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9" dependencies = [ - "heck 0.5.0", + "heck", "itertools 0.13.0", "prost", "prost-types", @@ -1160,65 +1797,58 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "pest" -version = "2.7.5" +name = "petgraph" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae9cee2a55a544be8b89dc6848072af97a20f2422603c10865be2a42b580fff5" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "memchr", - "thiserror", - "ucd-trie", + "fixedbitset", + "indexmap", ] [[package]] -name = "pest_derive" -version = "2.7.5" +name = "phf" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81d78524685f5ef2a3b3bd1cafbc9fcabb036253d9b1463e726a91cd16e2dfc2" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "pest", - "pest_generator", + "phf_shared", ] [[package]] -name = "pest_generator" -version = "2.7.5" +name = "phf_codegen" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bd1206e71118b5356dae5ddc61c8b11e28b09ef6a31acbd15ea48a28e0c227" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.39", + "phf_generator", + "phf_shared", ] [[package]] -name = "pest_meta" -version = "2.7.5" +name = "phf_generator" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c747191d4ad9e4a4ab9c8798f1e82a39affe7ef9648390b7e5548d18e099de6" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ - "once_cell", - "pest", - "sha2", + "phf_shared", + "rand", ] [[package]] -name = "petgraph" -version = "0.6.4" +name = "phf_shared" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ - "fixedbitset", - "indexmap", + "siphasher", ] [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -1228,9 +1858,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "powerfmt" @@ -1238,14 +1868,23 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" -version = "0.2.15" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" dependencies = [ "proc-macro2", - "syn 2.0.39", + "syn 2.0.79", ] [[package]] @@ -1258,44 +1897,20 @@ dependencies = [ "toml_edit", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" dependencies = [ "bytes", "prost-derive", @@ -1303,13 +1918,13 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", - "heck 0.4.1", - "itertools 0.11.0", + "heck", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -1318,28 +1933,28 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.39", + "syn 2.0.79", "tempfile", ] [[package]] name = "prost-derive" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.79", ] [[package]] name = "prost-types" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" dependencies = [ "prost", ] @@ -1347,7 +1962,7 @@ dependencies = [ [[package]] name = "proto-flow" version = "0.0.0" -source = "git+https://github.com/estuary/flow#60536f4c560dde8a6832dcafe29a50dd75e8066c" +source = "git+https://github.com/estuary/flow#60e47f697826a8128b533cc5f4600acb5d80cdbc" dependencies = [ "bytes", "pbjson", @@ -1362,7 +1977,7 @@ dependencies = [ [[package]] name = "proto-gazette" version = "0.0.0" -source = "git+https://github.com/estuary/flow#60536f4c560dde8a6832dcafe29a50dd75e8066c" +source = "git+https://github.com/estuary/flow#60e47f697826a8128b533cc5f4600acb5d80cdbc" dependencies = [ "bytes", "pbjson", @@ -1373,20 +1988,103 @@ dependencies = [ "uuid", ] +[[package]] +name = "psl" +version = "2.1.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce9398ad066421139b2e3afe16ea46772ffda30bd9ba57554dc035df5e26edc8" +dependencies = [ + "psl-types", +] + +[[package]] +name = "psl-types" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quad-rand" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" + [[package]] name = "quote" -version = "1.0.33" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "rdkafka" -version = "0.36.0" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54f02a5a40220f8a2dfa47ddb38ba9064475a5807a69504b6f91711df2eea63" +checksum = "1beea247b9a7600a81d4cc33f659ce1a77e1988323d7d2809c7ed1c21f4c316d" dependencies = [ "futures-channel", "futures-util", @@ -1397,6 +2095,7 @@ dependencies = [ "serde_derive", "serde_json", "slab", + "tokio", ] [[package]] @@ -1412,27 +2111,28 @@ dependencies = [ "openssl-sys", "pkg-config", "sasl2-sys", + "zstd-sys", ] [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags 1.3.2", + "bitflags", ] [[package]] name = "regex" -version = "1.10.2" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", - "regex-syntax 0.8.2", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", ] [[package]] @@ -1446,20 +2146,20 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax 0.8.5", ] [[package]] name = "regex-lite" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "regex-syntax" @@ -1469,64 +2169,165 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqwest" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +dependencies = [ + "base64 0.22.1", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.0", + "hyper-rustls 0.27.3", + "hyper-tls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile 2.2.0", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "windows-registry", +] [[package]] name = "ring" -version = "0.17.7" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", + "cfg-if", "getrandom", "libc", "spin", "untrusted", - "windows-sys 0.48.0", + "windows-sys 0.52.0", +] + +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec 1.0.1", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", ] +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.21" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ - "bitflags 2.4.1", + "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "rustls" -version = "0.21.11" +version = "0.21.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", "ring", - "rustls-webpki", + "rustls-webpki 0.101.7", "sct", ] +[[package]] +name = "rustls" +version = "0.23.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki 0.102.8", + "subtle", + "zeroize", +] + [[package]] name = "rustls-native-certs" version = "0.6.3" @@ -1534,7 +2335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "schannel", "security-framework", ] @@ -1545,9 +2346,24 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", ] +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -1558,17 +2374,34 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "sasl2-sys" -version = "0.1.20+2.1.28" +version = "0.1.22+2.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e645bd98535fc8fd251c43ba7c7c1f9be1e0369c99b6a5ea719052a773e655c" +checksum = "05f2a7f7efd9fc98b3a9033272df10709f5ee3fa0eabbd61a527a3a1ed6bd3c6" dependencies = [ "cc", "duct", @@ -1578,18 +2411,35 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.22" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "schema_registry_converter" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +checksum = "bcc3cf40651cf503827a34bcd7efbbd4750a7e3adc6768bb8089977e4d07303b" dependencies = [ - "windows-sys 0.48.0", + "apache-avro", + "byteorder", + "dashmap", + "futures", + "reqwest", + "serde", + "serde_json", + "url", + "valico", ] [[package]] name = "schemars" -version = "0.8.15" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f7b0ce13155372a76ee2e1c5ffba1fe61ede73fbea5630d61eee6fac4929c0c" +checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" dependencies = [ "dyn-clone", "schemars_derive", @@ -1599,16 +2449,22 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.8.15" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e85e2a16b12bdb763244c69ab79363d71db2b4b918a2def53f80b02e0574b13c" +checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 1.0.109", + "syn 2.0.79", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "sct" version = "0.7.1" @@ -1619,13 +2475,19 @@ dependencies = [ "untrusted", ] +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" -version = "2.9.2" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 1.3.2", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -1634,9 +2496,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.9.1" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" dependencies = [ "core-foundation-sys", "libc", @@ -1644,73 +2506,72 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.191" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a834c4821019838224821468552240d4d95d14e751986442c816572d39a080c9" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" -version = "1.0.191" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fa52d5646bce91b680189fe5b1c049d2ea38dabb4e2e7c8d00ca12cfbfbcfd" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.79", ] [[package]] name = "serde_derive_internals" -version = "0.26.0" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.79", ] [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "6dbcf9b78a125ee667ae19388837dd12294b858d101fdd393cb9d5501ef09eb2" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] [[package]] -name = "serde_with" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" -dependencies = [ - "hex", - "serde", - "serde_with_macros", -] - -[[package]] -name = "serde_with_macros" -version = "1.5.2" +name = "serde_urlencoded" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 1.0.109", + "form_urlencoded", + "itoa", + "ryu", + "serde", ] [[package]] @@ -1735,19 +2596,46 @@ dependencies = [ [[package]] name = "shared_child" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d94659ad3c2137fef23ae75b03d5241d633f8acded53d672decfa0e6e0caef" +checksum = "09fa9338aed9a1df411814a5b2252f7cd206c55ae9bf2fa763f8de84603aa60c" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ "libc", - "winapi", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "similar" -version = "2.3.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "slab" @@ -1760,53 +2648,51 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" - -[[package]] -name = "socket2" -version = "0.4.10" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" -version = "0.5.5" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "source-kafka" version = "0.1.0" dependencies = [ + "anyhow", + "apache-avro", + "async-trait", "aws-sdk-iam", "aws-sigv4", - "aws-smithy-runtime-api", - "base64", - "chrono", - "eyre", + "base64 0.22.1", + "bigdecimal 0.4.6", + "doc", + "futures", + "hex", "highway", - "http", + "http 0.2.12", "insta", + "json", + "lazy_static", "proto-flow", "rdkafka", + "reqwest", + "schema_registry_converter", "schemars", "serde", "serde_json", - "serde_with", - "structopt", - "thiserror", + "time", + "tokio", "tracing", "tracing-subscriber", + "uuid", ] [[package]] @@ -1816,46 +2702,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "structopt" -version = "0.3.26" +name = "strum" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" -dependencies = [ - "clap", - "lazy_static", - "structopt-derive", -] +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" [[package]] -name = "structopt-derive" -version = "0.4.18" +name = "strum_macros" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.3.3", - "proc-macro-error", + "heck", "proc-macro2", "quote", - "syn 1.0.109", + "rustversion", + "syn 2.0.79", ] [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -1870,9 +2739,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" dependencies = [ "proc-macro2", "quote", @@ -1880,52 +2749,79 @@ dependencies = [ ] [[package]] -name = "tempfile" -version = "3.8.1" +name = "sync_wrapper" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" dependencies = [ - "cfg-if", - "fastrand", - "redox_syscall", - "rustix", - "windows-sys 0.48.0", + "futures-core", ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" dependencies = [ - "unicode-width", + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", ] [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.79", ] [[package]] name = "thread_local" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" dependencies = [ "cfg-if", "once_cell", @@ -1933,11 +2829,13 @@ dependencies = [ [[package]] name = "time" -version = "0.3.30" +version = "0.3.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", + "itoa", + "num-conv", "powerfmt", "serde", "time-core", @@ -1952,26 +2850,66 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.15" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" dependencies = [ + "num-conv", "time-core", ] +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" -version = "1.35.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", "libc", "mio", + "parking_lot", "pin-project-lite", - "socket2 0.5.5", - "windows-sys 0.48.0", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", ] [[package]] @@ -1980,29 +2918,39 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls 0.23.15", + "rustls-pki-types", "tokio", ] [[package]] name = "tokio-util" -version = "0.7.10" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", "futures-sink", "pin-project-lite", "tokio", - "tracing", ] [[package]] name = "toml_datetime" -version = "0.6.5" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" @@ -2017,9 +2965,9 @@ dependencies = [ [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -2040,7 +2988,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.79", ] [[package]] @@ -2064,6 +3012,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -2074,12 +3032,16 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", + "time", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -2088,6 +3050,35 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "tuple" +version = "0.0.0" +source = "git+https://github.com/estuary/flow#60e47f697826a8128b533cc5f4600acb5d80cdbc" +dependencies = [ + "memchr", + "serde_json", +] + +[[package]] +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "typenum" version = "1.17.0" @@ -2095,28 +3086,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] -name = "ucd-trie" -version = "0.1.6" +name = "unicode-bidi" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] -name = "unicode-segmentation" -version = "1.10.1" +name = "unicode-normalization" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" - -[[package]] -name = "unicode-width" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] [[package]] name = "untrusted" @@ -2124,6 +3112,27 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "uritemplate-next" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcde98d1fc3f528255b1ecb22fb688ee0d23deb672a8c57127df10b98b4bd18c" +dependencies = [ + "regex", +] + +[[package]] +name = "url" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + [[package]] name = "urlencoding" version = "2.1.3" @@ -2132,14 +3141,38 @@ checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] name = "uuid" -version = "1.6.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", "serde", ] +[[package]] +name = "valico" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a0a4df97f827fcbcbe69c65364acddddf3a4bb50e6507f63361177a7ea7a4" +dependencies = [ + "addr", + "base64 0.21.7", + "chrono", + "downcast-rs", + "erased-serde", + "fancy-regex 0.11.0", + "json-pointer", + "jsonway", + "percent-encoding", + "phf", + "phf_codegen", + "serde", + "serde_json", + "uritemplate-next", + "url", + "uuid", +] + [[package]] name = "valuable" version = "0.1.0" @@ -2152,17 +3185,11 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "vsimd" @@ -2187,34 +3214,47 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.88" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7daec296f25a1bae309c0cd5c29c4b260e510e6d813c286b19eaadf409d40fce" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.88" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e397f4664c0e4e428e8313a469aaa58310d302159845980fd23b0f22a847f217" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.79", "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" -version = "0.2.88" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5961017b3b08ad5f3fe39f1e79877f8ee7c23c5e5fd5eb80de95abc41f1f16b2" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2222,22 +3262,32 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.88" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.79", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.88" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-sys" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d046c5d029ba91a1ed14da14dca44b68bf2f124cfbaf741c54151fdb3e0750b" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] [[package]] name = "winapi" @@ -2263,152 +3313,147 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.51.1" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.48.5", + "windows-targets", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows-registry" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ - "windows-targets 0.42.2", + "windows-result", + "windows-strings", + "windows-targets", ] [[package]] -name = "windows-sys" -version = "0.48.0" +name = "windows-result" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" dependencies = [ - "windows-targets 0.48.5", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.42.2" +name = "windows-strings" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows-result", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" +name = "windows-targets" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] [[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" +name = "windows_aarch64_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] -name = "windows_i686_msvc" -version = "0.42.2" +name = "windows_i686_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.2" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" +name = "windows_x86_64_msvc" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" +name = "winnow" +version = "0.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] [[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "wyz" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" [[package]] -name = "winnow" -version = "0.5.19" +name = "wyz" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829846f3e3db426d4cee4510841b71a8e58aa2a76b1132579487ae430ccd9c7b" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" dependencies = [ - "memchr", + "tap", ] [[package]] @@ -2418,16 +3463,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" [[package]] -name = "yaml-rust" -version = "0.4.5" +name = "zerocopy" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "linked-hash-map", + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", ] [[package]] name = "zeroize" -version = "1.6.0" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/source-kafka/Cargo.toml b/source-kafka/Cargo.toml index d7bc80d2e9..fca2a15e4f 100644 --- a/source-kafka/Cargo.toml +++ b/source-kafka/Cargo.toml @@ -3,33 +3,45 @@ name = "source-kafka" version = "0.1.0" edition = "2021" -[lib] -doctest = false - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -proto-flow = { git = "https://github.com/estuary/flow", version = "0.0.0" } +doc = { git = "https://github.com/estuary/flow" } +json = { git = "https://github.com/estuary/flow" } +proto-flow = { git = "https://github.com/estuary/flow" } -chrono = { version = "0.4.19", features = ["serde"] } -eyre = "0.6.5" -base64 = "0.21.5" -highway = "0.6.4" -http = "*" -aws-sdk-iam = "1.7.0" -aws-smithy-runtime-api = "1.1.1" -aws-sigv4 = { version = "1.1.1", features = ["sign-http", "http0-compat"] } -rdkafka = { version = "0.36", features = ["cmake-build", "gssapi", "libz", "sasl", "ssl"], default-features = false } -#rdkafka = { version = "0.36", features = ["dynamic-linking", "gssapi", "libz", "sasl", "ssl"], default-features = false } -#sasl2-sys = { version = "0.1.14", features = ["vendored" ] } -schemars = "0.8.3" -serde = { version = "1.0.96", features = ["derive"] } -serde_json = "1.0.96" -serde_with = { version = "1.9.4", features = [ "hex", "macros" ] } -structopt = "0.3.22" -thiserror = "1.0.26" +anyhow = "1.0" +aws-sdk-iam = "1.48.0" +aws-sigv4 = { version = "1.2.4", features = ["sign-http", "http0-compat"] } +base64 = "0.22" +hex = "0.4" +highway = "1.2" +http = "0.2" # 0.2x version is needed for MSK token signing via aws-sigv4 +lazy_static = "1.5" +rdkafka = { version = "0.36", features = [ + "cmake-build", + "sasl", + "ssl", + "zstd", +] } +schemars = "0.8" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tokio = { version = "1", features = ["full"] } tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing-subscriber = { version = "0.3", features = [ + "json", + "env-filter", + "time", +] } +reqwest = { version = "0.12", features = ["json"] } +futures = "0.3" +apache-avro = "0.17" +bigdecimal = "0.4" +time = "0.3" + [dev-dependencies] -insta = { version = "1.8.0", features = ["redactions", "yaml"] } +insta = { version = "1", features = ["json", "serde"] } +schema_registry_converter = { version = "4.2.0", features = ["avro", "json"] } +uuid = "1.11" +async-trait = "0.1" diff --git a/source-kafka/Dockerfile b/source-kafka/Dockerfile index 68b064bb2b..1d0fdd1f77 100644 --- a/source-kafka/Dockerfile +++ b/source-kafka/Dockerfile @@ -2,37 +2,37 @@ ARG BASE_IMAGE=ghcr.io/estuary/base-image:v1 # Build Stage ################################################################################ -FROM rust:1.70-slim-bullseye as builder - -RUN rustup component add clippy +FROM golang:1.22-bullseye as builder RUN apt-get update \ - && apt-get install -y ca-certificates pkg-config cmake g++ libssl-dev libsasl2-dev \ + && apt-get install -y curl ca-certificates pkg-config cmake g++ libssl-dev libsasl2-dev \ && rm -rf /var/lib/apt/lists/* -WORKDIR /connector - -ENV CARGO_HOME /connector/cargo -ENV CARGO_TARGET_DIR /connector/target +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y --default-toolchain 1.80 +ENV PATH="/root/.cargo/bin:${PATH}" +RUN rustup component add clippy -RUN mkdir /connector/src/ -RUN touch /connector/src/lib.rs -RUN echo "fn main() {}" > /connector/src/main.rs +WORKDIR /connector COPY source-kafka/Cargo.* /connector/ -RUN cargo build --release \ - && cargo test --release - -RUN rm -r /connector/src +# Avoid having to install/build all dependencies by copying the Cargo files and +# making a dummy src/main.rs and empty lib.rs files. +RUN mkdir src \ + && echo "fn main() {}" > src/main.rs \ + && touch src/lib.rs \ + && cargo test --locked \ + && cargo build --release --locked \ + && rm -r src -COPY source-kafka/src /connector/src +COPY source-kafka/src ./src -RUN touch /connector/src/main.rs \ - && touch /connector/src/lib.rs \ - && cargo build --offline --release \ - && cargo test --release \ - && cargo clippy --release +# This touch prevents Docker from using a cached empty main.rs file. +RUN touch src/main.rs \ + && touch src/lib.rs \ + && cargo test --release --locked --offline \ + && cargo clippy --locked --offline --no-deps \ + && cargo build --release --locked --offline # Runtime Stage ################################################################################ @@ -51,7 +51,7 @@ COPY --from=builder /usr/lib/x86_64-linux-gnu/libssl.so.1* /usr/lib/x86_64-linux COPY --from=builder /usr/lib/x86_64-linux-gnu/libcrypto.so.1* /usr/lib/x86_64-linux-gnu/ # Copy in the connector artifact. -COPY --from=builder /connector/target/release/source-kafka ./source-kafka +COPY --from=builder /connector/target/release/source-kafka ./ # Avoid running the connector as root. USER nonroot:nonroot @@ -59,5 +59,5 @@ USER nonroot:nonroot ENTRYPOINT ["/connector/source-kafka"] LABEL FLOW_RUNTIME_PROTOCOL=capture -LABEL CONNECTOR_PROTOCOL=flow-capture LABEL FLOW_RUNTIME_CODEC=json +LABEL CONNECTOR_PROTOCOL=flow-capture diff --git a/source-kafka/Makefile b/source-kafka/Makefile deleted file mode 100644 index 7da5ac2269..0000000000 --- a/source-kafka/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -# Test Cluster Management Tasks: - -topic=todo-list -partitions=4 - -.PHONY: test_create -test_create: - kafkactl create topic ${topic} --partitions ${partitions} - -.PHONY: test_destroy -test_destroy: - kafkactl delete topic ${topic} - -.PHONY: test_seed -test_seed: -# TODO: Figure out why looping does not want to work. - cat ./tests/fixtures/${topic}.json | jq -c "map(select(.userId % 4 == 0)) | .[]" | kafkactl produce ${topic} -p 0 - cat ./tests/fixtures/${topic}.json | jq -c "map(select(.userId % 4 == 1)) | .[]" | kafkactl produce ${topic} -p 1 - cat ./tests/fixtures/${topic}.json | jq -c "map(select(.userId % 4 == 2)) | .[]" | kafkactl produce ${topic} -p 2 - cat ./tests/fixtures/${topic}.json | jq -c "map(select(.userId % 4 == 3)) | .[]" | kafkactl produce ${topic} -p 3 - -.PHONY: test_setup -test_setup: test_create test_seed diff --git a/source-kafka/docker-compose.yaml b/source-kafka/docker-compose.yaml new file mode 100644 index 0000000000..da3451da33 --- /dev/null +++ b/source-kafka/docker-compose.yaml @@ -0,0 +1,50 @@ +services: + db: + image: confluentinc/confluent-local:7.7.1 + environment: + KAFKA_NODE_ID: 1 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 'CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT' + KAFKA_ADVERTISED_LISTENERS: 'PLAINTEXT://db:29092,PLAINTEXT_HOST://${LISTENER_HOST:-localhost}:9092' + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_PROCESS_ROLES: 'broker,controller' + KAFKA_CONTROLLER_QUORUM_VOTERS: '1@db:29093' + KAFKA_LISTENERS: 'PLAINTEXT://db:29092,CONTROLLER://db:29093,PLAINTEXT_HOST://0.0.0.0:9092' + KAFKA_INTER_BROKER_LISTENER_NAME: 'PLAINTEXT' + KAFKA_CONTROLLER_LISTENER_NAMES: 'CONTROLLER' + KAFKA_LOG_DIRS: '/tmp/kraft-combined-logs' + KAFKA_REST_HOST_NAME: rest-proxy + KAFKA_REST_BOOTSTRAP_SERVERS: 'db:29092' + KAFKA_REST_LISTENERS: "http://0.0.0.0:8082" + CLUSTER_ID: '4L6g3nShT-eMCtK--X86sw' + healthcheck: + test: /bin/kafka-cluster cluster-id --bootstrap-server localhost:9092 + interval: 1s + timeout: 1s + retries: 60 + ports: + - 9092:9092 + networks: + - flow-test + + schema-registry: + image: confluentinc/cp-schema-registry:7.7.1 + hostname: schema-registry + container_name: schema-registry + depends_on: + - db + ports: + - "8081:8081" + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'db:29092' + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 + networks: + - flow-test + +networks: + flow-test: + name: flow-test + external: true diff --git a/source-kafka/src/catalog.rs b/source-kafka/src/catalog.rs deleted file mode 100644 index 7f4d1dc2bd..0000000000 --- a/source-kafka/src/catalog.rs +++ /dev/null @@ -1,70 +0,0 @@ -use highway::{HighwayHash, HighwayHasher}; -use proto_flow::flow::RangeSpec; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("failed to read the catalog file")] - File(#[from] std::io::Error), - - #[error("failed to validate connector catalog")] - Format(#[from] serde_json::Error), - - #[error("cannot subscribe to non-existent stream: {0}")] - MissingStream(String), -} - -pub fn responsible_for_shard(range: &RangeSpec, key: impl Into) -> bool { - let hash = key.into().hash(); - range.key_begin <= hash && range.key_end >= hash -} - -#[derive(Serialize, Deserialize)] -pub struct Resource { - pub stream: String, -} - -#[derive(Default)] -pub struct ShardKey(HighwayHasher); - -impl ShardKey { - pub fn add_int(mut self, n: impl Into) -> Self { - self.0.append(&n.into().to_be_bytes()); - self - } - - pub fn add_str(mut self, s: &str) -> Self { - self.0.append(s.as_bytes()); - self - } - - fn hash(self) -> u32 { - self.0.finalize64() as u32 - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn shard_assignment_test() { - let range = RangeSpec { - key_begin: 0, - key_end: 0x7fffffff, - r_clock_begin: 0, - r_clock_end: 0xffffffff, - }; - - let shards = (0..10).map(|n| ShardKey::default().add_int(n)); - let shards_covered = 2; - - assert_eq!( - shards_covered, - shards - .map(|n| responsible_for_shard(&range, n)) - .filter(|b| *b) - .count() - ); - } -} diff --git a/source-kafka/src/configuration.rs b/source-kafka/src/configuration.rs index 603d8e5719..e2f900f2f1 100644 --- a/source-kafka/src/configuration.rs +++ b/source-kafka/src/configuration.rs @@ -1,61 +1,90 @@ -use std::fmt::Display; - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -use crate::connector; - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("failed to read the configuration file")] - File(#[from] std::io::Error), +use anyhow::Result; +use rdkafka::client::{ClientContext, OAuthToken}; +use rdkafka::consumer::{BaseConsumer, ConsumerContext}; +use rdkafka::ClientConfig; +use schemars::{schema::RootSchema, JsonSchema}; +use serde::{de, Deserialize, Deserializer, Serialize}; + +#[derive(Serialize, Deserialize)] +pub struct EndpointConfig { + bootstrap_servers: String, + credentials: Option, + tls: Option, + pub schema_registry: SchemaRegistryConfig, +} - #[error("failed to parse the file as valid json")] - Parsing(#[from] serde_json::Error), +#[derive(Serialize, Deserialize, Clone)] +#[serde(tag = "auth_type")] +#[serde(rename_all = "snake_case")] +pub enum Credentials { + UserPassword { + mechanism: SaslMechanism, + username: String, + password: String, + }, + #[serde(rename = "AWS")] + AWS { + aws_access_key_id: String, + aws_secret_access_key: String, + region: String, + }, +} - #[error("bootstrap servers are required to make the initial connection")] - NoBootstrapServersGiven, +#[derive(Serialize, Deserialize, Clone)] +#[serde(rename_all = "SCREAMING-KEBAB-CASE")] +pub enum SaslMechanism { + Plain, + ScramSha256, + ScramSha512, } -/// # Kafka Source Configuration -#[derive(Deserialize, Default, Debug, Serialize)] -pub struct Configuration { - /// # Bootstrap Servers - /// - /// The initial servers in the Kafka cluster to initially connect to. The Kafka - /// client will be informed of the rest of the cluster nodes by connecting to - /// one of these nodes. - pub bootstrap_servers: String, +impl std::fmt::Display for SaslMechanism { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SaslMechanism::Plain => write!(f, "PLAIN"), + SaslMechanism::ScramSha256 => write!(f, "SCRAM-SHA-256"), + SaslMechanism::ScramSha512 => write!(f, "SCRAM-SHA-512"), + } + } +} - /// # Credentials - /// - /// The connection details for authenticating a client connection to Kafka via SASL. - /// When not provided, the client connection will attempt to use PLAINTEXT - /// (insecure) protocol. This must only be used in dev/test environments. - pub credentials: Option, +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TlsSettings { + SystemCertificates, +} - /// # TLS connection settings. - pub tls: Option, +#[derive(Serialize, Deserialize)] +#[serde(tag = "schema_registry_type")] +#[serde(rename_all = "snake_case")] +pub enum SchemaRegistryConfig { + ConfluentSchemaRegistry { + endpoint: String, + username: String, + password: String, + }, + NoSchemaRegistry { + #[serde(deserialize_with = "validate_json_only_true")] + enable_json_only: bool, + }, } -impl Configuration { - pub fn brokers(&self) -> String { - self.bootstrap_servers.clone() - } - - pub fn security_protocol(&self) -> &'static str { - match (&self.credentials, &self.tls) { - (None, Some(TlsSettings::SystemCertificates)) => "SSL", - (None, None) => "PLAINTEXT", - (Some(_), Some(TlsSettings::SystemCertificates)) => "SASL_SSL", - (Some(_), None) => "SASL_PLAINTEXT", - } +fn validate_json_only_true<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + if bool::deserialize(deserializer)? { + Ok(true) + } else { + Err(de::Error::custom( + "'enable_json_only' must be set to true when no schema registry is configured", + )) } } -impl JsonSchema for Configuration { +impl JsonSchema for EndpointConfig { fn schema_name() -> String { - "Configuration".to_owned() + "EndpointConfig".to_owned() } fn json_schema(_gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { @@ -65,9 +94,16 @@ impl JsonSchema for Configuration { "type": "object", "required": [ "bootstrap_servers", - "credentials" + "credentials", + "schema_registry" ], "properties": { + "bootstrap_servers": { + "title": "Bootstrap Servers", + "description": "The initial servers in the Kafka cluster to initially connect to, separated by commas. The Kafka client will be informed of the rest of the cluster nodes by connecting to one of these nodes.", + "type": "string", + "order": 0 + }, "credentials": { "title": "Credentials", "description": "The connection details for authenticating a client connection to Kafka via SASL. When not provided, the client connection will attempt to use PLAINTEXT (insecure) protocol. This must only be used in dev/test environments.", @@ -81,8 +117,9 @@ impl JsonSchema for Configuration { "properties": { "auth_type": { "type": "string", - "default": "UserPassword", - "const": "UserPassword" + "default": "user_password", + "const": "user_password", + "order": 0 }, "mechanism": { "description": "The SASL Mechanism describes how to exchange and authenticate clients/servers.", @@ -93,18 +130,18 @@ impl JsonSchema for Configuration { ], "title": "SASL Mechanism", "type": "string", - "order": 0 + "default": "PLAIN", + "order": 1 }, - "password": { + "username": { "order": 2, - "secret": true, - "title": "Password", + "title": "Username", "type": "string" }, - "username": { - "order": 1, + "password": { + "order": 3, "secret": true, - "title": "Username", + "title": "Password", "type": "string" } }, @@ -120,21 +157,22 @@ impl JsonSchema for Configuration { "auth_type": { "type": "string", "default": "AWS", - "const": "AWS" + "const": "AWS", + "order": 0 }, "aws_access_key_id": { "title": "AWS Access Key ID", "type": "string", - "order": 0 + "order": 1 }, "aws_secret_access_key": { - "order": 1, + "order": 2, "secret": true, "title": "AWS Secret Access Key", "type": "string" }, "region": { - "order": 2, + "order": 3, "title": "AWS Region", "type": "string" } @@ -147,12 +185,6 @@ impl JsonSchema for Configuration { ] }] }, - "bootstrap_servers": { - "title": "Bootstrap Servers", - "description": "The initial servers in the Kafka cluster to initially connect to, separated by commas. The Kafka client will be informed of the rest of the cluster nodes by connecting to one of these nodes.", - "type": "string", - "order": 0 - }, "tls": { "default": "system_certificates", "description": "Controls how should TLS certificates be found or used.", @@ -162,6 +194,69 @@ impl JsonSchema for Configuration { "title": "TLS Settings", "type": "string", "order": 2 + }, + "schema_registry": { + "title": "Schema Registry", + "description": "Connection details for interacting with a schema registry.", + "type": "object", + "order": 3, + "discriminator": { + "propertyName": "schema_registry_type" + }, + "oneOf": [{ + "title": "Confluent Schema Registry", + "properties": { + "schema_registry_type": { + "type": "string", + "default": "confluent_schema_registry", + "const": "confluent_schema_registry", + "order": 0 + }, + "endpoint": { + "type": "string", + "title": "Schema Registry Endpoint", + "description": "Schema registry API endpoint. For example: https://registry-id.us-east-2.aws.confluent.cloud", + "order": 1 + }, + "username": { + "type": "string", + "title": "Schema Registry Username", + "description": "Schema registry username to use for authentication. If you are using Confluent Cloud, this will be the 'Key' from your schema registry API key.", + "order": 2 + }, + "password": { + "type": "string", + "title": "Schema Registry Password", + "description": "Schema registry password to use for authentication. If you are using Confluent Cloud, this will be the 'Secret' from your schema registry API key.", + "order": 3, + "secret": true + } + }, + "required": [ + "endpoint", + "username", + "password" + ], + }, { + "title": "No Schema Registry", + "properties": { + "schema_registry_type": { + "type": "string", + "default": "no_schema_registry", + "const": "no_schema_registry", + "order": 0 + }, + "enable_json_only": { + "type": "boolean", + "title": "Capture Messages in JSON Format Only", + "description": "If no schema registry is configured the capture will attempt to parse all data as JSON, and discovered collections will use a key of the message partition & offset. All available topics will be discovered, but if their messages are not encoded as JSON attempting to capture them will result in errors. If your topics contain messages encoded with a schema, you should configure the connector to use the schema registry for optimal results.", + "order": 1 + } + }, + "required": [ + "enable_json_only", + ], + }], } } })) @@ -169,145 +264,107 @@ impl JsonSchema for Configuration { } } -impl connector::ConnectorConfig for Configuration { - type Error = Error; - - fn parse(reader: &str) -> Result { - let configuration: Configuration = serde_json::from_str(reader)?; - - if configuration.bootstrap_servers.is_empty() { - return Err(Error::NoBootstrapServersGiven); - } - - Ok(configuration) - } +pub struct FlowConsumerContext { + auth: Option, } -/// # SASL Mechanism -/// -/// The SASL Mechanism describes _how_ to exchange and authenticate -/// clients/servers. For secure communication, TLS is **required** for all -/// supported mechanisms. -/// -/// For more information about the Simple Authentication and Security Layer (SASL), see RFC 4422: -/// https://datatracker.ietf.org/doc/html/rfc4422 -/// For more information about Salted Challenge Response Authentication -/// Mechanism (SCRAM), see RFC 7677. -/// https://datatracker.ietf.org/doc/html/rfc7677 -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(rename_all = "SCREAMING-KEBAB-CASE")] -pub enum SaslMechanism { - /// The username and password are sent to the server in the clear. - Plain, - /// SCRAM using SHA-256. - #[serde(rename = "SCRAM-SHA-256")] - ScramSha256, - /// SCRAM using SHA-512. - #[serde(rename = "SCRAM-SHA-512")] - ScramSha512, -} - -impl Display for SaslMechanism { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SaslMechanism::Plain => write!(f, "PLAIN"), - SaslMechanism::ScramSha256 => write!(f, "SCRAM-SHA-256"), - SaslMechanism::ScramSha512 => write!(f, "SCRAM-SHA-512"), +impl ClientContext for FlowConsumerContext { + const ENABLE_REFRESH_OAUTH_TOKEN: bool = true; + + fn generate_oauth_token( + &self, + _oauthbearer_config: Option<&str>, + ) -> Result> { + match &self.auth { + Some(Credentials::AWS { + aws_access_key_id, + aws_secret_access_key, + region, + }) => { + let (token, lifetime_ms) = crate::msk_oauthbearer::token( + region, + aws_access_key_id, + aws_secret_access_key, + )?; + Ok(OAuthToken { + // This is just a descriptive name of the principal which is + // accessing the resource, not a specific constant + principal_name: "flow-kafka-capture".to_string(), + token, + lifetime_ms, + }) + } + _ => Err(anyhow::anyhow!("generate_oauth_token called without AWS credentials").into()), } } } -/// # Credentials -/// -/// The information necessary to connect to Kafka. -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(tag="auth_type")] -pub enum Credentials { - UserPassword { - /// # Sasl Mechanism - mechanism: SaslMechanism, - /// # Username - username: String, - /// # Password - password: String, - }, - - AWS { - #[serde(rename="aws_access_key_id")] - access_key_id: String, - #[serde(rename="aws_secret_access_key")] - secret_access_key: String, - region: String, - } -} - -/// # TLS Settings -/// -/// Controls how should TLS certificates be found or used. -#[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -#[derive(Default)] -pub enum TlsSettings { - /// Use the TLS certificates bundled with openssl. - #[default] - SystemCertificates, - // TODO: allow the user to specify custom TLS certs, authorities, etc. - // CustomCertificates(CustomTlsSettings), -} - - +impl ConsumerContext for FlowConsumerContext {} + +impl EndpointConfig { + pub async fn to_consumer(&self) -> Result> { + let mut config = ClientConfig::new(); + + config.set("bootstrap.servers", self.bootstrap_servers.clone()); + config.set("enable.auto.commit", "false"); + config.set("group.id", "source-kafka"); // librdkafka will throw an error if this is left blank + config.set("security.protocol", self.security_protocol()); + + match &self.credentials { + Some(Credentials::UserPassword { + mechanism, + username, + password, + }) => { + config.set("sasl.mechanism", mechanism.to_string()); + config.set("sasl.username", username); + config.set("sasl.password", password); + } + Some(Credentials::AWS { .. }) => { + if self.security_protocol() != "SASL_SSL" { + anyhow::bail!("must use tls=system_certificates for AWS") + } + config.set("sasl.mechanism", "OAUTHBEARER"); + } + None => (), + } -#[cfg(test)] -mod test { - use super::*; + let ctx = FlowConsumerContext { + auth: self.credentials.clone(), + }; - #[test] - fn empty_brokers_test() { - let config = Configuration::default(); - let brokers = config.brokers(); - assert_eq!("", brokers); - } + let consumer: BaseConsumer = config.create_with_context(ctx)?; - #[test] - fn many_brokers_test() { - let config: Configuration = serde_json::from_str( - r#"{ - "bootstrap_servers": "localhost:9092,172.22.36.2:9093,localhost:9094", - "tls": "system_certificates" - }"#, - ) - .expect("to parse the config"); + if let Some(Credentials::AWS { .. }) = &self.credentials { + // In order to generate an initial OAuth Bearer token to be used by the consumer + // we need to call poll once. + // See https://docs.confluent.io/platform/current/clients/librdkafka/html/classRdKafka_1_1OAuthBearerTokenRefreshCb.html + // Note that this is expected to return an error since we have no topic assignments yet + // hence the ignoring of the result + let _ = consumer.poll(Some(std::time::Duration::ZERO)); + } - let brokers = config.brokers(); - assert_eq!("localhost:9092,172.22.36.2:9093,localhost:9094", brokers); + Ok(consumer) } - #[test] - fn parse_config_file_test() { - use connector::ConnectorConfig; - - let input = r#" - { - "bootstrap_servers": "localhost:9093", - "tls": "system_certificates" + fn security_protocol(&self) -> &'static str { + match (&self.credentials, &self.tls) { + (None, Some(TlsSettings::SystemCertificates)) => "SSL", + (None, None) => "PLAINTEXT", + (Some(_), Some(TlsSettings::SystemCertificates)) => "SASL_SSL", + (Some(_), None) => "SASL_PLAINTEXT", } - "#; - - Configuration::parse(input).expect("to parse"); + } +} - let input = r#" - { - "bootstrap_servers": "localhost:9093", - "credentials": { - "auth_type": "UserPassword", - "mechanism": "SCRAM-SHA-256", - "username": "user", - "password": "password" - }, - "tls": null - } - "#; +#[derive(Serialize, Deserialize, JsonSchema)] +pub struct Resource { + #[schemars(title = "Topic", description = "Kafka topic to capture messages from.")] + pub topic: String, +} - Configuration::parse(input).expect("to parse"); - } +pub fn schema_for() -> RootSchema { + schemars::gen::SchemaSettings::draft2019_09() + .into_generator() + .into_root_schema_for::() } diff --git a/source-kafka/src/connector.rs b/source-kafka/src/connector.rs deleted file mode 100644 index e94a01c45b..0000000000 --- a/source-kafka/src/connector.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::io::Write; - -use proto_flow::capture::{Response, request}; -use proto_flow::flow::{CaptureSpec, RangeSpec}; - -pub trait ConnectorConfig: Sized { - type Error: std::error::Error + Send + Sync + 'static; - - fn parse(reader: &str) -> Result; -} - -pub trait Connector { - type Config: ConnectorConfig; - type State: ConnectorConfig; - - fn spec(output: &mut dyn Write) -> eyre::Result<()>; - fn validate(output: &mut dyn Write, validate: request::Validate) -> eyre::Result<()>; - fn discover(output: &mut dyn Write, discover: request::Discover) -> eyre::Result<()>; - fn apply(output: &mut dyn Write, config: Self::Config) -> eyre::Result<()>; - fn read( - output: &mut dyn Write, - config: Self::Config, - capture: CaptureSpec, - range: Option, - state: Option, - ) -> eyre::Result<()>; -} - -#[derive(Debug, thiserror::Error)] -pub enum StdoutError { - #[error("failed to write message to output")] - Output(#[source] serde_json::Error), - - #[error("failed to emit a newline between messages")] - OutputNewline(#[source] std::io::Error), -} - -pub fn write_message( - mut output: &mut dyn Write, - message: Response, -) -> Result<(), StdoutError> { - serde_json::to_writer(&mut output, &message) - .map_err(StdoutError::Output)?; - - // Include a newline to break up the document stream. - writeln!(&mut output).map_err(StdoutError::OutputNewline)?; - Ok(()) -} diff --git a/source-kafka/src/discover.rs b/source-kafka/src/discover.rs new file mode 100644 index 0000000000..231e9907e2 --- /dev/null +++ b/source-kafka/src/discover.rs @@ -0,0 +1,574 @@ +use std::collections::HashMap; + +use anyhow::{Context, Result}; +use apache_avro::schema::Schema as AvroSchema; +use doc::{ + shape::{schema::to_schema, ObjProperty}, + Shape, +}; +use json::schema::{self as JsonSchema, types}; +use proto_flow::capture::{request::Discover, response::discovered}; +use rdkafka::consumer::Consumer; +use schemars::schema::RootSchema; +use serde_json::json; + +use crate::{ + configuration::{EndpointConfig, Resource, SchemaRegistryConfig}, + schema_registry::{ + RegisteredSchema::{Avro, Json, Protobuf}, + SchemaRegistryClient, TopicSchema, + }, + KAFKA_TIMEOUT, +}; + +static KAFKA_INTERNAL_TOPICS: [&str; 3] = ["__consumer_offsets", "__amazon_msk_canary", "_schemas"]; + +pub async fn do_discover(req: Discover) -> Result> { + let config: EndpointConfig = serde_json::from_str(&req.config_json)?; + let consumer = config.to_consumer().await?; + + let meta = consumer + .fetch_metadata(None, KAFKA_TIMEOUT) + .context("Could not connect to bootstrap server with the provided configuration. This may be due to an incorrect configuration for authentication or bootstrap servers. Double check your configuration and try again.")?; + + let mut all_topics: Vec = meta + .topics() + .iter() + .filter_map(|t| { + let name = t.name(); + if KAFKA_INTERNAL_TOPICS.contains(&name) { + None + } else { + Some(name.to_string()) + } + }) + .collect(); + + all_topics.sort(); + + let registered_schemas = match config.schema_registry { + SchemaRegistryConfig::ConfluentSchemaRegistry { + endpoint, + username, + password, + } => { + let client = SchemaRegistryClient::new(endpoint, username, password); + client + .schemas_for_topics(&all_topics) + .await + .context("Could not connect to the configured schema registry. Double check your configuration and try again.")? + } + SchemaRegistryConfig::NoSchemaRegistry { .. } => HashMap::new(), + }; + + all_topics + .into_iter() + .filter_map(|topic| { + let registered_schema = match registered_schemas.get(&topic) { + Some(s) => s, + None => &TopicSchema::default(), + }; + + if matches!(®istered_schema.key, Some(Protobuf)) + || matches!(®istered_schema.value, Some(Protobuf)) + { + // TODO(whb): At some point we may want to support protobuf + // schemas. + return None; + } + + let (collection_schema, key_ptrs) = + match topic_schema_to_collection_spec(registered_schema) { + Ok(s) => s, + Err(e) => return Some(Err(e)), + }; + + Some(Ok(discovered::Binding { + recommended_name: topic.to_owned(), + resource_config_json: serde_json::to_string(&Resource { + topic: topic.to_owned(), + }) + .expect("resource config must serialize"), + document_schema_json: serde_json::to_string(&collection_schema) + .expect("document schema must serialize"), + key: key_ptrs, + resource_path: vec![topic.to_owned()], + ..Default::default() + })) + }) + .collect::>>() +} + +fn topic_schema_to_collection_spec( + topic_schema: &TopicSchema, +) -> Result<(RootSchema, Vec)> { + let mut collection_key = vec!["/_meta/partition".to_string(), "/_meta/offset".to_string()]; + let doc_schema_json = json!({ + "x-infer-schema": true, + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "type": "object", + "properties": { + "_meta": { + "type": "object", + "properties": { + "topic": { + "description": "The topic the message was read from", + "type": "string", + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer", + }, + "offset": { + "description": "The offset of the message within the partition", + "type": "integer", + }, + "op": { + "enum": ["c", "u", "d"], + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + } + }, + "required": ["offset", "op", "partition", "topic"] + } + }, + "required": ["_meta"] + }); + + let mut collection_schema: RootSchema = serde_json::from_value(doc_schema_json).unwrap(); + + let mut key_shape = match &topic_schema.key { + Some(Avro(schema)) => avro_key_schema_to_shape(schema)?, + Some(Json(schema)) => json_key_schema_to_shape(schema)?, + Some(Protobuf) => todo!("protobuf schemas are not yet supported"), + None => Shape::nothing(), + }; + + if usable_key_shape(&key_shape) { + if key_shape.type_ != JsonSchema::types::OBJECT { + // The topic key is a single value not part of a record, and + // that cannot be represented as a JSON document. There has + // to be a key/value pair in the document, so transmute this + // unnamed scalar value into a document with a synthetic key + // to reference its value. + let scalar_shape = key_shape.clone(); + key_shape = Shape::nothing(); + key_shape.type_ = JsonSchema::types::OBJECT; + key_shape.object.properties = vec![ObjProperty { + name: "_key".into(), + is_required: true, + shape: scalar_shape, + }]; + } + + collection_key = key_shape + .locations() + .into_iter() + .filter(|(ptr, _, shape, ..)| { + !shape.type_.overlaps(JsonSchema::types::OBJECT) && ptr.to_string() != "/*" + }) + .map(|(ptr, ..)| ptr.to_string()) + .collect(); + + let mut key_schema = to_schema(key_shape); + + collection_schema + .schema + .object() + .properties + .append(&mut key_schema.schema.object().properties); + + collection_schema + .schema + .object() + .required + .append(&mut key_schema.schema.object().required); + } + + Ok((collection_schema, collection_key)) +} + +fn avro_key_schema_to_shape(schema: &AvroSchema) -> Result { + let mut shape = Shape::nothing(); + + shape.type_ = match schema { + AvroSchema::Boolean => JsonSchema::types::BOOLEAN, + AvroSchema::Int | AvroSchema::Long => JsonSchema::types::INTEGER, + AvroSchema::String => JsonSchema::types::STRING, + AvroSchema::Bytes | AvroSchema::Fixed(_) => { + shape.string.content_encoding = Some("base64".into()); + JsonSchema::types::STRING + } + AvroSchema::Decimal(_) | AvroSchema::BigDecimal => { + shape.string.format = Some(JsonSchema::formats::Format::Number); + JsonSchema::types::STRING + } + AvroSchema::Uuid => { + shape.string.format = Some(JsonSchema::formats::Format::Uuid); + JsonSchema::types::STRING + } + AvroSchema::Date => { + shape.string.format = Some(JsonSchema::formats::Format::Date); + JsonSchema::types::STRING + } + AvroSchema::TimeMillis | AvroSchema::TimeMicros => { + shape.string.format = Some(JsonSchema::formats::Format::Time); + JsonSchema::types::STRING + } + AvroSchema::TimestampMillis + | AvroSchema::TimestampMicros + | AvroSchema::TimestampNanos + | AvroSchema::LocalTimestampMillis + | AvroSchema::LocalTimestampMicros + | AvroSchema::LocalTimestampNanos => { + shape.string.format = Some(JsonSchema::formats::Format::DateTime); + JsonSchema::types::STRING + } + AvroSchema::Duration => { + shape.string.format = Some(JsonSchema::formats::Format::Duration); + JsonSchema::types::STRING + } + AvroSchema::Enum(enum_schema) => { + shape.enum_ = Some( + enum_schema + .symbols + .iter() + .map(|s| s.to_string().into()) + .collect(), + ); + JsonSchema::types::STRING + } + AvroSchema::Record(record_schema) => { + shape.object.properties = record_schema + .fields + .iter() + .map(|field| { + let mut field_shape = avro_key_schema_to_shape(&field.schema)?; + + if let Some(doc) = &field.doc { + field_shape.description = Some(doc.to_string().into()); + } + if let Some(default) = &field.default { + field_shape.default = Some((default.to_owned(), None).into()); + } + + Ok(ObjProperty { + name: field.name.clone().into(), + is_required: field.default.is_none(), + shape: field_shape, + }) + }) + .collect::>>()?; + JsonSchema::types::OBJECT + } + + // Schemas that allow 'null' are not schematized as keys since + // nullable keys are not very useful in practice. + AvroSchema::Null => JsonSchema::types::INVALID, + + // Similarly, schemas with multiple types or a single type with an + // additional explicit 'null' are not very useful in practice, + // although technically allowed by Flow, so they won't be + // schematized. + AvroSchema::Union(_) => JsonSchema::types::INVALID, + + // We could perhaps treat floating points as string-encoded numbers, + // but if that's what they were then they should probably specified + // as decimals. And this is more consistent with what is achievable + // with JSON schemas. + AvroSchema::Float => JsonSchema::types::INVALID, + AvroSchema::Double => JsonSchema::types::INVALID, + + // Arrays and maps just don't make any sense as key schemas. + AvroSchema::Array(_) => JsonSchema::types::INVALID, + AvroSchema::Map(_) => JsonSchema::types::INVALID, + + AvroSchema::Ref { name } => anyhow::bail!("Avro key schema contains reference {}", name), + }; + + Ok(shape) +} + +fn json_key_schema_to_shape(schema: &serde_json::Value) -> Result { + let json_schema = doc::validation::build_bundle(&schema.to_string())?; + let validator = doc::Validator::new(json_schema)?; + Ok(doc::Shape::infer( + &validator.schemas()[0], + validator.schema_index(), + )) +} + +fn usable_key_shape(shape: &Shape) -> bool { + // Schemas may be valid keys if all the properties are keyable and + // non-nullable, including nested properties. Non-nullable here means they + // can't be an explicit null, and either must exist or have a default value. + shape + .locations() + .iter() + .all(|(ptr, pattern, shape, exists)| { + if ptr.to_string() == "/*" && shape.type_ == types::INVALID { + // This represents an "additionalProperties: false" + // configuration, which must be the case for a valid key schema. + return true; + } + + !pattern + && (exists.must() || shape.default.is_some()) + && !shape.type_.overlaps(JsonSchema::types::NULL) + && (shape.type_.is_keyable_type() || shape.type_ == types::OBJECT) + }) +} + +#[cfg(test)] +mod tests { + use insta::assert_snapshot; + + use super::*; + + #[test] + fn test_topic_schema_to_collection_spec() { + let test_cases = [ + ( + "no key", + &TopicSchema { + key: None, + ..Default::default() + }, + ), + ( + "single scalar avro key", + &TopicSchema { + key: Some(Avro( + apache_avro::Schema::parse(&json!({"type": "string"})).unwrap(), + )), + ..Default::default() + }, + ), + ( + "single nullable scalar avro key", + &TopicSchema { + key: Some(Avro( + apache_avro::Schema::parse(&json!({"type": ["null", "string"]})).unwrap(), + )), + ..Default::default() + }, + ), + ( + "single non-scalar avro key", + &TopicSchema { + key: Some(Avro( + apache_avro::Schema::parse(&json!({"type": "array", "items": "string"})) + .unwrap(), + )), + ..Default::default() + }, + ), + ( + "avro record with scalar compound key", + &TopicSchema { + key: Some(Avro( + apache_avro::Schema::parse(&json!({ + "type": "record", + "name": "someRecord", + "fields": [ + {"name": "firstKey", "type": "string", "doc": "the first key field"}, + {"name": "secondKey", "type": "long", "doc": "the second key field"}, + { + "name": "thirdKey", + "type": "enum", + "symbols": ["a", "b", "c"], + "default": "a", + "doc": "the third key field, which is an enum with a default value", + }, + ], + })) + .unwrap(), + )), + ..Default::default() + }, + ), + ( + "nested avro record with scalars", + &TopicSchema { + key: Some(Avro( + apache_avro::Schema::parse(&json!({ + "type": "record", + "name": "someRecord", + "fields": [ + {"name": "firstKey", "type": "string", "doc": "the first key field"}, + { + "name": "nestedRecord", + "type": "record", + "fields": [ + {"name": "secondKeyNested", "type": "long", "doc": "the second key field"}, + {"name": "thirdKeyNested", "type": "bytes", "doc": "the third key field"}, + ], + }, + ], + })) + .unwrap(), + )), + ..Default::default() + }, + ), + ( + "single scalar json key", + &TopicSchema { + key: Some(Json(json!({"type": "string"}))), + ..Default::default() + }, + ), + ( + "single nullable scalar json key", + &TopicSchema { + key: Some(Json(json!({"type": ["null", "string"]}))), + ..Default::default() + }, + ), + ( + "nested json object", + &TopicSchema { + key: Some(Json(json!({ + "type": "object", + "properties": { + "firstKey": {"type": "string"}, + "nestedObject": { + "type": "object", + "properties": { + "secondKeyNested": {"type": "integer"}, + "thirdKeyNested": {"type": "boolean"}, + }, + "required": ["secondKeyNested", "thirdKeyNested"], + }, + }, + "required": ["firstKey", "nestedObject"], + "additionalProperties": false + }))), + ..Default::default() + }, + ), + ]; + + for (name, input) in test_cases { + let (discovered_schema, discovered_key) = + topic_schema_to_collection_spec(input).unwrap(); + let mut snap = String::new(); + snap.push_str(&serde_json::to_string(&discovered_key).unwrap()); + snap.push_str("\n"); + snap.push_str(&serde_json::to_string_pretty(&discovered_schema).unwrap()); + assert_snapshot!(name, snap); + } + } + + #[test] + fn test_avro_key_schema_to_shape() { + let test_cases = [ + ( + vec![json!({"type": "boolean"})], + Some(json!({"type": "boolean"})), + ), + ( + vec![json!({"type": "int"}), json!({"type": "long"})], + Some(json!({"type": "integer"})), + ), + ( + vec![json!({"type": "string"})], + Some(json!({"type": "string"})), + ), + ( + vec![ + json!({"type": "bytes"}), + json!({"type": "fixed", "name": "foo", "size": 10}), + ], + Some(json!({"type": "string", "contentEncoding": "base64"})), + ), + ( + vec![ + json!({"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}), + json!({"type": "bytes", "logicalType": "big-decimal", "precision": 4, "scale": 2}), + ], + Some(json!({"type": "string", "format": "number"})), + ), + ( + vec![json!({"type": "string", "logicalType": "uuid"})], + Some(json!({"type": "string", "format": "uuid"})), + ), + ( + vec![json!({"type": "int", "logicalType": "date"})], + Some(json!({"type": "string", "format": "date"})), + ), + ( + vec![ + json!({"type": "int", "logicalType": "time-millis"}), + json!({"type": "long", "logicalType": "time-micros"}), + ], + Some(json!({"type": "string", "format": "time"})), + ), + ( + vec![ + json!({"type": "long", "logicalType": "timestamp-millis"}), + json!({"type": "long", "logicalType": "timestamp-micros"}), + json!({"type": "long", "logicalType": "local-timestamp-millis"}), + json!({"type": "long", "logicalType": "local-timestamp-micros"}), + // TODO(whb): These nanosecond timestamps are not parsed by + // the Avro library and I'm not sure if they are really part + // of the spec. + // json!({"type": "long", "logicalType": "timestamp-nanos"})), + // json!({"type": "long", "logicalType": "local-timestamp-nanos"})), + ], + Some(json!({"type": "string", "format": "date-time"})), + ), + ( + vec![ + json!({"type": "fixed", "name": "foo", "size": 12, "logicalType": "duration"}), + ], + Some(json!({"type": "string", "format": "duration"})), + ), + ( + vec![json!({"type": "enum", "name": "foo", "symbols": ["a", "b", "c"]})], + Some(json!({"type": "string", "enum": ["a", "b", "c"]})), + ), + ( + vec![ + json!({"type": "null"}), + json!({"type": ["null", "long"]}), + json!({"type": "float"}), + json!({"type": "double"}), + json!({"type": "array", "items": "string"}), + json!({"type": "map", "values": "string"}), + ], + None, + ), + ]; + + for (schema_jsons, want) in test_cases { + for schema_json in schema_jsons { + let shape = + avro_key_schema_to_shape(&AvroSchema::parse(&schema_json).unwrap()).unwrap(); + if usable_key_shape(&shape) { + assert_eq!( + serde_json::to_value(&to_schema(shape).schema).unwrap(), + serde_json::to_value(&want.clone().unwrap()).unwrap() + ) + } else { + assert!(want.is_none()) + } + } + } + } +} diff --git a/source-kafka/src/kafka.rs b/source-kafka/src/kafka.rs deleted file mode 100644 index f59a120a33..0000000000 --- a/source-kafka/src/kafka.rs +++ /dev/null @@ -1,330 +0,0 @@ -use std::error::Error as StdError; -use std::time::Duration; - -use proto_flow::capture::{request, response, Response}; -use proto_flow::flow::capture_spec::Binding; -use rdkafka::consumer::{BaseConsumer, Consumer, ConsumerContext}; -use rdkafka::error::KafkaError; -use rdkafka::message::BorrowedMessage; -use rdkafka::metadata::{Metadata, MetadataPartition, MetadataTopic}; -use rdkafka::{ClientConfig, Message, TopicPartitionList}; -use rdkafka::client::{OAuthToken, ClientContext}; -use serde_json::json; - -use crate::catalog::Resource; -use crate::configuration::{Configuration, Credentials}; -use crate::{catalog, state}; - -const KAFKA_TIMEOUT: Duration = Duration::from_secs(5); - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("error creating consumer from config")] - Config(#[source] KafkaError), - - #[error("failed to fetch cluster metadata ({0})")] - Metadata(String, #[source] KafkaError), - - #[error("failed to fetch watermarks")] - Watermarks(#[source] KafkaError), - - #[error("failed to subscribe to topic")] - Subscription(#[source] KafkaError), - - #[error("failed to read message")] - Read(#[source] KafkaError), -} - -pub struct FlowConsumerContext { - auth: Option, -} - -impl ClientContext for FlowConsumerContext { - const ENABLE_REFRESH_OAUTH_TOKEN: bool = true; - - fn generate_oauth_token(&self, _oauthbearer_config: Option<&str>) -> Result> { - if let Some(Credentials::AWS { region, access_key_id, secret_access_key }) = &self.auth { - let (token, lifetime_ms) = crate::msk_oauthbearer::token(region, access_key_id, secret_access_key)?; - return Ok(OAuthToken { - // This is just a descriptive name of the principal which is accessing - // the resource, not a specific constant - principal_name: "flow-kafka-capture".to_string(), - token, - lifetime_ms, - }) - } else { - return Err(eyre::eyre!("generate_oauth_token called without AWS credentials").into()) - } - } -} -impl ConsumerContext for FlowConsumerContext {} - -pub fn consumer_from_config(configuration: &Configuration) -> eyre::Result> { - let mut config = ClientConfig::new(); - - config.set("bootstrap.servers", configuration.brokers()); - - // We want to avoid writing ConsumerGroup commits back to Kafka. We manage - // our own transactional semantics within Flow, so we don't need to rely on - // Kafka to help with that. - config.set("enable.auto.commit", "false"); - - // Despite wanting to avoid using ConsumerGroups, we *must* set this - // `group.id` in order to subscribe to topics. librdkafka will throw an - // error if this is left blank. - config.set("group.id", "source-kafka"); - - config.set("security.protocol", configuration.security_protocol()); - - let ctx = FlowConsumerContext { auth: configuration.credentials.clone() }; - - if let Some(Credentials::UserPassword { mechanism, username, password }) = &configuration.credentials { - config.set("sasl.mechanism", mechanism.to_string()); - config.set("sasl.username", username); - config.set("sasl.password", password); - } else if let Some(Credentials::AWS { .. }) = &configuration.credentials { - config.set("sasl.mechanism", "OAUTHBEARER"); - - if configuration.security_protocol() != "SASL_SSL" { - return Err(eyre::eyre!("must use tls=system_certificates for AWS").into()) - } - } - - let consumer: BaseConsumer = config.create_with_context(ctx).map_err(Error::Config)?; - - if let Some(Credentials::AWS { .. }) = &configuration.credentials { - // In order to generate an initial OAuth Bearer token to be used by the consumer - // we need to call poll once. - // See https://docs.confluent.io/platform/current/clients/librdkafka/html/classRdKafka_1_1OAuthBearerTokenRefreshCb.html - // Note that this is expected to return an error since we have no topic assignments yet - // hence the ignoring of the result - let _ = consumer.poll(KAFKA_TIMEOUT); - } - - Ok(consumer) -} - -pub fn test_connection>( - configuration: &Configuration, - consumer: &C, - bindings: Vec, -) -> Result { - let metadata = fetch_metadata(configuration, consumer)?; - Ok(Response { - validated: Some(response::Validated { - bindings: metadata - .topics() - .iter() - .filter(|topic| { - bindings.iter().any(|binding| { - let res: Resource = serde_json::from_str(&binding.resource_config_json) - .expect("parse resource config"); - res.stream == topic.name() - }) - }) - .map(|topic| response::validated::Binding { - resource_path: vec![topic.name().to_string()], - }) - .collect(), - }), - ..Default::default() - }) -} - -pub fn fetch_metadata>( - configuration: &Configuration, - consumer: &C, -) -> Result { - consumer - .fetch_metadata(None, Some(KAFKA_TIMEOUT)) - .map_err(|err| Error::Metadata(configuration.brokers(), err)) -} - -pub fn available_streams(metadata: &Metadata) -> Vec { - metadata - .topics() - .iter() - .filter(reject_internal_topics) - .map(MetadataTopic::name) - .map(|s| response::discovered::Binding { - recommended_name: s.to_owned(), - resource_config_json: serde_json::to_string(&json!({ - "stream": s.to_owned(), - })) - .expect("resource config"), - document_schema_json: serde_json::to_string(&json!({ - "x-infer-schema": true, - "type": "object", - "properties": { - "_meta": { - "type": "object", - "properties": { - "partition": { - "description": "The partition the message was read from", - "type": "integer", - }, - "offset": { - "description": "The offset of the message within the partition", - "type": "integer", - } - }, - "required": ["partition", "offset"] - } - }, - "required": ["_meta"] - })) - .expect("document schema"), - key: vec!["/_meta/partition".to_string(), "/_meta/offset".to_string()], - resource_path: vec![s.to_owned()], - disable: false, - }) - .collect() -} - -pub fn find_topic<'m>(metadata: &'m Metadata, needle: &str) -> Option<&'m MetadataTopic> { - metadata - .topics() - .iter() - .find(|topic| topic.name() == needle) -} - -/// Subscribes to the given topic/partitions and begins reading from the specified offsets. -/// -/// **Warning**: This will _unsubscribe_ the consumer from any previous topic partitions. -pub fn subscribe( - consumer: &BaseConsumer, - checkpoints: &state::CheckpointSet, -) -> Result { - let mut topic_partition_list = TopicPartitionList::new(); - for checkpoint in checkpoints.iter() { - topic_partition_list - .add_partition_offset( - &checkpoint.topic, - checkpoint.partition, - checkpoint.offset.next().into(), - ) - .map_err(Error::Subscription)?; - } - - consumer - .assign(&topic_partition_list) - .map_err(Error::Subscription)?; - - Ok(topic_partition_list) -} - -pub fn high_watermarks( - consumer: &BaseConsumer, - checkpoints: &state::CheckpointSet, -) -> Result { - let mut watermarks = state::CheckpointSet::default(); - - for checkpoint in checkpoints.iter() { - // The low watermark represents the first message that can be read. - // The high watermark is the "latest head" offset of the partition. This - // is effectively the index of the next message to be read. - let (low, high) = consumer - .fetch_watermarks(&checkpoint.topic, checkpoint.partition, KAFKA_TIMEOUT) - .map_err(Error::Watermarks)?; - - let offset = if high == 0 || high == low { - // We can consider a partition to be at the beginning if: - // - If the next message we receive will be Offset=0, then we haven't - // read *any* messages yet. - // - The low watermark can change over time as Kafka performs - // compaction on the partition. If we've compacted away _all_ the - // previous messages, the next message we can read will be the very - // first message which currently exists in this partition. - state::Offset::Start - } else { - // We subtract 1 from the high watermark since it is zero-indexed. - // If we try to read up *through* `high`, we'll potentially sit here - // forever waiting on the next message to come through. - state::Offset::UpThrough(high - 1) - }; - - watermarks.add(state::Checkpoint::new( - &checkpoint.topic, - checkpoint.partition, - offset, - )); - } - - Ok(watermarks) -} - -#[derive(Debug, thiserror::Error)] -pub enum ProcessingError { - #[error("failed when interacting with kafka")] - Kafka(#[from] KafkaError), - - #[error("failed to parse message `{0}`")] - Parsing(String, #[source] serde_json::Error), - - #[error("failed to encode message: `{0}`")] - Encoding(#[source] serde_json::Error), - - #[error("message contained no payload")] - EmptyMessage, -} - -pub fn process_message<'m>( - msg: &'m BorrowedMessage<'m>, - bindings: &Vec, -) -> Result<(response::Captured, state::Checkpoint), ProcessingError> { - let mut payload = parse_message(msg)?; - - let binding_index = bindings - .iter() - .position(|s| { - let res = serde_json::from_str::(&s.resource_config_json) - .expect("to parse resource config"); - res.stream == msg.topic() - }) - .expect("got message for unknown binding"); - - let meta = json!({ - "partition": msg.partition(), - "offset": msg.offset() - }); - - payload - .as_object_mut() - .unwrap() - .insert("_meta".to_string(), meta); - - let message = response::Captured { - binding: binding_index as u32, - doc_json: serde_json::to_string(&payload).map_err(ProcessingError::Encoding)?, - }; - let checkpoint = state::Checkpoint::new(msg.topic(), msg.partition(), state::Offset::from(msg)); - Ok((message, checkpoint)) -} - -// TODO: replace with CLI parser? -fn parse_message<'m>(msg: &'m BorrowedMessage<'m>) -> Result { - let mut bytes = msg.payload().ok_or(ProcessingError::EmptyMessage)?; - - // Strip a Confluent Schema Registry magic byte and schema ID. - if bytes.starts_with(&[0]) && bytes.len() >= 5 { - bytes = &bytes[5..]; - } - - serde_json::from_slice(bytes).map_err(|serde_error| { - // TODO: Capturing the raw_payload as a string is handy for - // debugging, but may not be what we want long term. - ProcessingError::Parsing(String::from_utf8_lossy(bytes).to_string(), serde_error) - }) -} - -static KAFKA_INTERNAL_TOPICS: [&str; 2] = ["__consumer_offsets", "__amazon_msk_canary"]; - -fn reject_internal_topics(topic: &&MetadataTopic) -> bool { - !KAFKA_INTERNAL_TOPICS.contains(&topic.name()) -} - -pub fn build_shard_key(topic: &MetadataTopic, partition: &MetadataPartition) -> catalog::ShardKey { - catalog::ShardKey::default() - .add_str(topic.name()) - .add_int(partition.id()) -} diff --git a/source-kafka/src/lib.rs b/source-kafka/src/lib.rs index 2e8bc34e87..3b4f07dc11 100644 --- a/source-kafka/src/lib.rs +++ b/source-kafka/src/lib.rs @@ -1,172 +1,159 @@ -extern crate serde_with; - -use crate::connector::ConnectorConfig; -use proto_flow::capture::{request, response, Response}; -use proto_flow::flow::{CaptureSpec, ConnectorState, RangeSpec}; -use schemars::schema_for; -use serde_json::json; - -use std::fmt::Debug; use std::io::Write; -pub mod catalog; +use anyhow::{Context, Result}; +use configuration::{schema_for, EndpointConfig, Resource, SchemaRegistryConfig}; +use discover::do_discover; +use proto_flow::capture::{ + request::Validate, + response::{ + validated::Binding as ValidatedBinding, Applied, Discovered, Opened, Spec, Validated, + }, + Request, Response, +}; +use pull::do_pull; +use rdkafka::consumer::Consumer; +use schema_registry::SchemaRegistryClient; +use tokio::io::{self, AsyncBufReadExt}; + pub mod configuration; -pub mod connector; -pub mod kafka; -pub mod state; +pub mod discover; pub mod msk_oauthbearer; - -pub struct KafkaConnector; - -const PROTOCOL_VERSION: u32 = 3032023; - -impl connector::Connector for KafkaConnector { - type Config = configuration::Configuration; - type State = state::CheckpointSet; - - fn spec(output: &mut dyn Write) -> eyre::Result<()> { - let message = Response { - spec: Some(response::Spec { - protocol: PROTOCOL_VERSION, - config_schema_json: serde_json::to_string(&schema_for!( - configuration::Configuration - ))?, - resource_config_schema_json: serde_json::to_string(&json!({ - "type": "object", - "properties": { - "stream": { - "type": "string", - "x-collection-name": true, - } - } - }))?, +pub mod pull; +pub mod schema_registry; + +const KAFKA_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5); + +pub async fn run_connector( + mut stdin: io::BufReader, + mut stdout: std::io::Stdout, +) -> Result<(), anyhow::Error> { + tracing::info!("running connector"); + + let mut line = String::new(); + + if stdin.read_line(&mut line).await? == 0 { + return Ok(()); // Clean EOF. + }; + let request: Request = serde_json::from_str(&line)?; + + if request.spec.is_some() { + let res = Response { + spec: Some(Spec { + protocol: 3032023, + config_schema_json: serde_json::to_string(&schema_for::())?, + resource_config_schema_json: serde_json::to_string(&schema_for::())?, documentation_url: "https://go.estuary.dev/source-kafka".to_string(), oauth2: None, - resource_path_pointers: vec!["/stream".to_string()], + resource_path_pointers: vec!["/topic".to_string()], }), ..Default::default() }; - connector::write_message(output, message)?; - Ok(()) - } - - fn validate(output: &mut dyn Write, mut validate: request::Validate) -> eyre::Result<()> { - let config = Self::Config::parse(&validate.config_json)?; - let consumer = kafka::consumer_from_config(&config)?; - // This is because validate implements drop (see: `rustc --explain E0509`) - let bindings = std::mem::take(&mut validate.bindings); - let message = kafka::test_connection(&config, &consumer, bindings)?; - - connector::write_message(output, message)?; - Ok(()) - } - - fn discover(output: &mut dyn Write, discover: request::Discover) -> eyre::Result<()> { - let config = Self::Config::parse(&discover.config_json)?; - let consumer = kafka::consumer_from_config(&config)?; - let metadata = kafka::fetch_metadata(&config, &consumer)?; - let bindings = kafka::available_streams(&metadata); - let message = Response { - discovered: Some(response::Discovered { bindings }), + write_capture_response(res, &mut stdout)?; + } else if let Some(req) = request.discover { + let res = Response { + discovered: Some(Discovered { + bindings: do_discover(req).await?, + }), ..Default::default() }; - connector::write_message(output, message)?; - Ok(()) - } - - fn apply(output: &mut dyn Write, _config: Self::Config) -> eyre::Result<()> { - connector::write_message( - output, - Response { - applied: Some(response::Applied { - action_description: "".to_string(), - }), - ..Default::default() - }, - )?; - - Ok(()) - } + write_capture_response(res, &mut stdout)?; + } else if let Some(req) = request.validate { + let res = Response { + validated: Some(Validated { + bindings: do_validate(req).await?, + }), + ..Default::default() + }; - fn read( - output: &mut dyn Write, - config: Self::Config, - capture: CaptureSpec, - range: Option, - persisted_state: Option, - ) -> eyre::Result<()> { - let consumer = kafka::consumer_from_config(&config)?; - let metadata = kafka::fetch_metadata(&config, &consumer)?; - - let mut checkpoints = state::CheckpointSet::reconcile_catalog_state( - &metadata, - &capture, - range.as_ref(), - &persisted_state.unwrap_or_default(), - )?; - kafka::subscribe(&consumer, &checkpoints)?; + write_capture_response(res, &mut stdout)?; + } else if request.apply.is_some() { + let res = Response { + applied: Some(Applied { + action_description: String::new(), + }), + ..Default::default() + }; - connector::write_message( - output, + write_capture_response(res, &mut stdout)?; + } else if let Some(req) = request.open { + write_capture_response( Response { - opened: Some(response::Opened { + opened: Some(Opened { explicit_acknowledgements: false, }), ..Default::default() }, + &mut stdout, )?; - loop { - let msg = consumer - .poll(None) - .expect("Polling without a timeout should always produce a message") - .map_err(kafka::Error::Read)?; - - let (record, checkpoint) = kafka::process_message(&msg, &capture.bindings)?; - - let delta_state = response::Checkpoint { - state: Some(ConnectorState { - updated_json: serde_json::to_string(&json!({ - checkpoint.topic.clone(): { - checkpoint.partition.to_string(): checkpoint.offset - } - }))?, - merge_patch: false, - }), - }; - checkpoints.add(checkpoint); - - connector::write_message( - output, - Response { - captured: Some(record), - ..Default::default() - }, - )?; - connector::write_message( - output, - Response { - checkpoint: Some(delta_state), - ..Default::default() - }, - )?; + let eof = tokio::spawn(async move { + match stdin.read_line(&mut line).await? { + 0 => Ok(()), + n => anyhow::bail!( + "read {} bytes from stdin when explicit acknowledgements were not requested", + n + ), + } + }); + + let pull = tokio::spawn(do_pull(req, stdout)); + + tokio::select! { + pull_res = pull => pull_res??, + eof_res = eof => eof_res??, } + } else { + anyhow::bail!("invalid request, expected spec|discover|validate|apply|open"); } -} -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("failed when interacting with kafka")] - Kafka(#[from] kafka::Error), + Ok(()) +} - #[error("failed to process message")] - Message(#[from] kafka::ProcessingError), +pub fn write_capture_response( + response: Response, + stdout: &mut std::io::Stdout, +) -> anyhow::Result<()> { + serde_json::to_writer(&mut *stdout, &response).context("serializing response")?; + writeln!(stdout).context("writing response newline")?; - #[error("failed to execute catalog")] - Catalog(#[from] catalog::Error), + if response.captured.is_none() { + stdout.flush().context("flushing stdout")?; + } + Ok(()) +} - #[error("failed to track state")] - State(#[from] state::Error), +async fn do_validate(req: Validate) -> Result> { + let config: EndpointConfig = serde_json::from_str(&req.config_json)?; + let consumer = config.to_consumer().await?; + + consumer + .fetch_metadata(None, KAFKA_TIMEOUT) + .context("Could not connect to bootstrap server with the provided configuration. This may be due to an incorrect configuration for authentication or bootstrap servers. Double check your configuration and try again.")?; + + match config.schema_registry { + SchemaRegistryConfig::ConfluentSchemaRegistry { + endpoint, + username, + password, + } => { + let client = SchemaRegistryClient::new(endpoint, username, password); + client + .schemas_for_topics(&[]) + .await + .context("Could not connect to the configured schema registry. Double check your configuration and try again.")?; + } + SchemaRegistryConfig::NoSchemaRegistry { .. } => (), + }; + + req.bindings + .iter() + .map(|binding| { + let res: Resource = serde_json::from_str(&binding.resource_config_json)?; + Ok(ValidatedBinding { + resource_path: vec![res.topic], + }) + }) + .collect() } diff --git a/source-kafka/src/main.rs b/source-kafka/src/main.rs index c46e3f8e41..214079f37f 100644 --- a/source-kafka/src/main.rs +++ b/source-kafka/src/main.rs @@ -1,62 +1,54 @@ -use std::io::{stdout, stdin}; - -use proto_flow::capture::Request; -use source_kafka::connector::{Connector, StdoutError, ConnectorConfig}; -use source_kafka::KafkaConnector; -use tracing_subscriber::fmt::format::FmtSpan; -use tracing_subscriber::EnvFilter; - -fn main() -> eyre::Result<()> { - setup_tracing(); - - let stdout_guard = stdout(); - let mut stdout = Box::new(stdout_guard.lock()); - - let lines = stdin().lines(); - - for line in lines { - let request: Request = serde_json::from_str(&line.unwrap())?; - - let result = if let Some(_spec) = request.spec { - ::spec(&mut stdout) - } else if let Some(validate) = request.validate { - ::validate(&mut stdout, validate) - } else if let Some(discover) = request.discover { - ::discover(&mut stdout, discover) - } else if let Some(apply) = request.apply { - let capture = apply.capture.expect("empty capture"); - ::apply(&mut stdout, ::Config::parse(&capture.config_json)?) - } else if let Some(open) = request.open { - let state = ::State::parse(&open.state_json)?; - let capture = open.capture.expect("empty capture"); - - ::read( - &mut stdout, - ::Config::parse(&capture.config_json)?, - capture, - open.range, - Some(state), - ) - } else { - Ok(()) - }; - - match result { - Err(e) if e.is::() => { - // Stdout has been closed, so we should gracefully shut down rather than panicking. - return Ok(()); - } - otherwise => otherwise.expect("error"), - } +use anyhow::Context; +use source_kafka::run_connector; +use tokio::io; + +fn main() -> anyhow::Result<()> { + let runtime = start_runtime()?; + + let stdin = io::BufReader::new(io::stdin()); + let stdout = std::io::stdout(); + + let result = runtime.block_on(run_connector(stdin, stdout)); + + if let Err(err) = result.as_ref() { + tracing::error!(error = ?err, "operation failed"); + } else { + tracing::debug!("connector run successful"); } - Ok(()) + runtime.shutdown_background(); + + result } -pub fn setup_tracing() { +fn start_runtime() -> anyhow::Result { + // The level string "debug" results in enabling debug logging for all the crates + // in the dependency tree, which produces a ridiculous amount of output. + // So map the debug level to a filter that will still use info level for other crates. + let level_str = match std::env::var("LOG_LEVEL").ok() { + Some(lvl) if lvl.as_str() == "debug" => "source_kafka=debug,info".to_string(), + Some(other) => other, + None => "info".to_string(), + }; + + let log_level = tracing_subscriber::EnvFilter::builder().parse_lossy(level_str); tracing_subscriber::fmt() - .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) .with_writer(std::io::stderr) - .with_env_filter(EnvFilter::from_default_env()) + .with_env_filter(log_level) + .json() + .flatten_event(true) + .with_timer(tracing_subscriber::fmt::time::UtcTime::rfc_3339()) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) + .with_current_span(true) + .with_span_list(false) + .with_thread_ids(false) + .with_thread_names(false) + .with_target(false) .init(); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .context("building tokio runtime")?; + Ok(runtime) } diff --git a/source-kafka/src/msk_oauthbearer.rs b/source-kafka/src/msk_oauthbearer.rs index 2a02cfe029..c26cadd68a 100644 --- a/source-kafka/src/msk_oauthbearer.rs +++ b/source-kafka/src/msk_oauthbearer.rs @@ -1,9 +1,12 @@ -use base64::prelude::{Engine as _, BASE64_URL_SAFE_NO_PAD}; +use anyhow::Result; use aws_sdk_iam::config::Credentials; -use aws_sigv4::http_request::{sign, SigningSettings, SignableBody, SignableRequest, SignatureLocation}; +use aws_sigv4::http_request::{ + sign, SignableBody, SignableRequest, SignatureLocation, SigningSettings, +}; use aws_sigv4::sign::v4; +use base64::prelude::{Engine as _, BASE64_URL_SAFE_NO_PAD}; use http; -use std::time::{SystemTime, Duration}; +use std::time::{Duration, SystemTime}; /* Generate a token for Amazon Streaming Kafka service. * This is based on AWS V4: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html @@ -24,8 +27,11 @@ use std::time::{SystemTime, Duration}; // Taken from the Go SDK's implementation // https://github.com/aws/aws-msk-iam-sasl-signer-go/blob/main/signer/msk_auth_token_provider.go#L33 const DEFAULT_EXPIRY_SECONDS: u64 = 900; -pub fn token(region: &str, access_key_id: &str, secret_access_key: &str) -> eyre::Result::<(String, i64)> { - let endpoint = format!("https://kafka.{}.amazonaws.com/?Action=kafka-cluster%3AConnect", region); +pub fn token(region: &str, access_key_id: &str, secret_access_key: &str) -> Result<(String, i64)> { + let endpoint = format!( + "https://kafka.{}.amazonaws.com/?Action=kafka-cluster%3AConnect", + region + ); let expiry_duration = Duration::new(DEFAULT_EXPIRY_SECONDS, 0); let now = SystemTime::now(); @@ -35,8 +41,9 @@ pub fn token(region: &str, access_key_id: &str, secret_access_key: &str) -> eyre secret_access_key, None, None, - "user credentials" - ).into(); + "user credentials", + ) + .into(); let mut signing_settings = SigningSettings::default(); // The default behaviour of the signing library is to put the signature in headers. @@ -62,8 +69,9 @@ pub fn token(region: &str, access_key_id: &str, secret_access_key: &str) -> eyre "GET", &endpoint, std::iter::empty(), - SignableBody::Bytes(&[]) - ).expect("signable request"); + SignableBody::Bytes(&[]), + ) + .expect("signable request"); // Create an empty draft HTTP request. The `sign` function provides us with a bunch of // "signing instructions" which we then apply to this draft HTTP request. The signing @@ -81,10 +89,13 @@ pub fn token(region: &str, access_key_id: &str, secret_access_key: &str) -> eyre // Finally add User Agent to the final signed url. This is based on the Go SDK that does this // after signing: https://github.com/aws/aws-msk-iam-sasl-signer-go/blob/main/signer/msk_auth_token_provider.go#L188 - let signed_url = format!("{}&User-Agent=EstuaryFlowCapture", signed_req.uri().to_string()); + let signed_url = format!("{}&User-Agent=EstuaryFlowCapture", signed_req.uri()); let token = BASE64_URL_SAFE_NO_PAD.encode(signed_url); - let expires_in = (now + expiry_duration).duration_since(SystemTime::UNIX_EPOCH).unwrap().as_millis(); + let expires_in = (now + expiry_duration) + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis(); - return Ok((token, expires_in.try_into()?)) + Ok((token, expires_in.try_into()?)) } diff --git a/source-kafka/src/pull.rs b/source-kafka/src/pull.rs new file mode 100644 index 0000000000..7121d98775 --- /dev/null +++ b/source-kafka/src/pull.rs @@ -0,0 +1,714 @@ +use crate::{ + configuration::{EndpointConfig, FlowConsumerContext, Resource, SchemaRegistryConfig}, + schema_registry::{RegisteredSchema, SchemaRegistryClient}, + write_capture_response, +}; +use anyhow::{anyhow, Context, Result}; +use apache_avro::{types::Value as AvroValue, Schema as AvroSchema}; +use base64::engine::general_purpose::STANDARD as base64; +use base64::Engine; +use bigdecimal::BigDecimal; +use hex::decode; +use highway::{HighwayHash, HighwayHasher, Key}; +use lazy_static::lazy_static; +use proto_flow::{ + capture::{ + request::Open, + response::{self, Checkpoint}, + Response, + }, + flow::{capture_spec::Binding, ConnectorState, RangeSpec}, +}; +use rdkafka::{ + consumer::{BaseConsumer, Consumer}, + message::Headers, + metadata::MetadataPartition, + Message, Offset, Timestamp, TopicPartitionList, +}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Map}; +use std::collections::{hash_map::Entry, HashMap}; +use time::{format_description, OffsetDateTime}; + +#[derive(Debug, Deserialize, Serialize, Default)] +struct CaptureState { + #[serde(rename = "bindingStateV1")] + resources: HashMap, +} + +impl CaptureState { + fn state_slice(state_key: &str, partition: i32, offset: i64) -> Self { + let mut partitions = HashMap::new(); + partitions.insert(partition, offset); + let mut resources = HashMap::new(); + resources.insert(state_key.to_string(), ResourceState { partitions }); + Self { resources } + } +} + +#[derive(Debug, Deserialize, Serialize, Default)] +struct ResourceState { + partitions: HashMap, +} + +struct BindingInfo { + binding_index: u32, + state_key: String, +} + +#[derive(Serialize, Deserialize, Default)] +struct Meta { + topic: String, + partition: i32, + offset: i64, + op: String, + headers: Option>, + timestamp: Option, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +enum MetaTimestamp { + CreationTime(String), + LogAppendTime(String), +} + +pub async fn do_pull(req: Open, mut stdout: std::io::Stdout) -> Result<()> { + let spec = req.capture.expect("open must contain a capture spec"); + + let state = if req.state_json == "{}" { + CaptureState::default() + } else { + serde_json::from_str(&req.state_json)? + }; + + let config: EndpointConfig = serde_json::from_str(&spec.config_json)?; + let mut consumer = config.to_consumer().await?; + let schema_client = match config.schema_registry { + SchemaRegistryConfig::ConfluentSchemaRegistry { + endpoint, + username, + password, + } => Some(SchemaRegistryClient::new(endpoint, username, password)), + SchemaRegistryConfig::NoSchemaRegistry { .. } => None, + }; + let mut schema_cache: HashMap = HashMap::new(); + + let topics_to_bindings = + setup_consumer(&mut consumer, state, &spec.bindings, &req.range).await?; + + loop { + let msg = consumer + .poll(None) + .expect("polling without a timeout should always produce a message") + .context("receiving next message")?; + + let mut op = "u"; + let mut doc = match msg.payload() { + Some(bytes) => parse_datum(bytes, false, &mut schema_cache, schema_client.as_ref()) + .await + .with_context(|| format!("parsing message payload for topic {}", msg.topic()))?, + None => { + // We interpret an absent message payload as a deletion + // tombstone. The captured document will otherwise be empty + // except for the _meta field and the message key (if present). + op = "d"; + json!({}) + } + }; + + let mut meta = Meta { + topic: msg.topic().to_string(), + partition: msg.partition(), + offset: msg.offset(), + op: op.to_string(), + ..Default::default() + }; + + if let Some(headers) = msg.headers() { + meta.headers = Some( + headers + .iter() + .map(|h| { + let value = match h.value { + Some(v) => match std::str::from_utf8(v) { + // Prefer capturing header byte values as UTF-8 + // strings if possible, otherwise base64 encode + // them. + Ok(v) => json!(v), + Err(_) => json!(base64.encode(v)), + }, + None => json!(null), + }; + (h.key.to_string(), value) + }) + .collect(), + ) + } + + meta.timestamp = match msg.timestamp() { + Timestamp::NotAvailable => None, + Timestamp::CreateTime(ts) => { + Some(MetaTimestamp::CreationTime(unix_millis_to_rfc3339(ts)?)) + } + Timestamp::LogAppendTime(ts) => { + Some(MetaTimestamp::LogAppendTime(unix_millis_to_rfc3339(ts)?)) + } + }; + + let captured = doc.as_object_mut().unwrap(); + captured.insert("_meta".to_string(), serde_json::to_value(meta).unwrap()); + + if let Some(key_bytes) = msg.key() { + let mut key_parsed = + parse_datum(key_bytes, true, &mut schema_cache, schema_client.as_ref()) + .await + .with_context(|| format!("parsing message key for topic {}", msg.topic()))?; + + // Add key/val pairs from the "key" to root of the captured + // document, which will clobber any collisions with keys from + // the parsed payload. + captured.append(key_parsed.as_object_mut().unwrap()); + } + + let binding_info = topics_to_bindings + .get(msg.topic()) + .with_context(|| format!("got a message for unknown topic {}", msg.topic()))?; + + let message = response::Captured { + binding: binding_info.binding_index, + doc_json: serde_json::to_string(&captured)?, + }; + + let checkpoint = + CaptureState::state_slice(&binding_info.state_key, msg.partition(), msg.offset()); + + write_capture_response( + Response { + captured: Some(message), + ..Default::default() + }, + &mut stdout, + )?; + + write_capture_response( + Response { + checkpoint: Some(Checkpoint { + state: Some(ConnectorState { + updated_json: serde_json::to_string(&checkpoint)?, + merge_patch: true, + }), + }), + ..Default::default() + }, + &mut stdout, + )?; + } +} + +fn unix_millis_to_rfc3339(millis: i64) -> Result { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::milliseconds(millis); + Ok(time.format(&format_description::well_known::Rfc3339)?) +} + +async fn setup_consumer( + consumer: &mut BaseConsumer, + state: CaptureState, + bindings: &[Binding], + range: &Option, +) -> Result> { + let meta = consumer.fetch_metadata(None, None)?; + + let extant_partitions: HashMap = meta + .topics() + .iter() + .map(|t| (t.name().to_string(), t.partitions())) + .collect(); + + let mut topics_to_bindings: HashMap = HashMap::new(); + let mut topic_partition_list = TopicPartitionList::new(); + + for (idx, binding) in bindings.iter().enumerate() { + let res: Resource = serde_json::from_str(&binding.resource_config_json)?; + + let state_key = &binding.state_key; + let topic = &res.topic; + + let default_state = ResourceState::default(); + let resource_state = state.resources.get(state_key).unwrap_or(&default_state); + + let partition_info = extant_partitions + .get(topic) + .ok_or(anyhow!("configured topic {} does not exist", topic))?; + + for partition in partition_info.iter() { + let partition = partition.id(); + if !responsible_for_partition(range, &res.topic, partition) { + continue; + } + + let offset = match resource_state.partitions.get(&partition) { + Some(o) => Offset::Offset(*o + 1), // Don't read the same offset again. + None => Offset::Beginning, + }; + topic_partition_list.add_partition_offset(topic, partition, offset)?; + } + + topics_to_bindings.insert( + topic.to_string(), + BindingInfo { + binding_index: idx as u32, + state_key: state_key.to_string(), + }, + ); + } + + consumer + .assign(&topic_partition_list) + .context("could not assign consumer to topic_partition_list")?; + + Ok(topics_to_bindings) +} + +lazy_static! { + // HIGHWAY_HASH_KEY is a fixed 32 bytes (as required by HighwayHash) read from /dev/random. + // DO NOT MODIFY this value, as it is required to have consistent hash results. + // This value is copied from the Go connector source-boilerplate. + static ref HIGHWAY_HASH_KEY: Vec = { + decode("332757d16f0fb1cf2d4f676f85e34c6a8b85aa58f42bb081449d8eb2e4ed529f") + .expect("invalid hex string for HIGHWAY_HASH_KEY") + }; +} + +fn bytes_to_key(key: &[u8]) -> Key { + assert!(key.len() == 32, "The key must be exactly 32 bytes long."); + + Key([ + u64::from_le_bytes(key[0..8].try_into().unwrap()), + u64::from_le_bytes(key[8..16].try_into().unwrap()), + u64::from_le_bytes(key[16..24].try_into().unwrap()), + u64::from_le_bytes(key[24..32].try_into().unwrap()), + ]) +} + +fn responsible_for_partition(range: &Option, topic: &str, partition: i32) -> bool { + let range = match range { + None => return true, + Some(r) => r, + }; + + let mut hasher = HighwayHasher::new(bytes_to_key(&HIGHWAY_HASH_KEY)); + hasher.append(topic.as_bytes()); + hasher.append(&partition.to_le_bytes()); + let hash = (hasher.finalize64() >> 32) as u32; + + hash >= range.key_begin && hash <= range.key_end +} + +async fn parse_datum( + datum: &[u8], + is_key: bool, + schema_cache: &mut HashMap, + schema_client: Option<&SchemaRegistryClient>, +) -> Result { + match (schema_client, datum[0]) { + (Some(schema_client), 0) => { + // Schema registry is available, and this message was encoded with a + // schema. + let schema_id = u32::from_be_bytes(datum[1..5].try_into()?); + if let Entry::Vacant(e) = schema_cache.entry(schema_id) { + e.insert(schema_client.fetch_schema(schema_id).await?); + } + + match schema_cache.get(&schema_id).unwrap() { + RegisteredSchema::Avro(avro_schema) => { + let avro_value = + apache_avro::from_avro_datum(avro_schema, &mut &datum[5..], None)?; + + let is_doc = matches!(avro_value, AvroValue::Map(_) | AvroValue::Record(_)); + let json_value = avro_to_json(avro_value, avro_schema)?; + + if is_key && !is_doc { + // Handle cases where there is an Avro schema, but it's + // not a record type. I'm not sure how common this is in + // practice but it's the first thing I tried to do. + Ok(serde_json::Map::from_iter([("_key".to_string(), json_value)]).into()) + } else { + Ok(json_value) + } + } + RegisteredSchema::Json(_) => Ok(serde_json::from_slice(&datum[5..])?), + RegisteredSchema::Protobuf => { + anyhow::bail!("decoding protobuf messages is not yet supported") + } + } + } + (None, 0) => { + // Schema registry is not available, but the data was encoded with a + // schema. We might as well try to see if the data is a valid JSON + // document. + Ok(serde_json::from_slice(&datum[5..]).context( + "received a message with a schema magic byte, but schema registry is not configured and the message is not valid JSON" + )?) + } + (_, _) => { + // If there is no schema information available for how to parse the + // document, we make our best guess at parsing into something that + // would be useful. A present key will always be able to be captured + // as a base64-encoded string of its bytes. The most reasonable + // thing to do for a "payload" is to try to parse it as a JSON + // document. + if is_key { + Ok( + serde_json::Map::from_iter([("_key".to_string(), base64.encode(datum).into())]) + .into(), + ) + } else { + Ok(serde_json::from_slice(datum)?) + } + } + } +} + +pub fn avro_to_json(value: AvroValue, schema: &AvroSchema) -> Result { + Ok(match value { + AvroValue::Null => json!(null), + AvroValue::Boolean(v) => json!(v), + AvroValue::Int(v) => json!(v), + AvroValue::Long(v) => json!(v), + AvroValue::Float(v) => match v.is_nan() || v.is_infinite() { + true => json!(v.to_string()), + false => json!(v), + }, + AvroValue::Double(v) => match v.is_nan() || v.is_infinite() { + true => json!(v.to_string()), + false => json!(v), + }, + AvroValue::Bytes(v) => json!(base64.encode(v)), + AvroValue::String(v) => json!(v), + AvroValue::Fixed(_, v) => json!(base64.encode(v)), + AvroValue::Enum(_, v) => json!(v), + AvroValue::Union(idx, v) => match schema { + AvroSchema::Union(s) => avro_to_json(*v, &s.variants()[idx as usize]) + .context("failed to decode union value")?, + _ => anyhow::bail!( + "expected a union schema for a union value but got {}", + schema + ), + }, + AvroValue::Array(v) => match schema { + AvroSchema::Array(s) => json!(v + .into_iter() + .map(|v| avro_to_json(v, &s.items)) + .collect::>>()?), + _ => anyhow::bail!( + "expected an array schema for an array value but got {}", + schema + ), + }, + AvroValue::Map(v) => match schema { + AvroSchema::Map(s) => json!(v + .into_iter() + .map(|(k, v)| Ok((k, avro_to_json(v, &s.types)?))) + .collect::>>()?), + _ => anyhow::bail!("expected a map schema for a map value but got {}", schema), + }, + AvroValue::Record(v) => match schema { + AvroSchema::Record(s) => json!(v + .into_iter() + .zip(s.fields.iter()) + .map(|((k, v), field)| { + if k != field.name { + anyhow::bail!( + "expected record field value with name '{}' but schema had name '{}'", + k, + field.name, + ) + } + Ok((k, avro_to_json(v, &field.schema)?)) + }) + .collect::>>()?), + _ => anyhow::bail!( + "expected a record schema for a record value but got {}", + schema + ), + }, + AvroValue::Date(v) => { + let date = OffsetDateTime::UNIX_EPOCH + time::Duration::days(v.into()); + json!(format!( + "{}-{:02}-{:02}", + date.year(), + date.month() as u8, + date.day() + )) + } + AvroValue::Decimal(v) => match schema { + AvroSchema::Decimal(s) => json!(BigDecimal::new(v.into(), s.scale as i64).to_string()), + _ => anyhow::bail!( + "expected a decimal schema for a decimal value but got {}", + schema + ), + }, + AvroValue::BigDecimal(v) => json!(v.to_string()), + AvroValue::TimeMillis(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::milliseconds(v as i64); + json!(format!( + "{:02}:{:02}:{:02}.{:03}", + time.hour(), + time.minute(), + time.second(), + time.millisecond() + )) + } + AvroValue::TimeMicros(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::microseconds(v); + json!(format!( + "{:02}:{:02}:{:02}.{:06}", + time.hour(), + time.minute(), + time.second(), + time.microsecond() + )) + } + AvroValue::TimestampMillis(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::milliseconds(v); + json!(time + .format(&format_description::well_known::Rfc3339) + .unwrap()) + } + AvroValue::TimestampMicros(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::microseconds(v); + json!(time + .format(&format_description::well_known::Rfc3339) + .unwrap()) + } + AvroValue::TimestampNanos(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::nanoseconds(v); + json!(time + .format(&format_description::well_known::Rfc3339) + .unwrap()) + } + AvroValue::LocalTimestampMillis(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::milliseconds(v); + json!(time + .format(&format_description::well_known::Rfc3339) + .unwrap()) + } + AvroValue::LocalTimestampMicros(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::microseconds(v); + json!(time + .format(&format_description::well_known::Rfc3339) + .unwrap()) + } + AvroValue::LocalTimestampNanos(v) => { + let time = OffsetDateTime::UNIX_EPOCH + time::Duration::nanoseconds(v); + json!(time + .format(&format_description::well_known::Rfc3339) + .unwrap()) + } + AvroValue::Duration(v) => { + json!(duration_to_duration_string( + v.months().into(), + v.days().into(), + v.millis().into() + )) + } + AvroValue::Uuid(v) => json!(v.to_string()), + }) +} + +fn duration_to_duration_string(months: u32, days: u32, total_milliseconds: u32) -> String { + let total_seconds = total_milliseconds / 1000; + let hours = total_seconds / 3600; + let minutes = (total_seconds % 3600) / 60; + let seconds = total_seconds % 60; + let milliseconds = total_milliseconds % 1000; + + let mut duration = String::from("P"); + if months > 0 { + duration.push_str(&format!("{}M", months)); + } + if days > 0 { + duration.push_str(&format!("{}D", days)); + } + + if hours > 0 || minutes > 0 || seconds > 0 || milliseconds > 0 { + duration.push('T'); + if hours > 0 { + duration.push_str(&format!("{}H", hours)); + } + if minutes > 0 { + duration.push_str(&format!("{}M", minutes)); + } + if seconds > 0 || milliseconds > 0 { + if milliseconds > 0 { + duration.push_str(&format!("{}.{:03}S", seconds, milliseconds)); + } else { + duration.push_str(&format!("{}S", seconds)); + } + } + } + + duration +} + +#[cfg(test)] +mod tests { + use core::{f32, f64}; + use std::{collections::HashMap, i64}; + + use super::*; + use apache_avro::{ + types::{Record, Value as AvroValue}, + Days, Decimal, Duration, Millis, Months, + }; + use bigdecimal::num_bigint::ToBigInt; + use insta::assert_json_snapshot; + use serde_json::json; + + #[test] + fn test_avro_to_json() { + let record_schema_raw = json!({ + "type": "record", + "name": "test", + "fields": [ + {"name": "nullField", "type": "null"}, + {"name": "boolField", "type": "boolean"}, + {"name": "intField", "type": "int"}, + {"name": "longField", "type": "long"}, + {"name": "floatField", "type": "float"}, + {"name": "floatFieldNaN", "type": "float"}, + {"name": "floatFieldPosInf", "type": "float"}, + {"name": "floatFieldNegInf", "type": "float"}, + {"name": "doubleField", "type": "double"}, + {"name": "doubleFieldNaN", "type": "double"}, + {"name": "doubleFieldPosInf", "type": "double"}, + {"name": "doubleFieldNegInf", "type": "double"}, + {"name": "bytesField", "type": "bytes"}, + {"name": "stringField", "type": "string"}, + {"name": "nullableStringField", "type": ["null", "string"]}, + {"name": "fixedBytesField", "type": {"type": "fixed", "name": "foo", "size": 5}}, + {"name": "enumField", "type": {"type": "enum", "name": "foo", "symbols": ["a", "b", "c"]}}, + {"name": "arrayField", "type": "array", "items": "string"}, + {"name": "mapField", "type": "map", "values": "string"}, + {"name": "dateField", "type": "int", "logicalType": "date"}, + {"name": "decimalField", "type": "bytes", "logicalType": "decimal", "precision": 10, "scale": 3}, + {"name": "fixedDecimalField", "type":{"type": "fixed", "size": 2, "name": "decimal"}, "logicalType": "decimal", "precision": 4, "scale": 2}, + {"name": "timeMillisField", "type": "int", "logicalType": "time-millis"}, + {"name": "timeMicrosField", "type": "long", "logicalType": "time-micros"}, + {"name": "timestampMillisField", "type": "long", "logicalType": "timestamp-millis"}, + {"name": "timestampMicrosField", "type": "long", "logicalType": "timestamp-micros"}, + {"name": "localTimestampMillisField", "type": "long", "logicalType": "local-timestamp-millis"}, + {"name": "localTimestampMicrosField", "type": "long", "logicalType": "local-timestamp-micros"}, + {"name": "durationField", "type": {"type": "fixed", "size": 12, "name": "duration"}, "logicalType": "duration"}, + {"name": "uuidField", "type": "string", "logicalType": "uuid"}, + {"name": "nestedRecordField", "type": {"type": "record", "name": "nestedRecord", "fields": [ + {"name": "nestedStringField", "type": "string"}, + {"name": "nestedLongField", "type": "long"}, + ]}} + ] + }); + + let nested_record_schema_raw = json!({ + "type": "record", + "name": "nestedRecord", + "fields": [ + {"name": "nestedStringField", "type": "string"}, + {"name": "nestedLongField", "type": "long"}, + ] + }); + + let record_schema_parsed = AvroSchema::parse(&record_schema_raw).unwrap(); + let nested_record_schema_parsed = AvroSchema::parse(&nested_record_schema_raw).unwrap(); + let mut nested_record = Record::new(&nested_record_schema_parsed).unwrap(); + nested_record.put("nestedStringField", "nested string value"); + nested_record.put("nestedLongField", 123); + + let mut record = Record::new(&record_schema_parsed).unwrap(); + record.put("nullField", AvroValue::Null); + record.put("boolField", true); + record.put("intField", i32::MAX); + record.put("longField", i64::MAX); + record.put("floatField", f32::MAX); + record.put("floatFieldNaN", f32::NAN); + record.put("floatFieldPosInf", f32::INFINITY); + record.put("floatFieldNegInf", f32::NEG_INFINITY); + record.put("doubleField", f32::MAX); + record.put("doubleFieldNaN", f64::NAN); + record.put("doubleFieldPosInf", f64::INFINITY); + record.put("doubleFieldNegInf", f64::NEG_INFINITY); + record.put("bytesField", vec![104, 101, 108, 108, 111]); + record.put("stringField", "hello"); + record.put("nullableStringField", AvroValue::Null); + record.put("fixedBytesField", vec![104, 101, 108, 108, 111]); + record.put("enumField", "b"); + record.put( + "arrayField", + AvroValue::Array(vec![ + AvroValue::String("first".into()), + AvroValue::String("second".into()), + ]), + ); + record.put( + "mapField", + HashMap::from([("key".to_string(), "value".to_string())]), + ); + record.put("dateField", 123); + record.put( + "decimalField", + Decimal::from((-32442.to_bigint().unwrap()).to_signed_bytes_be()), + ); + record.put( + "fixedDecimalField", + Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()), + ); + record.put("timeMillisField", AvroValue::TimeMillis(73_800_000)); + record.put("timeMicrosField", AvroValue::TimeMicros(73_800_000 * 1000)); + record.put( + "timestampMillisField", + AvroValue::TimestampMillis(1_730_233_606 * 1000), + ); + record.put( + "timestampMicrosField", + AvroValue::TimestampMicros(1_730_233_606 * 1000 * 1000), + ); + record.put( + "localTimestampMillisField", + AvroValue::TimestampMillis(1_730_233_606 * 1000), + ); + record.put( + "localTimestampMicrosField", + AvroValue::TimestampMicros(1_730_233_606 * 1000 * 1000), + ); + record.put( + "durationField", + Duration::new(Months::new(6), Days::new(14), Millis::new(73_800_000)), + ); + record.put("uuidField", uuid::Uuid::nil()); + record.put("nestedRecordField", nested_record); + + assert_json_snapshot!(avro_to_json(record.into(), &record_schema_parsed).unwrap()); + } + + #[test] + fn test_duration_to_duration_string() { + let test_cases = [ + (2, 0, 0, "P2M"), + (0, 5, 0, "P5D"), + (0, 8, 0, "P8D"), + (0, 0, 3661001, "PT1H1M1.001S"), + (1, 2, 3661001, "P1M2DT1H1M1.001S"), + (1, 2, 3661000, "P1M2DT1H1M1S"), + (0, 0, 3000, "PT3S"), + (0, 0, 120000, "PT2M"), + (0, 0, 3600000, "PT1H"), + ]; + + for (months, days, milliseconds, want) in test_cases { + assert_eq!( + duration_to_duration_string(months, days, milliseconds), + want + ) + } + } +} diff --git a/source-kafka/src/schema_registry.rs b/source-kafka/src/schema_registry.rs new file mode 100644 index 0000000000..0d302586cc --- /dev/null +++ b/source-kafka/src/schema_registry.rs @@ -0,0 +1,204 @@ +use anyhow::{Context, Result}; +use futures::stream::{self, StreamExt}; +use reqwest::Client; +use serde::{de::DeserializeOwned, Deserialize}; +use std::collections::{HashMap, HashSet}; + +const TOPIC_KEY_SUFFIX: &str = "-key"; +const TOPIC_VALUE_SUFFIX: &str = "-value"; +const CONCURRENT_SCHEMA_REQUESTS: usize = 10; + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct FetchedSchema { + #[serde(default = "SchemaType::default")] + schema_type: SchemaType, + schema: String, + references: Option, // TODO(whb): Schema reference support is not yet implemented. +} + +#[derive(Deserialize, Debug)] +struct FetchedLatestVersion { + id: u32, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "UPPERCASE")] +enum SchemaType { + Avro, + Json, + Protobuf, +} + +impl SchemaType { + fn default() -> Self { + SchemaType::Avro + } +} + +#[derive(Debug)] +pub enum RegisteredSchema { + Avro(apache_avro::Schema), + Json(serde_json::Value), + Protobuf, // TODO(whb): Protobuf support is not yet implemented. +} + +#[derive(Debug, Default)] +pub struct TopicSchema { + pub key: Option, + pub value: Option, +} + +pub struct SchemaRegistryClient { + endpoint: String, + http: Client, + username: String, + password: String, +} + +impl SchemaRegistryClient { + pub fn new(endpoint: String, username: String, password: String) -> SchemaRegistryClient { + SchemaRegistryClient { + endpoint: endpoint.to_string(), + http: reqwest::Client::default(), + username, + password, + } + } + + pub async fn schemas_for_topics( + &self, + topics: &[String], + ) -> Result> { + let applicable_topics: HashSet = topics.iter().cloned().collect(); + + let subjects: Vec = self + .make_request(format!("{}/subjects", self.endpoint).as_str()) + .await?; + + let filter_by_suffix = |s: &str, suffix: &str| { + if let Some(s) = s.strip_suffix(suffix) { + if !applicable_topics.contains(s) { + return None; + } + return Some(s.to_string()); + } + None + }; + + let topics_with_key_schema: HashSet = subjects + .iter() + .filter_map(|s| filter_by_suffix(s, TOPIC_KEY_SUFFIX)) + .collect(); + + let topics_with_value_schema: HashSet = subjects + .iter() + .filter_map(|s| filter_by_suffix(s, TOPIC_VALUE_SUFFIX)) + .collect(); + + let schema_futures: Vec<_> = applicable_topics + .iter() + .filter_map(|topic| { + let need_key = topics_with_key_schema.contains(topic); + let need_value = topics_with_value_schema.contains(topic); + if !need_key && !need_value { + return None; + } + Some(async move { + let mut schema = TopicSchema { + key: None, + value: None, + }; + + if need_key { + schema.key = Some(self.fetch_latest_schema(topic, true).await?) + } + if need_value { + schema.value = Some(self.fetch_latest_schema(topic, false).await?) + } + + Ok::<(String, TopicSchema), anyhow::Error>((topic.to_owned(), schema)) + }) + }) + .collect(); + + stream::iter(schema_futures) + .buffer_unordered(CONCURRENT_SCHEMA_REQUESTS) + .collect::>() + .await + .into_iter() + .collect::>>() + } + + pub async fn fetch_schema(&self, id: u32) -> Result { + let fetched: FetchedSchema = self + .make_request(format!("{}/schemas/ids/{}", self.endpoint, id).as_str()) + .await?; + + if fetched.references.is_some() { + anyhow::bail!("schema references are not yet supported, and requested schema with id {} has references", id); + } + + match fetched.schema_type { + SchemaType::Avro => { + let schema = apache_avro::Schema::parse_str(&fetched.schema) + .context("failed to parse fetched avro schema")?; + Ok(RegisteredSchema::Avro(schema)) + } + SchemaType::Json => { + let schema = serde_json::from_str(&fetched.schema) + .context("failed to parse fetched json schema")?; + Ok(RegisteredSchema::Json(schema)) + } + SchemaType::Protobuf => Ok(RegisteredSchema::Protobuf), + } + } + + async fn fetch_latest_version(&self, subject: &str) -> Result { + let fetched: FetchedLatestVersion = self + .make_request( + format!("{}/subjects/{}/versions/latest", self.endpoint, subject).as_str(), + ) + .await?; + Ok(fetched.id) + } + + async fn fetch_latest_schema(&self, topic: &str, key: bool) -> Result { + let subject = format!( + "{}{}", + topic, + if key { + TOPIC_KEY_SUFFIX + } else { + TOPIC_VALUE_SUFFIX + } + ); + let version = self.fetch_latest_version(subject.as_str()).await?; + self.fetch_schema(version).await + } + + async fn make_request(&self, url: &str) -> Result + where + T: DeserializeOwned, + { + let res = self + .http + .get(url) + .basic_auth(&self.username, Some(&self.password)) + .send() + .await?; + + if !res.status().is_success() { + let status = res.status(); + let body = res.text().await?; + anyhow::bail!( + "request GET {} failed with status {}: {}", + url, + status, + body + ); + } + + Ok(res.json().await?) + } +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__avro record with scalar compound key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__avro record with scalar compound key.snap new file mode 100644 index 0000000000..2141b2b5a8 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__avro record with scalar compound key.snap @@ -0,0 +1,82 @@ +--- +source: src/discover.rs +expression: snap +--- +["/firstKey","/secondKey","/thirdKey"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta", + "firstKey", + "secondKey" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + }, + "firstKey": { + "description": "the first key field", + "type": "string" + }, + "secondKey": { + "description": "the second key field", + "type": "integer" + }, + "thirdKey": { + "description": "the third key field, which is an enum with a default value", + "default": "a", + "type": "string", + "enum": [ + "a", + "b", + "c" + ] + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__nested avro record with scalars.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__nested avro record with scalars.snap new file mode 100644 index 0000000000..292c8fcb35 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__nested avro record with scalars.snap @@ -0,0 +1,86 @@ +--- +source: src/discover.rs +expression: snap +--- +["/firstKey","/nestedRecord/secondKeyNested","/nestedRecord/thirdKeyNested"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta", + "firstKey", + "nestedRecord" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + }, + "firstKey": { + "description": "the first key field", + "type": "string" + }, + "nestedRecord": { + "type": "object", + "required": [ + "secondKeyNested", + "thirdKeyNested" + ], + "properties": { + "secondKeyNested": { + "description": "the second key field", + "type": "integer" + }, + "thirdKeyNested": { + "description": "the third key field", + "type": "string", + "contentEncoding": "base64" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__nested json object.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__nested json object.snap new file mode 100644 index 0000000000..33c6fd545f --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__nested json object.snap @@ -0,0 +1,82 @@ +--- +source: src/discover.rs +expression: snap +--- +["/firstKey","/nestedObject/secondKeyNested","/nestedObject/thirdKeyNested"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta", + "firstKey", + "nestedObject" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + }, + "firstKey": { + "type": "string" + }, + "nestedObject": { + "type": "object", + "required": [ + "secondKeyNested", + "thirdKeyNested" + ], + "properties": { + "secondKeyNested": { + "type": "integer" + }, + "thirdKeyNested": { + "type": "boolean" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__no key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__no key.snap new file mode 100644 index 0000000000..20a6bd1550 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__no key.snap @@ -0,0 +1,62 @@ +--- +source: src/discover.rs +expression: snap +--- +["/_meta/partition","/_meta/offset"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__single non-scalar avro key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__single non-scalar avro key.snap new file mode 100644 index 0000000000..20a6bd1550 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__single non-scalar avro key.snap @@ -0,0 +1,62 @@ +--- +source: src/discover.rs +expression: snap +--- +["/_meta/partition","/_meta/offset"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__single nullable scalar avro key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__single nullable scalar avro key.snap new file mode 100644 index 0000000000..20a6bd1550 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__single nullable scalar avro key.snap @@ -0,0 +1,62 @@ +--- +source: src/discover.rs +expression: snap +--- +["/_meta/partition","/_meta/offset"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__single nullable scalar json key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__single nullable scalar json key.snap new file mode 100644 index 0000000000..20a6bd1550 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__single nullable scalar json key.snap @@ -0,0 +1,62 @@ +--- +source: src/discover.rs +expression: snap +--- +["/_meta/partition","/_meta/offset"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_meta" + ], + "properties": { + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__single scalar avro key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__single scalar avro key.snap new file mode 100644 index 0000000000..6910a9d4b2 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__single scalar avro key.snap @@ -0,0 +1,66 @@ +--- +source: src/discover.rs +expression: snap +--- +["/_key"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_key", + "_meta" + ], + "properties": { + "_key": { + "type": "string" + }, + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__discover__tests__single scalar json key.snap b/source-kafka/src/snapshots/source_kafka__discover__tests__single scalar json key.snap new file mode 100644 index 0000000000..6910a9d4b2 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__discover__tests__single scalar json key.snap @@ -0,0 +1,66 @@ +--- +source: src/discover.rs +expression: snap +--- +["/_key"] +{ + "type": "object", + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "required": [ + "_key", + "_meta" + ], + "properties": { + "_key": { + "type": "string" + }, + "_meta": { + "type": "object", + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + } + } + }, + "x-infer-schema": true +} diff --git a/source-kafka/src/snapshots/source_kafka__pull__tests__avro_to_json.snap b/source-kafka/src/snapshots/source_kafka__pull__tests__avro_to_json.snap new file mode 100644 index 0000000000..04cf586ee9 --- /dev/null +++ b/source-kafka/src/snapshots/source_kafka__pull__tests__avro_to_json.snap @@ -0,0 +1,45 @@ +--- +source: src/pull.rs +expression: "avro_to_json(record.into(), &record_schema_parsed).unwrap()" +--- +{ + "arrayField": [ + "first", + "second" + ], + "boolField": true, + "bytesField": "aGVsbG8=", + "dateField": 123, + "decimalField": "-32.442", + "doubleField": 340282346638528860000000000000000000000.0, + "doubleFieldNaN": "NaN", + "doubleFieldNegInf": "-inf", + "doubleFieldPosInf": "inf", + "durationField": "P6M14DT20H30M", + "enumField": "b", + "fixedBytesField": "aGVsbG8=", + "fixedDecimalField": "99.36", + "floatField": 340282346638528860000000000000000000000.0, + "floatFieldNaN": "NaN", + "floatFieldNegInf": "-inf", + "floatFieldPosInf": "inf", + "intField": 2147483647, + "localTimestampMicrosField": "2024-10-29T20:26:46Z", + "localTimestampMillisField": "2024-10-29T20:26:46Z", + "longField": 9223372036854775807, + "mapField": { + "key": "value" + }, + "nestedRecordField": { + "nestedLongField": 123, + "nestedStringField": "nested string value" + }, + "nullField": null, + "nullableStringField": null, + "stringField": "hello", + "timeMicrosField": "20:30:00.000000", + "timeMillisField": "20:30:00.000", + "timestampMicrosField": "2024-10-29T20:26:46Z", + "timestampMillisField": "2024-10-29T20:26:46Z", + "uuidField": "00000000-0000-0000-0000-000000000000" +} diff --git a/source-kafka/src/state.rs b/source-kafka/src/state.rs deleted file mode 100644 index e09c845a1e..0000000000 --- a/source-kafka/src/state.rs +++ /dev/null @@ -1,275 +0,0 @@ -use std::collections::BTreeMap; - -use proto_flow::flow::{CaptureSpec, RangeSpec}; -use rdkafka::message::BorrowedMessage; -use rdkafka::metadata::Metadata; -use serde::{Deserialize, Serialize}; -use tracing::info; - -use crate::{catalog::{self, Resource, responsible_for_shard}, connector, kafka}; - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("failed to read the state file")] - File(#[from] std::io::Error), - - #[error("failed to validate connector state")] - Format(#[from] serde_json::Error), - - #[error("failed to serialize state: {0:?}")] - Serialization(Checkpoint, serde_json::Error), -} - -/// Represents how far into a partition we've already consumed. The `Offset` value -/// stored in a state file is a record of successfully processing a message. -#[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)] -pub enum Offset { - /// The very beginning of the partition. **Not the same as `UpThrough(0)`**. - Start, - /// The very end of the partition. We won't necessarily know the offset number, - /// but we can use it to avoid reading from the beginning. - End, - /// The specific offset we've read. `UpThrough(0)` would mean we've already - /// consumed the message with offset=0. - UpThrough(i64), -} - -impl Offset { - /// Returns a new `Offset` representing the next offset to start consuming. - /// - /// When we're re-subscribing to a partition, we want to avoid re-reading the - /// message we last consumed. - pub fn next(&self) -> Self { - match self { - Self::Start => Self::Start, - Self::End => Self::End, - Self::UpThrough(n) => Self::UpThrough(*n + 1), - } - } -} - -impl PartialOrd for Offset { - fn partial_cmp(&self, other: &Self) -> Option { - use std::cmp::Ordering; - - match (self, other) { - (Offset::Start, Offset::Start) => Some(Ordering::Equal), - (Offset::Start, Offset::End) => Some(Ordering::Less), - (Offset::Start, Offset::UpThrough(-1)) => Some(Ordering::Equal), - (Offset::Start, Offset::UpThrough(_)) => Some(Ordering::Less), - (Offset::End, Offset::Start) => Some(Ordering::Greater), - (Offset::End, Offset::End) => Some(Ordering::Equal), - (Offset::End, Offset::UpThrough(_)) => Some(Ordering::Greater), - (Offset::UpThrough(-1), Offset::Start) => Some(Ordering::Equal), - (Offset::UpThrough(_), Offset::Start) => Some(Ordering::Greater), - (Offset::UpThrough(_), Offset::End) => Some(Ordering::Less), - (Offset::UpThrough(l), Offset::UpThrough(r)) => Some(l.cmp(r)), - } - } -} - -impl From for rdkafka::Offset { - fn from(orig: Offset) -> Self { - match orig { - Offset::Start => rdkafka::Offset::Beginning, - Offset::UpThrough(n) => rdkafka::Offset::Offset(n), - Offset::End => rdkafka::Offset::End, - } - } -} - -impl<'m> From<&BorrowedMessage<'m>> for Offset { - /// When constructing an `Offset` from a Kafka Message directly, we know it's - /// referencing a offset-value from the start of the partition and that it's - /// been consumed. - fn from(msg: &BorrowedMessage) -> Self { - use rdkafka::Message; - - Offset::UpThrough(msg.offset()) - } -} - -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct Checkpoint { - pub topic: String, - pub partition: i32, - pub offset: Offset, -} - -impl Checkpoint { - pub fn new>(topic: &str, partition: i32, offset: O) -> Self { - Self { - topic: topic.to_owned(), - partition, - offset: offset.into(), - } - } -} - -#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)] -pub struct CheckpointSet(pub BTreeMap>); - -impl CheckpointSet { - pub fn add(&mut self, new_checkpoint: Checkpoint) { - self.0 - .entry(new_checkpoint.topic) - .or_insert_with(Default::default) - .insert(new_checkpoint.partition, new_checkpoint.offset); - } - - /// Finds the union of the topics in: - /// 1. The connected Kafka Cluster - /// 2. The ConfiguredCatalogSpec - /// 3. The StateFile - pub fn reconcile_catalog_state( - metadata: &Metadata, - capture: &CaptureSpec, - range: Option<&RangeSpec>, - loaded_state: &CheckpointSet, - ) -> Result { - let mut reconciled = CheckpointSet::default(); - - for binding in capture.bindings.iter() { - let res: Resource = serde_json::from_str(&binding.resource_config_json)?; - if let Some(topic) = kafka::find_topic(metadata, &res.stream) { - for partition in topic.partitions() { - if let Some(range) = range { - if responsible_for_shard(range, kafka::build_shard_key(topic, partition)) { - info!("Responsible for {}/{}: YES", topic.name(), partition.id()); - - let offset = loaded_state - .offset_for(topic.name(), partition.id()) - .unwrap_or(Offset::Start); - - reconciled.add(Checkpoint::new(topic.name(), partition.id(), offset)); - } else { - info!("Responsible for {}/{}: NO", topic.name(), partition.id()); - } - } else { - let offset = loaded_state - .offset_for(topic.name(), partition.id()) - .unwrap_or(Offset::Start); - - reconciled.add(Checkpoint::new(topic.name(), partition.id(), offset)); - } - } - } else { - return Err(catalog::Error::MissingStream( - res.stream.clone(), - )); - } - } - - Ok(reconciled) - } - - pub fn offset_for(&self, topic: &str, partition_id: i32) -> Option { - self.0 - .get(topic) - .and_then(|partitions| partitions.get(&partition_id).map(Clone::clone)) - } - - pub fn iter(&self) -> impl Iterator + '_ { - self.0.iter().flat_map(|(topic, partitions)| { - partitions - .iter() - .map(move |(partition, offset)| Checkpoint::new(topic, *partition, *offset)) - }) - } -} - -impl connector::ConnectorConfig for CheckpointSet { - type Error = Error; - - fn parse(reader: &str) -> Result { - let checkpoints = serde_json::from_str(reader)?; - - Ok(checkpoints) - } -} - -#[cfg(test)] -mod test { - use super::*; - use connector::ConnectorConfig; - - #[test] - fn merge_test() { - let check_offsets = |checkpoints: &CheckpointSet, foo0, foo1, bar0| { - assert_eq!(checkpoints.0["foo"][&0], foo0); - assert_eq!(checkpoints.0["foo"][&1], foo1); - assert_eq!(checkpoints.0["bar"][&0], bar0); - }; - - let update_offset = |checkpoint, new_offset| { - Checkpoint { - offset: new_offset, - ..checkpoint - } - }; - - let mut checkpoints = CheckpointSet::default(); - - let foo0 = Checkpoint::new("foo", 0, Offset::Start); - let foo1 = Checkpoint::new("foo", 1, Offset::UpThrough(10)); - let bar0 = Checkpoint::new("bar", 0, Offset::Start); - - checkpoints.add(foo0.clone()); - checkpoints.add(foo1.clone()); - checkpoints.add(bar0); - - check_offsets( - &checkpoints, - Offset::Start, - Offset::UpThrough(10), - Offset::Start, - ); - - let foo0 = update_offset(foo0, Offset::UpThrough(0)); - checkpoints.add(foo0.clone()); - check_offsets( - &checkpoints, - Offset::UpThrough(0), - Offset::UpThrough(10), - Offset::Start, - ); - - let foo0 = update_offset(foo0, Offset::UpThrough(1)); - checkpoints.add(foo0); - check_offsets( - &checkpoints, - Offset::UpThrough(1), - Offset::UpThrough(10), - Offset::Start, - ); - - let foo1 = update_offset(foo1, Offset::UpThrough(11)); - checkpoints.add(foo1); - check_offsets( - &checkpoints, - Offset::UpThrough(1), - Offset::UpThrough(11), - Offset::Start, - ); - } - - #[test] - fn parse_state_file_test() { - let input = r#" - { - "test": { - "0": { "UpThrough": 100 } - } - } - "#; - - CheckpointSet::parse(input).expect("to parse"); - } - - #[test] - fn parse_empty_state_file_test() { - let input = "{}\n"; - - CheckpointSet::parse(input).expect("to parse"); - } -} diff --git a/source-kafka/tests/README.md b/source-kafka/tests/README.md deleted file mode 100644 index d44b0486b9..0000000000 --- a/source-kafka/tests/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Tests - -## Setup - -The integration tests expect to be able to connect to a kafka cluster accessible via `localhost:9093`. This is specified in `tests/test-config.json`. diff --git a/source-kafka/tests/acmeCo/flow.yaml b/source-kafka/tests/acmeCo/flow.yaml new file mode 100644 index 0000000000..508c6125ab --- /dev/null +++ b/source-kafka/tests/acmeCo/flow.yaml @@ -0,0 +1,77 @@ +--- +collections: + acmeCo/avro-topic: + schema: + properties: + _meta: + properties: + offset: + description: The offset of the message within the partition + type: integer + partition: + description: The partition the message was read from + type: integer + topic: + description: The topic the message was read from + type: string + required: + - partition + - offset + type: object + required: + - _meta + type: object + x-infer-schema: true + key: + - /_meta/partition + - /_meta/offset + acmeCo/json-schema-topic: + schema: + properties: + _meta: + properties: + offset: + description: The offset of the message within the partition + type: integer + partition: + description: The partition the message was read from + type: integer + topic: + description: The topic the message was read from + type: string + required: + - partition + - offset + type: object + required: + - _meta + type: object + x-infer-schema: true + key: + - /_meta/partition + - /_meta/offset + acmeCo/json-raw-topic: + schema: + properties: + _meta: + properties: + offset: + description: The offset of the message within the partition + type: integer + partition: + description: The partition the message was read from + type: integer + topic: + description: The topic the message was read from + type: string + required: + - partition + - offset + type: object + required: + - _meta + type: object + x-infer-schema: true + key: + - /_meta/partition + - /_meta/offset diff --git a/source-kafka/tests/fixtures/todo-list.json b/source-kafka/tests/fixtures/todo-list.json deleted file mode 100644 index 799b8322d2..0000000000 --- a/source-kafka/tests/fixtures/todo-list.json +++ /dev/null @@ -1,1202 +0,0 @@ -[ - { - "userId": 1, - "id": 1, - "title": "delectus aut autem", - "completed": false - }, - { - "userId": 1, - "id": 2, - "title": "quis ut nam facilis et officia qui", - "completed": false - }, - { - "userId": 1, - "id": 3, - "title": "fugiat veniam minus", - "completed": false - }, - { - "userId": 1, - "id": 4, - "title": "et porro tempora", - "completed": true - }, - { - "userId": 1, - "id": 5, - "title": "laboriosam mollitia et enim quasi adipisci quia provident illum", - "completed": false - }, - { - "userId": 1, - "id": 6, - "title": "qui ullam ratione quibusdam voluptatem quia omnis", - "completed": false - }, - { - "userId": 1, - "id": 7, - "title": "illo expedita consequatur quia in", - "completed": false - }, - { - "userId": 1, - "id": 8, - "title": "quo adipisci enim quam ut ab", - "completed": true - }, - { - "userId": 1, - "id": 9, - "title": "molestiae perspiciatis ipsa", - "completed": false - }, - { - "userId": 1, - "id": 10, - "title": "illo est ratione doloremque quia maiores aut", - "completed": true - }, - { - "userId": 1, - "id": 11, - "title": "vero rerum temporibus dolor", - "completed": true - }, - { - "userId": 1, - "id": 12, - "title": "ipsa repellendus fugit nisi", - "completed": true - }, - { - "userId": 1, - "id": 13, - "title": "et doloremque nulla", - "completed": false - }, - { - "userId": 1, - "id": 14, - "title": "repellendus sunt dolores architecto voluptatum", - "completed": true - }, - { - "userId": 1, - "id": 15, - "title": "ab voluptatum amet voluptas", - "completed": true - }, - { - "userId": 1, - "id": 16, - "title": "accusamus eos facilis sint et aut voluptatem", - "completed": true - }, - { - "userId": 1, - "id": 17, - "title": "quo laboriosam deleniti aut qui", - "completed": true - }, - { - "userId": 1, - "id": 18, - "title": "dolorum est consequatur ea mollitia in culpa", - "completed": false - }, - { - "userId": 1, - "id": 19, - "title": "molestiae ipsa aut voluptatibus pariatur dolor nihil", - "completed": true - }, - { - "userId": 1, - "id": 20, - "title": "ullam nobis libero sapiente ad optio sint", - "completed": true - }, - { - "userId": 2, - "id": 21, - "title": "suscipit repellat esse quibusdam voluptatem incidunt", - "completed": false - }, - { - "userId": 2, - "id": 22, - "title": "distinctio vitae autem nihil ut molestias quo", - "completed": true - }, - { - "userId": 2, - "id": 23, - "title": "et itaque necessitatibus maxime molestiae qui quas velit", - "completed": false - }, - { - "userId": 2, - "id": 24, - "title": "adipisci non ad dicta qui amet quaerat doloribus ea", - "completed": false - }, - { - "userId": 2, - "id": 25, - "title": "voluptas quo tenetur perspiciatis explicabo natus", - "completed": true - }, - { - "userId": 2, - "id": 26, - "title": "aliquam aut quasi", - "completed": true - }, - { - "userId": 2, - "id": 27, - "title": "veritatis pariatur delectus", - "completed": true - }, - { - "userId": 2, - "id": 28, - "title": "nesciunt totam sit blanditiis sit", - "completed": false - }, - { - "userId": 2, - "id": 29, - "title": "laborum aut in quam", - "completed": false - }, - { - "userId": 2, - "id": 30, - "title": "nemo perspiciatis repellat ut dolor libero commodi blanditiis omnis", - "completed": true - }, - { - "userId": 2, - "id": 31, - "title": "repudiandae totam in est sint facere fuga", - "completed": false - }, - { - "userId": 2, - "id": 32, - "title": "earum doloribus ea doloremque quis", - "completed": false - }, - { - "userId": 2, - "id": 33, - "title": "sint sit aut vero", - "completed": false - }, - { - "userId": 2, - "id": 34, - "title": "porro aut necessitatibus eaque distinctio", - "completed": false - }, - { - "userId": 2, - "id": 35, - "title": "repellendus veritatis molestias dicta incidunt", - "completed": true - }, - { - "userId": 2, - "id": 36, - "title": "excepturi deleniti adipisci voluptatem et neque optio illum ad", - "completed": true - }, - { - "userId": 2, - "id": 37, - "title": "sunt cum tempora", - "completed": false - }, - { - "userId": 2, - "id": 38, - "title": "totam quia non", - "completed": false - }, - { - "userId": 2, - "id": 39, - "title": "doloremque quibusdam asperiores libero corrupti illum qui omnis", - "completed": false - }, - { - "userId": 2, - "id": 40, - "title": "totam atque quo nesciunt", - "completed": true - }, - { - "userId": 3, - "id": 41, - "title": "aliquid amet impedit consequatur aspernatur placeat eaque fugiat suscipit", - "completed": false - }, - { - "userId": 3, - "id": 42, - "title": "rerum perferendis error quia ut eveniet", - "completed": false - }, - { - "userId": 3, - "id": 43, - "title": "tempore ut sint quis recusandae", - "completed": true - }, - { - "userId": 3, - "id": 44, - "title": "cum debitis quis accusamus doloremque ipsa natus sapiente omnis", - "completed": true - }, - { - "userId": 3, - "id": 45, - "title": "velit soluta adipisci molestias reiciendis harum", - "completed": false - }, - { - "userId": 3, - "id": 46, - "title": "vel voluptatem repellat nihil placeat corporis", - "completed": false - }, - { - "userId": 3, - "id": 47, - "title": "nam qui rerum fugiat accusamus", - "completed": false - }, - { - "userId": 3, - "id": 48, - "title": "sit reprehenderit omnis quia", - "completed": false - }, - { - "userId": 3, - "id": 49, - "title": "ut necessitatibus aut maiores debitis officia blanditiis velit et", - "completed": false - }, - { - "userId": 3, - "id": 50, - "title": "cupiditate necessitatibus ullam aut quis dolor voluptate", - "completed": true - }, - { - "userId": 3, - "id": 51, - "title": "distinctio exercitationem ab doloribus", - "completed": false - }, - { - "userId": 3, - "id": 52, - "title": "nesciunt dolorum quis recusandae ad pariatur ratione", - "completed": false - }, - { - "userId": 3, - "id": 53, - "title": "qui labore est occaecati recusandae aliquid quam", - "completed": false - }, - { - "userId": 3, - "id": 54, - "title": "quis et est ut voluptate quam dolor", - "completed": true - }, - { - "userId": 3, - "id": 55, - "title": "voluptatum omnis minima qui occaecati provident nulla voluptatem ratione", - "completed": true - }, - { - "userId": 3, - "id": 56, - "title": "deleniti ea temporibus enim", - "completed": true - }, - { - "userId": 3, - "id": 57, - "title": "pariatur et magnam ea doloribus similique voluptatem rerum quia", - "completed": false - }, - { - "userId": 3, - "id": 58, - "title": "est dicta totam qui explicabo doloribus qui dignissimos", - "completed": false - }, - { - "userId": 3, - "id": 59, - "title": "perspiciatis velit id laborum placeat iusto et aliquam odio", - "completed": false - }, - { - "userId": 3, - "id": 60, - "title": "et sequi qui architecto ut adipisci", - "completed": true - }, - { - "userId": 4, - "id": 61, - "title": "odit optio omnis qui sunt", - "completed": true - }, - { - "userId": 4, - "id": 62, - "title": "et placeat et tempore aspernatur sint numquam", - "completed": false - }, - { - "userId": 4, - "id": 63, - "title": "doloremque aut dolores quidem fuga qui nulla", - "completed": true - }, - { - "userId": 4, - "id": 64, - "title": "voluptas consequatur qui ut quia magnam nemo esse", - "completed": false - }, - { - "userId": 4, - "id": 65, - "title": "fugiat pariatur ratione ut asperiores necessitatibus magni", - "completed": false - }, - { - "userId": 4, - "id": 66, - "title": "rerum eum molestias autem voluptatum sit optio", - "completed": false - }, - { - "userId": 4, - "id": 67, - "title": "quia voluptatibus voluptatem quos similique maiores repellat", - "completed": false - }, - { - "userId": 4, - "id": 68, - "title": "aut id perspiciatis voluptatem iusto", - "completed": false - }, - { - "userId": 4, - "id": 69, - "title": "doloribus sint dolorum ab adipisci itaque dignissimos aliquam suscipit", - "completed": false - }, - { - "userId": 4, - "id": 70, - "title": "ut sequi accusantium et mollitia delectus sunt", - "completed": false - }, - { - "userId": 4, - "id": 71, - "title": "aut velit saepe ullam", - "completed": false - }, - { - "userId": 4, - "id": 72, - "title": "praesentium facilis facere quis harum voluptatibus voluptatem eum", - "completed": false - }, - { - "userId": 4, - "id": 73, - "title": "sint amet quia totam corporis qui exercitationem commodi", - "completed": true - }, - { - "userId": 4, - "id": 74, - "title": "expedita tempore nobis eveniet laborum maiores", - "completed": false - }, - { - "userId": 4, - "id": 75, - "title": "occaecati adipisci est possimus totam", - "completed": false - }, - { - "userId": 4, - "id": 76, - "title": "sequi dolorem sed", - "completed": true - }, - { - "userId": 4, - "id": 77, - "title": "maiores aut nesciunt delectus exercitationem vel assumenda eligendi at", - "completed": false - }, - { - "userId": 4, - "id": 78, - "title": "reiciendis est magnam amet nemo iste recusandae impedit quaerat", - "completed": false - }, - { - "userId": 4, - "id": 79, - "title": "eum ipsa maxime ut", - "completed": true - }, - { - "userId": 4, - "id": 80, - "title": "tempore molestias dolores rerum sequi voluptates ipsum consequatur", - "completed": true - }, - { - "userId": 5, - "id": 81, - "title": "suscipit qui totam", - "completed": true - }, - { - "userId": 5, - "id": 82, - "title": "voluptates eum voluptas et dicta", - "completed": false - }, - { - "userId": 5, - "id": 83, - "title": "quidem at rerum quis ex aut sit quam", - "completed": true - }, - { - "userId": 5, - "id": 84, - "title": "sunt veritatis ut voluptate", - "completed": false - }, - { - "userId": 5, - "id": 85, - "title": "et quia ad iste a", - "completed": true - }, - { - "userId": 5, - "id": 86, - "title": "incidunt ut saepe autem", - "completed": true - }, - { - "userId": 5, - "id": 87, - "title": "laudantium quae eligendi consequatur quia et vero autem", - "completed": true - }, - { - "userId": 5, - "id": 88, - "title": "vitae aut excepturi laboriosam sint aliquam et et accusantium", - "completed": false - }, - { - "userId": 5, - "id": 89, - "title": "sequi ut omnis et", - "completed": true - }, - { - "userId": 5, - "id": 90, - "title": "molestiae nisi accusantium tenetur dolorem et", - "completed": true - }, - { - "userId": 5, - "id": 91, - "title": "nulla quis consequatur saepe qui id expedita", - "completed": true - }, - { - "userId": 5, - "id": 92, - "title": "in omnis laboriosam", - "completed": true - }, - { - "userId": 5, - "id": 93, - "title": "odio iure consequatur molestiae quibusdam necessitatibus quia sint", - "completed": true - }, - { - "userId": 5, - "id": 94, - "title": "facilis modi saepe mollitia", - "completed": false - }, - { - "userId": 5, - "id": 95, - "title": "vel nihil et molestiae iusto assumenda nemo quo ut", - "completed": true - }, - { - "userId": 5, - "id": 96, - "title": "nobis suscipit ducimus enim asperiores voluptas", - "completed": false - }, - { - "userId": 5, - "id": 97, - "title": "dolorum laboriosam eos qui iure aliquam", - "completed": false - }, - { - "userId": 5, - "id": 98, - "title": "debitis accusantium ut quo facilis nihil quis sapiente necessitatibus", - "completed": true - }, - { - "userId": 5, - "id": 99, - "title": "neque voluptates ratione", - "completed": false - }, - { - "userId": 5, - "id": 100, - "title": "excepturi a et neque qui expedita vel voluptate", - "completed": false - }, - { - "userId": 6, - "id": 101, - "title": "explicabo enim cumque porro aperiam occaecati minima", - "completed": false - }, - { - "userId": 6, - "id": 102, - "title": "sed ab consequatur", - "completed": false - }, - { - "userId": 6, - "id": 103, - "title": "non sunt delectus illo nulla tenetur enim omnis", - "completed": false - }, - { - "userId": 6, - "id": 104, - "title": "excepturi non laudantium quo", - "completed": false - }, - { - "userId": 6, - "id": 105, - "title": "totam quia dolorem et illum repellat voluptas optio", - "completed": true - }, - { - "userId": 6, - "id": 106, - "title": "ad illo quis voluptatem temporibus", - "completed": true - }, - { - "userId": 6, - "id": 107, - "title": "praesentium facilis omnis laudantium fugit ad iusto nihil nesciunt", - "completed": false - }, - { - "userId": 6, - "id": 108, - "title": "a eos eaque nihil et exercitationem incidunt delectus", - "completed": true - }, - { - "userId": 6, - "id": 109, - "title": "autem temporibus harum quisquam in culpa", - "completed": true - }, - { - "userId": 6, - "id": 110, - "title": "aut aut ea corporis", - "completed": true - }, - { - "userId": 6, - "id": 111, - "title": "magni accusantium labore et id quis provident", - "completed": false - }, - { - "userId": 6, - "id": 112, - "title": "consectetur impedit quisquam qui deserunt non rerum consequuntur eius", - "completed": false - }, - { - "userId": 6, - "id": 113, - "title": "quia atque aliquam sunt impedit voluptatum rerum assumenda nisi", - "completed": false - }, - { - "userId": 6, - "id": 114, - "title": "cupiditate quos possimus corporis quisquam exercitationem beatae", - "completed": false - }, - { - "userId": 6, - "id": 115, - "title": "sed et ea eum", - "completed": false - }, - { - "userId": 6, - "id": 116, - "title": "ipsa dolores vel facilis ut", - "completed": true - }, - { - "userId": 6, - "id": 117, - "title": "sequi quae est et qui qui eveniet asperiores", - "completed": false - }, - { - "userId": 6, - "id": 118, - "title": "quia modi consequatur vero fugiat", - "completed": false - }, - { - "userId": 6, - "id": 119, - "title": "corporis ducimus ea perspiciatis iste", - "completed": false - }, - { - "userId": 6, - "id": 120, - "title": "dolorem laboriosam vel voluptas et aliquam quasi", - "completed": false - }, - { - "userId": 7, - "id": 121, - "title": "inventore aut nihil minima laudantium hic qui omnis", - "completed": true - }, - { - "userId": 7, - "id": 122, - "title": "provident aut nobis culpa", - "completed": true - }, - { - "userId": 7, - "id": 123, - "title": "esse et quis iste est earum aut impedit", - "completed": false - }, - { - "userId": 7, - "id": 124, - "title": "qui consectetur id", - "completed": false - }, - { - "userId": 7, - "id": 125, - "title": "aut quasi autem iste tempore illum possimus", - "completed": false - }, - { - "userId": 7, - "id": 126, - "title": "ut asperiores perspiciatis veniam ipsum rerum saepe", - "completed": true - }, - { - "userId": 7, - "id": 127, - "title": "voluptatem libero consectetur rerum ut", - "completed": true - }, - { - "userId": 7, - "id": 128, - "title": "eius omnis est qui voluptatem autem", - "completed": false - }, - { - "userId": 7, - "id": 129, - "title": "rerum culpa quis harum", - "completed": false - }, - { - "userId": 7, - "id": 130, - "title": "nulla aliquid eveniet harum laborum libero alias ut unde", - "completed": true - }, - { - "userId": 7, - "id": 131, - "title": "qui ea incidunt quis", - "completed": false - }, - { - "userId": 7, - "id": 132, - "title": "qui molestiae voluptatibus velit iure harum quisquam", - "completed": true - }, - { - "userId": 7, - "id": 133, - "title": "et labore eos enim rerum consequatur sunt", - "completed": true - }, - { - "userId": 7, - "id": 134, - "title": "molestiae doloribus et laborum quod ea", - "completed": false - }, - { - "userId": 7, - "id": 135, - "title": "facere ipsa nam eum voluptates reiciendis vero qui", - "completed": false - }, - { - "userId": 7, - "id": 136, - "title": "asperiores illo tempora fuga sed ut quasi adipisci", - "completed": false - }, - { - "userId": 7, - "id": 137, - "title": "qui sit non", - "completed": false - }, - { - "userId": 7, - "id": 138, - "title": "placeat minima consequatur rem qui ut", - "completed": true - }, - { - "userId": 7, - "id": 139, - "title": "consequatur doloribus id possimus voluptas a voluptatem", - "completed": false - }, - { - "userId": 7, - "id": 140, - "title": "aut consectetur in blanditiis deserunt quia sed laboriosam", - "completed": true - }, - { - "userId": 8, - "id": 141, - "title": "explicabo consectetur debitis voluptates quas quae culpa rerum non", - "completed": true - }, - { - "userId": 8, - "id": 142, - "title": "maiores accusantium architecto necessitatibus reiciendis ea aut", - "completed": true - }, - { - "userId": 8, - "id": 143, - "title": "eum non recusandae cupiditate animi", - "completed": false - }, - { - "userId": 8, - "id": 144, - "title": "ut eum exercitationem sint", - "completed": false - }, - { - "userId": 8, - "id": 145, - "title": "beatae qui ullam incidunt voluptatem non nisi aliquam", - "completed": false - }, - { - "userId": 8, - "id": 146, - "title": "molestiae suscipit ratione nihil odio libero impedit vero totam", - "completed": true - }, - { - "userId": 8, - "id": 147, - "title": "eum itaque quod reprehenderit et facilis dolor autem ut", - "completed": true - }, - { - "userId": 8, - "id": 148, - "title": "esse quas et quo quasi exercitationem", - "completed": false - }, - { - "userId": 8, - "id": 149, - "title": "animi voluptas quod perferendis est", - "completed": false - }, - { - "userId": 8, - "id": 150, - "title": "eos amet tempore laudantium fugit a", - "completed": false - }, - { - "userId": 8, - "id": 151, - "title": "accusamus adipisci dicta qui quo ea explicabo sed vero", - "completed": true - }, - { - "userId": 8, - "id": 152, - "title": "odit eligendi recusandae doloremque cumque non", - "completed": false - }, - { - "userId": 8, - "id": 153, - "title": "ea aperiam consequatur qui repellat eos", - "completed": false - }, - { - "userId": 8, - "id": 154, - "title": "rerum non ex sapiente", - "completed": true - }, - { - "userId": 8, - "id": 155, - "title": "voluptatem nobis consequatur et assumenda magnam", - "completed": true - }, - { - "userId": 8, - "id": 156, - "title": "nam quia quia nulla repellat assumenda quibusdam sit nobis", - "completed": true - }, - { - "userId": 8, - "id": 157, - "title": "dolorem veniam quisquam deserunt repellendus", - "completed": true - }, - { - "userId": 8, - "id": 158, - "title": "debitis vitae delectus et harum accusamus aut deleniti a", - "completed": true - }, - { - "userId": 8, - "id": 159, - "title": "debitis adipisci quibusdam aliquam sed dolore ea praesentium nobis", - "completed": true - }, - { - "userId": 8, - "id": 160, - "title": "et praesentium aliquam est", - "completed": false - }, - { - "userId": 9, - "id": 161, - "title": "ex hic consequuntur earum omnis alias ut occaecati culpa", - "completed": true - }, - { - "userId": 9, - "id": 162, - "title": "omnis laboriosam molestias animi sunt dolore", - "completed": true - }, - { - "userId": 9, - "id": 163, - "title": "natus corrupti maxime laudantium et voluptatem laboriosam odit", - "completed": false - }, - { - "userId": 9, - "id": 164, - "title": "reprehenderit quos aut aut consequatur est sed", - "completed": false - }, - { - "userId": 9, - "id": 165, - "title": "fugiat perferendis sed aut quidem", - "completed": false - }, - { - "userId": 9, - "id": 166, - "title": "quos quo possimus suscipit minima ut", - "completed": false - }, - { - "userId": 9, - "id": 167, - "title": "et quis minus quo a asperiores molestiae", - "completed": false - }, - { - "userId": 9, - "id": 168, - "title": "recusandae quia qui sunt libero", - "completed": false - }, - { - "userId": 9, - "id": 169, - "title": "ea odio perferendis officiis", - "completed": true - }, - { - "userId": 9, - "id": 170, - "title": "quisquam aliquam quia doloribus aut", - "completed": false - }, - { - "userId": 9, - "id": 171, - "title": "fugiat aut voluptatibus corrupti deleniti velit iste odio", - "completed": true - }, - { - "userId": 9, - "id": 172, - "title": "et provident amet rerum consectetur et voluptatum", - "completed": false - }, - { - "userId": 9, - "id": 173, - "title": "harum ad aperiam quis", - "completed": false - }, - { - "userId": 9, - "id": 174, - "title": "similique aut quo", - "completed": false - }, - { - "userId": 9, - "id": 175, - "title": "laudantium eius officia perferendis provident perspiciatis asperiores", - "completed": true - }, - { - "userId": 9, - "id": 176, - "title": "magni soluta corrupti ut maiores rem quidem", - "completed": false - }, - { - "userId": 9, - "id": 177, - "title": "et placeat temporibus voluptas est tempora quos quibusdam", - "completed": false - }, - { - "userId": 9, - "id": 178, - "title": "nesciunt itaque commodi tempore", - "completed": true - }, - { - "userId": 9, - "id": 179, - "title": "omnis consequuntur cupiditate impedit itaque ipsam quo", - "completed": true - }, - { - "userId": 9, - "id": 180, - "title": "debitis nisi et dolorem repellat et", - "completed": true - }, - { - "userId": 10, - "id": 181, - "title": "ut cupiditate sequi aliquam fuga maiores", - "completed": false - }, - { - "userId": 10, - "id": 182, - "title": "inventore saepe cumque et aut illum enim", - "completed": true - }, - { - "userId": 10, - "id": 183, - "title": "omnis nulla eum aliquam distinctio", - "completed": true - }, - { - "userId": 10, - "id": 184, - "title": "molestias modi perferendis perspiciatis", - "completed": false - }, - { - "userId": 10, - "id": 185, - "title": "voluptates dignissimos sed doloribus animi quaerat aut", - "completed": false - }, - { - "userId": 10, - "id": 186, - "title": "explicabo odio est et", - "completed": false - }, - { - "userId": 10, - "id": 187, - "title": "consequuntur animi possimus", - "completed": false - }, - { - "userId": 10, - "id": 188, - "title": "vel non beatae est", - "completed": true - }, - { - "userId": 10, - "id": 189, - "title": "culpa eius et voluptatem et", - "completed": true - }, - { - "userId": 10, - "id": 190, - "title": "accusamus sint iusto et voluptatem exercitationem", - "completed": true - }, - { - "userId": 10, - "id": 191, - "title": "temporibus atque distinctio omnis eius impedit tempore molestias pariatur", - "completed": true - }, - { - "userId": 10, - "id": 192, - "title": "ut quas possimus exercitationem sint voluptates", - "completed": false - }, - { - "userId": 10, - "id": 193, - "title": "rerum debitis voluptatem qui eveniet tempora distinctio a", - "completed": true - }, - { - "userId": 10, - "id": 194, - "title": "sed ut vero sit molestiae", - "completed": false - }, - { - "userId": 10, - "id": 195, - "title": "rerum ex veniam mollitia voluptatibus pariatur", - "completed": true - }, - { - "userId": 10, - "id": 196, - "title": "consequuntur aut ut fugit similique", - "completed": true - }, - { - "userId": 10, - "id": 197, - "title": "dignissimos quo nobis earum saepe", - "completed": true - }, - { - "userId": 10, - "id": 198, - "title": "quis eius est sint explicabo", - "completed": true - }, - { - "userId": 10, - "id": 199, - "title": "numquam repellendus a magnam", - "completed": true - }, - { - "userId": 10, - "id": 200, - "title": "ipsam aperiam voluptates qui", - "completed": false - } -] \ No newline at end of file diff --git a/source-kafka/tests/it/main.rs b/source-kafka/tests/it/main.rs deleted file mode 100644 index 783a91fdec..0000000000 --- a/source-kafka/tests/it/main.rs +++ /dev/null @@ -1,142 +0,0 @@ -use std::fs::File; -use std::io::Read; - -use proto_flow::capture::request; -use proto_flow::flow::capture_spec; -use proto_flow::flow::CaptureSpec; -use proto_flow::flow::CollectionSpec; -use source_kafka::catalog; -use source_kafka::configuration; -use source_kafka::connector::Connector; -use source_kafka::connector::ConnectorConfig; -use source_kafka::state; -use support::assert_valid_json; -use support::mock_stdout; - -use crate::support::parse_from_output; -use crate::support::parse_messages_from_output; - -mod support; - -#[test] -fn spec_test() { - let mut stdout = mock_stdout(); - - source_kafka::KafkaConnector::spec(&mut stdout).expect("spec command to succeed"); - - insta::assert_yaml_snapshot!(parse_from_output(&stdout)); -} - -#[test] -fn check_test() { - let mut stdout = mock_stdout(); - let config = include_str!("../test-config.json"); - let req = request::Validate { - name: "test/source-kafka".to_string(), - connector_type: capture_spec::ConnectorType::Image as i32, - config_json: config.to_string(), - // The connector seems to ignore bindings during validation. - // That seems wrong, but not something I'm going to try to address at the moment. - bindings: Vec::new(), - last_capture: None, - last_version: String::new(), - }; - - source_kafka::KafkaConnector::validate(&mut stdout, req).expect("check command to succeed"); - - insta::assert_yaml_snapshot!(parse_from_output(&stdout), { - ".connectionStatus.message" => "{{ NUM_TOPICS_FOUND }}" - }); -} - -#[test] -fn discover_test() { - let mut stdout = mock_stdout(); - let config = include_str!("../test-config.json"); - let req = request::Discover { - connector_type: capture_spec::ConnectorType::Image as i32, - config_json: config.to_string(), - }; - - source_kafka::KafkaConnector::discover(&mut stdout, req).expect("discover command to succeed"); - - // This is tricky to snapshot. It detects any other topics within the - // connected Kafka, which will vary by dev machine. - assert_valid_json(&stdout); -} - -#[test] -fn read_simple_catalog_test() { - let mut stdout = mock_stdout(); - let config = local_config(); - let catalog = local_capture("todo-list"); - - source_kafka::KafkaConnector::read(&mut stdout, config, catalog, None, None) - .expect("read command to succeed"); - - let messages = parse_messages_from_output(&stdout); - // Only look at the last 10 messages. Otherwise the snapshot file gets unwieldy. - let last_ten = (messages.len() - 10)..; - insta::assert_yaml_snapshot!(&messages[last_ten], { - "[].record.emitted_at" => "{{ UNIX_TIMESTAMP }}" - }); -} - -#[test] -fn read_resume_from_state_test() { - let mut stdout = mock_stdout(); - let config = local_config(); - let catalog = local_capture("todo-list"); - - let mut state = state::CheckpointSet::default(); - state.add(state::Checkpoint::new( - "todo-list", - 0, - state::Offset::UpThrough(37), - )); - state.add(state::Checkpoint::new( - "todo-list", - 2, - state::Offset::UpThrough(57), - )); - - source_kafka::KafkaConnector::read(&mut stdout, config, catalog, None, Some(state)) - .expect("read command to succeed"); - - insta::assert_yaml_snapshot!(parse_messages_from_output(&stdout), { - "[].record.emitted_at" => "{{ UNIX_TIMESTAMP }}" - }); -} - -fn local_config() -> configuration::Configuration { - let mut file = File::open("tests/test-config.json").expect("to open test config file"); - let mut buf = String::new(); - file.read_to_string(&mut buf) - .expect("to read test config file"); - configuration::Configuration::parse(&buf).expect("to parse test config file") -} - -fn local_capture(binding_name: &str) -> CaptureSpec { - let config = include_str!("../test-config.json"); - CaptureSpec { - name: "capture".to_string(), - bindings: vec![capture_spec::Binding { - resource_config_json: serde_json::to_string(&catalog::Resource { - stream: binding_name.to_string(), - }) - .unwrap(), - resource_path: vec![binding_name.to_string()], - collection: Some(CollectionSpec { - ..Default::default() - }), - backfill: 0, - state_key: "".to_string(), - }], - connector_type: capture_spec::ConnectorType::Image as i32, - config_json: config.to_string(), - interval_seconds: 30, - shard_template: None, - recovery_log_template: None, - network_ports: Vec::new(), - } -} diff --git a/source-kafka/tests/it/snapshots/it__check_test.snap b/source-kafka/tests/it/snapshots/it__check_test.snap deleted file mode 100644 index 13d17ab5ff..0000000000 --- a/source-kafka/tests/it/snapshots/it__check_test.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: tests/it/main.rs -expression: parse_from_output(&stdout) ---- -validated: {} - diff --git a/source-kafka/tests/it/snapshots/it__read_resume_from_state_test.snap b/source-kafka/tests/it/snapshots/it__read_resume_from_state_test.snap deleted file mode 100644 index 5f68fa8423..0000000000 --- a/source-kafka/tests/it/snapshots/it__read_resume_from_state_test.snap +++ /dev/null @@ -1,70 +0,0 @@ ---- -source: tests/it/main.rs -expression: parse_messages_from_output(&stdout) - ---- -- record: - data: - completed: true - id: 159 - title: debitis adipisci quibusdam aliquam sed dolore ea praesentium nobis - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 38 - type: STATE -- record: - data: - completed: false - id: 160 - title: et praesentium aliquam est - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 39 - type: STATE -- record: - data: - completed: true - id: 199 - title: numquam repellendus a magnam - userId: 10 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 2 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "2": - UpThrough: 58 - type: STATE -- record: - data: - completed: false - id: 200 - title: ipsam aperiam voluptates qui - userId: 10 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 2 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "2": - UpThrough: 59 - type: STATE - diff --git a/source-kafka/tests/it/snapshots/it__read_simple_catalog_test.snap b/source-kafka/tests/it/snapshots/it__read_simple_catalog_test.snap deleted file mode 100644 index ecd6d7f38c..0000000000 --- a/source-kafka/tests/it/snapshots/it__read_simple_catalog_test.snap +++ /dev/null @@ -1,86 +0,0 @@ ---- -source: tests/it/main.rs -expression: "&messages[last_ten]" - ---- -- record: - data: - completed: true - id: 156 - title: nam quia quia nulla repellat assumenda quibusdam sit nobis - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 35 - type: STATE -- record: - data: - completed: true - id: 157 - title: dolorem veniam quisquam deserunt repellendus - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 36 - type: STATE -- record: - data: - completed: true - id: 158 - title: debitis vitae delectus et harum accusamus aut deleniti a - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 37 - type: STATE -- record: - data: - completed: true - id: 159 - title: debitis adipisci quibusdam aliquam sed dolore ea praesentium nobis - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 38 - type: STATE -- record: - data: - completed: false - id: 160 - title: et praesentium aliquam est - userId: 8 - emitted_at: "{{ UNIX_TIMESTAMP }}" - namespace: Partition 0 - stream: todo-list - type: RECORD -- state: - data: - todo-list: - "0": - UpThrough: 39 - type: STATE - diff --git a/source-kafka/tests/it/snapshots/it__spec_test.snap b/source-kafka/tests/it/snapshots/it__spec_test.snap deleted file mode 100644 index ab787ecbaa..0000000000 --- a/source-kafka/tests/it/snapshots/it__spec_test.snap +++ /dev/null @@ -1,100 +0,0 @@ ---- -source: tests/it/main.rs -expression: parse_from_output(&stdout) ---- -spec: - configSchema: - $schema: "http://json-schema.org/draft-07/schema#" - properties: - bootstrap_servers: - description: "The initial servers in the Kafka cluster to initially connect to, separated by commas. The Kafka client will be informed of the rest of the cluster nodes by connecting to one of these nodes." - order: 0 - title: Bootstrap Servers - type: string - credentials: - description: "The connection details for authenticating a client connection to Kafka via SASL. When not provided, the client connection will attempt to use PLAINTEXT (insecure) protocol. This must only be used in dev/test environments." - discriminator: - propertyName: auth_type - oneOf: - - properties: - auth_type: - const: UserPassword - default: UserPassword - type: string - mechanism: - default: PLAIN - description: The SASL Mechanism describes how to exchange and authenticate clients/servers. - enum: - - PLAIN - - SCRAM-SHA-256 - - SCRAM-SHA-512 - order: 0 - title: SASL Mechanism - type: string - password: - order: 2 - secret: true - title: Password - type: string - username: - order: 1 - secret: true - title: Username - type: string - required: - - auth_type - - mechanism - - password - - username - title: SASL (User & Password) - - properties: - auth_type: - const: AWS - default: AWS - type: string - aws_access_key_id: - order: 0 - title: AWS Access Key ID - type: string - aws_secret_access_key: - order: 1 - secret: true - title: AWS Secret Access Key - type: string - region: - order: 2 - title: AWS Region - type: string - required: - - auth_type - - aws_access_key_id - - aws_secret_access_key - - region - title: AWS MSK IAM - order: 1 - title: Credentials - type: object - tls: - default: system_certificates - description: Controls how should TLS certificates be found or used. - enum: - - system_certificates - order: 2 - title: TLS Settings - type: string - required: - - bootstrap_servers - - credentials - title: Kafka Source Configuration - type: object - documentationUrl: "https://go.estuary.dev/source-kafka" - protocol: 3032023 - resourceConfigSchema: - properties: - stream: - type: string - x-collection-name: true - type: object - resourcePathPointers: - - /stream - diff --git a/source-kafka/tests/it/support.rs b/source-kafka/tests/it/support.rs deleted file mode 100644 index 6d2292547b..0000000000 --- a/source-kafka/tests/it/support.rs +++ /dev/null @@ -1,23 +0,0 @@ -use serde_json::Value; - -pub(crate) fn assert_valid_json(output: &[u8]) { - serde_json::from_reader::<_, Value>(output).expect("output to be valid json"); -} - -pub(crate) fn mock_stdout() -> Vec { - Vec::new() -} - -pub(crate) fn parse_from_output(stdout: &[u8]) -> Value { - let mut messages = parse_messages_from_output(stdout); - assert_eq!(messages.len(), 1); - messages.pop().unwrap() -} - -pub(crate) fn parse_messages_from_output(stdout: &[u8]) -> Vec { - std::str::from_utf8(stdout) - .unwrap() - .lines() - .map(|doc| serde_json::from_str(doc).unwrap()) - .collect() -} diff --git a/source-kafka/tests/snapshots/test__capture.snap b/source-kafka/tests/snapshots/test__capture.snap new file mode 100644 index 0000000000..a0a1091de2 --- /dev/null +++ b/source-kafka/tests/snapshots/test__capture.snap @@ -0,0 +1,44 @@ +--- +source: tests/test.rs +expression: snap +--- +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":0,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0},"value":"value-0"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-3"},"offset":1,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-05T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":3,"nested":{"sub_id":3},"value":"value-3"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-6"},"offset":2,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-08T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":6,"nested":{"sub_id":6},"value":"value-6"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":3,"op":"d","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0}}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-1"},"offset":0,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":1,"nested":{"sub_id":1},"value":"value-1"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-4"},"offset":1,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-06T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":4,"nested":{"sub_id":4},"value":"value-4"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-7"},"offset":2,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-09T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":7,"nested":{"sub_id":7},"value":"value-7"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-1"},"offset":3,"op":"d","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":1,"nested":{"sub_id":1}}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-2"},"offset":0,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":2,"nested":{"sub_id":2},"value":"value-2"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-5"},"offset":1,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-07T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":5,"nested":{"sub_id":5},"value":"value-5"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-8"},"offset":2,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-10T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":8,"nested":{"sub_id":8},"value":"value-8"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-2"},"offset":3,"op":"d","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":2,"nested":{"sub_id":2}}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":0,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0},"value":"value-0"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-3"},"offset":1,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-05T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":3,"nested":{"sub_id":3},"value":"value-3"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-6"},"offset":2,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-08T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":6,"nested":{"sub_id":6},"value":"value-6"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":3,"op":"d","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0}}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-1"},"offset":0,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":1,"nested":{"sub_id":1},"value":"value-1"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-4"},"offset":1,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-06T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":4,"nested":{"sub_id":4},"value":"value-4"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-7"},"offset":2,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-09T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":7,"nested":{"sub_id":7},"value":"value-7"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-1"},"offset":3,"op":"d","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":1,"nested":{"sub_id":1}}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-2"},"offset":0,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":2,"nested":{"sub_id":2},"value":"value-2"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-5"},"offset":1,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-07T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":5,"nested":{"sub_id":5},"value":"value-5"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-8"},"offset":2,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-10T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":8,"nested":{"sub_id":8},"value":"value-8"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-2"},"offset":3,"op":"d","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":2,"nested":{"sub_id":2}}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjB9","_meta":{"headers":{"header-key":"header-value-0"},"offset":0,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":0}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjN9","_meta":{"headers":{"header-key":"header-value-3"},"offset":1,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-05T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":3}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjZ9","_meta":{"headers":{"header-key":"header-value-6"},"offset":2,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-08T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":6}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjB9","_meta":{"headers":{"header-key":"header-value-0"},"offset":3,"op":"d","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"}}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjF9","_meta":{"headers":{"header-key":"header-value-1"},"offset":0,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":1}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjR9","_meta":{"headers":{"header-key":"header-value-4"},"offset":1,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-06T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":4}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjd9","_meta":{"headers":{"header-key":"header-value-7"},"offset":2,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-09T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":7}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjF9","_meta":{"headers":{"header-key":"header-value-1"},"offset":3,"op":"d","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"}}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjJ9","_meta":{"headers":{"header-key":"header-value-2"},"offset":0,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":2}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjV9","_meta":{"headers":{"header-key":"header-value-5"},"offset":1,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-07T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":5}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjh9","_meta":{"headers":{"header-key":"header-value-8"},"offset":2,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-10T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":8}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjJ9","_meta":{"headers":{"header-key":"header-value-2"},"offset":3,"op":"d","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"}}] +["connectorState",{"updated":{"bindingStateV1":{"json-schema-topic":{"partitions":{"2":0}}}},"mergePatch":true}] +["connectorState",{"updated":{"bindingStateV1":{"avro-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-raw-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-schema-topic":{"partitions":{"0":3,"1":3,"2":3}}}},"mergePatch":true}] +["connectorState",{}] +["connectorState",{"updated":{"bindingStateV1":{"avro-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-raw-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-schema-topic":{"partitions":{"0":3,"1":3,"2":3}}}}}] diff --git a/source-kafka/tests/snapshots/test__capture_resume.snap b/source-kafka/tests/snapshots/test__capture_resume.snap new file mode 100644 index 0000000000..10ccc24a99 --- /dev/null +++ b/source-kafka/tests/snapshots/test__capture_resume.snap @@ -0,0 +1,32 @@ +--- +source: tests/test.rs +expression: snap +--- +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":0,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0},"value":"value-0"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-3"},"offset":1,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-05T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":3,"nested":{"sub_id":3},"value":"value-3"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-6"},"offset":2,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-08T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":6,"nested":{"sub_id":6},"value":"value-6"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":3,"op":"d","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0}}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-7"},"offset":2,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-09T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":7,"nested":{"sub_id":7},"value":"value-7"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-1"},"offset":3,"op":"d","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":1,"nested":{"sub_id":1}}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-8"},"offset":2,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-10T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":8,"nested":{"sub_id":8},"value":"value-8"}] +["acmeCo/avro-topic",{"_meta":{"headers":{"header-key":"header-value-2"},"offset":3,"op":"d","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"avro-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":2,"nested":{"sub_id":2}}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":0,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0},"value":"value-0"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-3"},"offset":1,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-05T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":3,"nested":{"sub_id":3},"value":"value-3"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-6"},"offset":2,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-08T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":6,"nested":{"sub_id":6},"value":"value-6"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-0"},"offset":3,"op":"d","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":0,"nested":{"sub_id":0}}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-7"},"offset":2,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-09T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":7,"nested":{"sub_id":7},"value":"value-7"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-1"},"offset":3,"op":"d","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":1,"nested":{"sub_id":1}}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-8"},"offset":2,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-10T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":8,"nested":{"sub_id":8},"value":"value-8"}] +["acmeCo/json-schema-topic",{"_meta":{"headers":{"header-key":"header-value-2"},"offset":3,"op":"d","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"json-schema-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"idx":2,"nested":{"sub_id":2}}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjB9","_meta":{"headers":{"header-key":"header-value-0"},"offset":0,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":0}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjN9","_meta":{"headers":{"header-key":"header-value-3"},"offset":1,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-05T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":3}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjZ9","_meta":{"headers":{"header-key":"header-value-6"},"offset":2,"op":"u","partition":0,"timestamp":{"creationTime":"1970-01-08T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":6}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjB9","_meta":{"headers":{"header-key":"header-value-0"},"offset":3,"op":"d","partition":0,"timestamp":{"creationTime":"1970-01-02T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"}}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjd9","_meta":{"headers":{"header-key":"header-value-7"},"offset":2,"op":"u","partition":1,"timestamp":{"creationTime":"1970-01-09T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":7}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjF9","_meta":{"headers":{"header-key":"header-value-1"},"offset":3,"op":"d","partition":1,"timestamp":{"creationTime":"1970-01-03T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"}}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjh9","_meta":{"headers":{"header-key":"header-value-8"},"offset":2,"op":"u","partition":2,"timestamp":{"creationTime":"1970-01-10T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"},"payload":8}] +["acmeCo/json-raw-topic",{"_key":"eyJrZXkiOjJ9","_meta":{"headers":{"header-key":"header-value-2"},"offset":3,"op":"d","partition":2,"timestamp":{"creationTime":"1970-01-04T00:00:00Z"},"topic":"json-raw-topic","uuid":"DocUUIDPlaceholder-329Bb50aa48EAa9ef"}}] +["connectorState",{"updated":{"bindingStateV1":{"avro-topic":{"partitions":{"1":2}}}},"mergePatch":true}] +["connectorState",{"updated":{"bindingStateV1":{"avro-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-raw-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-schema-topic":{"partitions":{"0":3,"1":3,"2":3}}}},"mergePatch":true}] +["connectorState",{}] +["connectorState",{"updated":{"bindingStateV1":{"avro-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-raw-topic":{"partitions":{"0":3,"1":3,"2":3}},"json-schema-topic":{"partitions":{"0":3,"1":3,"2":3}}}}}] diff --git a/source-kafka/tests/snapshots/test__discover.snap b/source-kafka/tests/snapshots/test__discover.snap new file mode 100644 index 0000000000..6411eec723 --- /dev/null +++ b/source-kafka/tests/snapshots/test__discover.snap @@ -0,0 +1,247 @@ +--- +source: tests/test.rs +expression: snap +--- +{ + "documentSchema": { + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "properties": { + "_meta": { + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + }, + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "type": "object" + }, + "idx": { + "type": "integer" + }, + "nested": { + "properties": { + "sub_id": { + "type": "integer" + } + }, + "required": [ + "sub_id" + ], + "type": "object" + } + }, + "required": [ + "_meta", + "idx", + "nested" + ], + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "type": "object", + "x-infer-schema": true + }, + "key": [ + "/idx", + "/nested/sub_id" + ], + "recommendedName": "avro-topic", + "resourceConfig": { + "topic": "avro-topic" + }, + "resourcePath": [ + "avro-topic" + ] +} +{ + "documentSchema": { + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "properties": { + "_meta": { + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + }, + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "type": "object" + } + }, + "required": [ + "_meta" + ], + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "type": "object", + "x-infer-schema": true + }, + "key": [ + "/_meta/partition", + "/_meta/offset" + ], + "recommendedName": "json-raw-topic", + "resourceConfig": { + "topic": "json-raw-topic" + }, + "resourcePath": [ + "json-raw-topic" + ] +} +{ + "documentSchema": { + "if": { + "properties": { + "_meta": { + "properties": { + "op": { + "const": "d" + } + } + } + } + }, + "properties": { + "_meta": { + "properties": { + "offset": { + "description": "The offset of the message within the partition", + "type": "integer" + }, + "op": { + "description": "Change operation type: 'c' Create/Insert, 'u' Update, 'd' Delete.", + "enum": [ + "c", + "u", + "d" + ] + }, + "partition": { + "description": "The partition the message was read from", + "type": "integer" + }, + "topic": { + "description": "The topic the message was read from", + "type": "string" + } + }, + "required": [ + "offset", + "op", + "partition", + "topic" + ], + "type": "object" + }, + "idx": { + "type": "integer" + }, + "nested": { + "properties": { + "sub_id": { + "type": "integer" + } + }, + "required": [ + "sub_id" + ], + "title": "NestedJsonKeyRecord", + "type": "object" + } + }, + "required": [ + "_meta", + "idx", + "nested" + ], + "then": { + "reduce": { + "delete": true, + "strategy": "merge" + } + }, + "type": "object", + "x-infer-schema": true + }, + "key": [ + "/idx", + "/nested/sub_id" + ], + "recommendedName": "json-schema-topic", + "resourceConfig": { + "topic": "json-schema-topic" + }, + "resourcePath": [ + "json-schema-topic" + ] +} diff --git a/source-kafka/tests/snapshots/test__spec.snap b/source-kafka/tests/snapshots/test__spec.snap new file mode 100644 index 0000000000..83e3558f9a --- /dev/null +++ b/source-kafka/tests/snapshots/test__spec.snap @@ -0,0 +1,204 @@ +--- +source: tests/test.rs +expression: "serde_json::to_string_pretty(&got).unwrap()" +--- +{ + "configSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "bootstrap_servers": { + "description": "The initial servers in the Kafka cluster to initially connect to, separated by commas. The Kafka client will be informed of the rest of the cluster nodes by connecting to one of these nodes.", + "order": 0, + "title": "Bootstrap Servers", + "type": "string" + }, + "credentials": { + "description": "The connection details for authenticating a client connection to Kafka via SASL. When not provided, the client connection will attempt to use PLAINTEXT (insecure) protocol. This must only be used in dev/test environments.", + "discriminator": { + "propertyName": "auth_type" + }, + "oneOf": [ + { + "properties": { + "auth_type": { + "const": "user_password", + "default": "user_password", + "order": 0, + "type": "string" + }, + "mechanism": { + "default": "PLAIN", + "description": "The SASL Mechanism describes how to exchange and authenticate clients/servers.", + "enum": [ + "PLAIN", + "SCRAM-SHA-256", + "SCRAM-SHA-512" + ], + "order": 1, + "title": "SASL Mechanism", + "type": "string" + }, + "password": { + "order": 3, + "secret": true, + "title": "Password", + "type": "string" + }, + "username": { + "order": 2, + "title": "Username", + "type": "string" + } + }, + "required": [ + "auth_type", + "mechanism", + "password", + "username" + ], + "title": "SASL (User & Password)" + }, + { + "properties": { + "auth_type": { + "const": "AWS", + "default": "AWS", + "order": 0, + "type": "string" + }, + "aws_access_key_id": { + "order": 1, + "title": "AWS Access Key ID", + "type": "string" + }, + "aws_secret_access_key": { + "order": 2, + "secret": true, + "title": "AWS Secret Access Key", + "type": "string" + }, + "region": { + "order": 3, + "title": "AWS Region", + "type": "string" + } + }, + "required": [ + "auth_type", + "aws_access_key_id", + "aws_secret_access_key", + "region" + ], + "title": "AWS MSK IAM" + } + ], + "order": 1, + "title": "Credentials", + "type": "object" + }, + "schema_registry": { + "description": "Connection details for interacting with a schema registry.", + "discriminator": { + "propertyName": "schema_registry_type" + }, + "oneOf": [ + { + "properties": { + "endpoint": { + "description": "Schema registry API endpoint. For example: https://registry-id.us-east-2.aws.confluent.cloud", + "order": 1, + "title": "Schema Registry Endpoint", + "type": "string" + }, + "password": { + "description": "Schema registry password to use for authentication. If you are using Confluent Cloud, this will be the 'Secret' from your schema registry API key.", + "order": 3, + "secret": true, + "title": "Schema Registry Password", + "type": "string" + }, + "schema_registry_type": { + "const": "confluent_schema_registry", + "default": "confluent_schema_registry", + "order": 0, + "type": "string" + }, + "username": { + "description": "Schema registry username to use for authentication. If you are using Confluent Cloud, this will be the 'Key' from your schema registry API key.", + "order": 2, + "title": "Schema Registry Username", + "type": "string" + } + }, + "required": [ + "endpoint", + "password", + "username" + ], + "title": "Confluent Schema Registry" + }, + { + "properties": { + "enable_json_only": { + "description": "If no schema registry is configured the capture will attempt to parse all data as JSON, and discovered collections will use a key of the message partition & offset. All available topics will be discovered, but if their messages are not encoded as JSON attempting to capture them will result in errors. If your topics contain messages encoded with a schema, you should configure the connector to use the schema registry for optimal results.", + "order": 1, + "title": "Capture Messages in JSON Format Only", + "type": "boolean" + }, + "schema_registry_type": { + "const": "no_schema_registry", + "default": "no_schema_registry", + "order": 0, + "type": "string" + } + }, + "required": [ + "enable_json_only" + ], + "title": "No Schema Registry" + } + ], + "order": 3, + "title": "Schema Registry", + "type": "object" + }, + "tls": { + "default": "system_certificates", + "description": "Controls how should TLS certificates be found or used.", + "enum": [ + "system_certificates" + ], + "order": 2, + "title": "TLS Settings", + "type": "string" + } + }, + "required": [ + "bootstrap_servers", + "credentials", + "schema_registry" + ], + "title": "Kafka Source Configuration", + "type": "object" + }, + "documentationUrl": "https://go.estuary.dev/source-kafka", + "protocol": 3032023, + "resourceConfigSchema": { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "properties": { + "topic": { + "description": "Kafka topic to capture messages from.", + "title": "Topic", + "type": "string" + } + }, + "required": [ + "topic" + ], + "title": "Resource", + "type": "object" + }, + "resourcePathPointers": [ + "/topic" + ] +} diff --git a/source-kafka/tests/test-config.json b/source-kafka/tests/test-config.json deleted file mode 100644 index d9350d0483..0000000000 --- a/source-kafka/tests/test-config.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "bootstrap_servers": "localhost:9092", - "credentials": { - "auth_type": "UserPassword", - "mechanism": "SCRAM-SHA-256", - "username": "alice", - "password": "alice-pass" - }, - "tls": null -} diff --git a/source-kafka/tests/test.flow.yaml b/source-kafka/tests/test.flow.yaml new file mode 100644 index 0000000000..fe8a4d91cb --- /dev/null +++ b/source-kafka/tests/test.flow.yaml @@ -0,0 +1,27 @@ +--- +import: + - acmeCo/flow.yaml +captures: + acmeCo/source-kafka: + endpoint: + local: + command: + - cargo + - run + config: + bootstrap_servers: "localhost:9092" + schema_registry: + schema_registry_type: confluent_schema_registry + endpoint: http://localhost:8081 + username: user + password: password + bindings: + - resource: + topic: avro-topic + target: acmeCo/avro-topic + - resource: + topic: json-schema-topic + target: acmeCo/json-schema-topic + - resource: + topic: json-raw-topic + target: acmeCo/json-raw-topic diff --git a/source-kafka/tests/test.rs b/source-kafka/tests/test.rs new file mode 100644 index 0000000000..76cea4e636 --- /dev/null +++ b/source-kafka/tests/test.rs @@ -0,0 +1,502 @@ +use std::time::Duration; + +use anyhow::Result; +use apache_avro::types::{Record, Value}; +use apache_avro::Schema; +use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication}; +use rdkafka::message::{Header, OwnedHeaders, ToBytes}; +use rdkafka::producer::{FutureProducer, FutureRecord}; +use rdkafka::ClientConfig; +use schema_registry_converter::async_impl::avro::AvroEncoder; +use schema_registry_converter::async_impl::json::JsonEncoder; +use schema_registry_converter::async_impl::schema_registry::SrSettings; +use schema_registry_converter::schema_registry_common::SubjectNameStrategy; +use serde_json::json; + +#[test] +fn test_spec() { + let output = std::process::Command::new("flowctl") + .args(["raw", "spec", "--source", "tests/test.flow.yaml"]) + .output() + .unwrap(); + + assert!(output.status.success()); + let got: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + insta::assert_snapshot!(serde_json::to_string_pretty(&got).unwrap()); +} + +#[tokio::test] +async fn test_discover() { + setup_test().await; + + let output = std::process::Command::new("flowctl") + .args([ + "raw", + "discover", + "--source", + "tests/test.flow.yaml", + "-o", + "json", + "--emit-raw", + ]) + .output() + .unwrap(); + + assert!(output.status.success()); + + let snap = std::str::from_utf8(&output.stdout) + .unwrap() + .lines() + .map(|line| serde_json::from_str::(line).unwrap()) + .map(|line| serde_json::to_string_pretty(&line).unwrap()) + .reduce(|snap, line| format!("{}\n{}", snap, line)) + .unwrap(); + + insta::assert_snapshot!(snap); +} + +#[tokio::test] +async fn test_capture() { + setup_test().await; + + let output = std::process::Command::new("flowctl") + .args([ + "preview", + "--source", + "tests/test.flow.yaml", + "--sessions", + "1", + "--delay", + "2s", + "--output-state", + ]) + .output() + .unwrap(); + + println!("{}", std::str::from_utf8(&output.stderr).unwrap()); + + assert!(output.status.success()); + + let snap = std::str::from_utf8(&output.stdout).unwrap(); + + insta::assert_snapshot!(snap); +} + +#[tokio::test] +async fn test_capture_resume() { + setup_test().await; + + let initial_state = json!({ + "bindingStateV1": { + "avro-topic": { + "partitions": { + "1": 1, + "2": 1 + } + }, + "json-schema-topic": { + "partitions": { + "1": 1, + "2": 1 + } + }, + "json-raw-topic": { + "partitions": { + "1": 1, + "2": 1 + } + } + } + }); + + let output = std::process::Command::new("flowctl") + .args([ + "preview", + "--source", + "tests/test.flow.yaml", + "--sessions", + "1", + "--delay", + "2s", + "--output-state", + "--initial-state", + &initial_state.to_string(), + ]) + .output() + .unwrap(); + + assert!(output.status.success()); + + let snap = std::str::from_utf8(&output.stdout).unwrap(); + + insta::assert_snapshot!(snap); +} + +async fn setup_test() { + let bootstrap_servers = "localhost:9092"; + let schema_registry_endpoint = "http://localhost:8081"; + let num_messages = 9; + let num_partitions = 3; + let topic_replication = 1; + + let test_cases: &[(&dyn TestDataEncoder, &str)] = &[ + (&AvroTestDataEncoder::new(), "avro-topic"), + (&JsonSchemaTestDataEncoder::new(), "json-schema-topic"), + (&JsonRawTestDataEncoder::new(), "json-raw-topic"), + ]; + + let http = reqwest::Client::default(); + + let admin: AdminClient<_> = ClientConfig::new() + .set("bootstrap.servers", bootstrap_servers) + .create() + .unwrap(); + + let producer: FutureProducer = ClientConfig::new() + .set("bootstrap.servers", bootstrap_servers) + .create() + .unwrap(); + + let opts = AdminOptions::default().request_timeout(Some(Duration::from_secs(1))); + + for (enc, topic) in test_cases { + admin.delete_topics(&[topic], &opts).await.unwrap(); + admin + .create_topics( + &[NewTopic::new( + topic, + num_partitions, + TopicReplication::Fixed(topic_replication), + )], + &opts, + ) + .await + .unwrap() + .into_iter() + .collect::, _>>() + .unwrap(); + + // Registry schemas if the encoder uses schemas. + if let Some(schema_type) = enc.schema_type_string() { + for (topic, suffix, schema) in [ + (topic, "key", &enc.key_schema_string()), + (topic, "value", &enc.payload_schema_string()), + ] { + // Try to delete the existing schema if it exists, ignoring any errors + // that are returned, which may be 404's. + http.delete(format!( + "{}/subjects/{}-{}", + schema_registry_endpoint, topic, suffix + )) + .send() + .await + .unwrap(); + + // You have to do a "soft" delete before a permanent "hard" delete. + http.delete(format!( + "{}/subjects/{}-{}?permanent=true", + schema_registry_endpoint, topic, suffix + )) + .send() + .await + .unwrap(); + + // Register the schema, which must be successful. + assert!(http + .post(format!( + "{}/subjects/{}-{}/versions", + schema_registry_endpoint, topic, suffix + )) + .json(&json!({"schema": schema, "schemaType": schema_type})) + .send() + .await + .unwrap() + .status() + .is_success()); + } + } + + // Populate regular "data" records. + for idx in 0..num_messages { + send_message( + topic, + &enc.key_for_idx(idx, topic).await, + Some(&enc.payload_for_idx(idx, topic).await), + idx, + num_partitions, + &producer, + ) + .await; + } + + // Populate deletion records. + for idx in 0..num_partitions { + send_message( + topic, + &enc.key_for_idx(idx as usize, topic).await, + None::<&[u8]>, + idx as usize, + num_partitions, + &producer, + ) + .await; + } + } +} + +async fn send_message( + topic: &str, + key: &K, + payload: Option<&P>, + idx: usize, + num_partitions: i32, + producer: &FutureProducer, +) where + K: ToBytes + ?Sized, + P: ToBytes + ?Sized, +{ + let mut rec = FutureRecord::to(topic) + .partition(idx as i32 % num_partitions) + .key(key) + .timestamp(unix_millis_fixture(idx)) + .headers(OwnedHeaders::new().insert(Header { + key: "header-key", + value: Some(&format!("header-value-{}", idx)), + })); + + if let Some(payload) = payload { + rec = rec.payload(payload); + } + + producer.send(rec, None).await.unwrap(); +} + +fn unix_millis_fixture(idx: usize) -> i64 { + ((idx + 1) * 86_400_000) as i64 +} + +#[async_trait::async_trait] +trait TestDataEncoder { + async fn key_for_idx<'a>(&'a self, idx: usize, topic: &'a str) -> Vec; + async fn payload_for_idx<'a>(&'a self, idx: usize, topic: &'a str) -> Vec; + fn key_schema_string(&self) -> String; + fn payload_schema_string(&self) -> String; + fn schema_type_string(&self) -> Option; +} + +struct AvroTestDataEncoder {} + +impl AvroTestDataEncoder { + fn new() -> Self { + AvroTestDataEncoder {} + } +} + +#[async_trait::async_trait] +impl TestDataEncoder for AvroTestDataEncoder { + async fn key_for_idx<'a>(&'a self, idx: usize, topic: &'a str) -> Vec { + let enc = AvroEncoder::new(SrSettings::new(String::from("http://localhost:8081"))); + let schema = + Schema::parse(&serde_json::from_str(&self.key_schema_string()).unwrap()).unwrap(); + + let mut key = Record::new(&schema).unwrap(); + key.put("idx", Value::Int(idx as i32)); + key.put( + "nested", + Value::Record(vec![("sub_id".to_string(), Value::Int(idx as i32))]), + ); + + enc.encode_value( + key.into(), + &SubjectNameStrategy::TopicNameStrategy(topic.to_string(), true), + ) + .await + .unwrap() + } + + async fn payload_for_idx<'a>(&'a self, idx: usize, topic: &'a str) -> Vec { + let enc = AvroEncoder::new(SrSettings::new(String::from("http://localhost:8081"))); + let schema = + Schema::parse(&serde_json::from_str(&self.payload_schema_string()).unwrap()).unwrap(); + + let mut value = Record::new(&schema).unwrap(); + value.put("value", Value::String(format!("value-{}", idx))); + + enc.encode_value( + value.into(), + &SubjectNameStrategy::TopicNameStrategy(topic.to_string(), false), + ) + .await + .unwrap() + } + + fn key_schema_string(&self) -> String { + let parsed = Schema::parse(&json!({ + "type": "record", + "name": "AvroKey", + "fields": [ + { + "name": "idx", + "type": "int" + }, + { + "name": "nested", + "type": { + "type": "record", + "name": "NestedAvroKeyRecord", + "fields": [ + { + "name": "sub_id", + "type": "int" + } + ] + } + } + ] + })) + .unwrap(); + + parsed.canonical_form() + } + + fn payload_schema_string(&self) -> String { + let parsed = Schema::parse(&json!({ + "type": "record", + "name": "AvroValue", + "fields": [ + { + "name": "value", + "type": "string" + } + ] + })) + .unwrap(); + + parsed.canonical_form() + } + + fn schema_type_string(&self) -> Option { + Some("AVRO".to_string()) + } +} + +struct JsonSchemaTestDataEncoder {} + +impl JsonSchemaTestDataEncoder { + fn new() -> Self { + Self {} + } +} + +#[async_trait::async_trait] +impl TestDataEncoder for JsonSchemaTestDataEncoder { + async fn key_for_idx<'a>(&'a self, idx: usize, topic: &'a str) -> Vec { + let enc = JsonEncoder::new(SrSettings::new(String::from("http://localhost:8081"))); + enc.encode( + &json!({ + "idx": idx, + "nested": { + "sub_id": idx + }, + }), + SubjectNameStrategy::TopicNameStrategy(topic.to_string(), true), + ) + .await + .unwrap() + } + + async fn payload_for_idx<'a>(&'a self, idx: usize, topic: &'a str) -> Vec { + let enc = JsonEncoder::new(SrSettings::new(String::from("http://localhost:8081"))); + enc.encode( + &json!({ + "value": format!("value-{}", idx), + }), + SubjectNameStrategy::TopicNameStrategy(topic.to_string(), false), + ) + .await + .unwrap() + } + + fn key_schema_string(&self) -> String { + serde_json::to_string(&json!({ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "JsonKey", + "type": "object", + "properties": { + "idx": { + "type": "integer" + }, + "nested": { + "type": "object", + "title": "NestedJsonKeyRecord", + "properties": { + "sub_id": { + "type": "integer" + } + }, + "required": ["sub_id"] + } + }, + "required": ["idx", "nested"], + "additionalProperties": false + })) + .unwrap() + } + + fn payload_schema_string(&self) -> String { + serde_json::to_string(&json!({ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "JsonValue", + "type": "object", + "properties": { + "value": { + "type": "string" + } + }, + "required": ["value"], + "additionalProperties": false + })) + .unwrap() + } + + fn schema_type_string(&self) -> Option { + Some("JSON".to_string()) + } +} + +struct JsonRawTestDataEncoder {} + +impl JsonRawTestDataEncoder { + fn new() -> Self { + Self {} + } +} + +#[async_trait::async_trait] +impl TestDataEncoder for JsonRawTestDataEncoder { + async fn key_for_idx<'a>(&'a self, idx: usize, _: &'a str) -> Vec { + serde_json::to_vec(&json!({ + "key": idx, + })) + .unwrap() + } + + async fn payload_for_idx<'a>(&'a self, idx: usize, _: &'a str) -> Vec { + serde_json::to_vec(&json!({ + "payload": idx, + })) + .unwrap() + } + + fn key_schema_string(&self) -> String { + panic!("not implemented") + } + + fn payload_schema_string(&self) -> String { + panic!("not implemented") + } + + fn schema_type_string(&self) -> Option { + None + } +} diff --git a/tests/source-kafka/cleanup.sh b/tests/source-kafka/cleanup.sh index d939e899dd..7da176aa0a 100755 --- a/tests/source-kafka/cleanup.sh +++ b/tests/source-kafka/cleanup.sh @@ -1,12 +1,4 @@ #!/bin/bash set -e -root_dir="$(git rev-parse --show-toplevel)" -kafkactl_config="$root_dir/tests/source-kafka/kafkactl.yaml" - -function kctl() { - docker run -i --network flow-test --mount "type=bind,src=$kafkactl_config,target=/kafkactl.yaml" deviceinsight/kafkactl --config-file=/kafkactl.yaml $@ -} - -# Remove the test topic -kctl delete topic $TEST_STREAM +docker compose -f source-kafka/docker-compose.yaml down -v diff --git a/tests/source-kafka/kafkactl.yaml b/tests/source-kafka/kafkactl.yaml deleted file mode 100644 index 4bb5bc3fef..0000000000 --- a/tests/source-kafka/kafkactl.yaml +++ /dev/null @@ -1,10 +0,0 @@ -contexts: - alice: - brokers: - - infra-kafka-1.flow-test:9092 - sasl: - enabled: true - mechanism: scram-sha256 - password: alice-pass - username: alice -current-context: alice diff --git a/tests/source-kafka/setup.sh b/tests/source-kafka/setup.sh index 6bc1c2ab5e..7ce7d4595e 100755 --- a/tests/source-kafka/setup.sh +++ b/tests/source-kafka/setup.sh @@ -2,49 +2,11 @@ set -e export TEST_STREAM="estuary-test-$(shuf -zer -n6 {a..z} | tr -d '\0')" -export RESOURCE="{ \"stream\": \"${TEST_STREAM}\", \"syncMode\": \"incremental\" }" +export RESOURCE="{\"topic\": \"${TEST_STREAM}\"}" +export CONNECTOR_CONFIG='{"bootstrap_servers": "source-kafka-db-1.flow-test:9092", "schema_registry": {"schema_registry_type": "no_schema_registry", "enable_json_only": true}}' -# Because Flow uses network=host, the port exposed to Flow is different than the -# one we use when running `docker exec` below. -export CONNECTOR_CONFIG='{ - "bootstrap_servers": "infra-kafka-1.flow-test:9092", - "credentials": { - "auth_type": "UserPassword", - "mechanism": "SCRAM-SHA-256", - "username": "alice", - "password": "alice-pass" - }, - "tls": null -}' +LISTENER_HOST="source-kafka-db-1.flow-test" docker compose -f source-kafka/docker-compose.yaml up --wait --detach -root_dir="$(git rev-parse --show-toplevel)" -kafkactl_config="$root_dir/tests/source-kafka/kafkactl.yaml" -TOTAL_PARTITIONS=4 - -function kctl() { - docker run -i --network flow-test --mount "type=bind,src=$kafkactl_config,target=/kafkactl.yaml" deviceinsight/kafkactl --config-file=/kafkactl.yaml $@ -} - -# Ensure we can connect to a broker. -for i in $(seq 1 10); do - if [ -n "$(kctl get topics)" ]; then - break - else - if [ $i -ge 10 ]; then - echo "Can't connect to Kafka. Is the kafkactl config correct?" - kctl config view - exit 1 - fi - sleep 2 - fi -done - -# Create the topic with n partitions -kctl create topic $TEST_STREAM --partitions $TOTAL_PARTITIONS - -# Seed the topic with documents -for i in $(seq 1 $TOTAL_PARTITIONS); do - cat $root_dir/tests/files/d.jsonl | - jq -cs "map(select(.id % $TOTAL_PARTITIONS == $i - 1)) | .[]" | - kctl produce $TEST_STREAM -done +docker exec source-kafka-db-1 sh -c "/bin/kafka-topics --create --topic ${TEST_STREAM} --bootstrap-server localhost:9092" +docker cp tests/files/d.jsonl source-kafka-db-1:/ +docker exec source-kafka-db-1 sh -c "cat /d.jsonl | /bin/kafka-console-producer --topic ${TEST_STREAM} --bootstrap-server localhost:9092"