diff --git a/docs/pages/product/configuration/data-sources/_meta.js b/docs/pages/product/configuration/data-sources/_meta.js index 0dc644cda455b..099d845cb0b8a 100644 --- a/docs/pages/product/configuration/data-sources/_meta.js +++ b/docs/pages/product/configuration/data-sources/_meta.js @@ -23,5 +23,6 @@ module.exports = { "singlestore": "SingleStore", "snowflake": "Snowflake", "sqlite": "SQLite", - "trino": "Trino" + "trino": "Trino", + "pinot": "Pinot" } diff --git a/docs/pages/product/configuration/data-sources/pinot.mdx b/docs/pages/product/configuration/data-sources/pinot.mdx new file mode 100644 index 0000000000000..85a4e0b6a4801 --- /dev/null +++ b/docs/pages/product/configuration/data-sources/pinot.mdx @@ -0,0 +1,87 @@ +--- +redirect_from: + - /config/databases/pinot +--- + +# Pinot + +Apache Pinot is a real-time distributed OLAP datastore purpose-built for low-latency, high-throughput analytics, and perfect for user-facing analytical workloads + +## Prerequisites + +- The hostname for the [Pinot][pinot] broker +- The port for the [Pinot][pinot] broker + +## Setup + +### Manual + +Add the following to a `.env` file in your Cube project: + +```dotenv +CUBEJS_DB_TYPE=pinot +CUBEJS_DB_HOST=http[s]://pinot.broker.host +CUBEJS_DB_PORT=8099 +CUBEJS_DB_USER=pinot_user +CUBEJS_DB_PASS=********** +``` + +## Environment Variables + +| Environment Variable | Description | Possible Values | Required | +| -------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------- | :------: | +| `CUBEJS_DB_HOST` | The host URL for your Pinot broker | A valid host URL | ✅ | +| `CUBEJS_DB_PORT` | The port for the database connection | A valid port number | ✅ | +| `CUBEJS_DB_USER` | The username used to connect to the broker | A valid username | ❌ | +| `CUBEJS_DB_PASS` | The password used to connect to the broker | A valid password | ❌ | + +## Pre-Aggregation Feature Support + +### count_distinct_approx + +Measures of type +[`count_distinct_approx`][ref-schema-ref-types-formats-countdistinctapprox] can +be used in pre-aggregations when using Pinot as a source database. To learn more +about Pinot support for approximate aggregate functions, [click +here][pinot-docs-approx-agg-fns]. + +## Pre-aggregation build strategies + + + +To learn more about pre-aggregation build strategies, [head +here][ref-caching-using-preaggs-build-strats]. + + + +| Feature | Works with read-only mode? | Is default? | +| ------------- | :------------------------: | :---------: | +| Simple | ✅ | ✅ | +| Batching | - | - | +| Export bucket | - | - | + +By default, Pinot uses a simple strategy to build pre-aggregations. + +### Simple + +No extra configuration is required to configure simple pre-aggregation builds +for Pinot. + +### Batching + +Pinot does not support batching. + +### Export bucket + +Pinot does not support export buckets. + +## SSL + +Cube does not require any additional configuration to enable SSL as Elasticsearch connections are made over HTTPS. + +[pinot]: https://docs.pinot.apache.org/ +[pinot-docs-approx-agg-fns]: + https://docs.pinot.apache.org/users/user-guide-query/query-syntax/how-to-handle-unique-counting +[ref-recipe-enable-ssl]: + /guides/recipes/data-sources/using-ssl-connections-to-data-source +[ref-schema-ref-types-formats-countdistinctapprox]: /reference/data-model/types-and-formats#count_distinct_approx diff --git a/packages/cubejs-docker/dev.Dockerfile b/packages/cubejs-docker/dev.Dockerfile index 592ed23f90640..5c24cc59e68fe 100644 --- a/packages/cubejs-docker/dev.Dockerfile +++ b/packages/cubejs-docker/dev.Dockerfile @@ -63,6 +63,7 @@ COPY packages/cubejs-questdb-driver/package.json packages/cubejs-questdb-driver/ COPY packages/cubejs-materialize-driver/package.json packages/cubejs-materialize-driver/package.json COPY packages/cubejs-prestodb-driver/package.json packages/cubejs-prestodb-driver/package.json COPY packages/cubejs-trino-driver/package.json packages/cubejs-trino-driver/package.json +COPY packages/cubejs-pinot-driver/package.json packages/cubejs-pinot-driver/package.json COPY packages/cubejs-query-orchestrator/package.json packages/cubejs-query-orchestrator/package.json COPY packages/cubejs-schema-compiler/package.json packages/cubejs-schema-compiler/package.json COPY packages/cubejs-server/package.json packages/cubejs-server/package.json @@ -138,6 +139,7 @@ COPY packages/cubejs-questdb-driver/ packages/cubejs-questdb-driver/ COPY packages/cubejs-materialize-driver/ packages/cubejs-materialize-driver/ COPY packages/cubejs-prestodb-driver/ packages/cubejs-prestodb-driver/ COPY packages/cubejs-trino-driver/ packages/cubejs-trino-driver/ +COPY packages/cubejs-pinot-driver/ packages/cubejs-pinot-driver/ COPY packages/cubejs-query-orchestrator/ packages/cubejs-query-orchestrator/ COPY packages/cubejs-schema-compiler/ packages/cubejs-schema-compiler/ COPY packages/cubejs-server/ packages/cubejs-server/ diff --git a/packages/cubejs-docker/package.json b/packages/cubejs-docker/package.json index 5994ba549a26a..841c04d66676d 100644 --- a/packages/cubejs-docker/package.json +++ b/packages/cubejs-docker/package.json @@ -35,6 +35,7 @@ "@cubejs-backend/snowflake-driver": "^0.36.1", "@cubejs-backend/sqlite-driver": "^0.36.0", "@cubejs-backend/trino-driver": "^0.36.2", + "@inthememory/pinot-driver": "^0.35.78", "cubejs-cli": "^0.36.2", "typescript": "~5.2.2" }, diff --git a/packages/cubejs-docker/package.json.local b/packages/cubejs-docker/package.json.local index f3f3a77ef2d57..1be5e72927ddf 100644 --- a/packages/cubejs-docker/package.json.local +++ b/packages/cubejs-docker/package.json.local @@ -38,6 +38,7 @@ "@cubejs-backend/snowflake-driver": "file:/cube-build/packages/cubejs-snowflake-driver", "@cubejs-backend/sqlite-driver": "file:/cube-build/packages/cubejs-sqlite-driver", "@cubejs-backend/trino-driver": "file:/cube-build/packages/cubejs-trino-driver", + "@cubejs-backend/pinot-driver": "file:/cube-build/packages/cubejs-pinot-driver", "cubejs-cli": "file:/cube-build/packages/cubejs-cli", "typescript": "~4.1.5" }, diff --git a/packages/cubejs-docker/testing-drivers.Dockerfile b/packages/cubejs-docker/testing-drivers.Dockerfile index 36773d9e292bc..4ef851146a22d 100644 --- a/packages/cubejs-docker/testing-drivers.Dockerfile +++ b/packages/cubejs-docker/testing-drivers.Dockerfile @@ -57,6 +57,7 @@ COPY packages/cubejs-questdb-driver/package.json packages/cubejs-questdb-driver/ COPY packages/cubejs-materialize-driver/package.json packages/cubejs-materialize-driver/package.json COPY packages/cubejs-prestodb-driver/package.json packages/cubejs-prestodb-driver/package.json COPY packages/cubejs-trino-driver/package.json packages/cubejs-trino-driver/package.json +COPY packages/cubejs-pinot-driver/package.json packages/cubejs-pinot-driver/package.json COPY packages/cubejs-query-orchestrator/package.json packages/cubejs-query-orchestrator/package.json COPY packages/cubejs-schema-compiler/package.json packages/cubejs-schema-compiler/package.json COPY packages/cubejs-server/package.json packages/cubejs-server/package.json @@ -134,6 +135,7 @@ COPY packages/cubejs-questdb-driver/ packages/cubejs-questdb-driver/ COPY packages/cubejs-materialize-driver/ packages/cubejs-materialize-driver/ COPY packages/cubejs-prestodb-driver/ packages/cubejs-prestodb-driver/ COPY packages/cubejs-trino-driver/ packages/cubejs-trino-driver/ +COPY packages/cubejs-pinot-driver/ packages/cubejs-pinot-driver/ COPY packages/cubejs-query-orchestrator/ packages/cubejs-query-orchestrator/ COPY packages/cubejs-schema-compiler/ packages/cubejs-schema-compiler/ COPY packages/cubejs-server/ packages/cubejs-server/ diff --git a/packages/cubejs-pinot-driver/.gitignore b/packages/cubejs-pinot-driver/.gitignore new file mode 100644 index 0000000000000..1521c8b7652b1 --- /dev/null +++ b/packages/cubejs-pinot-driver/.gitignore @@ -0,0 +1 @@ +dist diff --git a/packages/cubejs-pinot-driver/CHANGELOG.md b/packages/cubejs-pinot-driver/CHANGELOG.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/packages/cubejs-pinot-driver/LICENSE b/packages/cubejs-pinot-driver/LICENSE new file mode 100644 index 0000000000000..da67a9a3301be --- /dev/null +++ b/packages/cubejs-pinot-driver/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2020 Cube Dev, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/README.md b/packages/cubejs-pinot-driver/README.md new file mode 100644 index 0000000000000..a9b7a5d4e76ca --- /dev/null +++ b/packages/cubejs-pinot-driver/README.md @@ -0,0 +1,46 @@ +

Cube.js

+ +[Website](https://cube.dev) • [Docs](https://cube.dev/docs) • [Blog](https://cube.dev/blog) • [Slack](https://slack.cube.dev) • [Twitter](https://twitter.com/the_cube_dev) + +[![npm version](https://badge.fury.io/js/%40cubejs-backend%2Fserver.svg)](https://badge.fury.io/js/%40cubejs-backend%2Fserver) +[![GitHub Actions](https://github.com/cube-js/cube.js/workflows/Build/badge.svg)](https://github.com/cube-js/cube.js/actions?query=workflow%3ABuild+branch%3Amaster) + +# Cube.js Pinot Database Driver + +Cube.js Pinot driver. + +[Learn more](https://github.com/cube-js/cube.js#getting-started) + +### Project Status + +In Review. [#8689](https://github.com/cube-js/cube/pull/8689) + +### Installation + +`npm i @inthememory/pinot-driver` + +### Usage +#### For Docker + +Build development [docker image](https://github.com/cube-js/cube/blob/master/packages/cubejs-docker/DEVELOPMENT.md). + +Assuming the built image is tagged `cubejs/cube:dev` + +``` +FROM cubejs/cube:dev +RUN npm i @inthememory/pinot-driver +``` + +``` + environment: + - CUBEJS_DB_TYPE=pinot + - CUBEJS_DB_HOST= #broker_host + - CUBEJS_DB_PORT= #broker_port + - CUBEJS_DB_USER= #database user + - CUBEJS_DB_PASS= #database password + - CUBEJS_DEV_MODE=true #if running locally +``` + +### License + +Cube.js Pinot Database Driver is [Apache 2.0 licensed](./LICENSE). \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/docker-compose.yml b/packages/cubejs-pinot-driver/docker-compose.yml new file mode 100644 index 0000000000000..26f8e5ca04df5 --- /dev/null +++ b/packages/cubejs-pinot-driver/docker-compose.yml @@ -0,0 +1,78 @@ +services: + pinot-controller: + image: apachepinot/pinot:1.1.0 + hostname: pinot-controller + restart: unless-stopped + command: StartController -configFileName /tmp/data/test-resources/controller.conf + container_name: pinot-controller-cube-tests + ports: + - 9000:9000 + depends_on: + zookeeper: + condition: service_healthy + healthcheck: + test: curl -f "http://localhost:9000/health" + interval: 5s + timeout: 5s + retries: 3 + start_period: 10s + volumes: + - ./pinot-resources:/tmp/data/test-resources:ro + + pinot-broker: + image: apachepinot/pinot:1.1.0 + hostname: pinot-broker + restart: unless-stopped + command: StartBroker -zkAddress zookeeper:2181 -configFileName /tmp/data/test-resources/broker.conf + container_name: pinot-broker-cube-tests + volumes: + - ./pinot-resources:/tmp/data/test-resources:ro + ports: + - 8099:8099 + depends_on: + pinot-controller: + condition: service_healthy + healthcheck: + test: curl -f "http://localhost:8099/health" + interval: 5s + timeout: 5s + retries: 3 + start_period: 10s + + pinot-server: + image: apachepinot/pinot:1.1.0 + hostname: pinot-server + restart: unless-stopped + container_name: pinot-server-cube-tests + command: StartServer -zkAddress zookeeper:2181 + ports: + - 8098:8098 + - 8097:8097 + depends_on: + pinot-broker: + condition: service_healthy + healthcheck: + test: curl -f "http://localhost:8097/health" + interval: 5s + timeout: 5s + retries: 3 + start_period: 10s + + ### + # Utils (Zookeeper, Kafka, Spark, ...) + ### + zookeeper: + image: zookeeper:latest + hostname: zookeeper + ports: + - 2181:2181 + environment: + ZOO_MY_ID: 1 + ZOO_PORT: 2181 + ZOO_SERVERS: server.1=zookeeper:2888:3888;2181 + healthcheck: + test: nc -z localhost 2181 || exit -1 + interval: 10s + timeout: 5s + retries: 3 + start_period: 2s diff --git a/packages/cubejs-pinot-driver/index.js b/packages/cubejs-pinot-driver/index.js new file mode 100644 index 0000000000000..f8c1ed9927178 --- /dev/null +++ b/packages/cubejs-pinot-driver/index.js @@ -0,0 +1,11 @@ +const fromExports = require('./dist/src'); +const { PinotDriver } = require('./dist/src/PinotDriver'); + +const toExport = PinotDriver; + +// eslint-disable-next-line no-restricted-syntax +for (const [key, module] of Object.entries(fromExports)) { + toExport[key] = module; +} + +module.exports = toExport; diff --git a/packages/cubejs-pinot-driver/package.json b/packages/cubejs-pinot-driver/package.json new file mode 100644 index 0000000000000..d6d866d9f2b65 --- /dev/null +++ b/packages/cubejs-pinot-driver/package.json @@ -0,0 +1,55 @@ +{ + "name": "@inthememory/pinot-driver", + "description": "Cube.js Pinot database driver", + "author": "Julian Ronsse, InTheMemory", + "version": "0.35.80", + "repository": { + "type": "git", + "url": "https://github.com/cube-js/cube.git", + "directory": "packages/cubejs-pinotdb-driver" + }, + "engines": { + "node": "^14.0.0 || ^16.0.0 || >=17.0.0" + }, + "files": [ + "dist/src", + "index.js" + ], + "main": "index.js", + "typings": "dist/src/index.d.ts", + "scripts": { + "build": "rm -rf dist && npm run tsc", + "tsc": "tsc", + "watch": "tsc -w", + "integration": "jest dist/test", + "integration:pinot": "jest dist/test", + "lint": "eslint src/* --ext .ts", + "lint:fix": "eslint --fix src/* --ext .ts" + }, + "dependencies": { + "@cubejs-backend/base-driver": "^0.36.0", + "@cubejs-backend/schema-compiler": "^0.36.0", + "@cubejs-backend/shared": "^0.36.0", + "node-fetch": "^2.6.1", + "ramda": "^0.27.0", + "sqlstring": "^2.3.3" + }, + "license": "Apache-2.0", + "publishConfig": { + "access": "public" + }, + "devDependencies": { + "@cubejs-backend/linter": "^0.36.0", + "@types/jest": "^27", + "jest": "^27", + "should": "^13.2.3", + "testcontainers": "^10.13.0", + "typescript": "~5.2.2" + }, + "jest": { + "testEnvironment": "node" + }, + "eslintConfig": { + "extends": "../cubejs-linter" + } +} diff --git a/packages/cubejs-pinot-driver/pinot-resources/broker.conf b/packages/cubejs-pinot-driver/pinot-resources/broker.conf new file mode 100644 index 0000000000000..dffabb32f2cce --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/broker.conf @@ -0,0 +1,3 @@ +pinot.broker.access.control.class=org.apache.pinot.broker.broker.BasicAuthAccessControlFactory +pinot.broker.access.control.principals=admin +pinot.broker.access.control.principals.admin.password=mysecret \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/controller.conf b/packages/cubejs-pinot-driver/pinot-resources/controller.conf new file mode 100644 index 0000000000000..2651bb565a182 --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/controller.conf @@ -0,0 +1,9 @@ +controller.helix.cluster.name=PinotCluster +controller.port=9000 +controller.vip.host=pinot-controller-cube-tests +controller.vip.port=9000 +controller.data.dir=/var/pinot/controller/data +controller.zk.str=zookeeper:2181 +pinot.set.instance.id.to.hostname=true +controller.admin.access.control.principals=admin +controller.admin.access.control.principals.admin.password=mysecret \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/rawdata/scores/scores.csv b/packages/cubejs-pinot-driver/pinot-resources/rawdata/scores/scores.csv new file mode 100644 index 0000000000000..dd7392757ffd9 --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/rawdata/scores/scores.csv @@ -0,0 +1,7 @@ +studentID,subject,score,score_date +200,Maths,3.8,1725462007792 +200,English,3.5,1725348240000 +201,English,3.2,1725348240000 +202,Maths,3.1,1725348240000 +201,Maths,3.2,1725276600000 +202,Physics,3.6,1724329800000 \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/rawdata/students/students.csv b/packages/cubejs-pinot-driver/pinot-resources/rawdata/students/students.csv new file mode 100644 index 0000000000000..387c664f87dee --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/rawdata/students/students.csv @@ -0,0 +1,4 @@ +studentID,firstName,lastName,gender +200,Lucy,Smith,Female +201,Bob,King,Male +202,Nick,Young,Male \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/scores.jobspec.yml b/packages/cubejs-pinot-driver/pinot-resources/scores.jobspec.yml new file mode 100644 index 0000000000000..90ef1093c3ee8 --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/scores.jobspec.yml @@ -0,0 +1,23 @@ +executionFrameworkSpec: + name: 'standalone' + segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner' + segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner' + segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner' +jobType: SegmentCreationAndTarPush +inputDirURI: '/tmp/data/test-resources/rawdata/scores/' +includeFileNamePattern: 'glob:**/*.csv' +outputDirURI: '/tmp/data/' +overwriteOutput: true +pinotFSSpecs: + - scheme: file + className: org.apache.pinot.spi.filesystem.LocalPinotFS +recordReaderSpec: + dataFormat: 'csv' + className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader' + configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig' +tableSpec: + tableName: 'scores' +pinotClusterSpecs: + - controllerURI: 'http://localhost:9000' +pushJobSpec: + pushAttempts: 1 \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/scores.schema.json b/packages/cubejs-pinot-driver/pinot-resources/scores.schema.json new file mode 100644 index 0000000000000..77b9ef32c682f --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/scores.schema.json @@ -0,0 +1,26 @@ +{ + "schemaName": "scores", + "dimensionFieldSpecs": [ + { + "name": "studentID", + "dataType": "INT" + }, + { + "name": "subject", + "dataType": "STRING" + } + ], + "metricFieldSpecs": [ + { + "name": "score", + "dataType": "DOUBLE" + } + ], + "dateTimeFieldSpecs": [{ + "name": "score_date", + "dataType": "TIMESTAMP", + "format" : "1:MILLISECONDS:EPOCH", + "granularity": "1:MILLISECONDS" + }], + "primaryKeyColumns": ["studentID", "subject", "score_date"] +} \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/scores.table.json b/packages/cubejs-pinot-driver/pinot-resources/scores.table.json new file mode 100644 index 0000000000000..2f498415e0774 --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/scores.table.json @@ -0,0 +1,16 @@ +{ + "tableName": "scores", + "tableType":"OFFLINE", + "segmentsConfig" : { + "timeColumnName": "score_date", + "replication" : "1" + }, + "tableIndexConfig" : { + "loadMode" : "MMAP" + }, + "tenants" : { + "broker":"DefaultTenant", + "server":"DefaultTenant" + }, + "metadata": {} +} \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/students.jobspec.yml b/packages/cubejs-pinot-driver/pinot-resources/students.jobspec.yml new file mode 100644 index 0000000000000..5edd00883ab42 --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/students.jobspec.yml @@ -0,0 +1,23 @@ +executionFrameworkSpec: + name: 'standalone' + segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner' + segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner' + segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner' +jobType: SegmentCreationAndTarPush +inputDirURI: '/tmp/data/test-resources/rawdata/students/' +includeFileNamePattern: 'glob:**/*.csv' +outputDirURI: '/tmp/data/' +overwriteOutput: true +pinotFSSpecs: + - scheme: file + className: org.apache.pinot.spi.filesystem.LocalPinotFS +recordReaderSpec: + dataFormat: 'csv' + className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader' + configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig' +tableSpec: + tableName: 'students' +pinotClusterSpecs: + - controllerURI: 'http://localhost:9000' +pushJobSpec: + pushAttempts: 1 \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/students.schema.json b/packages/cubejs-pinot-driver/pinot-resources/students.schema.json new file mode 100644 index 0000000000000..9f751e7669031 --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/students.schema.json @@ -0,0 +1,22 @@ +{ + "schemaName": "students", + "dimensionFieldSpecs": [ + { + "name": "studentID", + "dataType": "INT" + }, + { + "name": "firstName", + "dataType": "STRING" + }, + { + "name": "lastName", + "dataType": "STRING" + }, + { + "name": "gender", + "dataType": "STRING" + } + ], + "primaryKeyColumns": ["studentID"] +} \ No newline at end of file diff --git a/packages/cubejs-pinot-driver/pinot-resources/students.table.json b/packages/cubejs-pinot-driver/pinot-resources/students.table.json new file mode 100644 index 0000000000000..d76d31015d61c --- /dev/null +++ b/packages/cubejs-pinot-driver/pinot-resources/students.table.json @@ -0,0 +1,34 @@ +{ + "tableName": "students", + "tableType": "OFFLINE", + "isDimTable": true, + "routing": { + "segmentPrunerTypes": ["partition"], + "instanceSelectorType": "replicaGroup" + }, + "segmentsConfig": { + "retentionTimeUnit": "DAYS", + "retentionTimeValue": "180", + "replication": "5", + "schemaName": "students" + }, + "tenants": { + "broker": "DefaultTenant", + "server": "DefaultTenant" + }, + "tableIndexConfig": { + }, + "metadata": {}, + "quota": { + "storage": "20M" + }, + "ingestionConfig": { + "batchIngestionConfig": { + "segmentIngestionType": "REFRESH", + "segmentIngestionFrequency": "DAILY" + } + }, + "dimensionTableConfig": { + "disablePreload": false + } +} diff --git a/packages/cubejs-pinot-driver/src/PinotDriver.ts b/packages/cubejs-pinot-driver/src/PinotDriver.ts new file mode 100644 index 0000000000000..e997650dcc0fa --- /dev/null +++ b/packages/cubejs-pinot-driver/src/PinotDriver.ts @@ -0,0 +1,182 @@ +/** + * @copyright Cube Dev, Inc. + * @license Apache-2.0 + * @fileoverview The `PrestoDriver` and related types declaration. + */ + +import { + DriverInterface, + StreamTableData, + BaseDriver +} from '@cubejs-backend/base-driver'; +import { + getEnv, + assertDataSource, +} from '@cubejs-backend/shared'; + +import type { ConnectionOptions as TLSConnectionOptions } from 'tls'; + +import { + map, zipObj +} from 'ramda'; +import SqlString from 'sqlstring'; +import fetch, { Headers, Request, Response } from 'node-fetch'; +import { PinotQuery } from './PinotQuery'; + +export type PinotDriverConfiguration = { + host?: string; + port?: string; + user?: string; + basicAuth?: { user: string, password: string }; + ssl?: string | TLSConnectionOptions; + dataSource?: string; + queryTimeout?: number; +}; + +type PinotResponse = { + exceptions: any[], + minConsumingFreshnessTimeMs: number, + numConsumingSegmentsQueried: number, + numDocsScanned: number, + numEntriesScannedInFilter: number, + numEntriesScannedPostFilter: number, + numGroupsLimitReached: boolean, + numSegmentsMatched: number, + numSegmentsProcessed: number, + numSegmentsQueried: number, + numServersQueried: number, + numServersResponded: number, + resultTable: { + dataSchema: { + columnDataTypes: string[], + columnNames: string[] + }, + rows: any[][] + }, + segmentStatistics: any[], + timeUsedMs: number, + totalDocs: number, + traceInfo: any +}; + +/** + * Presto driver class. + */ +export class PinotDriver extends BaseDriver implements DriverInterface { + /** + * Returns default concurrency value. + */ + public static getDefaultConcurrency() { + return 2; + } + + private config: PinotDriverConfiguration; + + private url: string; + + public static dialectClass() { + return PinotQuery; + } + + /** + * Class constructor. + */ + public constructor(config: PinotDriverConfiguration = {}) { + super(); + + const dataSource = + config.dataSource || + assertDataSource('default'); + + this.config = { + host: getEnv('dbHost', { dataSource }), + port: getEnv('dbPort', { dataSource }), + user: getEnv('dbUser', { dataSource }), + basicAuth: getEnv('dbPass', { dataSource }) + ? { + user: getEnv('dbUser', { dataSource }), + password: getEnv('dbPass', { dataSource }), + } + : undefined, + ssl: this.getSslOptions(dataSource), + queryTimeout: getEnv('dbQueryTimeout', { dataSource }), + ...config + }; + + this.url = `${this.config.host}:${this.config.port}/query/sql`; + } + + public testConnection() { + const query = SqlString.format('select 1'); + + return (> this.queryPromised(query)) + .then(response => { + if (response.length === 0) { + throw new Error('Unable to connect to your Pinot instance'); + } + }); + } + + public query(query: string, values: unknown[]): Promise { + return > this.queryPromised(this.prepareQueryWithParams(query, values)); + } + + public prepareQueryWithParams(query: string, values: unknown[]) { + return SqlString.format(query, (values || []).map(value => (typeof value === 'string' ? { + toSqlString: () => SqlString.escape(value).replace(/\\\\([_%])/g, '\\$1'), + } : value))); + } + + public authorizationHeaders(): { Authorization?: string } { + if (!this.config.basicAuth) { + return {}; + } + + const encodedCredentials = Buffer.from(`${this.config.basicAuth.user}:${this.config.basicAuth.password}`).toString('base64'); + + return { Authorization: `Basic ${encodedCredentials}` }; + } + + public queryPromised(query: string): Promise { + const toError = (error: any) => new Error(error.error ? `${error.message}\n${error.error}` : error.message); + + const request: Request = new Request(this.url, { + method: 'POST', + headers: new Headers({ + 'Content-Type': 'application/json', + ...this.authorizationHeaders() + }), + body: JSON.stringify({ sql: query, queryOptions: `useMultistageEngine=true;timeoutMs=${this.config.queryTimeout}` }) + }); + + return new Promise((resolve, reject) => { + fetch(request) + .then(async (response: Response) => { + if (!response.ok) { + if (response.status === 401) { + return reject(toError({ message: 'Unauthorized request' })); + } + + return reject(toError({ message: 'Unexpected error' })); + } + const pinotResponse: PinotResponse = await response.json(); + + if (pinotResponse?.exceptions?.length) { + return reject(toError(pinotResponse.exceptions[0])); + } + + return resolve(this.normalizeResultOverColumns(pinotResponse.resultTable.rows, pinotResponse.resultTable.dataSchema.columnNames)); + }) + .catch((error: any) => reject(toError(error))); + }); + } + + protected override quoteIdentifier(identifier: string): string { + return identifier; + } + + public normalizeResultOverColumns(data: any[], columns: string[]) { + const arrayToObject = zipObj(columns); + return map(arrayToObject, data || []); + } +} diff --git a/packages/cubejs-pinot-driver/src/PinotQuery.ts b/packages/cubejs-pinot-driver/src/PinotQuery.ts new file mode 100644 index 0000000000000..fcf82809a348f --- /dev/null +++ b/packages/cubejs-pinot-driver/src/PinotQuery.ts @@ -0,0 +1,165 @@ +import { BaseFilter, BaseQuery, BaseTimeDimension } from '@cubejs-backend/schema-compiler'; + +enum GRANULARITY_TO_INTERVAL { + day = 'day', + week = 'week', + hour = 'hour', + minute = 'minute', + second = 'second', + month = 'month', + quarter = 'quarter', + year = 'year' +} + +type GRANULARITY_ID = keyof typeof GRANULARITY_TO_INTERVAL; + +const DATE_TIME_FORMAT = '\'yyyy-MM-dd HH:mm:ss.SSS\''; + +class PinotTimeDimension extends BaseTimeDimension { + public formatFromDate(date: string) { + return super.formatFromDate(date).replace('T', ' '); + } + + public formatToDate(date: string) { + return super.formatToDate(date).replace('T', ' '); + } + + public timeSeries(): string[][] { + if (!this.granularity) return super.timeSeries(); + + return super.timeSeries().map(([from, to]) => ([from.replace('T', ' '), to.replace('T', ' ')])); + } +} + +class PinotFilter extends BaseFilter { + public likeIgnoreCase(column: any, not: any, param: any, type: string) { + const p = (!type || type === 'contains' || type === 'ends') ? '%' : ''; + const s = (!type || type === 'contains' || type === 'starts') ? '%' : ''; + return `LOWER(${column})${not ? ' NOT' : ''} LIKE CONCAT('${p}', LOWER(${this.allocateParam(param)}) , '${s}') ESCAPE '\\'`; + } + + public castParameter() { + if (this.definition().type === 'boolean') { + return 'CAST(? AS BOOLEAN)'; + } else if (this.measure || this.definition().type === 'number') { + // TODO here can be measure type of string actually + return 'CAST(? AS DOUBLE)'; + } + + return '?'; + } +} + +export class PinotQuery extends BaseQuery { + public newFilter(filter: any): PinotFilter { + return new PinotFilter(this, filter); + } + + public timeStampParam() { + return '?'; + } + + public timeStampCast(value: string) { + return `CAST(${value} as TIMESTAMP)`; + } + + public dateTimeCast(value: string) { + return value; + } + + public convertTz(field: string) { + return this.timeStampCast(`toDateTime(${field}, ${DATE_TIME_FORMAT}, '${this.timezone}')`); + } + + public timeGroupedColumn(granularity: GRANULARITY_ID, dimension: string) { + return this.timeStampCast(`dateTrunc('${GRANULARITY_TO_INTERVAL[granularity]}', ${dimension})`); + } + + public subtractInterval(date: string, interval: string) { + const [intervalValue, intervalUnit] = interval.split(' '); + return `${this.timeStampCast(date)} - fromEpoch${intervalUnit}(${intervalValue})`; + } + + public addInterval(date: string, interval: string) { + const [intervalValue, intervalUnit] = interval.split(' '); + return `${this.timeStampCast(date)} + fromEpoch${intervalUnit}(${intervalValue})`; + } + + public seriesSql(timeDimension: BaseTimeDimension) { + const values = timeDimension.timeSeries().map( + ([from, to]) => `select '${from}' f, '${to}' t` + ).join(' UNION ALL '); + return `SELECT ${this.timeStampCast('dates.f')} date_from, ${this.timeStampCast('dates.t')} date_to FROM (${values}) AS dates`; + } + + public applyMeasureFilters(evaluateSql: '*' | string, symbol: any, cubeName: string) { + if (!symbol.filters || !symbol.filters.length) { + return evaluateSql; + } + + const where = this.evaluateMeasureFilters(symbol, cubeName); + + return `${evaluateSql === '*' ? '1' : evaluateSql}) FILTER (WHERE ${where}`; + } + + /** + * @return {string} + */ + public timestampFormat() { + return 'YYYY-MM-DD HH:mm:ss.SSS'; + } + + public unixTimestampSql() { + return this.nowTimestampSql(); + } + + public defaultRefreshKeyRenewalThreshold() { + return 120; + } + + public defaultEveryRefreshKey() { + return { + every: '2 minutes' + }; + } + + public hllInit(sql: string) { + return this.countDistinctApprox(sql); // todo: ensure the correct way to do so in pinot + } + + public hllMerge(sql: string) { + return this.countDistinctApprox(sql); // todo: ensure the correct way to do so in pinot + } + + public countDistinctApprox(sql: string) { + return `DistinctCountHLLPlus(${sql})`; + } + + protected limitOffsetClause(limit: string | number, offset: string | number) { + const limitClause = limit != null ? ` LIMIT ${limit}` : ''; + const offsetClause = offset != null ? ` OFFSET ${offset}` : ''; + return `${offsetClause}${limitClause}`; + } + + public newTimeDimension(timeDimension: any): BaseTimeDimension { + return new PinotTimeDimension(this, timeDimension); + } + + public sqlTemplates() { + const templates = super.sqlTemplates(); + templates.functions.DATETRUNC = 'DATE_TRUNC({{ args_concat }})'; + templates.statements.select = 'SELECT {{ select_concat | map(attribute=\'aliased\') | join(\', \') }} \n' + + 'FROM (\n {{ from }}\n) AS {{ from_alias }} \n' + + '{% if group_by %} GROUP BY {{ group_by }}{% endif %}' + + '{% if order_by %} ORDER BY {{ order_by | map(attribute=\'expr\') | join(\', \') }}{% endif %}' + + '{% if offset %}\nOFFSET {{ offset }}{% endif %}' + + '{% if limit %}\nLIMIT {{ limit }}{% endif %}'; + templates.expressions.extract = 'EXTRACT({{ date_part }} FROM {{ expr }})'; + templates.expressions.timestamp_literal = `fromDateTime('{{ value }}', ${DATE_TIME_FORMAT})`; + templates.quotes.identifiers = '"'; + delete templates.types.time; + delete templates.types.interval; + delete templates.types.binary; + return templates; + } +} diff --git a/packages/cubejs-pinot-driver/src/index.ts b/packages/cubejs-pinot-driver/src/index.ts new file mode 100644 index 0000000000000..c8d0df2648881 --- /dev/null +++ b/packages/cubejs-pinot-driver/src/index.ts @@ -0,0 +1,4 @@ +import { PinotDriver } from './PinotDriver'; + +export default PinotDriver; +export { PinotDriver }; diff --git a/packages/cubejs-pinot-driver/test/Pinot.test.ts b/packages/cubejs-pinot-driver/test/Pinot.test.ts new file mode 100644 index 0000000000000..2c41d79336395 --- /dev/null +++ b/packages/cubejs-pinot-driver/test/Pinot.test.ts @@ -0,0 +1,314 @@ +// eslint-disable-next-line import/no-extraneous-dependencies +import { DockerComposeEnvironment, Wait, StartedDockerComposeEnvironment } from 'testcontainers'; +import { prepareCompiler as originalPrepareCompiler } from '@cubejs-backend/schema-compiler'; +import { PinotQuery } from '../src/PinotQuery'; +import { PinotDriver } from '../src/PinotDriver'; + +const path = require('path'); + +const prepareCompiler = (content: string, options: any[]) => originalPrepareCompiler({ + localPath: () => __dirname, + dataSchemaFiles: () => Promise.resolve([ + { fileName: 'main.js', content } + ]) +}, { adapter: 'postgres', ...options }); + +describe('Pinot', () => { + jest.setTimeout(6 * 60 * 1000); + + let env: StartedDockerComposeEnvironment; + let config: { basicAuth: { user: string, password: string }, host: string, port: string }; + + const doWithDriver = async (callback: any) => { + const driver = new PinotDriver(config); + const result = await callback(driver); + return result; + }; + + // eslint-disable-next-line consistent-return,func-names + beforeAll(async () => { + if (process.env.TEST_PINOT_HOST) { + config = { + host: process.env.TEST_PINOT_HOST || 'http://localhost', + port: process.env.TEST_PINOT_PORT || '8099', + basicAuth: { + user: 'admin', + password: 'mysecret' + } + }; + + return; + } + + const dc = new DockerComposeEnvironment( + path.resolve(path.dirname(__filename), '../../'), + 'docker-compose.yml' + ); + + env = await dc + .withStartupTimeout(2 * 60 * 1000) + .withWaitStrategy('pinot-server-cube-tests', Wait.forHealthCheck()) + .up(); + + config = { + host: `http://${env.getContainer('pinot-broker-cube-tests').getHost()}`, + port: env.getContainer('pinot-broker-cube-tests').getMappedPort(8099).toString(), + basicAuth: { + user: 'admin', + password: 'mysecret' + } + }; + + const controller = env.getContainer('pinot-controller-cube-tests'); + + await controller.exec(['/opt/pinot/bin/pinot-admin.sh', 'AddTable', '-controllerPort', '9000', '-schemaFile', '/tmp/data/test-resources/students.schema.json', '-tableConfigFile', '/tmp/data/test-resources/students.table.json', '-exec']); + await controller.exec(['/opt/pinot/bin/pinot-admin.sh', 'AddTable', '-controllerPort', '9000', '-schemaFile', '/tmp/data/test-resources/scores.schema.json', '-tableConfigFile', '/tmp/data/test-resources/scores.table.json', '-exec']); + await controller.exec(['/opt/pinot/bin/pinot-admin.sh', 'LaunchDataIngestionJob', '-jobSpecFile', '/tmp/data/test-resources/students.jobspec.yml']); + await controller.exec(['/opt/pinot/bin/pinot-admin.sh', 'LaunchDataIngestionJob', '-jobSpecFile', '/tmp/data/test-resources/scores.jobspec.yml']); + }); + + // eslint-disable-next-line consistent-return,func-names + afterAll(async () => { + if (env) { + await env.down(); + } + }); + + describe('PinotDriver', () => { + it('constructs', async () => { + await doWithDriver(() => { + // + }); + }); + + // eslint-disable-next-line func-names + it('tests the connection', async () => { + await doWithDriver(async (driver: any) => { + await driver.testConnection(); + }); + }); + }); + + describe('PinotQuery', () => { + const { compiler, joinGraph, cubeEvaluator } = prepareCompiler(` + cube(\`students\`, { + sql_table: 'students', + + dimensions: { + studentID: { + type: 'number', + sql: 'studentID', + primary_key: true, + public: true + }, + firstName: { + type: 'string', + sql: 'firstName' + }, + lastName: { + type: 'string', + sql: 'lastName' + }, + gender: { + type: 'string', + sql: 'gender' + } + } + }); + + cube(\`scores\`, { + sql_table: 'scores', + + joins: { + students: { + relationship: 'many_to_one', + sql: \`\${CUBE}.studentID = \${students.studentID}\`, + }, + }, + + measures: { + count: { + type: 'count', + sql : '*', + }, + unboundedCount: { + type: 'count', + sql : '*', + rollingWindow: { + trailing: 'unbounded' + } + }, + maxScore: { + type: 'max', + sql: 'score' + }, + maxScoreEnglish: { + type: 'max', + sql: 'score', + filters: [ + { sql: \`\${CUBE}.subject = 'English'\` } + ] + } + }, + + dimensions: { + id: { + type: 'string', + sql: \`\${CUBE}.studentID || \${CUBE}.subject || toDateTime(\${CUBE}.score_date, 'yyyy-MM-dd')\`, + primary_key: true, + public: true + }, + scoreDate: { + type: 'time', + sql: 'score_date' + }, + subject: { + type: 'string', + sql: 'subject' + } + } + }); + `, []); + + const runQueryTest = async (q: any, expectedResult: any[]) => { + await compiler.compile(); + const query = new PinotQuery({ joinGraph, cubeEvaluator, compiler }, q); + + const [sqlQuery, sqlParams] = query.buildSqlAndParams() as [string, unknown[]]; + + console.log('SQL To execute', sqlQuery, sqlParams); + + const result = await doWithDriver(async (driver: PinotDriver) => driver.query(sqlQuery, sqlParams)); + + expect(result).toEqual( + expectedResult + ); + }; + + it('works simple join with equal filters', async () => { + const filterValuesVariants = [ + [['Lucy'], [{ scores__max_score: 3.8 }]], + [[null], [{ scores__max_score: null }]], + ]; + + for (const [values, expectedResult] of filterValuesVariants) { + await runQueryTest({ + measures: [ + 'scores.maxScore' + ], + timeDimensions: [], + filters: [{ + member: 'students.firstName', + operator: 'equals', + values + }], + timezone: 'America/Los_Angeles' + }, expectedResult); + } + }); + + it('works with a date range', async () => runQueryTest({ + measures: [ + 'scores.maxScore' + ], + timeDimensions: [ + { + dimension: 'scores.scoreDate', + dateRange: ['2024-09-01', '2024-09-07'] + } + ], + timezone: 'America/Los_Angeles' + }, [{ scores__max_score: 3.8 }])); + + it('works with a date range', async () => runQueryTest({ + measures: [ + 'scores.maxScore' + ], + timeDimensions: [ + { + dimension: 'scores.scoreDate', + dateRange: ['2024-09-01', '2024-09-07'] + } + ], + timezone: 'America/Los_Angeles' + }, [{ scores__max_score: 3.8 }])); + + it('works with a filtered measure', async () => runQueryTest({ + measures: [ + 'scores.maxScoreEnglish' + ], + timeDimensions: [ + { + dimension: 'scores.scoreDate', + dateRange: ['2024-09-01', '2024-09-07'] + } + ], + timezone: 'America/Los_Angeles' + }, [{ scores__max_score_english: 3.5 }])); + + it('works with a date range and granularity', async () => runQueryTest({ + measures: [ + 'scores.maxScore' + ], + timeDimensions: [ + { + dimension: 'scores.scoreDate', + dateRange: ['2024-09-01', '2024-09-07'], + granularity: 'day' + } + ], + timezone: 'America/Los_Angeles', + order: [ + { id: 'scores.scoreDate' } + ] + }, + [ + { + scores__score_date_day: '2024-09-02 00:00:00.0', + scores__max_score: 3.2 + }, + { + scores__score_date_day: '2024-09-03 00:00:00.0', + scores__max_score: 3.5 + }, + { + scores__score_date_day: '2024-09-04 00:00:00.0', + scores__max_score: 3.8 + }, + ])); + + it('groups by the score_date field on the calculated granularity for unbounded trailing windows with dimension', async () => runQueryTest({ + measures: [ + 'scores.count', 'scores.unboundedCount' + ], + timeDimensions: [ + { + dimension: 'scores.scoreDate', + dateRange: ['2024-09-01', '2024-09-07'], + granularity: 'day' + } + ], + timezone: 'America/Los_Angeles', + order: [ + { id: 'scores.scoreDate' } + ] + }, + [ + { + scores__score_date_day: '2024-09-02 00:00:00.0', + scores__count: 1, + scores__unbounded_count: 2 + }, + { + scores__score_date_day: '2024-09-03 00:00:00.0', + scores__count: 3, + scores__unbounded_count: 5 + }, + { + scores__score_date_day: '2024-09-04 00:00:00.0', + scores__count: 1, + scores__unbounded_count: 6 + }, + ])); + }); +}); diff --git a/packages/cubejs-pinot-driver/tsconfig.json b/packages/cubejs-pinot-driver/tsconfig.json new file mode 100644 index 0000000000000..889114bc38306 --- /dev/null +++ b/packages/cubejs-pinot-driver/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../tsconfig.base.json", + "include": [ + "src", + "test" + ], + "compilerOptions": { + "outDir": "dist", + "rootDir": ".", + "baseUrl": "." + } +} diff --git a/packages/cubejs-server-core/src/core/DriverDependencies.js b/packages/cubejs-server-core/src/core/DriverDependencies.js index 7e9f1d1812d31..47317889a285a 100644 --- a/packages/cubejs-server-core/src/core/DriverDependencies.js +++ b/packages/cubejs-server-core/src/core/DriverDependencies.js @@ -26,6 +26,7 @@ module.exports = { ksql: '@cubejs-backend/ksql-driver', questdb: '@cubejs-backend/questdb-driver', materialize: '@cubejs-backend/materialize-driver', + pinot: '@inthememory/pinot-driver', // List for JDBC drivers 'databricks-jdbc': '@cubejs-backend/databricks-jdbc-driver', }; diff --git a/packages/cubejs-server-core/src/core/types.ts b/packages/cubejs-server-core/src/core/types.ts index 763f86b6051cb..9ff62e0f57c0a 100644 --- a/packages/cubejs-server-core/src/core/types.ts +++ b/packages/cubejs-server-core/src/core/types.ts @@ -117,7 +117,8 @@ export type DatabaseType = | 'snowflake' | 'sqlite' | 'questdb' - | 'materialize'; + | 'materialize' + | 'pinot'; export type ContextToAppIdFn = (context: RequestContext) => string | Promise; export type ContextToOrchestratorIdFn = (context: RequestContext) => string | Promise; diff --git a/tsconfig.json b/tsconfig.json index f783c3da47a36..a467a74d4ce97 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -97,6 +97,9 @@ { "path": "packages/cubejs-trino-driver" }, + { + "path": "packages/cubejs-pinot-driver" + }, { "path": "packages/cubejs-server-core" }, diff --git a/yarn.lock b/yarn.lock index d93983347c86c..2319082e22035 100644 --- a/yarn.lock +++ b/yarn.lock @@ -27341,7 +27341,7 @@ string-length@^5.0.1: char-regex "^2.0.0" strip-ansi "^7.0.1" -"string-width-cjs@npm:string-width@^4.2.0": +"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -27359,15 +27359,6 @@ string-width@^1.0.1: is-fullwidth-code-point "^1.0.0" strip-ansi "^3.0.0" -"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3: - version "4.2.3" - resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" - integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== - dependencies: - emoji-regex "^8.0.0" - is-fullwidth-code-point "^3.0.0" - strip-ansi "^6.0.1" - string-width@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/string-width/-/string-width-2.1.1.tgz#ab93f27a8dc13d28cac815c462143a6d9012ae9e" @@ -27466,7 +27457,7 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1": +"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -27494,13 +27485,6 @@ strip-ansi@^5.0.0, strip-ansi@^5.1.0, strip-ansi@^5.2.0: dependencies: ansi-regex "^4.1.0" -strip-ansi@^6.0.0, strip-ansi@^6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" - integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== - dependencies: - ansi-regex "^5.0.1" - strip-ansi@^7.0.1: version "7.1.0" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.0.tgz#d5b6568ca689d8561370b0707685d22434faff45" @@ -29828,7 +29812,7 @@ wordwrap@^1.0.0: resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb" integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus= -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -29863,15 +29847,6 @@ wrap-ansi@^6.2.0: string-width "^4.1.0" strip-ansi "^6.0.0" -wrap-ansi@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" - integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== - dependencies: - ansi-styles "^4.0.0" - string-width "^4.1.0" - strip-ansi "^6.0.0" - wrap-ansi@^8.1.0: version "8.1.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"