From e1e0a0747f097a1669f52c61caf7b54a38694d0c Mon Sep 17 00:00:00 2001 From: Ayush Goyal <36241930+agl29@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:56:20 +0530 Subject: [PATCH] [Trino] Update Trino integration guide in Hue (#3943) * [Trino] Update Trino integration guide in Hue * remove concurrency flag * remove links giving 403 --------- Co-authored-by: Harsh Gupta <42064744+Harshg999@users.noreply.github.com> (cherry picked from commit e119138402a4b1a10cc1a15579592cc6db756fab) (cherry picked from commit df47fe06cbf63f156fde3778035ad9df45a9f155) --- .../configuration/connectors/_index.md | 48 ++++++------------- .../administrator/installation/_index.md | 3 +- .../content/releases/release-notes-4.3.0.md | 2 +- .../content/releases/release-notes-4.9.0.md | 2 - tools/ci/check_for_website_dead_links.sh | 2 +- 5 files changed, 17 insertions(+), 40 deletions(-) diff --git a/docs/docs-site/content/administrator/configuration/connectors/_index.md b/docs/docs-site/content/administrator/configuration/connectors/_index.md index dfeee0b6fe7..c3f344f725e 100644 --- a/docs/docs-site/content/administrator/configuration/connectors/_index.md +++ b/docs/docs-site/content/administrator/configuration/connectors/_index.md @@ -165,49 +165,29 @@ Currently just substitute 'presto' with 'trino' or vice versa. ### Trino -Fork of PrestoSQL (hence still having 'presto' name in several parameters). +To support Trino in Hue, we're leveraging the official [Trino Python client](https://github.com/trinodb/trino-python-client) -Install at least the 0.6.2 version of pyhive from https://github.com/gethue/PyHive or https://github.com/dropbox/PyHive +Install at least the 0.329.0 version of trino from https://github.com/trinodb/trino-python-client or https://pypi.org/project/trino/ - ./build/env/bin/pip install pyhive - -Then give Hue the information about the database source following the `trino://{trino-coordinator}:{port}/{catalog}/{schema}` format: - - [[[presto]]] - name = Trino - interface=sqlalchemy - options='{"url": "trino://localhost:8080/tpch/default"}' - -**Note**: keep `[[[presto]]]` if not using the [connectors](/administrator/configuration/connectors/#connectors). - -With impersonation: - - options='{"url": "trino://localhost:8080/tpch/default", "has_impersonation": true}' + ./build/env/bin/pip install trino -With Kerberos: +Then give Hue the information about the trino cluster: - options='{"url": "trino://localhost:8080/tpch/default?KerberosKeytabPath=/path/to/keytab&KerberosPrincipal=principal&KerberosRemoteServiceName=service&protocol=https"' + [[[trino]]] + name=Trino + interface=trino + options='{"url": "http://localhost:8080"}' With credentials: - options='{"url": "trino://username:password@localhost:8080/tpch/default"}' - -With LDAPS enabled over HTTPS: - - options='{"url": "trino://username:password@localhost:8443/tpch/default","connect_args":"{\"protocol\": \"https\"}"}' + options='{"url": "http://localhost:8080", "auth_username": "", "auth_password":""}' -Pass Trino Session properties along with HTTPS: +With password script: - options='{"url": "trino://username:password@localhost:8443/tpch/default","connect_args":"{\"protocol\": \"https\", \"session_props\": {\"query_max_run_time\": \"1m\"}}"}' - -Pass Trino Session Properties without HTTPS enabled: - - options='{"url": "trino://username:password@localhost:8080/tpch/default","connect_args":"{\"session_props\": {\"query_max_run_time\": \"1m\"}}"}' + options='{"url": "http://localhost:8080", "auth_username": "", "auth_password_script":""}' **Note** -In the past Hue did not use trino specific dialect of SQLAlchemy which may lead to a *catalog must be specified* error. This can be solved by setting `protocol.v1.alternate-header-name=Presto` in the Trino's configuration. More details about his can be found at [Migrating from PrestoSQL to Trino](https://trino.io/blog/2021/01/04/migrating-from-prestosql-to-trino.html) - -Also give a try to https://github.com/dungdm93/sqlalchemy-trino for the 'trino://' and avoiding the [old protocol issue](https://github.com/dropbox/PyHive/issues/378). +Currently, only [basic LDAP authentication](https://github.com/trinodb/trino-python-client?tab=readme-ov-file#basic-authentication) using username and password or password script is supported. Alternatively, you can establish unsecured Trino connections. ### Oracle @@ -596,7 +576,7 @@ Then give Hue the information about the database source: Alternative: -Vertica’s JDBC client drivers can be downloaded here: [Vertica JDBC Client Drivers](http://my.vertica.com/download/vertica/client-drivers/). Be sure to download the driver for the right version and OS. +Be sure to download the Vertica’s JDBC client driver for the right version and OS. [[[vertica]]] name=Vertica JDBC @@ -955,7 +935,7 @@ Hue's filebrowser can now allow users to explore, manage, and upload data in an Read more about it in the [S3 User Documentation](/user/browsing#s3). -In order to add an S3 account to Hue, you'll need to configure Hue with valid S3 credentials, including the access key ID and secret access key: [AWSCredentials](http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html) +In order to add an S3 account to Hue, you'll need to configure Hue with valid S3 credentials, including the access key ID and secret access key. These keys can securely stored in a script that outputs the actual access key and secret key to stdout to be read by Hue (this is similar to how Hue reads password scripts). In order to use script files, add the following section to your hue.ini configuration file: diff --git a/docs/docs-site/content/administrator/installation/_index.md b/docs/docs-site/content/administrator/installation/_index.md index c2374f5d9ed..8cc68d69fad 100644 --- a/docs/docs-site/content/administrator/installation/_index.md +++ b/docs/docs-site/content/administrator/installation/_index.md @@ -20,7 +20,6 @@ Continue to the [next sections](/administrator/installation/dependencies/) for d You can also find distributions of Hue via these companies: * [Cloudera Data Platform - Hue](https://www.cloudera.com/products/cloudera-data-platform.html) -* [Amazon AWS EMR - Hue](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hue.html) -* [Open Data Hub - Data Catalog](https://opendatahub.io/docs/advanced-tutorials/data-exploration.html) +* Amazon AWS EMR - Hue * [Google Cloud Dataproc - Hue](https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/tree/master/hue) * [Azure HDInsight - Hue](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-hue-linux) diff --git a/docs/docs-site/content/releases/release-notes-4.3.0.md b/docs/docs-site/content/releases/release-notes-4.3.0.md index 5cd2739e874..4b6978e2197 100644 --- a/docs/docs-site/content/releases/release-notes-4.3.0.md +++ b/docs/docs-site/content/releases/release-notes-4.3.0.md @@ -696,7 +696,7 @@ List of 900+ Commits * dc2febd HUE-3287 [core] Django 1.11 upgrade - To upgrade Django 1.11, following changes are needed: * 7e419c2 HUE-3287 [core] Django 1.11 upgrade - Adding PyYAML-3.12 colorama-0.3.2 docutils-0.14 nose-1.3.7 openpyxl-2.5.3 repoze.who-2.3 wheel-0.31.0 zope.interface-4.5.0 python modules * 386b2b7 HUE-3287 [core] Django 1.11 upgrade - Fixing navigator error -* a96e334 HUE-3287 [core] Django 1.11 upgrade - fixing django-openid-auth-0.14/django_openid_auth/models.py from registering BLOB (Large object) with 2048 char length see https://stackoverflow.com/questions/44358506/sql-error-1170 +* a96e334 HUE-3287 [core] Django 1.11 upgrade - fixing django-openid-auth-0.14/django_openid_auth/models.py from registering BLOB (Large object) with 2048 char length * 64ff0d2 HUE-3287 [core] Django 1.11 upgrade - Reverting to requests-kerberos-0.6.1 * 5ad587f HUE-3287 [core] Django 1.11 upgrade - Upgrading following modules: - avro-1.8.2 - backports.csv-1.0.5 - configobj-5.0.6 - cryptography-2.1.4 - defusedxml-0.5.0 - django-openid-auth-0.14 - djangosaml2-0.16.11 - elementtree-1.2.6-20050316 - enum34-1.1.6 * 3186126 HUE-3287 [core] Django 1.11 upgrade - Fixing Hue Port :8888 and :8889 access issue diff --git a/docs/docs-site/content/releases/release-notes-4.9.0.md b/docs/docs-site/content/releases/release-notes-4.9.0.md index c0cdba46025..7c517040b36 100644 --- a/docs/docs-site/content/releases/release-notes-4.9.0.md +++ b/docs/docs-site/content/releases/release-notes-4.9.0.md @@ -23,8 +23,6 @@ First, the support of many SQL dialects has been improved, with in particular Ap * [Query live HBase data with Phoenix SQL](https://gethue.com/blog/querying-live-kafka-data-in-apache-hbase-with-phoenix/) * [Getting started with the Spark SQL Editor](https://gethue.com/blog/querying-spark-sql-with-spark-thrift-server-and-hue-editor/) -!["Flink SQL - SELECT and INSERT data into Kafka Topics"](https://cdn.gethue.com/uploads/2021/02/peek-log-streams.gif) - The Editor is also getting split up into components, so that they are cleaner and easier to reuse (e.g. SQL parsers, SQL Scratchpad...). This will bring a new version of the Editor, which is currently in beta. Another improvement in the introduction of the Connectors (in beta too) so that SQL dialects can be easily added without any server restart. Secondly, special thanks to the community who contributed a brand new Dask SQL autocomplete (ISSUE-1480) and also Prometheus Alerting for Kubernetes (PR-1648). diff --git a/tools/ci/check_for_website_dead_links.sh b/tools/ci/check_for_website_dead_links.sh index ff01df9c285..e3b39e56df6 100755 --- a/tools/ci/check_for_website_dead_links.sh +++ b/tools/ci/check_for_website_dead_links.sh @@ -46,7 +46,7 @@ if [ "$?" -eq "0" ]; |http://demo.gethue.com*|https://twitter.com/gethue|https://github.com*|https://cdn.gethue.com/downloads/*|https://pypi.org*" \ --ignore-fragments \ --timeout 15 \ - --concurrency 10 + --buffer-size 8192 LINT_EXIT_CODE=$? kill $HUGO_PID