diff --git a/backend/dataall/modules/worksheets/aws/bedrock_client.py b/backend/dataall/modules/worksheets/aws/bedrock_client.py index 3c799c3ce..13a0e286f 100644 --- a/backend/dataall/modules/worksheets/aws/bedrock_client.py +++ b/backend/dataall/modules/worksheets/aws/bedrock_client.py @@ -3,11 +3,11 @@ from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import PromptTemplate from dataall.base.db import exceptions -from dataall.modules.worksheets.aws.bedrock_prompts import ( - SQL_EXAMPLES, - TEXT_TO_SQL_PROMPT_TEMPLATE, - PROCESS_TEXT_PROMPT_TEMPLATE, -) +import os + +TEXT_TO_SQL_EXAMPLES_PATH = os.path.join(os.path.dirname(__file__), 'bedrock_prompts', 'text_to_sql_examples.txt') +TEXT_TO_SQL_TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), 'bedrock_prompts', 'test_to_sql_template.txt') +PROCESS_TEXT_TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), 'bedrock_prompts', 'process_text_template.txt') class BedrockClient: @@ -29,16 +29,19 @@ def __init__(self): ) def invoke_model_text_to_sql(self, prompt: str, metadata: str): - prompt_template = PromptTemplate.from_template(TEXT_TO_SQL_PROMPT_TEMPLATE) - + prompt_template = PromptTemplate.from_file(TEXT_TO_SQL_TEMPLATE_PATH) chain = prompt_template | self._model | StrOutputParser() - response = chain.invoke({'prompt': prompt, 'context': metadata, 'examples': SQL_EXAMPLES}) + + with open(TEXT_TO_SQL_EXAMPLES_PATH, 'r') as f: + examples = f.read() + + response = chain.invoke({'prompt': prompt, 'context': metadata, 'examples': examples}) if response.startswith('Error:'): raise exceptions.ModelGuardrailException(response) return response def invoke_model_process_text(self, prompt: str, content: str): - prompt_template = PromptTemplate.from_template(PROCESS_TEXT_PROMPT_TEMPLATE) + prompt_template = PromptTemplate.from_file(PROCESS_TEXT_TEMPLATE_PATH) chain = prompt_template | self._model | StrOutputParser() response = chain.invoke({'prompt': prompt, 'content': content}) diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts.py b/backend/dataall/modules/worksheets/aws/bedrock_prompts.py deleted file mode 100644 index b90fdbbb7..000000000 --- a/backend/dataall/modules/worksheets/aws/bedrock_prompts.py +++ /dev/null @@ -1,104 +0,0 @@ -SQL_EXAMPLES = [ - { - 'User': """I want to get the average area of all listings \\n\\nBased on on the following glue metadata: \n - - Database Name : dataall_homes_11p3uu8f - Table name: listings - Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}] - Partition Metadata: [] - """, - 'AI': """SELECT AVG(CAST(area AS DOUBLE)) -FROM dataall_homes_11p3uu8f.listings -WHERE area IS NOT NULL;""", - }, - { - 'User': """I want to get the average of the 3 most expensive listings with less than 3 bedrooms\\n\\nBased on on the following glue metadata: \n - - Database Name : dataall_homes_11p3uu8f - Table name: listings - Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}] - Partition Metadata: [] - """, - 'AI': """SELECT AVG(price) AS average_price -FROM ( - SELECT price - FROM dataall_homes_11p3uu8f.listings - WHERE bedrooms > 3 - ORDER BY price DESC - LIMIT 3)""", - }, - { - 'User': """I want to see if any letter has been sent from 900 Somerville Avenue to 2 Finnigan Street and what is the content n\nBased on the following glue metadata: \n - - ["Database name: dataall_packages_omf768qq \n Table name: packages \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'contents', 'Type': 'string'}, {'Name': 'from_address_id', 'Type': 'bigint'}, {'Name': 'to_address_id', 'Type': 'bigint'}]\n Partition Metadata: []\n ", "\n Table name: addresses \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'address', 'Type': 'string'}, {'Name': 'type', 'Type': 'string'}]\n Partition Metadata: []\n ", "\n Table name: drivers \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}]\n Partition Metadata: []\n ", "\n Table name: scans \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'driver_id', 'Type': 'bigint'}, {'Name': 'package_id', 'Type': 'bigint'}, {'Name': 'address_id', 'Type': 'bigint'}, {'Name': 'action', 'Type': 'string'}, {'Name': 'timestamp', 'Type': 'string'}]\n Partition Metadata: []\n "] - - - """, - 'AI': """SELECT p.contents -FROM dataall_packages_omf768qq.packages p -JOIN dataall_packages_omf768qq.addresses a1 ON p.from_address_id = a1.id -JOIN dataall_packages_omf768qq.addresses a2 ON p.to_address_id = a2.id -WHERE a1.address = '900 Somerville Avenue' AND a2.address = '2 Finnigan Street'""", - }, -] - -TEXT_TO_SQL_PROMPT_TEMPLATE = """ -You will be given the name of an AWS Glue Database, metadata from one or more AWS Glue Table(s) and a user prompt from a user. - -Based on this information your job is to turn the prompt into a SQL query that will be sent to query the data within the tables in Amazon Athena. - -Take the following points into consideration. It is crucial that you follow them: - -- I only want you to return the SQL needed (NO EXPLANATION or anything else). - -- Tables are referenced on the following form 'database_name.table_name' (for example 'Select * FROM database_name.table_name ...' and not 'SELECT * FROM table_name ...) since we dont have access to the table name directly since its not global variable. - -- Take relations between tables into consideration, for example if you have a table with columns that might reference the other tables, you would need to join them in the query. - -- The generate SQL statement MUST be Read only (no WRITE, INSERT, ALTER or DELETE keywords) - -- Answer on the same form as the examples given below. - -Examples: -{examples} - - -I want you to follow the following steps when generating the SQL statement: - -Step 1: Determine if the given tables columns are suitable to answer the question. -If not respond with "Error: The tables provided does not give enough information" - -Step 2: Determine if the user wants to perform any mutations, if so return "Error: Only READ queries are allowed" - -Step 3: Determine if joins will be needed. - -Step 4: Generate the SQL in order to solve the problem. - - -Based on the following glue metadata: - -{context} - - -User prompt: {prompt} - - -""" - - -PROCESS_TEXT_PROMPT_TEMPLATE = """ -You are an AI assistant tasked with analyzing and processing text content. Your goal is to provide accurate and helpful responses based on the given content and user prompt. -You must follow the steps: - -1. Detetermine if the document has the information to be able to answer the question. If not respond with "Error: The Document does not provide the information needed to answer you question" -2. I want you to answer the question based on the information in the document. -3. At the bottom I want you to provide the sources (the parts of the document where you found the results). The sources should be listed in order - - -Content to analyze: -{content} - -User prompt: {prompt} - -Please provide a response that addresses the user's prompt in the context of the given content. Be thorough, accurate, and helpful in your analysis. -""" diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts/process_text_template.txt b/backend/dataall/modules/worksheets/aws/bedrock_prompts/process_text_template.txt new file mode 100644 index 000000000..bb22fcc8e --- /dev/null +++ b/backend/dataall/modules/worksheets/aws/bedrock_prompts/process_text_template.txt @@ -0,0 +1,14 @@ +You are an AI assistant tasked with analyzing and processing text content. Your goal is to provide accurate and helpful responses based on the given content and user prompt. +You must follow the steps: + +1. Detetermine if the document has the information to be able to answer the question. If not respond with "Error: The Document does not provide the information needed to answer you question" +2. I want you to answer the question based on the information in the document. +3. At the bottom I want you to provide the sources (the parts of the document where you found the results). The sources should be listed in order + + +Content to analyze: +{content} + +User prompt: {prompt} + +Please provide a response that addresses the user's prompt in the context of the given content. Be thorough, accurate, and helpful in your analysis. diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts/test_to_sql_template.txt b/backend/dataall/modules/worksheets/aws/bedrock_prompts/test_to_sql_template.txt new file mode 100644 index 000000000..c24f5f9f0 --- /dev/null +++ b/backend/dataall/modules/worksheets/aws/bedrock_prompts/test_to_sql_template.txt @@ -0,0 +1,38 @@ +You will be given the name of an AWS Glue Database, metadata from one or more AWS Glue Table(s) and a user prompt from a user. + +Based on this information your job is to turn the prompt into a SQL query that will be sent to query the data within the tables in Amazon Athena. + +Take the following points into consideration. It is crucial that you follow them: + +- I only want you to return the SQL needed (NO EXPLANATION or anything else). + +- Tables are referenced on the following form 'database_name.table_name' (for example 'Select * FROM database_name.table_name ...' and not 'SELECT * FROM table_name ...) since we dont have access to the table name directly since its not global variable. + +- Take relations between tables into consideration, for example if you have a table with columns that might reference the other tables, you would need to join them in the query. + +- The generate SQL statement MUST be Read only (no WRITE, INSERT, ALTER or DELETE keywords) + +- Answer on the same form as the examples given below. + +Examples: +{examples} + + +I want you to follow the following steps when generating the SQL statement: + +Step 1: Determine if the given tables columns are suitable to answer the question. +If not respond with "Error: The tables provided does not give enough information" + +Step 2: Determine if the user wants to perform any mutations, if so return "Error: Only READ queries are allowed" + +Step 3: Determine if joins will be needed. + +Step 4: Generate the SQL in order to solve the problem. + + +Based on the following metadata: +{context} + + +User prompt: {prompt} + diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts/text_to_sql_examples.txt b/backend/dataall/modules/worksheets/aws/bedrock_prompts/text_to_sql_examples.txt new file mode 100644 index 000000000..8fa9bd84c --- /dev/null +++ b/backend/dataall/modules/worksheets/aws/bedrock_prompts/text_to_sql_examples.txt @@ -0,0 +1,49 @@ +Example 1. +User prompt: I want to get the average area of all listings + +Context: Based on on the following metadata +Database Name : dataall_homes_11p3uu8f +Table Name: listings +Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}] +Partition Metadata: [] + +Response: SELECT AVG(CAST(area AS DOUBLE)) FROM dataall_homes_11p3uu8f.listings WHERE area IS NOT NULL; + + +Example 2. +User prompt: I want to get the average of the 3 most expensive listings with less than 3 bedrooms + +Context: Based on on the following metadata +Database Name : dataall_homes_11p3uu8f +Table Name: listings +Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}] +Partition Metadata: [] + +Response: SELECT AVG(price) AS average_price FROM (SELECT price FROM dataall_homes_11p3uu8f.listings WHERE bedrooms > 3 ORDER BY price DESC LIMIT 3); + + +Example 3. +User prompt: I want to see if any letter has been sent from 900 Somerville Avenue to 2 Finnigan Street and what is the content + +Context: Based on the following metadata +Database Name : dataall_packages_omf768qq +Table name: packages +Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'contents', 'Type': 'string'}, {'Name': 'from_address_id', 'Type': 'bigint'}, {'Name': 'to_address_id', 'Type': 'bigint'}]\n +Partition Metadata: [] + +Database Name : dataall_packages_omf768qq +Table name: addresses +Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'address', 'Type': 'string'}, {'Name': 'type', 'Type': 'string'}] +Partition Metadata: [] + +Database Name : dataall_packages_omf768qq +Table name: drivers +Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}] +Partition Metadata: [] + +Database Name : dataall_packages_omf768qq +Table name: scans +Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'driver_id', 'Type': 'bigint'}, {'Name': 'package_id', 'Type': 'bigint'}, {'Name': 'address_id', 'Type': 'bigint'}, {'Name': 'action', 'Type': 'string'}, {'Name': 'timestamp', 'Type': 'string'}] +Partition Metadata: [] + +Response: SELECT p.contents FROM dataall_packages_omf768qq.packages p JOIN dataall_packages_omf768qq.addresses a1 ON p.from_address_id = a1.id JOIN dataall_packages_omf768qq.addresses a2 ON p.to_address_id = a2.id WHERE a1.address = '900 Somerville Avenue' AND a2.address = '2 Finnigan Street'; diff --git a/backend/dataall/modules/worksheets/aws/glue_client.py b/backend/dataall/modules/worksheets/aws/glue_client.py index 295e0e8f4..d123a7662 100644 --- a/backend/dataall/modules/worksheets/aws/glue_client.py +++ b/backend/dataall/modules/worksheets/aws/glue_client.py @@ -25,8 +25,8 @@ def get_table_metadata(self, database, table_name): column_metadata = table_metadata['Table']['StorageDescriptor']['Columns'] partition_metadata = table_metadata['Table']['PartitionKeys'] meta_data = f""" - Database name: {database} - Table name: {table_name} + Database Name: {database} + Table Name: {table_name} Column Metadata: {column_metadata} Partition Metadata: {partition_metadata} """ diff --git a/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js b/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js index f2e86a115..2cda63d26 100644 --- a/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js +++ b/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js @@ -9,9 +9,8 @@ import { Typography } from '@mui/material'; import PropTypes from 'prop-types'; -import { Label } from 'design'; +import { Label, UserModal } from 'design'; import { isFeatureEnabled } from 'utils'; -import { UserModal } from 'design'; import { useState } from 'react'; export const DatasetGovernance = (props) => { diff --git a/frontend/src/modules/Environments/components/EnvironmentOverview.js b/frontend/src/modules/Environments/components/EnvironmentOverview.js index ae6f012f0..2991c67e6 100644 --- a/frontend/src/modules/Environments/components/EnvironmentOverview.js +++ b/frontend/src/modules/Environments/components/EnvironmentOverview.js @@ -1,8 +1,7 @@ import React, { useState } from 'react'; import { Box, Grid } from '@mui/material'; import PropTypes from 'prop-types'; -import { ObjectBrief, ObjectMetadata } from 'design'; -import { UserModal } from 'design'; +import { ObjectBrief, ObjectMetadata, UserModal } from 'design'; import { EnvironmentConsoleAccess } from './EnvironmentConsoleAccess'; import { EnvironmentFeatures } from './EnvironmentFeatures'; diff --git a/frontend/src/modules/Organizations/components/OrganizationOverview.js b/frontend/src/modules/Organizations/components/OrganizationOverview.js index 95234dd1d..4f58e6102 100644 --- a/frontend/src/modules/Organizations/components/OrganizationOverview.js +++ b/frontend/src/modules/Organizations/components/OrganizationOverview.js @@ -1,8 +1,7 @@ import React, { useState } from 'react'; import { Box, Grid } from '@mui/material'; import PropTypes from 'prop-types'; -import { ObjectBrief, ObjectMetadata } from 'design'; -import { UserModal } from 'design'; +import { ObjectBrief, ObjectMetadata, UserModal } from 'design'; export const OrganizationOverview = (props) => { const { organization, ...other } = props; diff --git a/frontend/src/modules/Worksheets/components/TextDisplay.js b/frontend/src/modules/Worksheets/components/TextDisplay.js deleted file mode 100644 index bf89112b3..000000000 --- a/frontend/src/modules/Worksheets/components/TextDisplay.js +++ /dev/null @@ -1,33 +0,0 @@ -import React from 'react'; -import PropTypes from 'prop-types'; -import { THEMES, useSettings } from 'design'; - -export const TextDisplay = ({ text }) => { - const { settings } = useSettings(); - - const containerStyle = { - width: '600px', - height: '390px', - maxWidth: '100%', - margin: '0 auto', - padding: '20px', - border: - settings.theme === THEMES.LIGHT ? '1px solid #eee' : '1px solid #333', - borderRadius: '5px', - backgroundColor: settings.theme === THEMES.LIGHT ? '#ffffff' : '#1e1e1e', - color: settings.theme === THEMES.LIGHT ? '#333333' : '#d4d4d4', - fontFamily: 'Arial, sans-serif', - fontSize: '14px', - lineHeight: '1.6', - whiteSpace: 'pre-wrap', - wordWrap: 'break-word', - overflowY: 'auto', - maxHeight: '400px' - }; - - return
{text}
; -}; - -TextDisplay.propTypes = { - text: PropTypes.string.isRequired -}; diff --git a/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js b/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js index e0bec7086..1dd1764a3 100644 --- a/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js +++ b/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js @@ -210,6 +210,12 @@ export const WorksheetTextToSQLEditor = ({