diff --git a/backend/dataall/modules/worksheets/aws/bedrock_client.py b/backend/dataall/modules/worksheets/aws/bedrock_client.py
index 3c799c3ce..13a0e286f 100644
--- a/backend/dataall/modules/worksheets/aws/bedrock_client.py
+++ b/backend/dataall/modules/worksheets/aws/bedrock_client.py
@@ -3,11 +3,11 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from dataall.base.db import exceptions
-from dataall.modules.worksheets.aws.bedrock_prompts import (
- SQL_EXAMPLES,
- TEXT_TO_SQL_PROMPT_TEMPLATE,
- PROCESS_TEXT_PROMPT_TEMPLATE,
-)
+import os
+
+TEXT_TO_SQL_EXAMPLES_PATH = os.path.join(os.path.dirname(__file__), 'bedrock_prompts', 'text_to_sql_examples.txt')
+TEXT_TO_SQL_TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), 'bedrock_prompts', 'test_to_sql_template.txt')
+PROCESS_TEXT_TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), 'bedrock_prompts', 'process_text_template.txt')
class BedrockClient:
@@ -29,16 +29,19 @@ def __init__(self):
)
def invoke_model_text_to_sql(self, prompt: str, metadata: str):
- prompt_template = PromptTemplate.from_template(TEXT_TO_SQL_PROMPT_TEMPLATE)
-
+ prompt_template = PromptTemplate.from_file(TEXT_TO_SQL_TEMPLATE_PATH)
chain = prompt_template | self._model | StrOutputParser()
- response = chain.invoke({'prompt': prompt, 'context': metadata, 'examples': SQL_EXAMPLES})
+
+ with open(TEXT_TO_SQL_EXAMPLES_PATH, 'r') as f:
+ examples = f.read()
+
+ response = chain.invoke({'prompt': prompt, 'context': metadata, 'examples': examples})
if response.startswith('Error:'):
raise exceptions.ModelGuardrailException(response)
return response
def invoke_model_process_text(self, prompt: str, content: str):
- prompt_template = PromptTemplate.from_template(PROCESS_TEXT_PROMPT_TEMPLATE)
+ prompt_template = PromptTemplate.from_file(PROCESS_TEXT_TEMPLATE_PATH)
chain = prompt_template | self._model | StrOutputParser()
response = chain.invoke({'prompt': prompt, 'content': content})
diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts.py b/backend/dataall/modules/worksheets/aws/bedrock_prompts.py
deleted file mode 100644
index b90fdbbb7..000000000
--- a/backend/dataall/modules/worksheets/aws/bedrock_prompts.py
+++ /dev/null
@@ -1,104 +0,0 @@
-SQL_EXAMPLES = [
- {
- 'User': """I want to get the average area of all listings \\n\\nBased on on the following glue metadata: \n
-
- Database Name : dataall_homes_11p3uu8f
- Table name: listings
- Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}]
- Partition Metadata: []
- """,
- 'AI': """SELECT AVG(CAST(area AS DOUBLE))
-FROM dataall_homes_11p3uu8f.listings
-WHERE area IS NOT NULL;""",
- },
- {
- 'User': """I want to get the average of the 3 most expensive listings with less than 3 bedrooms\\n\\nBased on on the following glue metadata: \n
-
- Database Name : dataall_homes_11p3uu8f
- Table name: listings
- Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}]
- Partition Metadata: []
- """,
- 'AI': """SELECT AVG(price) AS average_price
-FROM (
- SELECT price
- FROM dataall_homes_11p3uu8f.listings
- WHERE bedrooms > 3
- ORDER BY price DESC
- LIMIT 3)""",
- },
- {
- 'User': """I want to see if any letter has been sent from 900 Somerville Avenue to 2 Finnigan Street and what is the content n\nBased on the following glue metadata: \n
-
- ["Database name: dataall_packages_omf768qq \n Table name: packages \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'contents', 'Type': 'string'}, {'Name': 'from_address_id', 'Type': 'bigint'}, {'Name': 'to_address_id', 'Type': 'bigint'}]\n Partition Metadata: []\n ", "\n Table name: addresses \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'address', 'Type': 'string'}, {'Name': 'type', 'Type': 'string'}]\n Partition Metadata: []\n ", "\n Table name: drivers \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}]\n Partition Metadata: []\n ", "\n Table name: scans \n Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'driver_id', 'Type': 'bigint'}, {'Name': 'package_id', 'Type': 'bigint'}, {'Name': 'address_id', 'Type': 'bigint'}, {'Name': 'action', 'Type': 'string'}, {'Name': 'timestamp', 'Type': 'string'}]\n Partition Metadata: []\n "]
-
-
- """,
- 'AI': """SELECT p.contents
-FROM dataall_packages_omf768qq.packages p
-JOIN dataall_packages_omf768qq.addresses a1 ON p.from_address_id = a1.id
-JOIN dataall_packages_omf768qq.addresses a2 ON p.to_address_id = a2.id
-WHERE a1.address = '900 Somerville Avenue' AND a2.address = '2 Finnigan Street'""",
- },
-]
-
-TEXT_TO_SQL_PROMPT_TEMPLATE = """
-You will be given the name of an AWS Glue Database, metadata from one or more AWS Glue Table(s) and a user prompt from a user.
-
-Based on this information your job is to turn the prompt into a SQL query that will be sent to query the data within the tables in Amazon Athena.
-
-Take the following points into consideration. It is crucial that you follow them:
-
-- I only want you to return the SQL needed (NO EXPLANATION or anything else).
-
-- Tables are referenced on the following form 'database_name.table_name' (for example 'Select * FROM database_name.table_name ...' and not 'SELECT * FROM table_name ...) since we dont have access to the table name directly since its not global variable.
-
-- Take relations between tables into consideration, for example if you have a table with columns that might reference the other tables, you would need to join them in the query.
-
-- The generate SQL statement MUST be Read only (no WRITE, INSERT, ALTER or DELETE keywords)
-
-- Answer on the same form as the examples given below.
-
-Examples:
-{examples}
-
-
-I want you to follow the following steps when generating the SQL statement:
-
-Step 1: Determine if the given tables columns are suitable to answer the question.
-If not respond with "Error: The tables provided does not give enough information"
-
-Step 2: Determine if the user wants to perform any mutations, if so return "Error: Only READ queries are allowed"
-
-Step 3: Determine if joins will be needed.
-
-Step 4: Generate the SQL in order to solve the problem.
-
-
-Based on the following glue metadata:
-
-{context}
-
-
-User prompt: {prompt}
-
-
-"""
-
-
-PROCESS_TEXT_PROMPT_TEMPLATE = """
-You are an AI assistant tasked with analyzing and processing text content. Your goal is to provide accurate and helpful responses based on the given content and user prompt.
-You must follow the steps:
-
-1. Detetermine if the document has the information to be able to answer the question. If not respond with "Error: The Document does not provide the information needed to answer you question"
-2. I want you to answer the question based on the information in the document.
-3. At the bottom I want you to provide the sources (the parts of the document where you found the results). The sources should be listed in order
-
-
-Content to analyze:
-{content}
-
-User prompt: {prompt}
-
-Please provide a response that addresses the user's prompt in the context of the given content. Be thorough, accurate, and helpful in your analysis.
-"""
diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts/process_text_template.txt b/backend/dataall/modules/worksheets/aws/bedrock_prompts/process_text_template.txt
new file mode 100644
index 000000000..bb22fcc8e
--- /dev/null
+++ b/backend/dataall/modules/worksheets/aws/bedrock_prompts/process_text_template.txt
@@ -0,0 +1,14 @@
+You are an AI assistant tasked with analyzing and processing text content. Your goal is to provide accurate and helpful responses based on the given content and user prompt.
+You must follow the steps:
+
+1. Detetermine if the document has the information to be able to answer the question. If not respond with "Error: The Document does not provide the information needed to answer you question"
+2. I want you to answer the question based on the information in the document.
+3. At the bottom I want you to provide the sources (the parts of the document where you found the results). The sources should be listed in order
+
+
+Content to analyze:
+{content}
+
+User prompt: {prompt}
+
+Please provide a response that addresses the user's prompt in the context of the given content. Be thorough, accurate, and helpful in your analysis.
diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts/test_to_sql_template.txt b/backend/dataall/modules/worksheets/aws/bedrock_prompts/test_to_sql_template.txt
new file mode 100644
index 000000000..c24f5f9f0
--- /dev/null
+++ b/backend/dataall/modules/worksheets/aws/bedrock_prompts/test_to_sql_template.txt
@@ -0,0 +1,38 @@
+You will be given the name of an AWS Glue Database, metadata from one or more AWS Glue Table(s) and a user prompt from a user.
+
+Based on this information your job is to turn the prompt into a SQL query that will be sent to query the data within the tables in Amazon Athena.
+
+Take the following points into consideration. It is crucial that you follow them:
+
+- I only want you to return the SQL needed (NO EXPLANATION or anything else).
+
+- Tables are referenced on the following form 'database_name.table_name' (for example 'Select * FROM database_name.table_name ...' and not 'SELECT * FROM table_name ...) since we dont have access to the table name directly since its not global variable.
+
+- Take relations between tables into consideration, for example if you have a table with columns that might reference the other tables, you would need to join them in the query.
+
+- The generate SQL statement MUST be Read only (no WRITE, INSERT, ALTER or DELETE keywords)
+
+- Answer on the same form as the examples given below.
+
+Examples:
+{examples}
+
+
+I want you to follow the following steps when generating the SQL statement:
+
+Step 1: Determine if the given tables columns are suitable to answer the question.
+If not respond with "Error: The tables provided does not give enough information"
+
+Step 2: Determine if the user wants to perform any mutations, if so return "Error: Only READ queries are allowed"
+
+Step 3: Determine if joins will be needed.
+
+Step 4: Generate the SQL in order to solve the problem.
+
+
+Based on the following metadata:
+{context}
+
+
+User prompt: {prompt}
+
diff --git a/backend/dataall/modules/worksheets/aws/bedrock_prompts/text_to_sql_examples.txt b/backend/dataall/modules/worksheets/aws/bedrock_prompts/text_to_sql_examples.txt
new file mode 100644
index 000000000..8fa9bd84c
--- /dev/null
+++ b/backend/dataall/modules/worksheets/aws/bedrock_prompts/text_to_sql_examples.txt
@@ -0,0 +1,49 @@
+Example 1.
+User prompt: I want to get the average area of all listings
+
+Context: Based on on the following metadata
+Database Name : dataall_homes_11p3uu8f
+Table Name: listings
+Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}]
+Partition Metadata: []
+
+Response: SELECT AVG(CAST(area AS DOUBLE)) FROM dataall_homes_11p3uu8f.listings WHERE area IS NOT NULL;
+
+
+Example 2.
+User prompt: I want to get the average of the 3 most expensive listings with less than 3 bedrooms
+
+Context: Based on on the following metadata
+Database Name : dataall_homes_11p3uu8f
+Table Name: listings
+Column Metadata: [{'Name': 'price', 'Type': 'bigint'}, {'Name': 'area', 'Type': 'bigint'}, {'Name': 'bedrooms', 'Type': 'bigint'}, {'Name': 'bathrooms', 'Type': 'bigint'}, {'Name': 'stories', 'Type': 'bigint'}, {'Name': 'mainroad', 'Type': 'string'}, {'Name': 'guestroom', 'Type': 'string'}, {'Name': 'basement', 'Type': 'string'}, {'Name': 'hotwaterheating', 'Type': 'string'}, {'Name': 'airconditioning', 'Type': 'string'}, {'Name': 'parking', 'Type': 'bigint'}, {'Name': 'prefarea', 'Type': 'string'}, {'Name': 'furnishingstatus', 'Type': 'string'}, {'Name': 'passengerid', 'Type': 'bigint'}, {'Name': 'survived', 'Type': 'bigint'}, {'Name': 'pclass', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}, {'Name': 'sex', 'Type': 'string'}, {'Name': 'age', 'Type': 'double'}, {'Name': 'sibsp', 'Type': 'bigint'}, {'Name': 'parch', 'Type': 'bigint'}, {'Name': 'ticket', 'Type': 'string'}, {'Name': 'fare', 'Type': 'double'}, {'Name': 'cabin', 'Type': 'string'}, {'Name': 'embarked', 'Type': 'string'}]
+Partition Metadata: []
+
+Response: SELECT AVG(price) AS average_price FROM (SELECT price FROM dataall_homes_11p3uu8f.listings WHERE bedrooms > 3 ORDER BY price DESC LIMIT 3);
+
+
+Example 3.
+User prompt: I want to see if any letter has been sent from 900 Somerville Avenue to 2 Finnigan Street and what is the content
+
+Context: Based on the following metadata
+Database Name : dataall_packages_omf768qq
+Table name: packages
+Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'contents', 'Type': 'string'}, {'Name': 'from_address_id', 'Type': 'bigint'}, {'Name': 'to_address_id', 'Type': 'bigint'}]\n
+Partition Metadata: []
+
+Database Name : dataall_packages_omf768qq
+Table name: addresses
+Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'address', 'Type': 'string'}, {'Name': 'type', 'Type': 'string'}]
+Partition Metadata: []
+
+Database Name : dataall_packages_omf768qq
+Table name: drivers
+Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'name', 'Type': 'string'}]
+Partition Metadata: []
+
+Database Name : dataall_packages_omf768qq
+Table name: scans
+Column Metadata: [{'Name': 'id', 'Type': 'bigint'}, {'Name': 'driver_id', 'Type': 'bigint'}, {'Name': 'package_id', 'Type': 'bigint'}, {'Name': 'address_id', 'Type': 'bigint'}, {'Name': 'action', 'Type': 'string'}, {'Name': 'timestamp', 'Type': 'string'}]
+Partition Metadata: []
+
+Response: SELECT p.contents FROM dataall_packages_omf768qq.packages p JOIN dataall_packages_omf768qq.addresses a1 ON p.from_address_id = a1.id JOIN dataall_packages_omf768qq.addresses a2 ON p.to_address_id = a2.id WHERE a1.address = '900 Somerville Avenue' AND a2.address = '2 Finnigan Street';
diff --git a/backend/dataall/modules/worksheets/aws/glue_client.py b/backend/dataall/modules/worksheets/aws/glue_client.py
index 295e0e8f4..d123a7662 100644
--- a/backend/dataall/modules/worksheets/aws/glue_client.py
+++ b/backend/dataall/modules/worksheets/aws/glue_client.py
@@ -25,8 +25,8 @@ def get_table_metadata(self, database, table_name):
column_metadata = table_metadata['Table']['StorageDescriptor']['Columns']
partition_metadata = table_metadata['Table']['PartitionKeys']
meta_data = f"""
- Database name: {database}
- Table name: {table_name}
+ Database Name: {database}
+ Table Name: {table_name}
Column Metadata: {column_metadata}
Partition Metadata: {partition_metadata}
"""
diff --git a/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js b/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js
index f2e86a115..2cda63d26 100644
--- a/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js
+++ b/frontend/src/modules/DatasetsBase/components/DatasetGovernance.js
@@ -9,9 +9,8 @@ import {
Typography
} from '@mui/material';
import PropTypes from 'prop-types';
-import { Label } from 'design';
+import { Label, UserModal } from 'design';
import { isFeatureEnabled } from 'utils';
-import { UserModal } from 'design';
import { useState } from 'react';
export const DatasetGovernance = (props) => {
diff --git a/frontend/src/modules/Environments/components/EnvironmentOverview.js b/frontend/src/modules/Environments/components/EnvironmentOverview.js
index ae6f012f0..2991c67e6 100644
--- a/frontend/src/modules/Environments/components/EnvironmentOverview.js
+++ b/frontend/src/modules/Environments/components/EnvironmentOverview.js
@@ -1,8 +1,7 @@
import React, { useState } from 'react';
import { Box, Grid } from '@mui/material';
import PropTypes from 'prop-types';
-import { ObjectBrief, ObjectMetadata } from 'design';
-import { UserModal } from 'design';
+import { ObjectBrief, ObjectMetadata, UserModal } from 'design';
import { EnvironmentConsoleAccess } from './EnvironmentConsoleAccess';
import { EnvironmentFeatures } from './EnvironmentFeatures';
diff --git a/frontend/src/modules/Organizations/components/OrganizationOverview.js b/frontend/src/modules/Organizations/components/OrganizationOverview.js
index 95234dd1d..4f58e6102 100644
--- a/frontend/src/modules/Organizations/components/OrganizationOverview.js
+++ b/frontend/src/modules/Organizations/components/OrganizationOverview.js
@@ -1,8 +1,7 @@
import React, { useState } from 'react';
import { Box, Grid } from '@mui/material';
import PropTypes from 'prop-types';
-import { ObjectBrief, ObjectMetadata } from 'design';
-import { UserModal } from 'design';
+import { ObjectBrief, ObjectMetadata, UserModal } from 'design';
export const OrganizationOverview = (props) => {
const { organization, ...other } = props;
diff --git a/frontend/src/modules/Worksheets/components/TextDisplay.js b/frontend/src/modules/Worksheets/components/TextDisplay.js
deleted file mode 100644
index bf89112b3..000000000
--- a/frontend/src/modules/Worksheets/components/TextDisplay.js
+++ /dev/null
@@ -1,33 +0,0 @@
-import React from 'react';
-import PropTypes from 'prop-types';
-import { THEMES, useSettings } from 'design';
-
-export const TextDisplay = ({ text }) => {
- const { settings } = useSettings();
-
- const containerStyle = {
- width: '600px',
- height: '390px',
- maxWidth: '100%',
- margin: '0 auto',
- padding: '20px',
- border:
- settings.theme === THEMES.LIGHT ? '1px solid #eee' : '1px solid #333',
- borderRadius: '5px',
- backgroundColor: settings.theme === THEMES.LIGHT ? '#ffffff' : '#1e1e1e',
- color: settings.theme === THEMES.LIGHT ? '#333333' : '#d4d4d4',
- fontFamily: 'Arial, sans-serif',
- fontSize: '14px',
- lineHeight: '1.6',
- whiteSpace: 'pre-wrap',
- wordWrap: 'break-word',
- overflowY: 'auto',
- maxHeight: '400px'
- };
-
- return {text}
;
-};
-
-TextDisplay.propTypes = {
- text: PropTypes.string.isRequired
-};
diff --git a/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js b/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js
index e0bec7086..1dd1764a3 100644
--- a/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js
+++ b/frontend/src/modules/Worksheets/components/WorksheetTextToSQLEditor.js
@@ -210,6 +210,12 @@ export const WorksheetTextToSQLEditor = ({