diff --git a/docs/CNAME b/docs/CNAME deleted file mode 100644 index ad1c2a23..00000000 --- a/docs/CNAME +++ /dev/null @@ -1 +0,0 @@ -docs.vanna.ai \ No newline at end of file diff --git a/docs/databases.md b/docs/databases.md deleted file mode 100644 index 6924d34c..00000000 --- a/docs/databases.md +++ /dev/null @@ -1,142 +0,0 @@ -# How to use Vanna with various databases - -You can use Vanna with any database that you can connect to via Python. Here are some examples of how to connect to various databases. - -All you have to do is provide Vanna with a function that takes in a SQL query and returns a Pandas DataFrame. Here are some examples of how to do that. - -## **PostgreSQL** - -```python -import pandas as pd -import psycopg2 - -conn_details = {...} # fill this with your connection details -conn_postgres = psycopg2.connect(**conn_details) - -def run_sql_postgres(sql: str) -> pd.DataFrame: - df = pd.read_sql_query(sql, conn_postgres) - return df - -vn.run_sql = run_sql_postgres -``` - -## **Snowflake** - -We have a built-in function for Snowflake, so you don't need to write your own. - -```python -vn.connect_to_snowflake(account='my-account', username='my-username', password='my-password', database='my-database') -``` - -```python -import pandas as pd -from snowflake.connector.pandas_tools import pd_read_sql -from snowflake.connector import connect - -conn_details = {...} # fill this with your connection details -conn_snowflake = connect(**conn_details) - -def run_sql_snowflake(sql: str) -> pd.DataFrame: - df = pd_read_sql(sql, conn_snowflake) - return df - -vn.run_sql = run_sql_snowflake -``` - -## **Google BigQuery** - -```python -from google.cloud import bigquery -import pandas as pd - -project_id = 'your-project-id' # replace with your Project ID -client_bigquery = bigquery.Client(project=project_id) - -def run_sql_bigquery(sql: str) -> pd.DataFrame: - df = client_bigquery.query(sql).to_dataframe() - return df - -vn.run_sql = run_sql_bigquery -``` - -## **Amazon Athena** - -```python -import pandas as pd -from pyathena import connect - -conn_details = {...} # fill this with your connection details -conn_athena = connect(**conn_details) - -def run_sql_athena(sql: str) -> pd.DataFrame: - df = pd.read_sql(sql, conn_athena) - return df - -vn.run_sql = run_sql_athena -``` - -## **Amazon Redshift** - -```python -import pandas as pd -import psycopg2 - -conn_details = {...} # fill this with your connection details -conn_redshift = psycopg2.connect(**conn_details) - -def run_sql_redshift(sql: str) -> pd.DataFrame: - df = pd.read_sql_query(sql, conn_redshift) - return df - -vn.run_sql = run_sql_redshift -``` - -Sure, here is an example for Google Cloud SQL using the MySQL connector: - -## **Google Cloud SQL (MySQL)** - -```python -import pandas as pd -import mysql.connector - -conn_details = {...} # fill this with your connection details -conn_google_cloud_sql = mysql.connector.connect(**conn_details) - -def run_sql_google_cloud_sql(sql: str) -> pd.DataFrame: - df = pd.read_sql(sql, conn_google_cloud_sql) - return df -``` - -Note: Google Cloud SQL supports MySQL, PostgreSQL, and SQL Server. The above example uses MySQL. If you are using PostgreSQL or SQL Server, you should use the appropriate connector. - -## **SQLite** - -```python -import sqlite3 -import pandas as pd - -db_path = 'path_to_your_db' # replace with your SQLite DB path -conn_sqlite = sqlite3.connect(db_path) - -def run_sql_sqlite(sql: str) -> pd.DataFrame: - df = pd.read_sql_query(sql, conn_sqlite) - return df - -vn.run_sql = run_sql_sqlite -``` - -## **Microsoft SQL Server** - -```python -import pandas as pd -import pyodbc - -conn_details = {...} # fill this with your connection details -conn_sql_server = pyodbc.connect(**conn_details) - -def run_sql_sql_server(sql: str) -> pd.DataFrame: - df = pd.read_sql(sql, conn_sql_server) - return df - -vn.run_sql = run_sql_sql_server -``` diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 02d2fc63..00000000 --- a/docs/index.md +++ /dev/null @@ -1,239 +0,0 @@ -# Vanna.AI - Personalized AI SQL Agent - -**Let Vanna.AI write your nasty SQL for you**. Vanna is a Python based AI SQL agent trained on your schema that writes complex SQL in seconds. `pip install vanna` to get started now. - - - -## An example - -A business user asks you **"who are the top 2 customers in each region?"**. Right in the middle of lunch. And they need it for a presentation this afternoon. 😡😡😡 - -### The old way 😡 😫 💩 -Simple question to ask, not so fun to answer. You spend over an hour a) finding the tables, b) figuring out out the joins, c) look up the syntax for ranking, d) putting this into a CTE, e) filtering by rank, and f) choosing the correct metrics. Finally, you come up with this ugly mess - - -```sql -with ranked_customers as (SELECT c.c_name as customer_name, - r.r_name as region_name, - row_number() OVER (PARTITION BY r.r_name - ORDER BY sum(l.l_quantity * l.l_extendedprice) desc) as rank - FROM snowflake_sample_data.tpch_sf1.customer c join snowflake_sample_data.tpch_sf1.orders o - ON c.c_custkey = o.o_custkey join snowflake_sample_data.tpch_sf1.lineitem l - ON o.o_orderkey = l.l_orderkey join snowflake_sample_data.tpch_sf1.nation n - ON c.c_nationkey = n.n_nationkey join snowflake_sample_data.tpch_sf1.region r - ON n.n_regionkey = r.r_regionkey - GROUP BY customer_name, region_name) -SELECT region_name, - customer_name -FROM ranked_customers -WHERE rank <= 2; -``` - -And you had to skip your lunch. **HANGRY!** - -### The Vanna way 😍 🌟 🚀 -With Vanna, you train up a custom model on your data warehouse, and simply enter this in your Jupyter Notebook - - -```python -import vanna as vn -vn.set_model('your-model') -vn.ask('who are the top 2 customers in each region?') -``` - -Vanna generates that nasty SQL above for you, runs it (locally & securely) and gives you back a Dataframe in seconds: - -| region_name | customer_name | total_sales | -| ----------- | ------------- | ----------- | -| ASIA | Customer#000000001 | 68127.72 | -| ASIA | Customer#000000002 | 65898.69 | -... - -And you ate your lunch in peace. **YUMMY!** - -## How Vanna works -Vanna works in two easy steps - train a model on your data, and then ask questions. - -1. **Train a model on your data**. -2. **Ask questions**. - -When you ask a question, we utilize a custom model for your dataset to generate SQL, as seen below. Your model performance and accuracy depends on the quality and quantity of training data you use to train your model. - - - - -## Why Vanna? - -1. **High accuracy on complex datasets.** - - Vanna’s capabilities are tied to the training data you give it - - More training data means better accuracy for large and complex datasets -2. **Secure and private.** - - Your database contents are never sent to Vanna’s servers - - We only see the bare minimum - schemas & queries. -3. **Isolated, custom model.** - - You train a custom model specific to your database and your schema. - - Nobody else can use your model or view your model’s training data unless you choose to add members to your model or make it public - - We use a combination of third-party foundational models (OpenAI, Google) and our own LLM. -4. **Self learning.** - - As you use Vanna more, your model continuously improves as we augment your training data -5. **Supports many databases.** - - We have out-of-the-box support Snowflake, BigQuery, Postgres - - You can easily make a connector for any [database](https://docs.vanna.ai/databases/) -6. **Pretrained models.** - - If you’re a data provider you can publish your models for anyone to use - - As part of our roadmap, we are in the process of pre-training models for common datasets (Google Ads, Facebook ads, etc) -7. **Choose your front end.** - - Start in a Jupyter Notebook. - - Expose to business users via Slackbot, web app, Streamlit app, or Excel plugin. - - Even integrate in your web app for customers. - -## Getting started -You can start by [automatically training Vanna (currently works for Snowflake)](https://docs.vanna.ai/notebooks/vn-train/) or add manual training data. - -### Train with DDL Statements -If you prefer to manually train, you do not need to connect to a database. You can use the train function with other parmaeters like ddl - - -```python -vn.train(ddl=""" - CREATE TABLE IF NOT EXISTS my-table ( - id INT PRIMARY KEY, - name VARCHAR(100), - age INT - ) -""") -``` - -### Train with Documentation -Sometimes you may want to add documentation about your business terminology or definitions. - -```python -vn.train(documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and in full") -``` - -### Train with SQL -You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL. - -```python -vn.train(sql="SELECT * FROM my-table WHERE name = 'John Doe'") -``` - - - -## Asking questions -```python -vn.ask("What are the top 10 customers by sales?") -``` - - SELECT c.c_name as customer_name, - sum(l.l_extendedprice * (1 - l.l_discount)) as total_sales - FROM snowflake_sample_data.tpch_sf1.lineitem l join snowflake_sample_data.tpch_sf1.orders o - ON l.l_orderkey = o.o_orderkey join snowflake_sample_data.tpch_sf1.customer c - ON o.o_custkey = c.c_custkey - GROUP BY customer_name - ORDER BY total_sales desc limit 10; - - - -
- | CUSTOMER_NAME | -TOTAL_SALES | -
---|---|---|
0 | -Customer#000143500 | -6757566.0218 | -
1 | -Customer#000095257 | -6294115.3340 | -
2 | -Customer#000087115 | -6184649.5176 | -
3 | -Customer#000131113 | -6080943.8305 | -
4 | -Customer#000134380 | -6075141.9635 | -
5 | -Customer#000103834 | -6059770.3232 | -
6 | -Customer#000069682 | -6057779.0348 | -
7 | -Customer#000102022 | -6039653.6335 | -
8 | -Customer#000098587 | -6027021.5855 | -
9 | -Customer#000064660 | -5905659.6159 | -
\n", - " | Name | \n", - "totalsales | \n", - "
---|---|---|
0 | \n", - "Iron Maiden | \n", - "140 | \n", - "
1 | \n", - "U2 | \n", - "107 | \n", - "
2 | \n", - "Metallica | \n", - "91 | \n", - "
3 | \n", - "Led Zeppelin | \n", - "87 | \n", - "
4 | \n", - "Os Paralamas Do Sucesso | \n", - "45 | \n", - "
\n", - " | CUSTOMER_NAME | \n", - "TOTAL_SALES | \n", - "
---|---|---|
0 | \n", - "Customer#000143500 | \n", - "6757566.0218 | \n", - "
1 | \n", - "Customer#000095257 | \n", - "6294115.3340 | \n", - "
2 | \n", - "Customer#000087115 | \n", - "6184649.5176 | \n", - "
3 | \n", - "Customer#000131113 | \n", - "6080943.8305 | \n", - "
4 | \n", - "Customer#000134380 | \n", - "6075141.9635 | \n", - "
5 | \n", - "Customer#000103834 | \n", - "6059770.3232 | \n", - "
6 | \n", - "Customer#000069682 | \n", - "6057779.0348 | \n", - "
7 | \n", - "Customer#000102022 | \n", - "6039653.6335 | \n", - "
8 | \n", - "Customer#000098587 | \n", - "6027021.5855 | \n", - "
9 | \n", - "Customer#000064660 | \n", - "5905659.6159 | \n", - "
\n", - " | id | \n", - "training_data_type | \n", - "question | \n", - "content | \n", - "
---|---|---|---|---|
0 | \n", - "15-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the PARTSUPP table.\\n\\nThe ... | \n", - "
1 | \n", - "11-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the CUSTOMER table.\\n\\nThe ... | \n", - "
2 | \n", - "14-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the ORDERS table.\\n\\nThe fo... | \n", - "
3 | \n", - "1244-sql | \n", - "sql | \n", - "What are the names of the top 10 customers? | \n", - "SELECT c.c_name as customer_name\\nFROM snowf... | \n", - "
4 | \n", - "1242-sql | \n", - "sql | \n", - "What are the top 5 customers in terms of total... | \n", - "SELECT c.c_name AS customer_name, SUM(l.l_quan... | \n", - "
5 | \n", - "17-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the REGION table.\\n\\nThe fo... | \n", - "
6 | \n", - "16-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the PART table.\\n\\nThe foll... | \n", - "
7 | \n", - "1243-sql | \n", - "sql | \n", - "What are the top 10 customers with the highest... | \n", - "SELECT c.c_name as customer_name,\\n sum(... | \n", - "
8 | \n", - "1239-sql | \n", - "sql | \n", - "What are the top 100 customers based on their ... | \n", - "SELECT c.c_name as customer_name,\\n sum(... | \n", - "
9 | \n", - "13-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the SUPPLIER table.\\n\\nThe ... | \n", - "
10 | \n", - "1241-sql | \n", - "sql | \n", - "What are the top 10 customers in terms of tota... | \n", - "SELECT c.c_name as customer_name,\\n sum(... | \n", - "
11 | \n", - "12-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the LINEITEM table.\\n\\nThe ... | \n", - "
12 | \n", - "18-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the NATION table.\\n\\nThe fo... | \n", - "
13 | \n", - "1248-sql | \n", - "sql | \n", - "How many customers are in each country? | \n", - "SELECT n.n_name as country,\\n count(*) a... | \n", - "
14 | \n", - "1240-sql | \n", - "sql | \n", - "What is the number of orders placed each week? | \n", - "SELECT date_trunc('week', o_orderdate) as week... | \n", - "
\n", - " | CUSTOMER_NAME | \n", - "TOTAL_SALES | \n", - "
---|---|---|
0 | \n", - "Customer#000143500 | \n", - "6757566.0218 | \n", - "
1 | \n", - "Customer#000095257 | \n", - "6294115.3340 | \n", - "
2 | \n", - "Customer#000087115 | \n", - "6184649.5176 | \n", - "
3 | \n", - "Customer#000131113 | \n", - "6080943.8305 | \n", - "
4 | \n", - "Customer#000134380 | \n", - "6075141.9635 | \n", - "
5 | \n", - "Customer#000103834 | \n", - "6059770.3232 | \n", - "
6 | \n", - "Customer#000069682 | \n", - "6057779.0348 | \n", - "
7 | \n", - "Customer#000102022 | \n", - "6039653.6335 | \n", - "
8 | \n", - "Customer#000098587 | \n", - "6027021.5855 | \n", - "
9 | \n", - "Customer#000064660 | \n", - "5905659.6159 | \n", - "
\n", - " | COUNTRY_NAME | \n", - "TOTAL_SALES | \n", - "
---|---|---|
0 | \n", - "FRANCE | \n", - "8960205391.8314 | \n", - "
1 | \n", - "INDONESIA | \n", - "8942575217.6237 | \n", - "
2 | \n", - "RUSSIA | \n", - "8925318302.0710 | \n", - "
3 | \n", - "MOZAMBIQUE | \n", - "8892984086.0088 | \n", - "
4 | \n", - "JORDAN | \n", - "8873862546.7864 | \n", - "
\n", - " | REGION_NAME | \n", - "CUSTOMER_NAME | \n", - "
---|---|---|
0 | \n", - "ASIA | \n", - "Customer#000102022 | \n", - "
1 | \n", - "ASIA | \n", - "Customer#000148750 | \n", - "
2 | \n", - "AMERICA | \n", - "Customer#000095257 | \n", - "
3 | \n", - "AMERICA | \n", - "Customer#000091630 | \n", - "
4 | \n", - "EUROPE | \n", - "Customer#000028180 | \n", - "
5 | \n", - "EUROPE | \n", - "Customer#000053809 | \n", - "
6 | \n", - "MIDDLE EAST | \n", - "Customer#000143500 | \n", - "
7 | \n", - "MIDDLE EAST | \n", - "Customer#000103834 | \n", - "
8 | \n", - "AFRICA | \n", - "Customer#000131113 | \n", - "
9 | \n", - "AFRICA | \n", - "Customer#000134380 | \n", - "
\n", - " | submitter_id | \n", - "case_id | \n", - "diag__treat__count | \n", - "primary_site | \n", - "disease_type | \n", - "proj__name | \n", - "proj__project_id | \n", - "demo__demographic_id | \n", - "demo__gender | \n", - "demo__race | \n", - "... | \n", - "exp__bmi | \n", - "exp__years_smoked | \n", - "exp__pack_years_smoked | \n", - "exp__cigarettes_per_day | \n", - "exp__alcohol_history | \n", - "exp__state | \n", - "exp__created_datetime | \n", - "exp__updated_datetime | \n", - "state | \n", - "updated_datetime | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "TCGA-CN-5363 | \n", - "291b069c-9dde-4e1e-8430-85146bc94338 | \n", - "2 | \n", - "Larynx | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "2611cb61-6d05-5286-b94a-ce6cac2ba37b | \n", - "male | \n", - "black or african american | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "15.0 | \n", - "0.821918 | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T18:43:25.167078-05:00 | \n", - "released | \n", - "2019-08-06T14:25:25.511101-05:00 | \n", - "
1 | \n", - "TCGA-CN-5365 | \n", - "4cffea0b-90a7-4c86-a73f-bb8feca3ada7 | \n", - "2 | \n", - "Tonsil | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "97a7f69b-0f40-5450-bbeb-92084a100a9d | \n", - "male | \n", - "white | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "26.0 | \n", - "1.424658 | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:39:51.442671-05:00 | \n", - "released | \n", - "2019-08-06T14:25:25.511101-05:00 | \n", - "
2 | \n", - "TCGA-CN-A642 | \n", - "a1ded1e8-eb28-49dd-8f3d-1ce8f40eed8f | \n", - "2 | \n", - "Other and unspecified parts of tongue | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "4bc58619-47fc-5c2d-aaec-9d9e562e049b | \n", - "male | \n", - "white | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "5.0 | \n", - "0.273973 | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:30:27.901248-05:00 | \n", - "released | \n", - "2019-08-06T14:25:39.854271-05:00 | \n", - "
3 | \n", - "TCGA-CR-7380 | \n", - "53b254b7-021f-43df-af9b-3fc01b87479e | \n", - "2 | \n", - "Other and ill-defined sites in lip, oral cavit... | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "be41a712-ebee-52e1-907c-80b1917daa45 | \n", - "male | \n", - "white | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:40:20.032260-05:00 | \n", - "released | \n", - "2019-08-06T14:26:05.315718-05:00 | \n", - "
4 | \n", - "TCGA-CV-5978 | \n", - "e16e9535-b20f-4c9a-8b5b-82df80c99448 | \n", - "2 | \n", - "Larynx | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "92d1d967-c8a0-52cb-a62d-1d11bdf85068 | \n", - "female | \n", - "black or african american | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:52:06.976359-05:00 | \n", - "released | \n", - "2019-08-06T14:26:05.315718-05:00 | \n", - "
5 | \n", - "TCGA-CV-6948 | \n", - "fcf0dc48-b889-4593-a15b-aa715aae7bf5 | \n", - "2 | \n", - "Floor of mouth | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "2fd1a926-7584-50d5-b6b7-9b9d02710f47 | \n", - "female | \n", - "white | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "No | \n", - "released | \n", - "None | \n", - "2019-07-31T19:55:16.152855-05:00 | \n", - "released | \n", - "2019-08-06T14:26:16.536997-05:00 | \n", - "
6 | \n", - "TCGA-CV-7409 | \n", - "acd98e20-d2da-4256-99a5-13e261bc88e6 | \n", - "2 | \n", - "Other and ill-defined sites in lip, oral cavit... | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "2a3f5bb4-3606-5549-8d85-ec413eadd7ab | \n", - "male | \n", - "black or african american | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "No | \n", - "released | \n", - "None | \n", - "2019-07-31T19:48:25.311492-05:00 | \n", - "released | \n", - "2019-08-06T14:26:28.608672-05:00 | \n", - "
7 | \n", - "TCGA-CV-A6JU | \n", - "b1b3983d-37d2-4bef-bd17-708e3e600146 | \n", - "2 | \n", - "Other and unspecified parts of tongue | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "604e3dac-30be-589d-b622-df0b41cd9a7f | \n", - "female | \n", - "white | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "81.0 | \n", - "4.438356 | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:48:40.594893-05:00 | \n", - "released | \n", - "2019-08-06T14:26:39.780396-05:00 | \n", - "
8 | \n", - "TCGA-QK-A6IH | \n", - "c1f286f6-d4a1-494a-88c8-ff8e2a3df2ce | \n", - "2 | \n", - "Gum | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "83e5c705-bd2e-5516-9700-ed3803dde268 | \n", - "female | \n", - "white | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:49:42.057478-05:00 | \n", - "released | \n", - "2019-08-06T14:27:02.392779-05:00 | \n", - "
9 | \n", - "TCGA-QK-A8Z8 | \n", - "ac511727-185b-4ac0-b6c0-dc3a79657be6 | \n", - "2 | \n", - "Larynx | \n", - "Squamous Cell Neoplasms | \n", - "Head and Neck Squamous Cell Carcinoma | \n", - "TCGA-HNSC | \n", - "fd1e46fb-43bb-54ae-b713-a579ba857ed4 | \n", - "female | \n", - "black or african american | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "80.0 | \n", - "4.383562 | \n", - "Yes | \n", - "released | \n", - "None | \n", - "2019-07-31T19:48:22.125112-05:00 | \n", - "released | \n", - "2019-08-06T14:27:02.392779-05:00 | \n", - "
10 rows × 70 columns
\n", - "\n", - " | id | \n", - "timestamp | \n", - "userstamp | \n", - "descr | \n", - "current_release | \n", - "full_descr | \n", - "alive | \n", - "for_release | \n", - "display_name | \n", - "project_id | \n", - "avg_length | \n", - "min_length | \n", - "max_length | \n", - "num_sequences | \n", - "num_organisms | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "5 | \n", - "2017-05-17 00:00:00.000000 | \n", - "RNACEN | \n", - "VEGA | \n", - "98 | \n", - "VEGA | \n", - "N | \n", - "None | \n", - "VEGA | \n", - "PRJEB4568 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "0 | \n", - "
1 | \n", - "1 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ENA | \n", - "731 | \n", - "ENA | \n", - "Y | \n", - "None | \n", - "ENA | \n", - "None | \n", - "412.0 | \n", - "10.0 | \n", - "900074.0 | \n", - "12086180 | \n", - "814855 | \n", - "
2 | \n", - "26 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "GENCODE | \n", - "450 | \n", - "GENCODE | \n", - "N | \n", - "None | \n", - "GENCODE | \n", - "None | \n", - "889.0 | \n", - "32.0 | \n", - "205012.0 | \n", - "47677 | \n", - "2 | \n", - "
3 | \n", - "3 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "SRPDB | \n", - "732 | \n", - "SRPDB | \n", - "Y | \n", - "None | \n", - "SRPDB | \n", - "PRJEB4384 | \n", - "173.0 | \n", - "30.0 | \n", - "533.0 | \n", - "503 | \n", - "684 | \n", - "
4 | \n", - "15 | \n", - "2017-05-02 00:00:00.000000 | \n", - "RNACEN | \n", - "WORMBASE | \n", - "735 | \n", - "WormBase | \n", - "Y | \n", - "None | \n", - "WormBase | \n", - "PRJNA13758 | \n", - "174.0 | \n", - "17.0 | \n", - "84141.0 | \n", - "26116 | \n", - "1 | \n", - "
5 | \n", - "24 | \n", - "2017-05-02 00:00:00.000000 | \n", - "RNACEN | \n", - "FLYBASE | \n", - "739 | \n", - "FlyBase | \n", - "Y | \n", - "None | \n", - "FlyBase | \n", - "PRJ_FLY | \n", - "765.0 | \n", - "18.0 | \n", - "21216.0 | \n", - "4210 | \n", - "1 | \n", - "
6 | \n", - "14 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "TAIR | \n", - "720 | \n", - "TAIR | \n", - "Y | \n", - "None | \n", - "TAIR | \n", - "PRJ_TAIR | \n", - "384.0 | \n", - "19.0 | \n", - "6227.0 | \n", - "4406 | \n", - "1 | \n", - "
7 | \n", - "2 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "RFAM | \n", - "635 | \n", - "RFAM | \n", - "Y | \n", - "None | \n", - "Rfam | \n", - "None | \n", - "135.0 | \n", - "24.0 | \n", - "11047.0 | \n", - "1872081 | \n", - "14735 | \n", - "
8 | \n", - "25 | \n", - "2017-05-11 00:00:00.000000 | \n", - "RNACEN | \n", - "ENSEMBL | \n", - "721 | \n", - "Ensembl | \n", - "Y | \n", - "None | \n", - "Ensembl | \n", - "None | \n", - "908.0 | \n", - "20.0 | \n", - "347561.0 | \n", - "1294678 | \n", - "264 | \n", - "
9 | \n", - "52 | \n", - "2022-12-21 11:20:36.071013 | \n", - "RNACEN | \n", - "RIBOCENTRE | \n", - "729 | \n", - "Ribocentre | \n", - "Y | \n", - "None | \n", - "Ribocentre | \n", - "None | \n", - "67.0 | \n", - "28.0 | \n", - "1619.0 | \n", - "151770 | \n", - "5536 | \n", - "
10 | \n", - "53 | \n", - "2023-01-24 11:56:51.648817 | \n", - "RNACEN | \n", - "EVLNCRNAS | \n", - "730 | \n", - "EVLncRNAs | \n", - "Y | \n", - "None | \n", - "EVLncRNAs | \n", - "None | \n", - "2311.0 | \n", - "199.0 | \n", - "205012.0 | \n", - "933 | \n", - "3 | \n", - "
11 | \n", - "6 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "TMRNA_WEB | \n", - "414 | \n", - "tmRNA Website | \n", - "Y | \n", - "None | \n", - "tmRNA Website | \n", - "PRJEB4570 | \n", - "335.0 | \n", - "23.0 | \n", - "644.0 | \n", - "2857 | \n", - "7560 | \n", - "
12 | \n", - "51 | \n", - "2022-09-22 15:14:59.500143 | \n", - "RNACEN | \n", - "EXPRESSION_ATLAS | \n", - "636 | \n", - "Expression Atlas | \n", - "Y | \n", - "None | \n", - "Expression Atlas | \n", - "None | \n", - "953.0 | \n", - "72.0 | \n", - "32709.0 | \n", - "11030 | \n", - "3 | \n", - "
13 | \n", - "35 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ENSEMBL_PROTISTS | \n", - "724 | \n", - "Ensembl Protists | \n", - "Y | \n", - "None | \n", - "Ensembl Protists | \n", - "None | \n", - "212.0 | \n", - "11.0 | \n", - "7940.0 | \n", - "5261 | \n", - "32 | \n", - "
14 | \n", - "36 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ENSEMBL_FUNGI | \n", - "725 | \n", - "Ensembl Fungi | \n", - "Y | \n", - "None | \n", - "Ensembl Fungi | \n", - "None | \n", - "210.0 | \n", - "19.0 | \n", - "16569.0 | \n", - "15817 | \n", - "62 | \n", - "
15 | \n", - "22 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "MODOMICS | \n", - "92 | \n", - "Modomics | \n", - "Y | \n", - "None | \n", - "Modomics | \n", - "None | \n", - "140.0 | \n", - "54.0 | \n", - "5025.0 | \n", - "319 | \n", - "60 | \n", - "
16 | \n", - "20 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "LNCIPEDIA | \n", - "612 | \n", - "LNCipedia | \n", - "Y | \n", - "None | \n", - "LNCipedia | \n", - "None | \n", - "1534.0 | \n", - "200.0 | \n", - "152544.0 | \n", - "126876 | \n", - "1 | \n", - "
17 | \n", - "28 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "RGD | \n", - "194 | \n", - "Rat Genome Database | \n", - "Y | \n", - "None | \n", - "RGD | \n", - "None | \n", - "2340.0 | \n", - "49.0 | \n", - "27956.0 | \n", - "11124 | \n", - "1 | \n", - "
18 | \n", - "27 | \n", - "2017-10-23 00:00:00.000000 | \n", - "RNACEN | \n", - "MGI | \n", - "174 | \n", - "Mouse Genome Database | \n", - "Y | \n", - "None | \n", - "MGI | \n", - "None | \n", - "851.0 | \n", - "21.0 | \n", - "84395.0 | \n", - "16719 | \n", - "1 | \n", - "
19 | \n", - "50 | \n", - "2022-08-16 15:52:33.990145 | \n", - "RNACEN | \n", - "PLNCDB | \n", - "606 | \n", - "PLncDB | \n", - "Y | \n", - "None | \n", - "PLncDB | \n", - "None | \n", - "6659.0 | \n", - "199.0 | \n", - "985945.0 | \n", - "936926 | \n", - "80 | \n", - "
20 | \n", - "49 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "RIBOVISION | \n", - "560 | \n", - "RiboVision | \n", - "Y | \n", - "None | \n", - "RiboVision | \n", - "None | \n", - "1676.0 | \n", - "30.0 | \n", - "5070.0 | \n", - "36 | \n", - "15 | \n", - "
21 | \n", - "42 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "INTACT | \n", - "740 | \n", - "IntAct | \n", - "Y | \n", - "None | \n", - "IntAct | \n", - "None | \n", - "705.0 | \n", - "18.0 | \n", - "93092.0 | \n", - "386 | \n", - "9 | \n", - "
22 | \n", - "48 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "PSICQUIC | \n", - "632 | \n", - "PSICQUIC | \n", - "Y | \n", - "None | \n", - "PSICQUIC | \n", - "None | \n", - "2251.0 | \n", - "21.0 | \n", - "84395.0 | \n", - "95 | \n", - "8 | \n", - "
23 | \n", - "34 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ENSEMBL_METAZOA | \n", - "723 | \n", - "Ensembl Metazoa | \n", - "Y | \n", - "None | \n", - "Ensembl Metazoa | \n", - "None | \n", - "712.0 | \n", - "18.0 | \n", - "46956.0 | \n", - "285241 | \n", - "152 | \n", - "
24 | \n", - "40 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "MALACARDS | \n", - "726 | \n", - "MalaCards | \n", - "Y | \n", - "None | \n", - "MalaCards | \n", - "None | \n", - "1861.0 | \n", - "16.0 | \n", - "220253.0 | \n", - "57129 | \n", - "1 | \n", - "
25 | \n", - "41 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "GENECARDS | \n", - "727 | \n", - "MalaCards | \n", - "Y | \n", - "None | \n", - "GeneCards | \n", - "None | \n", - "1301.0 | \n", - "16.0 | \n", - "347561.0 | \n", - "515365 | \n", - "1 | \n", - "
26 | \n", - "47 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ENSEMBL_GENCODE | \n", - "728 | \n", - "ENSEMBL_GENCODE | \n", - "Y | \n", - "None | \n", - "Ensembl/GENCODE | \n", - "None | \n", - "1274.0 | \n", - "32.0 | \n", - "347561.0 | \n", - "76095 | \n", - "2 | \n", - "
27 | \n", - "19 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "DICTYBASE | \n", - "86 | \n", - "dictyBase | \n", - "Y | \n", - "None | \n", - "dictyBase | \n", - "PRJ_DICTY | \n", - "82.0 | \n", - "32.0 | \n", - "1060.0 | \n", - "149 | \n", - "1 | \n", - "
28 | \n", - "12 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "SNOPY | \n", - "511 | \n", - "snOPY | \n", - "Y | \n", - "None | \n", - "snOPY | \n", - "PRJEB8122 | \n", - "117.0 | \n", - "42.0 | \n", - "1004.0 | \n", - "2501 | \n", - "7 | \n", - "
29 | \n", - "46 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "PIRBASE | \n", - "485 | \n", - "piRBase | \n", - "Y | \n", - "None | \n", - "piRBase | \n", - "None | \n", - "28.0 | \n", - "15.0 | \n", - "40.0 | \n", - "200827 | \n", - "17 | \n", - "
30 | \n", - "45 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "CRW | \n", - "593 | \n", - "CRW | \n", - "Y | \n", - "None | \n", - "CRW | \n", - "None | \n", - "1365.0 | \n", - "107.0 | \n", - "4381.0 | \n", - "934 | \n", - "685 | \n", - "
31 | \n", - "8 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "GTRNADB | \n", - "733 | \n", - "GtRNAdb | \n", - "Y | \n", - "None | \n", - "GtRNAdb | \n", - "PRJEB5173 | \n", - "76.0 | \n", - "53.0 | \n", - "356.0 | \n", - "93098 | \n", - "4337 | \n", - "
32 | \n", - "9 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "REFSEQ | \n", - "719 | \n", - "RefSeq | \n", - "Y | \n", - "None | \n", - "RefSeq | \n", - "None | \n", - "637.0 | \n", - "15.0 | \n", - "91671.0 | \n", - "107976 | \n", - "22247 | \n", - "
33 | \n", - "31 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ENSEMBL_PLANTS | \n", - "722 | \n", - "Ensembl Plants | \n", - "Y | \n", - "None | \n", - "Ensembl Plants | \n", - "None | \n", - "279.0 | \n", - "15.0 | \n", - "79788.0 | \n", - "76428 | \n", - "59 | \n", - "
34 | \n", - "43 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "SNORNADB | \n", - "446 | \n", - "snoRNA Database | \n", - "Y | \n", - "None | \n", - "snoRNA Database | \n", - "None | \n", - "50.0 | \n", - "45.0 | \n", - "74.0 | \n", - "680 | \n", - "10 | \n", - "
35 | \n", - "10 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "RDP | \n", - "85 | \n", - "RDP | \n", - "Y | \n", - "None | \n", - "RDP | \n", - "None | \n", - "1536.0 | \n", - "1337.0 | \n", - "1600.0 | \n", - "4779 | \n", - "2487 | \n", - "
36 | \n", - "7 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "LNCRNADB | \n", - "464 | \n", - "lncRNAdb | \n", - "Y | \n", - "None | \n", - "lncRNAdb | \n", - "PRJEB6238 | \n", - "3086.0 | \n", - "61.0 | \n", - "32753.0 | \n", - "62 | \n", - "10 | \n", - "
37 | \n", - "39 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "MIRGENEDB | \n", - "559 | \n", - "MirGeneDB | \n", - "Y | \n", - "None | \n", - "MirGeneDB | \n", - "None | \n", - "41.0 | \n", - "20.0 | \n", - "282.0 | \n", - "18901 | \n", - "75 | \n", - "
38 | \n", - "17 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "SILVA | \n", - "610 | \n", - "SILVA | \n", - "Y | \n", - "None | \n", - "SILVA | \n", - "None | \n", - "1080.0 | \n", - "300.0 | \n", - "4997.0 | \n", - "8193988 | \n", - "665540 | \n", - "
39 | \n", - "30 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ZWD | \n", - "615 | \n", - "ZWD | \n", - "Y | \n", - "None | \n", - "ZWD | \n", - "None | \n", - "86.0 | \n", - "21.0 | \n", - "951.0 | \n", - "44332 | \n", - "6763 | \n", - "
40 | \n", - "33 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "LNCBOOK | \n", - "616 | \n", - "LncBook | \n", - "Y | \n", - "None | \n", - "LncBook | \n", - "None | \n", - "1686.0 | \n", - "54.0 | \n", - "205012.0 | \n", - "322552 | \n", - "1 | \n", - "
41 | \n", - "11 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "PDBE | \n", - "734 | \n", - "PDBe | \n", - "Y | \n", - "None | \n", - "PDBe | \n", - "None | \n", - "591.0 | \n", - "10.0 | \n", - "19000.0 | \n", - "3760 | \n", - "382 | \n", - "
42 | \n", - "16 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "SGD | \n", - "736 | \n", - "SGD | \n", - "Y | \n", - "None | \n", - "SGD | \n", - "PRJ_SGD | \n", - "243.0 | \n", - "58.0 | \n", - "6858.0 | \n", - "212 | \n", - "1 | \n", - "
43 | \n", - "18 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "POMBASE | \n", - "737 | \n", - "PomBase | \n", - "Y | \n", - "None | \n", - "PomBase | \n", - "PRJNA13836 | \n", - "179.0 | \n", - "47.0 | \n", - "3485.0 | \n", - "200 | \n", - "1 | \n", - "
44 | \n", - "23 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "HGNC | \n", - "738 | \n", - "HUGO Gene Nomenclature Committee | \n", - "Y | \n", - "None | \n", - "HGNC | \n", - "None | \n", - "1058.0 | \n", - "33.0 | \n", - "205012.0 | \n", - "8438 | \n", - "1 | \n", - "
45 | \n", - "44 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "ZFIN | \n", - "741 | \n", - "ZFIN | \n", - "Y | \n", - "None | \n", - "ZFIN | \n", - "None | \n", - "907.0 | \n", - "83.0 | \n", - "13525.0 | \n", - "1025 | \n", - "1 | \n", - "
46 | \n", - "37 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "SNODB | \n", - "444 | \n", - "snoDB | \n", - "Y | \n", - "None | \n", - "snoDB | \n", - "None | \n", - "117.0 | \n", - "33.0 | \n", - "791.0 | \n", - "1970 | \n", - "1 | \n", - "
47 | \n", - "13 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "GREENGENES | \n", - "71 | \n", - "Greengenes | \n", - "Y | \n", - "None | \n", - "Greengenes | \n", - "None | \n", - "1403.0 | \n", - "1253.0 | \n", - "2368.0 | \n", - "1004892 | \n", - "92684 | \n", - "
48 | \n", - "38 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "5SRRNADB | \n", - "387 | \n", - "5SrRNAdb | \n", - "Y | \n", - "None | \n", - "5SrRNAdb | \n", - "None | \n", - "120.0 | \n", - "95.0 | \n", - "180.0 | \n", - "9487 | \n", - "7158 | \n", - "
49 | \n", - "32 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "LNCBASE | \n", - "248 | \n", - "LncBase | \n", - "Y | \n", - "None | \n", - "LncBase | \n", - "None | \n", - "22.0 | \n", - "17.0 | \n", - "26.0 | \n", - "1151 | \n", - "2 | \n", - "
50 | \n", - "29 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "TARBASE | \n", - "223 | \n", - "TarBase | \n", - "Y | \n", - "None | \n", - "TarBase | \n", - "None | \n", - "22.0 | \n", - "16.0 | \n", - "27.0 | \n", - "1291 | \n", - "2 | \n", - "
51 | \n", - "4 | \n", - "2017-05-01 00:00:00.000000 | \n", - "RNACEN | \n", - "MIRBASE | \n", - "435 | \n", - "MIRBASE | \n", - "Y | \n", - "None | \n", - "miRBase | \n", - "PRJEB4451 | \n", - "55.0 | \n", - "15.0 | \n", - "2354.0 | \n", - "65389 | \n", - "271 | \n", - "
52 | \n", - "21 | \n", - "2017-05-02 00:00:00.000000 | \n", - "RNACEN | \n", - "NONCODE | \n", - "146 | \n", - "NONCODE | \n", - "Y | \n", - "None | \n", - "NONCODE | \n", - "None | \n", - "1130.0 | \n", - "201.0 | \n", - "244296.0 | \n", - "234669 | \n", - "7 | \n", - "
\n", - " | id | \n", - "training_data_type | \n", - "question | \n", - "content | \n", - "
---|---|---|---|---|
0 | \n", - "15-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the PARTSUPP table.\\n\\nThe ... | \n", - "
1 | \n", - "11-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the CUSTOMER table.\\n\\nThe ... | \n", - "
2 | \n", - "14-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the ORDERS table.\\n\\nThe fo... | \n", - "
3 | \n", - "1244-sql | \n", - "sql | \n", - "What are the names of the top 10 customers? | \n", - "SELECT c.c_name as customer_name\\nFROM snowf... | \n", - "
4 | \n", - "1242-sql | \n", - "sql | \n", - "What are the top 5 customers in terms of total... | \n", - "SELECT c.c_name AS customer_name, SUM(l.l_quan... | \n", - "
5 | \n", - "17-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the REGION table.\\n\\nThe fo... | \n", - "
6 | \n", - "16-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the PART table.\\n\\nThe foll... | \n", - "
7 | \n", - "1243-sql | \n", - "sql | \n", - "What are the top 10 customers with the highest... | \n", - "SELECT c.c_name as customer_name,\\n sum(... | \n", - "
8 | \n", - "1239-sql | \n", - "sql | \n", - "What are the top 100 customers based on their ... | \n", - "SELECT c.c_name as customer_name,\\n sum(... | \n", - "
9 | \n", - "13-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the SUPPLIER table.\\n\\nThe ... | \n", - "
10 | \n", - "1241-sql | \n", - "sql | \n", - "What are the top 10 customers in terms of tota... | \n", - "SELECT c.c_name as customer_name,\\n sum(... | \n", - "
11 | \n", - "12-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the LINEITEM table.\\n\\nThe ... | \n", - "
12 | \n", - "18-doc | \n", - "documentation | \n", - "None | \n", - "This is a table in the NATION table.\\n\\nThe fo... | \n", - "
13 | \n", - "1248-sql | \n", - "sql | \n", - "How many customers are in each country? | \n", - "SELECT n.n_name as country,\\n count(*) a... | \n", - "
14 | \n", - "1240-sql | \n", - "sql | \n", - "What is the number of orders placed each week? | \n", - "SELECT date_trunc('week', o_orderdate) as week... | \n", - "