From 65706a34be473bf412b01f8d7bc3cb942b55f688 Mon Sep 17 00:00:00 2001
From: haiyenvu96 <haiyen96.hp@gmail.com>
Date: Wed, 22 May 2024 00:29:50 +0000
Subject: [PATCH 1/4] extract table

---
 notebooks/yen/debug-ollama-extract.ipynb | 190 +++++++++++++++++++++++
 ollama-extract.py                        |  13 +-
 2 files changed, 202 insertions(+), 1 deletion(-)
 create mode 100644 notebooks/yen/debug-ollama-extract.ipynb

diff --git a/notebooks/yen/debug-ollama-extract.ipynb b/notebooks/yen/debug-ollama-extract.ipynb
new file mode 100644
index 0000000..1618708
--- /dev/null
+++ b/notebooks/yen/debug-ollama-extract.ipynb
@@ -0,0 +1,190 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import fitz\n",
+    "import ollama"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc = fitz.open(\"../../example.pdf\")\n",
+    "for pageNumber, page in enumerate(doc.pages(), start=1):\n",
+    "    if pageNumber > 2 and pageNumber < 10:\n",
+    "        text = page.get_text().encode(\"utf8\")\n",
+    "        with open(f\"texts/output_{pageNumber}.txt\", \"wb\") as out:\n",
+    "            out.write(text)  # write text of page\n",
+    "            out.write(bytes((12,)))  # write page delimiter (form feed 0x0C)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Assessing the impact of observations \n",
+      " \n",
+      " \n",
+      " \n",
+      "Technical Memorandum No. 916  \n",
+      " \n",
+      " \n",
+      " \n",
+      "3 \n",
+      "Abstract \n",
+      " \n",
+      "We review the assessment of existing observations with Observing System Experiments (OSEs) and the \n",
+      "Forecast Sensitivity to Observation Impact (FSOI) approach. Although care is needed when interpreting their \n",
+      "results, the information they provide is largely consistent. The Ensemble of Data Assimilations (EDA) provides \n",
+      "an affordable and manageable framework for simulating the impact of future observing systems. Recent \n",
+      "experience comparing EDA predictions with the subsequent impact of real measurements gives us some \n",
+      "confidence that, with appropriate interpretation and care, they provide useful information that can help guide \n",
+      "the future evolution of the global observing system. \n",
+      "Plain Language Summary \n",
+      " \n",
+      "This “Special Topic Paper” was originally presented to the ECMWF Science Advisory Committee (SAC) in \n",
+      "October 2022. It has been reproduced here, with minor editorial changes, as an ECMWF Technical \n",
+      "Memorandum to enable broader access to the document.  \n",
+      "The central question addressed in the paper is: How can we predict the potential impact of future observations \n",
+      "on the quality weather forecasts produced with numerical weather prediction systems? This is clearly \n",
+      "fundamental when trying to plan how the global observing system (GOS) should evolve, but it is extremely \n",
+      "difficult to address in practice. To provide appropriate context, this paper starts by reviewing how the impact \n",
+      "of the current, real observations is assessed, emphasising that this apparently straightforward task requires \n",
+      "considerable skill and care when interpreting the results. We then discuss the use of ensemble methods \n",
+      "introduced by ECMWF in 2007 designed to predict the impact of the future observations on theoretical \n",
+      "estimates of analysis and short-range forecast error statistics. The strengths and weaknesses of these ensemble \n",
+      "methods are discussed, and examples using both current, real and future satellite observations are presented.     \n",
+      " \n",
+      " \n",
+      " \n",
+      "\f\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Program to read the entire file using read() function\n",
+    "file = open(\"texts/output_3.txt\", \"r\")\n",
+    "content = file.read()\n",
+    "print(content)\n",
+    "file.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import camelot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "tables = camelot.read_pdf(\"../../example.pdf\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tabula\n",
+    "import os\n",
+    "from tabula.io import read_pdf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Got stderr: May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 10 (36) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 5 (37) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 7 (38) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 4 (40) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 8 (41) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 26 (51) in font AAAABG+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 27 (54) in font AAAABG+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 28 (55) in font AAAABG+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 9 (42) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 6 (43) in font AAAABI+CambriaMath\n",
+      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 3 (44) in font AAAABI+CambriaMath\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# read PDF file\n",
+    "tables = read_pdf(\"../../example.pdf\", pages=\"all\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.makedirs('tables', exist_ok = True) \n",
+    "for table_idx in range(len(tables)):\n",
+    "    tables[table_idx].to_csv(f\"tables/output_{table_idx}.csv\")\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ollama-extract.py b/ollama-extract.py
index 5020b24..eee869d 100644
--- a/ollama-extract.py
+++ b/ollama-extract.py
@@ -1,6 +1,10 @@
 import fitz
 import ollama
+import tabula
+import os
+from tabula.io import read_pdf
 
+# extract texts
 doc = fitz.open("example.pdf")
 for pageNumber, page in enumerate(doc.pages(), start=1):
     if pageNumber > 2 and pageNumber < 10:
@@ -9,6 +13,12 @@
             out.write(text)  # write text of page
             out.write(bytes((12,)))  # write page delimiter (form feed 0x0C)
 
+# extract tables
+tables = read_pdf("example.pdf", pages="all")# read PDF file
+os.makedirs('tables', exist_ok = True) 
+for table_idx, table in enumerate(tables):
+    table.to_csv(f"tables/output_{table_idx}.csv")
+
 
 system_promt = "You are a helpful Natural Language Processing expert who extracts relevant information and store them on a Knowledge Graph"
 
@@ -46,7 +56,8 @@
 """
 
 # for page in np.arange(3,10):
-with open("texts/output_4.txt", "r", encoding="ascii") as f:
+# with open("texts/output_4.txt", "r", encoding="ascii") as f:
+with open("texts/output_4.txt", "r", encoding='utf-8') as f:
     text = f.readlines()
     text = " ".join(text)
     text = text.replace("/n", "")

From 0bc684f71f339a2b8baf6951a897b401fd1fe182 Mon Sep 17 00:00:00 2001
From: haiyenvu96 <haiyen96.hp@gmail.com>
Date: Sun, 2 Jun 2024 19:15:47 +0000
Subject: [PATCH 2/4] refactor git

---
 ...ipynb => debug-extract-texts-tables.ipynb} | 54 +++++++++----------
 1 file changed, 26 insertions(+), 28 deletions(-)
 rename notebooks/{yen/debug-ollama-extract.ipynb => debug-extract-texts-tables.ipynb} (76%)

diff --git a/notebooks/yen/debug-ollama-extract.ipynb b/notebooks/debug-extract-texts-tables.ipynb
similarity index 76%
rename from notebooks/yen/debug-ollama-extract.ipynb
rename to notebooks/debug-extract-texts-tables.ipynb
index 1618708..eb48ecc 100644
--- a/notebooks/yen/debug-ollama-extract.ipynb
+++ b/notebooks/debug-extract-texts-tables.ipynb
@@ -6,28 +6,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import fitz\n",
-    "import ollama"
+    "import fitz"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "doc = fitz.open(\"../../example.pdf\")\n",
+    "doc = fitz.open(\"../example.pdf\")\n",
     "for pageNumber, page in enumerate(doc.pages(), start=1):\n",
     "    if pageNumber > 2 and pageNumber < 10:\n",
     "        text = page.get_text().encode(\"utf8\")\n",
-    "        with open(f\"texts/output_{pageNumber}.txt\", \"wb\") as out:\n",
+    "        with open(f\"../texts/output_{pageNumber}.txt\", \"wb\") as out:\n",
     "            out.write(text)  # write text of page\n",
     "            out.write(bytes((12,)))  # write page delimiter (form feed 0x0C)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -75,7 +74,7 @@
    ],
    "source": [
     "# Program to read the entire file using read() function\n",
-    "file = open(\"texts/output_3.txt\", \"r\")\n",
+    "file = open(\"../texts/output_3.txt\", \"r\")\n",
     "content = file.read()\n",
     "print(content)\n",
     "file.close()"
@@ -83,7 +82,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -92,17 +91,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "\n",
-    "tables = camelot.read_pdf(\"../../example.pdf\")"
+    "tables = camelot.read_pdf(\"../example.pdf\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -113,43 +112,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Got stderr: May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 10 (36) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 5 (37) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 7 (38) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 4 (40) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 8 (41) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 26 (51) in font AAAABG+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 27 (54) in font AAAABG+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 28 (55) in font AAAABG+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 9 (42) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
       "WARNING: No Unicode mapping for 6 (43) in font AAAABI+CambriaMath\n",
-      "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
-      "WARNING: No Unicode mapping for 3 (44) in font AAAABI+CambriaMath\n",
-      "\n"
+      "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n",
+      "WARNING: No Unicode mapping for 3 (44) in font AAAABI+CambriaMath\n"
      ]
     }
    ],
    "source": [
     "\n",
     "# read PDF file\n",
-    "tables = read_pdf(\"../../example.pdf\", pages=\"all\")"
+    "tables = read_pdf(\"../example.pdf\", pages=\"all\")"
    ]
   },
   {
@@ -158,9 +156,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "os.makedirs('tables', exist_ok = True) \n",
+    "# os.makedirs('tables', exist_ok = True) \n",
     "for table_idx in range(len(tables)):\n",
-    "    tables[table_idx].to_csv(f\"tables/output_{table_idx}.csv\")\n",
+    "    tables[table_idx].to_csv(f\"../tables/output_{table_idx}.csv\")\n",
     "    "
    ]
   }

From 42ffa804076026358c01988c878425feec408bc4 Mon Sep 17 00:00:00 2001
From: haiyenvu96 <haiyen96.hp@gmail.com>
Date: Wed, 12 Jun 2024 18:06:37 +0000
Subject: [PATCH 3/4] add table output examples

---
 tables/output_0.csv | 11 +++++++++++
 tables/output_1.csv |  6 ++++++
 2 files changed, 17 insertions(+)
 create mode 100644 tables/output_0.csv
 create mode 100644 tables/output_1.csv

diff --git a/tables/output_0.csv b/tables/output_0.csv
new file mode 100644
index 0000000..0ae3061
--- /dev/null
+++ b/tables/output_0.csv
@@ -0,0 +1,11 @@
+,Constellation name Type of orbits,Number of,Number of.1
+0,,orbital planes,satellites
+1,Real data,,
+2,No MW sounders -,0,0
+3,Metop/JPSS baseline Sun-synchronous,2,4 (Metop-A/B; S-
+4,,,"NPP, NOAA-20)"
+5,Metop/JPSS+ Sun-synchronous,5,8* (Metop-A/B; S-
+6,,,"NPP, NOAA-15/"
+7,,,"18/19/20, F-17)"
+8,"Simulated new data, added to the Metop/JPSS baseline with real data",,
+9,Polar Sun-synchronous,4,8
diff --git a/tables/output_1.csv b/tables/output_1.csv
new file mode 100644
index 0000000..a026e5d
--- /dev/null
+++ b/tables/output_1.csv
@@ -0,0 +1,6 @@
+,Polar+,Sun-synchronous,7,14
+0,Polar++,Sun-synchronous,10.0,20.0
+1,4x2,Mid-inclination (60o),4.0,8.0
+2,6x2,Mid-inclination (60o),6.0,12.0
+3,Polar + 4x2,Sun-synchronous +,8.0,16.0
+4,,mid-inclination (60o),,

From 5062426ecc1e31db204dffc649ebc28cce84023d Mon Sep 17 00:00:00 2001
From: haiyenvu96 <haiyen96.hp@gmail.com>
Date: Tue, 6 Aug 2024 14:14:05 +0000
Subject: [PATCH 4/4] add interactive frontend

---
 frontend/app-interactive.py | 315 ++++++++++++++++++++++++++++++++++++
 1 file changed, 315 insertions(+)
 create mode 100644 frontend/app-interactive.py

diff --git a/frontend/app-interactive.py b/frontend/app-interactive.py
new file mode 100644
index 0000000..e5b3097
--- /dev/null
+++ b/frontend/app-interactive.py
@@ -0,0 +1,315 @@
+import streamlit as st
+from dotenv import load_dotenv
+import os
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+import logging
+from neo4j import GraphDatabase
+from pyvis.network import Network
+import streamlit.components.v1 as components
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+_ = load_dotenv()
+
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+URI = os.getenv("NEO4J_URI")
+user = os.getenv("NEO4J_USERNAME")
+password = os.getenv("NEO4J_PASSWORD")
+
+
+def get_response(user_query, contexts, chat_history):
+    template = """
+    You are a helpful assistant. Answer the following questions considering the history of the conversation:
+
+    Chat history: {chat_history}
+
+    Contexts: {contexts}
+
+    User question: {user_question}
+    """
+
+    prompt = ChatPromptTemplate.from_template(template)
+
+    llm = ChatOpenAI()
+
+    chain = prompt | llm | StrOutputParser()
+
+    return chain.stream(
+        {
+            "chat_history": chat_history,
+            "contexts": contexts,
+            "user_question": user_query,
+        }
+    )
+
+
+def get_answer_neo4j(driver, question):
+    contexts = []
+    chunkIds = []
+    with driver.session() as session:
+        query = """
+                WITH genai.vector.encode(
+                    $question,
+                    "OpenAI",
+                    {
+                    token: $openAiApiKey
+                    }) AS question_embedding
+                CALL db.index.vector.queryNodes(
+                    'chunk_content_embeddings',
+                    $top_k,
+                    question_embedding
+                    ) YIELD node AS chunk, score
+                RETURN chunk.name, chunk.content, score
+            """
+
+        result = session.run(
+            query, {"question": question, "openAiApiKey": OPENAI_API_KEY, "top_k": 5}
+        )
+        for record in result:
+            name = record["chunk.name"]
+            score = record["score"]
+            chunkIds.append(name)
+            contexts.append(record["chunk.content"])
+            print("Name:", name)
+            print(score)
+
+    return contexts, chunkIds, score
+
+
+def query_subgraph(driver, chunkIds):
+    query = """
+    WITH $chunkIds AS names
+    MATCH (n)
+    WHERE n.name IN names
+    OPTIONAL MATCH (n)-[r]-(neighbor)
+    RETURN
+    {name: n.name, properties: apoc.map.fromLists(keys(n), [p in keys(n) | n[p]])} AS node,
+    collect({
+          neighbor: {name: neighbor.name, properties: apoc.map.fromLists(keys(neighbor), [p in keys(neighbor) | neighbor[p]])},
+          relationship: {label: type(r), properties: apoc.map.fromLists(keys(r), [p in keys(r) | r[p]])}
+  }) AS neighbors
+    """
+
+    records = []
+
+    with driver.session() as session:
+        for record in session.run(query, {"chunkIds": chunkIds}):
+            records.append(record)
+    return records
+
+
+def process_subgraph_to_pyvis(subgraph):
+    net = Network(height="750px", width="100%", notebook=True)
+    for record in subgraph:
+        node = record["node"]
+        neighbors = record["neighbors"]
+        node_id = node["name"]
+        node_properties = node["properties"]
+        net.add_node(node_id, label=node_id, title=str(node_properties), color="red")
+
+        for neighbor_info in neighbors:
+            neighbor = neighbor_info["neighbor"]
+            relationship = neighbor_info["relationship"]
+
+            if neighbor:
+                neighbor_id = neighbor["name"]
+                neighbor_properties = neighbor["properties"]
+                net.add_node(
+                    neighbor_id,
+                    label=neighbor_id,
+                    title=str(neighbor_properties),
+                    color="blue",
+                )
+
+                if relationship:
+                    relationship_label = relationship["label"]
+                    relationship_properties = relationship["properties"]
+                    net.add_edge(
+                        node_id,
+                        neighbor_id,
+                        label=relationship_label,
+                        title=str(relationship_properties),
+                    )
+
+    return net
+
+
+def main():
+    st.set_page_config(page_title="Study with me", page_icon=":books:", layout="wide")
+    driver = GraphDatabase.driver(URI, auth=(user, password))
+    col1, col2, col3 = st.columns([3, 2, 5], gap="small")  # Adjusted column widths
+
+    if "count" not in st.session_state:
+        st.session_state.count = 0
+    
+    graph_path = "./graphs"
+    if not os.path.exists(graph_path):
+    # Create the directory
+        os.makedirs(graph_path)
+
+    # Left Column: Chat Window
+    with col1:
+        st.subheader("Chat window")
+        if "chat_history" not in st.session_state:
+            st.session_state.chat_history = [
+                AIMessage(content="Hello, I am a bot. How can I help you?"),
+            ]
+
+        # Display conversation
+        for message in st.session_state.chat_history:
+            if isinstance(message, AIMessage):
+                with st.chat_message("AI"):
+                    st.write(message.content)
+            elif isinstance(message, HumanMessage):
+                with st.chat_message("Human"):
+                    st.write(message.content)
+
+        # User input
+        user_query = st.chat_input("Type your message here...")
+
+        if user_query is not None and user_query != "":
+            st.session_state.count += 1
+            contexts, chunkIds, score = get_answer_neo4j(driver, user_query)
+            contexts_string = "\n".join(contexts)
+            print(contexts)
+            subgraph = query_subgraph(driver, chunkIds)
+            net = process_subgraph_to_pyvis(subgraph)
+            html_file_path = f"graphs/graph_{st.session_state.count}.html"
+            net.save_graph(html_file_path)
+
+            st.session_state.chat_history.append(HumanMessage(content=user_query))
+
+            with st.chat_message("Human"):
+                st.markdown(user_query)
+
+            with st.chat_message("AI"):
+                response = st.write_stream(
+                    get_response(
+                        user_query, contexts_string, st.session_state.chat_history
+                    )
+                )
+            st.session_state.chat_history.append(AIMessage(content=response))
+            # logging.info(st.session_state.chat_history)
+
+    # Middle Column: List of HTML Files
+    with col2:
+        st.subheader("Graph History")
+        dir = "graphs/"
+        html_files = [file for file in os.listdir(dir) if file.endswith(".html")]
+        if "selected_html" not in st.session_state:
+            st.session_state.selected_html = None
+
+        for file in html_files:
+            if st.button(file):
+                st.session_state.selected_html = file
+
+    # Right Column: Graph Visualization and Node Information
+    with col3:
+        st.subheader("Graph Visualization")
+
+        if st.session_state.selected_html:
+            file_path = os.path.join(dir, st.session_state.selected_html)
+            
+            # Top Row: Interactive Graph Visualization
+            with st.container():
+                # st.subheader("Interactive Graph")
+                
+                # add JavaScript to index.html for makING the graph interactive 
+                with open(file_path, "r", encoding="utf-8") as file:
+                    graph_content = file.read()
+
+                # Inject JavaScript for click detection
+                script = """
+                <script>
+                // ----------------------------------------------------
+                // Just copy/paste these functions as-is:
+
+                function sendMessageToStreamlitClient(type, data) {
+                    var outData = Object.assign({
+                    isStreamlitMessage: true,
+                    type: type,
+                    }, data);
+                    window.parent.postMessage(outData, "*");
+                }
+
+                function init() {
+                    sendMessageToStreamlitClient("streamlit:componentReady", {apiVersion: 1});
+                }
+
+                function setFrameHeight(height) {
+                    sendMessageToStreamlitClient("streamlit:setFrameHeight", {height: height});
+                }
+
+                // The `data` argument can be any JSON-serializable value.
+                function sendDataToPython(data) {
+                    sendMessageToStreamlitClient("streamlit:setComponentValue", data);
+                }
+
+                // -------------- Receive info from Graph -----------------------
+
+                function onClick(event) {
+                        const nodeId = event.nodes[0];
+                        if (nodeId) {
+                        var clickedNode = allNodes[nodeId]
+                            
+                            sendDataToPython({
+                            value: clickedNode,
+                            dataType: "json",
+                            });
+                        }
+                    }
+
+                    network.on('click', onClick);
+
+                // ----------------------------------------------------
+                // Now modify this part of the code to fit your needs:
+
+                // Hook things up!
+                init();
+
+                // Hack to autoset the iframe height.
+                // window.addEventListener("load", function() {
+                //     window.setTimeout(function() {
+                //     setFrameHeight(document.documentElement.clientHeight)
+                //     }, 0);
+                // });
+
+                // Optionally, if the automatic height computation fails you, give this component a height manually
+                // by commenting out below:
+                setFrameHeight(500);
+                </script>
+
+                """
+                index_content = graph_content + script
+
+                # copy from graph_1 to ./index.html
+                index_path = "./index.html"
+                with open(index_path, "w", encoding="utf-8") as file:
+                    file.write(index_content)
+
+                # Create a new component which read from ./index.html
+                mycomponent = components.declare_component(
+                    name=os.path.basename(file_path),
+                    path=".",
+                )
+                node_info = mycomponent()
+
+            # Bottom Row: Node Information
+            with st.container():
+                st.subheader("Node Information")
+                # st.write(node_info)
+                if node_info is not None:
+                    information = eval(node_info['title'])
+                    information.pop('contentEmbedding', None)
+                    st.write(information)
+
+    driver.close()
+
+
+if __name__ == "__main__":
+    main()