From 65706a34be473bf412b01f8d7bc3cb942b55f688 Mon Sep 17 00:00:00 2001 From: haiyenvu96 Date: Wed, 22 May 2024 00:29:50 +0000 Subject: [PATCH 1/4] extract table --- notebooks/yen/debug-ollama-extract.ipynb | 190 +++++++++++++++++++++++ ollama-extract.py | 13 +- 2 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 notebooks/yen/debug-ollama-extract.ipynb diff --git a/notebooks/yen/debug-ollama-extract.ipynb b/notebooks/yen/debug-ollama-extract.ipynb new file mode 100644 index 0000000..1618708 --- /dev/null +++ b/notebooks/yen/debug-ollama-extract.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import fitz\n", + "import ollama" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "doc = fitz.open(\"../../example.pdf\")\n", + "for pageNumber, page in enumerate(doc.pages(), start=1):\n", + " if pageNumber > 2 and pageNumber < 10:\n", + " text = page.get_text().encode(\"utf8\")\n", + " with open(f\"texts/output_{pageNumber}.txt\", \"wb\") as out:\n", + " out.write(text) # write text of page\n", + " out.write(bytes((12,))) # write page delimiter (form feed 0x0C)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assessing the impact of observations \n", + " \n", + " \n", + " \n", + "Technical Memorandum No. 916 \n", + " \n", + " \n", + " \n", + "3 \n", + "Abstract \n", + " \n", + "We review the assessment of existing observations with Observing System Experiments (OSEs) and the \n", + "Forecast Sensitivity to Observation Impact (FSOI) approach. Although care is needed when interpreting their \n", + "results, the information they provide is largely consistent. The Ensemble of Data Assimilations (EDA) provides \n", + "an affordable and manageable framework for simulating the impact of future observing systems. Recent \n", + "experience comparing EDA predictions with the subsequent impact of real measurements gives us some \n", + "confidence that, with appropriate interpretation and care, they provide useful information that can help guide \n", + "the future evolution of the global observing system. \n", + "Plain Language Summary \n", + " \n", + "This “Special Topic Paper” was originally presented to the ECMWF Science Advisory Committee (SAC) in \n", + "October 2022. It has been reproduced here, with minor editorial changes, as an ECMWF Technical \n", + "Memorandum to enable broader access to the document. \n", + "The central question addressed in the paper is: How can we predict the potential impact of future observations \n", + "on the quality weather forecasts produced with numerical weather prediction systems? This is clearly \n", + "fundamental when trying to plan how the global observing system (GOS) should evolve, but it is extremely \n", + "difficult to address in practice. To provide appropriate context, this paper starts by reviewing how the impact \n", + "of the current, real observations is assessed, emphasising that this apparently straightforward task requires \n", + "considerable skill and care when interpreting the results. We then discuss the use of ensemble methods \n", + "introduced by ECMWF in 2007 designed to predict the impact of the future observations on theoretical \n", + "estimates of analysis and short-range forecast error statistics. The strengths and weaknesses of these ensemble \n", + "methods are discussed, and examples using both current, real and future satellite observations are presented. \n", + " \n", + " \n", + " \n", + "\f\n" + ] + } + ], + "source": [ + "# Program to read the entire file using read() function\n", + "file = open(\"texts/output_3.txt\", \"r\")\n", + "content = file.read()\n", + "print(content)\n", + "file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import camelot" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "tables = camelot.read_pdf(\"../../example.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "import tabula\n", + "import os\n", + "from tabula.io import read_pdf" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Got stderr: May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 10 (36) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 5 (37) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 7 (38) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 4 (40) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 8 (41) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 26 (51) in font AAAABG+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 27 (54) in font AAAABG+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 28 (55) in font AAAABG+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 9 (42) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 6 (43) in font AAAABI+CambriaMath\n", + "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 3 (44) in font AAAABI+CambriaMath\n", + "\n" + ] + } + ], + "source": [ + "\n", + "# read PDF file\n", + "tables = read_pdf(\"../../example.pdf\", pages=\"all\")" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs('tables', exist_ok = True) \n", + "for table_idx in range(len(tables)):\n", + " tables[table_idx].to_csv(f\"tables/output_{table_idx}.csv\")\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ollama-extract.py b/ollama-extract.py index 5020b24..eee869d 100644 --- a/ollama-extract.py +++ b/ollama-extract.py @@ -1,6 +1,10 @@ import fitz import ollama +import tabula +import os +from tabula.io import read_pdf +# extract texts doc = fitz.open("example.pdf") for pageNumber, page in enumerate(doc.pages(), start=1): if pageNumber > 2 and pageNumber < 10: @@ -9,6 +13,12 @@ out.write(text) # write text of page out.write(bytes((12,))) # write page delimiter (form feed 0x0C) +# extract tables +tables = read_pdf("example.pdf", pages="all")# read PDF file +os.makedirs('tables', exist_ok = True) +for table_idx, table in enumerate(tables): + table.to_csv(f"tables/output_{table_idx}.csv") + system_promt = "You are a helpful Natural Language Processing expert who extracts relevant information and store them on a Knowledge Graph" @@ -46,7 +56,8 @@ """ # for page in np.arange(3,10): -with open("texts/output_4.txt", "r", encoding="ascii") as f: +# with open("texts/output_4.txt", "r", encoding="ascii") as f: +with open("texts/output_4.txt", "r", encoding='utf-8') as f: text = f.readlines() text = " ".join(text) text = text.replace("/n", "") From 0bc684f71f339a2b8baf6951a897b401fd1fe182 Mon Sep 17 00:00:00 2001 From: haiyenvu96 Date: Sun, 2 Jun 2024 19:15:47 +0000 Subject: [PATCH 2/4] refactor git --- ...ipynb => debug-extract-texts-tables.ipynb} | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) rename notebooks/{yen/debug-ollama-extract.ipynb => debug-extract-texts-tables.ipynb} (76%) diff --git a/notebooks/yen/debug-ollama-extract.ipynb b/notebooks/debug-extract-texts-tables.ipynb similarity index 76% rename from notebooks/yen/debug-ollama-extract.ipynb rename to notebooks/debug-extract-texts-tables.ipynb index 1618708..eb48ecc 100644 --- a/notebooks/yen/debug-ollama-extract.ipynb +++ b/notebooks/debug-extract-texts-tables.ipynb @@ -6,28 +6,27 @@ "metadata": {}, "outputs": [], "source": [ - "import fitz\n", - "import ollama" + "import fitz" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "doc = fitz.open(\"../../example.pdf\")\n", + "doc = fitz.open(\"../example.pdf\")\n", "for pageNumber, page in enumerate(doc.pages(), start=1):\n", " if pageNumber > 2 and pageNumber < 10:\n", " text = page.get_text().encode(\"utf8\")\n", - " with open(f\"texts/output_{pageNumber}.txt\", \"wb\") as out:\n", + " with open(f\"../texts/output_{pageNumber}.txt\", \"wb\") as out:\n", " out.write(text) # write text of page\n", " out.write(bytes((12,))) # write page delimiter (form feed 0x0C)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -75,7 +74,7 @@ ], "source": [ "# Program to read the entire file using read() function\n", - "file = open(\"texts/output_3.txt\", \"r\")\n", + "file = open(\"../texts/output_3.txt\", \"r\")\n", "content = file.read()\n", "print(content)\n", "file.close()" @@ -83,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -92,17 +91,17 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", - "tables = camelot.read_pdf(\"../../example.pdf\")" + "tables = camelot.read_pdf(\"../example.pdf\")" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -113,43 +112,42 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Got stderr: May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 10 (36) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 5 (37) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 7 (38) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:45 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:35 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 4 (40) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 8 (41) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 26 (51) in font AAAABG+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 27 (54) in font AAAABG+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 28 (55) in font AAAABG+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 9 (42) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", "WARNING: No Unicode mapping for 6 (43) in font AAAABI+CambriaMath\n", - "May 21, 2024 11:17:47 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", - "WARNING: No Unicode mapping for 3 (44) in font AAAABI+CambriaMath\n", - "\n" + "Jun 02, 2024 7:14:37 PM org.apache.pdfbox.pdmodel.font.PDSimpleFont toUnicode\n", + "WARNING: No Unicode mapping for 3 (44) in font AAAABI+CambriaMath\n" ] } ], "source": [ "\n", "# read PDF file\n", - "tables = read_pdf(\"../../example.pdf\", pages=\"all\")" + "tables = read_pdf(\"../example.pdf\", pages=\"all\")" ] }, { @@ -158,9 +156,9 @@ "metadata": {}, "outputs": [], "source": [ - "os.makedirs('tables', exist_ok = True) \n", + "# os.makedirs('tables', exist_ok = True) \n", "for table_idx in range(len(tables)):\n", - " tables[table_idx].to_csv(f\"tables/output_{table_idx}.csv\")\n", + " tables[table_idx].to_csv(f\"../tables/output_{table_idx}.csv\")\n", " " ] } From 42ffa804076026358c01988c878425feec408bc4 Mon Sep 17 00:00:00 2001 From: haiyenvu96 Date: Wed, 12 Jun 2024 18:06:37 +0000 Subject: [PATCH 3/4] add table output examples --- tables/output_0.csv | 11 +++++++++++ tables/output_1.csv | 6 ++++++ 2 files changed, 17 insertions(+) create mode 100644 tables/output_0.csv create mode 100644 tables/output_1.csv diff --git a/tables/output_0.csv b/tables/output_0.csv new file mode 100644 index 0000000..0ae3061 --- /dev/null +++ b/tables/output_0.csv @@ -0,0 +1,11 @@ +,Constellation name Type of orbits,Number of,Number of.1 +0,,orbital planes,satellites +1,Real data,, +2,No MW sounders -,0,0 +3,Metop/JPSS baseline Sun-synchronous,2,4 (Metop-A/B; S- +4,,,"NPP, NOAA-20)" +5,Metop/JPSS+ Sun-synchronous,5,8* (Metop-A/B; S- +6,,,"NPP, NOAA-15/" +7,,,"18/19/20, F-17)" +8,"Simulated new data, added to the Metop/JPSS baseline with real data",, +9,Polar Sun-synchronous,4,8 diff --git a/tables/output_1.csv b/tables/output_1.csv new file mode 100644 index 0000000..a026e5d --- /dev/null +++ b/tables/output_1.csv @@ -0,0 +1,6 @@ +,Polar+,Sun-synchronous,7,14 +0,Polar++,Sun-synchronous,10.0,20.0 +1,4x2,Mid-inclination (60o),4.0,8.0 +2,6x2,Mid-inclination (60o),6.0,12.0 +3,Polar + 4x2,Sun-synchronous +,8.0,16.0 +4,,mid-inclination (60o),, From 5062426ecc1e31db204dffc649ebc28cce84023d Mon Sep 17 00:00:00 2001 From: haiyenvu96 Date: Tue, 6 Aug 2024 14:14:05 +0000 Subject: [PATCH 4/4] add interactive frontend --- frontend/app-interactive.py | 315 ++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) create mode 100644 frontend/app-interactive.py diff --git a/frontend/app-interactive.py b/frontend/app-interactive.py new file mode 100644 index 0000000..e5b3097 --- /dev/null +++ b/frontend/app-interactive.py @@ -0,0 +1,315 @@ +import streamlit as st +from dotenv import load_dotenv +import os +from langchain_openai import ChatOpenAI +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +import logging +from neo4j import GraphDatabase +from pyvis.network import Network +import streamlit.components.v1 as components + + +logging.basicConfig(level=logging.INFO) + + +_ = load_dotenv() + +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +URI = os.getenv("NEO4J_URI") +user = os.getenv("NEO4J_USERNAME") +password = os.getenv("NEO4J_PASSWORD") + + +def get_response(user_query, contexts, chat_history): + template = """ + You are a helpful assistant. Answer the following questions considering the history of the conversation: + + Chat history: {chat_history} + + Contexts: {contexts} + + User question: {user_question} + """ + + prompt = ChatPromptTemplate.from_template(template) + + llm = ChatOpenAI() + + chain = prompt | llm | StrOutputParser() + + return chain.stream( + { + "chat_history": chat_history, + "contexts": contexts, + "user_question": user_query, + } + ) + + +def get_answer_neo4j(driver, question): + contexts = [] + chunkIds = [] + with driver.session() as session: + query = """ + WITH genai.vector.encode( + $question, + "OpenAI", + { + token: $openAiApiKey + }) AS question_embedding + CALL db.index.vector.queryNodes( + 'chunk_content_embeddings', + $top_k, + question_embedding + ) YIELD node AS chunk, score + RETURN chunk.name, chunk.content, score + """ + + result = session.run( + query, {"question": question, "openAiApiKey": OPENAI_API_KEY, "top_k": 5} + ) + for record in result: + name = record["chunk.name"] + score = record["score"] + chunkIds.append(name) + contexts.append(record["chunk.content"]) + print("Name:", name) + print(score) + + return contexts, chunkIds, score + + +def query_subgraph(driver, chunkIds): + query = """ + WITH $chunkIds AS names + MATCH (n) + WHERE n.name IN names + OPTIONAL MATCH (n)-[r]-(neighbor) + RETURN + {name: n.name, properties: apoc.map.fromLists(keys(n), [p in keys(n) | n[p]])} AS node, + collect({ + neighbor: {name: neighbor.name, properties: apoc.map.fromLists(keys(neighbor), [p in keys(neighbor) | neighbor[p]])}, + relationship: {label: type(r), properties: apoc.map.fromLists(keys(r), [p in keys(r) | r[p]])} + }) AS neighbors + """ + + records = [] + + with driver.session() as session: + for record in session.run(query, {"chunkIds": chunkIds}): + records.append(record) + return records + + +def process_subgraph_to_pyvis(subgraph): + net = Network(height="750px", width="100%", notebook=True) + for record in subgraph: + node = record["node"] + neighbors = record["neighbors"] + node_id = node["name"] + node_properties = node["properties"] + net.add_node(node_id, label=node_id, title=str(node_properties), color="red") + + for neighbor_info in neighbors: + neighbor = neighbor_info["neighbor"] + relationship = neighbor_info["relationship"] + + if neighbor: + neighbor_id = neighbor["name"] + neighbor_properties = neighbor["properties"] + net.add_node( + neighbor_id, + label=neighbor_id, + title=str(neighbor_properties), + color="blue", + ) + + if relationship: + relationship_label = relationship["label"] + relationship_properties = relationship["properties"] + net.add_edge( + node_id, + neighbor_id, + label=relationship_label, + title=str(relationship_properties), + ) + + return net + + +def main(): + st.set_page_config(page_title="Study with me", page_icon=":books:", layout="wide") + driver = GraphDatabase.driver(URI, auth=(user, password)) + col1, col2, col3 = st.columns([3, 2, 5], gap="small") # Adjusted column widths + + if "count" not in st.session_state: + st.session_state.count = 0 + + graph_path = "./graphs" + if not os.path.exists(graph_path): + # Create the directory + os.makedirs(graph_path) + + # Left Column: Chat Window + with col1: + st.subheader("Chat window") + if "chat_history" not in st.session_state: + st.session_state.chat_history = [ + AIMessage(content="Hello, I am a bot. How can I help you?"), + ] + + # Display conversation + for message in st.session_state.chat_history: + if isinstance(message, AIMessage): + with st.chat_message("AI"): + st.write(message.content) + elif isinstance(message, HumanMessage): + with st.chat_message("Human"): + st.write(message.content) + + # User input + user_query = st.chat_input("Type your message here...") + + if user_query is not None and user_query != "": + st.session_state.count += 1 + contexts, chunkIds, score = get_answer_neo4j(driver, user_query) + contexts_string = "\n".join(contexts) + print(contexts) + subgraph = query_subgraph(driver, chunkIds) + net = process_subgraph_to_pyvis(subgraph) + html_file_path = f"graphs/graph_{st.session_state.count}.html" + net.save_graph(html_file_path) + + st.session_state.chat_history.append(HumanMessage(content=user_query)) + + with st.chat_message("Human"): + st.markdown(user_query) + + with st.chat_message("AI"): + response = st.write_stream( + get_response( + user_query, contexts_string, st.session_state.chat_history + ) + ) + st.session_state.chat_history.append(AIMessage(content=response)) + # logging.info(st.session_state.chat_history) + + # Middle Column: List of HTML Files + with col2: + st.subheader("Graph History") + dir = "graphs/" + html_files = [file for file in os.listdir(dir) if file.endswith(".html")] + if "selected_html" not in st.session_state: + st.session_state.selected_html = None + + for file in html_files: + if st.button(file): + st.session_state.selected_html = file + + # Right Column: Graph Visualization and Node Information + with col3: + st.subheader("Graph Visualization") + + if st.session_state.selected_html: + file_path = os.path.join(dir, st.session_state.selected_html) + + # Top Row: Interactive Graph Visualization + with st.container(): + # st.subheader("Interactive Graph") + + # add JavaScript to index.html for makING the graph interactive + with open(file_path, "r", encoding="utf-8") as file: + graph_content = file.read() + + # Inject JavaScript for click detection + script = """ + + + """ + index_content = graph_content + script + + # copy from graph_1 to ./index.html + index_path = "./index.html" + with open(index_path, "w", encoding="utf-8") as file: + file.write(index_content) + + # Create a new component which read from ./index.html + mycomponent = components.declare_component( + name=os.path.basename(file_path), + path=".", + ) + node_info = mycomponent() + + # Bottom Row: Node Information + with st.container(): + st.subheader("Node Information") + # st.write(node_info) + if node_info is not None: + information = eval(node_info['title']) + information.pop('contentEmbedding', None) + st.write(information) + + driver.close() + + +if __name__ == "__main__": + main()