From ade1520543c0c8f7b68d8ae05bff53a2e922993d Mon Sep 17 00:00:00 2001 From: Andreas Date: Mon, 19 Aug 2024 13:01:39 +0300 Subject: [PATCH] feat: add Sources Page (#144) * feat: add Sources page Create new Sources page to display data source information from Elasticsearch * Add expandable document view for each source Implement a new feature that allows users to quickly view documents associated with each source directly from the sources table, allowing for better understanding of the current scraped content for each data source. * Add document content viewer to Sources page Implement a modal that displays full document details when clicking the view icon in the Sources table. This allows to inspect all fields of individual documents fetched from Elasticsearch, providing deeper insights into the indexed content. --- src/components/sources/DocumentModal.tsx | 126 ++++++++++++++ src/components/sources/Documents.tsx | 146 +++++++++++++++++ src/hooks/useDocumentContent.ts | 39 +++++ src/hooks/useSourceDocuments.ts | 47 ++++++ src/hooks/useSources.ts | 45 +++++ .../elasticSearchProxy/getDocumentContent.ts | 55 +++++++ .../api/elasticSearchProxy/sourceDocuments.ts | 63 +++++++ src/pages/api/elasticSearchProxy/sources.ts | 71 ++++++++ src/pages/sources.tsx | 154 ++++++++++++++++++ src/types.ts | 8 + src/utils/dateUtils.ts | 18 ++ 11 files changed, 772 insertions(+) create mode 100644 src/components/sources/DocumentModal.tsx create mode 100644 src/components/sources/Documents.tsx create mode 100644 src/hooks/useDocumentContent.ts create mode 100644 src/hooks/useSourceDocuments.ts create mode 100644 src/hooks/useSources.ts create mode 100644 src/pages/api/elasticSearchProxy/getDocumentContent.ts create mode 100644 src/pages/api/elasticSearchProxy/sourceDocuments.ts create mode 100644 src/pages/api/elasticSearchProxy/sources.ts create mode 100644 src/pages/sources.tsx create mode 100644 src/utils/dateUtils.ts diff --git a/src/components/sources/DocumentModal.tsx b/src/components/sources/DocumentModal.tsx new file mode 100644 index 0000000..849cab8 --- /dev/null +++ b/src/components/sources/DocumentModal.tsx @@ -0,0 +1,126 @@ +import React from "react"; +import { + Modal, + ModalOverlay, + ModalContent, + ModalHeader, + ModalBody, + ModalCloseButton, + Text, + Link, + UnorderedList, + ListItem, + Box, + VStack, +} from "@chakra-ui/react"; + +interface DocumentModalProps { + isOpen: boolean; + onClose: () => void; + document: Record | null; + isLoading: boolean; + isError: boolean; + error?: string; +} + +const formatValue = (value: any): string => { + if (typeof value === "string") { + return value; + } else if (typeof value === "number" || typeof value === "boolean") { + return value.toString(); + } else if (value instanceof Date) { + return value.toISOString(); + } + return ""; +}; + +const RenderField = ({ name, value }: { name: string; value: any }) => { + if (Array.isArray(value)) { + return ( + + {name}: + + {value.map((item, index) => ( + + {typeof item === "object" ? ( + + ) : ( + formatValue(item) + )} + + ))} + + + ); + } else if (typeof value === "object" && value !== null) { + return ( + + {name}: + + + + + ); + } else { + return ( + + + {name}: + {" "} + {formatValue(value)} + + ); + } +}; + +const RenderObject = ({ object }: { object: Record }) => { + return ( + + {Object.entries(object).map(([key, value]) => ( + + ))} + + ); +}; + +const DocumentModal: React.FC = ({ + isOpen, + onClose, + document, + isLoading, + isError, + error, +}) => { + return ( + + + + {document?.title || "Document Details"} + + + {isLoading && Loading document content...} + {isError && ( + Error loading document: {error} + )} + {!isLoading && !isError && document && ( + + {document.url && ( + + {document.url} + + )} + + + )} + + + + ); +}; + +export default DocumentModal; diff --git a/src/components/sources/Documents.tsx b/src/components/sources/Documents.tsx new file mode 100644 index 0000000..948bb20 --- /dev/null +++ b/src/components/sources/Documents.tsx @@ -0,0 +1,146 @@ +import React, { useState } from "react"; +import { FaEye } from "react-icons/fa"; + +import { useSourceDocuments } from "@/hooks/useSourceDocuments"; +import { useDocumentContent } from "@/hooks/useDocumentContent"; +import { formatTimeAgo } from "@/utils/dateUtils"; +import DocumentModal from "./DocumentModal"; + +interface SourceDocumentsProps { + domain: string; +} + +const trimUrl = (url: string, domain: string): string => { + const domainPattern = new RegExp( + `^(https?:\/\/)?(www\.)?${domain.replace(".", ".")}/?`, + "i" + ); + const trimmed = url.replace(domainPattern, ""); + return trimmed.startsWith("/") ? trimmed : `/${trimmed}`; +}; + +const Documents: React.FC = ({ domain }) => { + const [page, setPage] = useState(1); + const { sourceDocuments, total, isLoading, isError, error } = + useSourceDocuments(domain, page); + const [selectedDocumentUrl, setSelectedDocumentUrl] = useState( + null + ); + const { + documentContent, + isLoading: isContentLoading, + isError: isContentError, + error: contentError, + } = useDocumentContent(selectedDocumentUrl); + + if (isLoading) return
Loading source documents...
; + if (isError) + return
Error loading source documents: {error.message}
; + + const totalPages = Math.ceil(total / 10); + + const handleViewDocument = (url: string) => { + setSelectedDocumentUrl(url); + }; + + return ( +
+

Documents for {domain}

+
+ + + + + + + + + + + {sourceDocuments?.map((doc, index) => ( + + + + + + + ))} + +
+ Title + + URL + + Indexed At +
+
+ {doc.title} +
+
+ + +
+ {formatTimeAgo(doc.indexed_at)} + + {new Date(doc.indexed_at).toLocaleString()} + +
+
+ +
+
+
+ + + Page {page} of {totalPages} + + +
+ setSelectedDocumentUrl(null)} + document={documentContent} + isLoading={isContentLoading} + isError={isContentError} + error={contentError?.message} + /> +
+ ); +}; + +export default Documents; diff --git a/src/hooks/useDocumentContent.ts b/src/hooks/useDocumentContent.ts new file mode 100644 index 0000000..4d0be53 --- /dev/null +++ b/src/hooks/useDocumentContent.ts @@ -0,0 +1,39 @@ +import { useQuery } from "@tanstack/react-query"; + +interface DocumentContent { + title: string; + url: string; + content: string; + indexed_at: string; +} + +const fetchDocumentContent = async (url: string): Promise => { + const response = await fetch("/api/elasticSearchProxy/getDocumentContent", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ url }), + }); + const data = await response.json(); + if (!data.success) throw new Error(data.message); + return data.data; +}; + +export const useDocumentContent = (url: string) => { + const { data, isLoading, isError, error } = useQuery({ + queryKey: ["documentContent", url], + queryFn: () => fetchDocumentContent(url), + enabled: !!url, + cacheTime: Infinity, + staleTime: Infinity, + refetchOnWindowFocus: false, + }); + + return { + documentContent: data, + isLoading, + isError, + error, + }; +}; diff --git a/src/hooks/useSourceDocuments.ts b/src/hooks/useSourceDocuments.ts new file mode 100644 index 0000000..0500a95 --- /dev/null +++ b/src/hooks/useSourceDocuments.ts @@ -0,0 +1,47 @@ +import { useQuery } from "@tanstack/react-query"; + +interface Document { + title: string; + url: string; + indexed_at: string; +} + +interface SourceDocumentsResponse { + documents: Document[]; + total: number; +} + +const fetchSourceDocuments = async ( + domain: string, + page: number +): Promise => { + const response = await fetch("/api/elasticSearchProxy/sourceDocuments", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ domain, page }), + }); + const data = await response.json(); + if (!data.success) throw new Error(data.message); + return data.data; +}; + +export const useSourceDocuments = (domain: string, page: number) => { + const { data, isLoading, isError, error } = useQuery< + SourceDocumentsResponse, + Error + >({ + queryKey: ["sourceDocuments", domain, page], + queryFn: () => fetchSourceDocuments(domain, page), + cacheTime: Infinity, + staleTime: Infinity, + refetchOnWindowFocus: false, + }); + + return { + sourceDocuments: data?.documents, + total: data?.total, + isLoading, + isError, + error, + }; +}; diff --git a/src/hooks/useSources.ts b/src/hooks/useSources.ts new file mode 100644 index 0000000..47a7ef5 --- /dev/null +++ b/src/hooks/useSources.ts @@ -0,0 +1,45 @@ +import { useQuery } from "@tanstack/react-query"; +import { EsSourcesResponse } from "@/types"; + +type FetchSources = (url?: string) => Promise; + +const fetchSources: FetchSources = async (url) => { + return fetch(url ?? "/api/elasticSearchProxy/sources", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({}), + }) + .then(async (res) => { + const data = await res.json(); + if (!data.success) { + const errMessage = data.message || "Error while fetching sources"; + throw new Error(errMessage); + } + return data.data?.result; + }) + .catch((err) => { + throw new Error(err.message ?? "Error fetching sources"); + }); +}; + +export const useSources = () => { + const { data, isLoading, isError, error } = useQuery< + EsSourcesResponse, + Error + >({ + queryKey: ["sources"], + queryFn: () => fetchSources(), + cacheTime: Infinity, + staleTime: Infinity, + refetchOnWindowFocus: false, + }); + + return { + sources: data, + isLoading, + isError, + error, + }; +}; diff --git a/src/pages/api/elasticSearchProxy/getDocumentContent.ts b/src/pages/api/elasticSearchProxy/getDocumentContent.ts new file mode 100644 index 0000000..4e63c79 --- /dev/null +++ b/src/pages/api/elasticSearchProxy/getDocumentContent.ts @@ -0,0 +1,55 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { client } from "@/config/elasticsearch"; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ + error: + "Invalid request method. This endpoint only supports POST requests.", + }); + } + + const { url } = req.body; + + if (!url) { + return res.status(400).json({ + error: "URL is required", + }); + } + + try { + const result = await client.search({ + index: process.env.INDEX, + body: { + query: { + term: { "url.keyword": url }, + }, + size: 1, + }, + }); + + if (result.hits.hits.length === 0) { + return res.status(404).json({ + success: false, + message: "Document not found", + }); + } + + const document = result.hits.hits[0]._source; + + return res.status(200).json({ + success: true, + data: document, + }); + } catch (error) { + console.error(error); + return res.status(400).json({ + success: false, + message: + error.message || "An error occurred while fetching document content", + }); + } +} diff --git a/src/pages/api/elasticSearchProxy/sourceDocuments.ts b/src/pages/api/elasticSearchProxy/sourceDocuments.ts new file mode 100644 index 0000000..3590b59 --- /dev/null +++ b/src/pages/api/elasticSearchProxy/sourceDocuments.ts @@ -0,0 +1,63 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { client } from "@/config/elasticsearch"; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ + error: + "Invalid request method. This endpoint only supports POST requests.", + }); + } + + const { domain, page = 1 } = req.body; + + if (!domain) { + return res.status(400).json({ + error: "Domain is required", + }); + } + + const size = 10; + const from = (page - 1) * size; + + try { + const result = await client.search({ + index: process.env.INDEX, + body: { + from, + size, + query: { + term: { "domain.keyword": domain }, + }, + _source: ["title", "url", "indexed_at"], + sort: [{ indexed_at: "desc" }], + }, + }); + + const documents = result.hits.hits.map((hit) => hit._source); + + // Handle both possible types of total + const total = + typeof result.hits.total === "number" + ? result.hits.total + : result.hits.total.value; + + return res.status(200).json({ + success: true, + data: { + documents, + total, + }, + }); + } catch (error) { + console.error(error); + return res.status(400).json({ + success: false, + message: + error.message || "An error occurred while fetching document details", + }); + } +} diff --git a/src/pages/api/elasticSearchProxy/sources.ts b/src/pages/api/elasticSearchProxy/sources.ts new file mode 100644 index 0000000..8c5ff7b --- /dev/null +++ b/src/pages/api/elasticSearchProxy/sources.ts @@ -0,0 +1,71 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { client } from "@/config/elasticsearch"; + +interface DomainAggregationBucket { + key: string; + doc_count: number; + last_indexed: { + value: number; + }; +} + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ + error: + "Invalid request method. This endpoint only supports POST requests.", + }); + } + + try { + const result = await client.search({ + index: process.env.INDEX, + body: { + size: 0, + aggs: { + domains: { + terms: { + field: "domain.keyword", + size: 1000, // Adjust based on the expected number of unique domains + }, + aggs: { + last_indexed: { + max: { + field: "indexed_at", + }, + }, + }, + }, + }, + }, + }); + + const domainBuckets = ( + result.aggregations?.domains as { + buckets: DomainAggregationBucket[]; + } + ).buckets; + + const sources = domainBuckets.map((bucket) => ({ + domain: bucket.key, + documentCount: bucket.doc_count, + lastScraped: bucket.last_indexed.value || null, + })); + + return res.status(200).json({ + success: true, + data: { + result: sources, + }, + }); + } catch (error) { + console.error(error); + return res.status(400).json({ + success: false, + message: error.message || "An error occurred while fetching sources data", + }); + } +} diff --git a/src/pages/sources.tsx b/src/pages/sources.tsx new file mode 100644 index 0000000..1861d4c --- /dev/null +++ b/src/pages/sources.tsx @@ -0,0 +1,154 @@ +import React, { useState, useMemo } from "react"; + +import NavBar from "@/components/navBar/NavBar"; +import Footer from "@/components/footer/Footer"; +import Documents from "@/components/sources/Documents"; +import { useSources } from "@/hooks/useSources"; +import { formatTimeAgo } from "@/utils/dateUtils"; +import { Source } from "@/types"; + +const SourcesPage: React.FC = () => { + const { sources, isLoading, isError, error } = useSources(); + const [sortConfig, setSortConfig] = useState<{ + key: keyof Source; + direction: "ascending" | "descending"; + } | null>(null); + const [expandedSource, setExpandedSource] = useState(null); + + const toggleExpand = (domain: string) => { + setExpandedSource(expandedSource === domain ? null : domain); + }; + + const sortedSources = useMemo(() => { + if (!sources) return []; + const sortableItems = [...sources]; + if (sortConfig !== null) { + sortableItems.sort((a, b) => { + if (a[sortConfig.key] < b[sortConfig.key]) { + return sortConfig.direction === "ascending" ? -1 : 1; + } + if (a[sortConfig.key] > b[sortConfig.key]) { + return sortConfig.direction === "ascending" ? 1 : -1; + } + return 0; + }); + } + return sortableItems; + }, [sources, sortConfig]); + + const sortBy = (key: keyof Source) => { + let direction: "ascending" | "descending" = "ascending"; + if ( + sortConfig && + sortConfig.key === key && + sortConfig.direction === "ascending" + ) { + direction = "descending"; + } + setSortConfig({ key, direction }); + }; + + const getSortIndicator = (key: keyof Source) => { + if (sortConfig && sortConfig.key === key) { + return sortConfig.direction === "ascending" ? " ▲" : " ▼"; + } + return ""; + }; + + return ( +
+ +
+

Data Sources

+ {isLoading ? ( +
Loading...
+ ) : isError ? ( +
Error: {error.message}
+ ) : ( +
+ + + + + + + + + + + {sortedSources.map((source, index) => ( + + toggleExpand(source.domain)} + > + + + + + + {expandedSource === source.domain && ( + + + + )} + + ))} + +
sortBy("domain")} + > + Domain{getSortIndicator("domain")} + sortBy("lastScraped")} + > + Last Scraped{getSortIndicator("lastScraped")} + sortBy("documentCount")} + > + Document Count{getSortIndicator("documentCount")} +
+ + {expandedSource === source.domain ? "▼" : "▶"} + + + e.stopPropagation()} + > + {source.domain} + + +
+ {formatTimeAgo(source.lastScraped)} + + {new Date(source.lastScraped).toLocaleString()} + +
+
+ {source.documentCount} +
+ +
+
+ )} +
+
+
+ ); +}; + +export default SourcesPage; diff --git a/src/types.ts b/src/types.ts index 943af5f..83496af 100644 --- a/src/types.ts +++ b/src/types.ts @@ -60,3 +60,11 @@ export type EsSearchResponse = SearchResponse< unknown, Record >; + +export interface Source { + domain: string; + lastScraped: string; + documentCount: number; +} + +export type EsSourcesResponse = Source[]; diff --git a/src/utils/dateUtils.ts b/src/utils/dateUtils.ts new file mode 100644 index 0000000..4aa5b0e --- /dev/null +++ b/src/utils/dateUtils.ts @@ -0,0 +1,18 @@ +export const formatTimeAgo = (date: string | number) => { + const now = new Date(); + const past = new Date(date); + const diffTime = Math.abs(now.getTime() - past.getTime()); + const diffHours = Math.floor(diffTime / (1000 * 60 * 60)); + const diffDays = Math.floor(diffTime / (1000 * 60 * 60 * 24)); + + if (diffHours < 24) { + if (diffHours === 0) return "Less than an hour ago"; + if (diffHours === 1) return "1 hour ago"; + return `${diffHours} hours ago`; + } + if (diffDays === 1) return "Yesterday"; + if (diffDays < 7) return `${diffDays} days ago`; + if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`; + if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`; + return `${Math.floor(diffDays / 365)} years ago`; +};