addressed reviews, using boot autoconfig

elastic · Oct 25, 2024 · 58a32fb · 58a32fb
1 parent 9e8766c
commit 58a32fb
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 270 deletions.
diff --git a/examples/rag-spring-article/pom.xml b/examples/rag-spring-article/pom.xml
@@ -39,27 +39,29 @@
       <dependency>
         <groupId>org.springframework.ai</groupId>
         <artifactId>spring-ai-bom</artifactId>
-        <version>1.0.0-SNAPSHOT</version>
+        <version>1.0.0-M3</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
     </dependencies>
   </dependencyManagement>
   <dependencies>
+<!--    autoconfiguration of beans-->
     <dependency>
       <groupId>org.springframework.ai</groupId>
-      <artifactId>spring-ai-elasticsearch-store</artifactId>
+      <artifactId>spring-ai-spring-boot-autoconfigure</artifactId>
       <version>1.0.0-SNAPSHOT</version>
     </dependency>
+
     <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-core</artifactId>
-      <version>2.9.2</version>
+      <groupId>org.springframework.ai</groupId>
+      <artifactId>spring-ai-elasticsearch-store</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.tika</groupId>
-      <artifactId>tika-parser-pdf-module</artifactId>
-      <version>2.9.2</version>
+      <groupId>org.springframework.ai</groupId>
+      <artifactId>spring-ai-pdf-document-reader</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>
 
     <dependency>

diff --git a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/Config.java b/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/Config.java
diff --git a/...s/rag-spring-article/src/main/java/co/elastic/clients/rag/article/PageContentHandler.java b/...s/rag-spring-article/src/main/java/co/elastic/clients/rag/article/PageContentHandler.java
diff --git a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/RagService.java b/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/RagService.java
@@ -18,79 +18,34 @@
  */
 package co.elastic.clients.rag.article;
 
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.pdf.PDFParserConfig;
-import org.springframework.ai.chat.messages.Message;
-import org.springframework.ai.chat.messages.UserMessage;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.chat.prompt.SystemPromptTemplate;
+import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.document.Document;
+import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
 import org.springframework.ai.transformer.splitter.TokenTextSplitter;
 import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
 import org.springframework.ai.vectorstore.SearchRequest;
-import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
-import org.xml.sax.SAXException;
 
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.stream.Collectors;
 
 @Service
 public class RagService {
 
+    // Both beans autowired from default configuration
     private ElasticsearchVectorStore vectorStore;
-    private ChatModel chatModel;
+    private ChatClient chatClient;
 
-    @Autowired
-    public RagService(ElasticsearchVectorStore vectorStore, ChatModel model) {
+    public RagService(ElasticsearchVectorStore vectorStore, ChatClient.Builder clientBuilder) {
         this.vectorStore = vectorStore;
-        this.chatModel = model;
+        this.chatClient = clientBuilder.build();
     }
 
-    public void ingestPDF(String path) throws IOException, TikaException, SAXException {
-        // Initializing the PDF parser
-        // Keep in mind that AutoDetectParser is not thread safe
-        Parser parser = new AutoDetectParser();
-        // Using our custom single page handler class
-        PageContentHandler handler = new PageContentHandler();
+    public void ingestPDF(String path) {
 
-        // No need for any other specific PDF configuration
-        ParseContext parseContext = new ParseContext();
-        parseContext.set(PDFParserConfig.class, new PDFParserConfig());
-
-        // The metadata contain information such as creation date, creation tool used, etc... which we
-        // don't need
-        Metadata metadata = new Metadata();
-
-        // Reading the file
-        try (FileInputStream stream = new FileInputStream(path)) {
-            parser.parse(stream, handler, metadata, parseContext);
-        }
-
-        // Getting the result as a list of Strings with the content of the pages
-        List<String> allPages = handler.getPages();
-        List<Document> docbatch = new ArrayList<>();
-
-        // Converting pages to Documents
-        for (int i = 0; i < allPages.size(); i++) {
-            Map<String, Object> docMetadata = new HashMap<>();
-            // The page number will be used in the response
-            docMetadata.put("page", i + 1);
-
-            Document doc = new Document(allPages.get(i), docMetadata);
-            docbatch.add(doc);
-        }
+        // Spring AI utility class to read a PDF file page by page
+        PagePdfDocumentReader pdfReader = new PagePdfDocumentReader(path);
+        List<Document> docbatch = pdfReader.read();
 
         // Sending batch of documents to vector store
         // applying tokenizer
@@ -109,31 +64,31 @@ public String queryLLM(String question) {
             .map(Document::getContent)
             .collect(Collectors.joining(System.lineSeparator()));
 
-        // Setting the prompt
-        String basePrompt = """
+        // Setting the prompt with the context
+        String prompt = """
             You're assisting with providing the rules of the tabletop game Runewars.
-            Use the information from the DOCUMENTS section to provide accurate answers.
+            Use the information from the DOCUMENTS section to provide accurate answers to the
+            question in the QUESTION section. 
             If unsure, simply state that you don't know.
             
             DOCUMENTS:
-            {documents}
-            """;
-
-        // Preparing the question for the LLM
-        SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(basePrompt);
-        Message systemMessage = systemPromptTemplate.createMessage(Map.of("documents", documents));
+            """ + documents
+            + """
+            QUESTION:
+            """ + question;
 
-        UserMessage userMessage = new UserMessage(question);
 
-        Prompt prompt = new Prompt(List.of(systemMessage, userMessage));
         // Calling the chat model with the question
-        ChatResponse response = chatModel.call(prompt);
+        String response = chatClient.prompt()
+            .user(prompt)
+            .call()
+            .content();
 
-        return response.getResult().getOutput().getContent() +
+        return response +
             System.lineSeparator() +
             "Found at page: " +
             // Retrieving the first ranked page number from the document metadata
-            vectorStoreResult.get(0).getMetadata().get("page") +
+            vectorStoreResult.get(0).getMetadata().get(PagePdfDocumentReader.METADATA_START_PAGE_NUMBER) +
             " of the manual";
     }
 }
diff --git a/examples/rag-spring-article/src/main/resources/application.properties b/examples/rag-spring-article/src/main/resources/application.properties
@@ -1 +1,9 @@
 spring.application.name=rag
+
+spring.ai.openai.api-key=${OPENAI_API_KEY}
+spring.ai.chat.client.enabled=true
+
+spring.elasticsearch.uris=${ES_SERVER_URL}
+spring.elasticsearch.username=${ES_USERNAME}
+spring.elasticsearch.password=${ES_PASSWORD}
+spring.ai.vectorstore.elasticsearch.initialize-schema=true