Skip to content

Commit

Permalink
addressed reviews, using boot autoconfig
Browse files Browse the repository at this point in the history
  • Loading branch information
l-trotta committed Oct 25, 2024
1 parent 9e8766c commit 58a32fb
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 270 deletions.
18 changes: 10 additions & 8 deletions examples/rag-spring-article/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,29 @@
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0-M3</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<!-- autoconfiguration of beans-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elasticsearch-store</artifactId>
<artifactId>spring-ai-spring-boot-autoconfigure</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>2.9.2</version>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elasticsearch-store</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-pdf-module</artifactId>
<version>2.9.2</version>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-pdf-document-reader</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>

<dependency>
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,79 +18,34 @@
*/
package co.elastic.clients.rag.article;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.springframework.ai.chat.messages.Message;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.ai.chat.prompt.SystemPromptTemplate;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.xml.sax.SAXException;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Service
public class RagService {

// Both beans autowired from default configuration
private ElasticsearchVectorStore vectorStore;
private ChatModel chatModel;
private ChatClient chatClient;

@Autowired
public RagService(ElasticsearchVectorStore vectorStore, ChatModel model) {
public RagService(ElasticsearchVectorStore vectorStore, ChatClient.Builder clientBuilder) {
this.vectorStore = vectorStore;
this.chatModel = model;
this.chatClient = clientBuilder.build();
}

public void ingestPDF(String path) throws IOException, TikaException, SAXException {
// Initializing the PDF parser
// Keep in mind that AutoDetectParser is not thread safe
Parser parser = new AutoDetectParser();
// Using our custom single page handler class
PageContentHandler handler = new PageContentHandler();
public void ingestPDF(String path) {

// No need for any other specific PDF configuration
ParseContext parseContext = new ParseContext();
parseContext.set(PDFParserConfig.class, new PDFParserConfig());

// The metadata contain information such as creation date, creation tool used, etc... which we
// don't need
Metadata metadata = new Metadata();

// Reading the file
try (FileInputStream stream = new FileInputStream(path)) {
parser.parse(stream, handler, metadata, parseContext);
}

// Getting the result as a list of Strings with the content of the pages
List<String> allPages = handler.getPages();
List<Document> docbatch = new ArrayList<>();

// Converting pages to Documents
for (int i = 0; i < allPages.size(); i++) {
Map<String, Object> docMetadata = new HashMap<>();
// The page number will be used in the response
docMetadata.put("page", i + 1);

Document doc = new Document(allPages.get(i), docMetadata);
docbatch.add(doc);
}
// Spring AI utility class to read a PDF file page by page
PagePdfDocumentReader pdfReader = new PagePdfDocumentReader(path);
List<Document> docbatch = pdfReader.read();

// Sending batch of documents to vector store
// applying tokenizer
Expand All @@ -109,31 +64,31 @@ public String queryLLM(String question) {
.map(Document::getContent)
.collect(Collectors.joining(System.lineSeparator()));

// Setting the prompt
String basePrompt = """
// Setting the prompt with the context
String prompt = """
You're assisting with providing the rules of the tabletop game Runewars.
Use the information from the DOCUMENTS section to provide accurate answers.
Use the information from the DOCUMENTS section to provide accurate answers to the
question in the QUESTION section.
If unsure, simply state that you don't know.
DOCUMENTS:
{documents}
""";

// Preparing the question for the LLM
SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(basePrompt);
Message systemMessage = systemPromptTemplate.createMessage(Map.of("documents", documents));
""" + documents
+ """
QUESTION:
""" + question;

UserMessage userMessage = new UserMessage(question);

Prompt prompt = new Prompt(List.of(systemMessage, userMessage));
// Calling the chat model with the question
ChatResponse response = chatModel.call(prompt);
String response = chatClient.prompt()
.user(prompt)
.call()
.content();

return response.getResult().getOutput().getContent() +
return response +
System.lineSeparator() +
"Found at page: " +
// Retrieving the first ranked page number from the document metadata
vectorStoreResult.get(0).getMetadata().get("page") +
vectorStoreResult.get(0).getMetadata().get(PagePdfDocumentReader.METADATA_START_PAGE_NUMBER) +
" of the manual";
}
}
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
spring.application.name=rag

spring.ai.openai.api-key=${OPENAI_API_KEY}
spring.ai.chat.client.enabled=true

spring.elasticsearch.uris=${ES_SERVER_URL}
spring.elasticsearch.username=${ES_USERNAME}
spring.elasticsearch.password=${ES_PASSWORD}
spring.ai.vectorstore.elasticsearch.initialize-schema=true

0 comments on commit 58a32fb

Please sign in to comment.