Add How to better configure your cache document (#450)

Signed-off-by: SimFG <[email protected]>
zilliztech · Jun 27, 2023 · 38c946b · 38c946b
1 parent c663379
commit 38c946b
Show file tree

Hide file tree

Showing 8 changed files with 571 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -162,7 +162,7 @@ cache.set_openai_key()
 questions = [
     "what's github",
     "can you explain what GitHub is",
-    "can you tell me more about GitHub"
+    "can you tell me more about GitHub",
     "what is the purpose of GitHub"
 ]
 

diff --git a/docs/bootcamp/langchain/question_answering.ipynb b/docs/bootcamp/langchain/question_answering.ipynb
@@ -113,7 +113,7 @@
    "metadata": {},
    "source": [
     "## Prepare Data\n",
-    "First we [prepare the data](https://raw.githubusercontent.com/hwchase17/langchain/master/docs/modules/state_of_the_union.txt). For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents). You can learn more detail about Milvus in Langchain refer to [it](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/milvus.html?highlight=milvus)."
+    "First we [prepare the data](https://raw.githubusercontent.com/hwchase17/langchain/master/docs/extras/modules/state_of_the_union.txt). For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents). You can learn more detail about Milvus in Langchain refer to [it](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/milvus.html?highlight=milvus)."
    ]
   },
   {
@@ -386,7 +386,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -400,7 +400,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.8.8"
   },
   "vscode": {
    "interpreter": {

diff --git a/docs/configure_it.md b/docs/configure_it.md
diff --git a/docs/toc.bak b/docs/toc.bak
@@ -6,6 +6,7 @@
 
    usage.md
    feature.md
+   configure_it.md
    release_note.md
 
 .. toctree::

diff --git a/gptcache/adapter/adapter.py b/gptcache/adapter/adapter.py
@@ -188,6 +188,7 @@ def post_process():
         kwargs["cache_context"] = context
         kwargs["cache_skip"] = cache_skip
         kwargs["cache_factor"] = cache_factor
+        kwargs["search_only_flag"] = search_only_flag
         llm_data = adapt(
             llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs
         )
@@ -199,6 +200,9 @@ def post_process():
             llm_handler, func_name="llm_request", report_func=chat_cache.report.llm
         )(*args, **kwargs)
 
+    if not llm_data:
+        return None
+
     if cache_enable:
         try:
             def update_cache_func(handled_llm_data, question=None):

diff --git a/gptcache/manager/vector_data/manager.py b/gptcache/manager/vector_data/manager.py
@@ -201,9 +201,9 @@ def get(name, **kwargs):
             from gptcache.manager.vector_data.redis_vectorstore import RedisVectorStore
             host = kwargs.get("host", "localhost")
             port = kwargs.get("port", "6379")
-            user = kwargs.get("user")
-            password = kwargs.get("password")
-            namespace = kwargs.get("namespace")
+            user = kwargs.get("user", "")
+            password = kwargs.get("password", "")
+            namespace = kwargs.get("namespace", "")
             dimension = kwargs.get("dimension", DIMENSION)
             collection_name = kwargs.get("collection_name", COLLECTION_NAME)
 

diff --git a/gptcache/processor/context/summarization_context.py b/gptcache/processor/context/summarization_context.py
@@ -9,23 +9,33 @@
 
 import transformers  # pylint: disable=C0413
 
+
 def summarize_to_length(summarizer, text, target_len, max_len=1024):
     tokenizer = summarizer.tokenizer
+
     def token_length(text):
         return len(tokenizer.encode(text))
+
     segment_len = max_len - 100
     summary_result = text
     while token_length(text) > target_len:
         tokens = tokenizer.encode(text)
-        segments = [tokens[i:i+segment_len] for i in range(0, len(tokens), segment_len-1)]
+        segments = [
+            tokens[i : i + segment_len] for i in range(0, len(tokens), segment_len - 1)
+        ]
         summary_result = ""
         for segment in segments:
-            len_seg = int(len(segment)/4)
-            summary = summarizer(tokenizer.decode(segment), min_length=max(len_seg-10, 1), max_length=len_seg)
+            len_seg = int(len(segment) / 4)
+            summary = summarizer(
+                tokenizer.decode(segment),
+                min_length=max(len_seg - 10, 1),
+                max_length=len_seg,
+            )
             summary_result += summary[0]["summary_text"]
         text = summary_result
     return summary_result
 
+
 class SummarizationContextProcess(ContextProcess):
     """A context processor for summarizing large amounts of text data using a summarizer model.
 
@@ -45,8 +55,10 @@ class SummarizationContextProcess(ContextProcess):
             context_process = SummarizationContextProcess()
             cache.init(pre_embedding_func=context_process.pre_process)
     """
-    def __init__(self, model_name="facebook/bart-large-cnn",
-                  tokenizer=None, target_length=512):
+
+    def __init__(
+        self, model_name="facebook/bart-large-cnn", tokenizer=None, target_length=512
+    ):
         summarizer = transformers.pipeline(task="summarization", model=model_name)
         self.summarizer = summarizer
         self.target_length = target_length
@@ -64,7 +76,9 @@ def summarize_to_sentence(self, sentences, target_size=1000):
         target_sentences = []
         for sent, target_len in zip(sentences, target_lengths):
             if len(self.tokenizer.tokenize(sent)) > target_len:
-                response = summarize_to_length(self.summarizer, sent, target_len, self.tokenizer.model_max_length)
+                response = summarize_to_length(
+                    self.summarizer, sent, target_len, self.tokenizer.model_max_length
+                )
                 target_sentence = response
             else:
                 target_sentence = sent
@@ -84,9 +98,14 @@ def process_all_content(self) -> (Any, Any):
         def serialize_content(content):
             ret = ""
             for message in content:
-                ret += "[#RS]{}[#RE][#CS]{}[#CE]".format(message["role"], message["content"])
+                ret += "[#RS]{}[#RE][#CS]{}[#CE]".format(
+                    message["role"], message["content"]
+                )
             return ret
-        result = self.summarize_to_sentence([message["content"] for message in self.content], self.target_length)
+
+        result = self.summarize_to_sentence(
+            [message["content"] for message in self.content], self.target_length
+        )
         save_content = serialize_content(self.content)
         embedding_content = result
         return save_content, embedding_content
diff --git a/tests/unit_tests/adapter/test_langchain_models.py b/tests/unit_tests/adapter/test_langchain_models.py
@@ -7,7 +7,7 @@
 from gptcache.adapter import openai
 from gptcache.adapter.api import init_similar_cache, get
 from gptcache.adapter.langchain_models import LangChainLLMs, LangChainChat, _cache_msg_data_convert
-from gptcache.processor.pre import get_prompt, last_content_without_template
+from gptcache.processor.pre import get_prompt, last_content_without_template, get_messages_last_content
 from gptcache.utils import import_pydantic, import_langchain
 from gptcache.utils.response import get_message_from_openai_answer
 
@@ -60,10 +60,6 @@ def test_langchain_llms():
     assert expect_answer == answer
 
 
-def get_msg_func(data, **_):
-    return data.get("messages")[-1].content
-
-
 def test_langchain_chats():
     question = [HumanMessage(content="test_langchain_chats")]
     question2 = [HumanMessage(content="test_langchain_chats2")]
@@ -76,7 +72,7 @@ def test_langchain_chats():
 
     llm_cache = Cache()
     llm_cache.init(
-        pre_embedding_func=get_msg_func,
+        pre_embedding_func=get_messages_last_content,
     )
 
     os.environ["OPENAI_API_KEY"] = "API"