From 8e1d6b675625d19c424fbaaf9357a07d009b4db4 Mon Sep 17 00:00:00 2001 From: Abdusshh Date: Tue, 5 Nov 2024 19:58:10 +0300 Subject: [PATCH] Update README --- README.md | 421 ++++++++---------------------------------------------- 1 file changed, 58 insertions(+), 363 deletions(-) diff --git a/README.md b/README.md index 6d33e5a..231a8a1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ # Upstash Vector Python SDK -The Upstash Vector Python client > [!NOTE] > **This project is in GA Stage.** @@ -7,6 +6,19 @@ The Upstash Vector Python client > The Upstash Professional Support fully covers this project. It receives regular updates, and bug fixes. > The Upstash team is committed to maintaining and improving its functionality. +## What is Upstash Vector? + +Upstash Vector is a serverless vector database designed for managing and querying vector embeddings, ideal for AI and personalized data applications. It handles numeric representations of various objects (images, text, etc.) in multi-dimensional space, enabling similarity-based querying for tailored insights. + +### Core Features + +- **Serverless Architecture**: Operates without infrastructure management, with cost-effective, usage-based billing based on API calls. +- **High-Performance Queries**: Utilizes DiskANN for fast, high-recall queries, outperforming traditional exhaustive search methods. +- **Similarity Functions**: Supports Euclidean distance, Cosine similarity, and Dot Product for flexible similarity search. +- **Metadata Support and Filtering**: Attach metadata to vectors for added context and apply metadata-based filters to refine search results. + +For more details, see the [Upstash Vector documentation](https://upstash.com/docs/vector/overall/getstarted). + ## Installation Install a released version from pip: @@ -14,411 +26,94 @@ Install a released version from pip: pip3 install upstash-vector ``` -## Usage -In order to use this client, head out to [Upstash Console](https://console.upstash.com) and create a vector database. -There, get the `UPSTASH_VECTOR_REST_URL` and the `UPSTASH_VECTOR_REST_TOKEN` from the dashboard. +## Quick Start + +1. **Create a Vector Database**: Head to the [Upstash Console](https://console.upstash.com) to create a vector database and obtain your `UPSTASH_VECTOR_REST_URL` and `UPSTASH_VECTOR_REST_TOKEN`. + +2. **Initialize the Index**: -### Initializing the Index ```python from upstash_vector import Index -index = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN) +index = Index(url="your_rest_url", token="your_rest_token") ``` -or alternatively, initialize from the environment variables +Or use environment variables: ```bash -export UPSTASH_VECTOR_REST_URL [URL] -export UPSTASH_VECTOR_REST_TOKEN [TOKEN] +export UPSTASH_VECTOR_REST_URL=[URL] +export UPSTASH_VECTOR_REST_TOKEN=[TOKEN] ``` ```python from upstash_vector import Index - index = Index.from_env() ``` -### Upsert Vectors - -Vectors can be upserted(inserted or updated) into a namespace of an index -to be later queried or fetched. - -There are a couple of ways of doing upserts: +3. **Upsert and Query Vectors**: Insert vectors into namespaces and perform similarity queries to retrieve relevant data. ```python -# as tuples, either of the form: -# - (id, vector, metadata, data) -# - (id, vector, metadata) -# - (id, vector) - index.upsert( vectors=[ + # Upserting with tuple format ("id1", [0.1, 0.2], {"metadata_field": "metadata_value"}, "data-value"), - ("id2", [0.2, 0.2], {"metadata_field": "metadata_value"}), - ("id3", [0.3, 0.4]), - ] -) -``` - -```python -# as dicts, either of the form: -# - {"id": id, "vector": vector, "metadata": metadata, "data": data) -# - {"id": id, "vector": vector, "metadata": metadata) -# - {"id": id, "vector": vector, "data": data) -# - {"id": id, "vector": vector} - -index.upsert( - vectors=[ - {"id": "id4", "vector": [0.1, 0.2], "metadata": {"field": "value"}, "data": "value"}, - {"id": "id5", "vector": [0.1, 0.2], "metadata": {"field": "value"}}, - {"id": "id6", "vector": [0.1, 0.2], "data": "value"}, - {"id": "id7", "vector": [0.5, 0.6]}, - ] -) -``` - -```python -from upstash_vector import Vector - -# as Vector objects - -index.upsert( - vectors=[ - Vector(id="id5", vector=[1, 2], metadata={"field": "value"}), - Vector(id="id6", vector=[1, 2], data="value"), - Vector(id="id7", vector=[6, 7]), - ] -) -``` - -If the index is created with an embedding model, raw string data can be upserted. -In this case, the `data` field of the vector will also be set to the `data` passed -below, so that it can be accessed later. - -```python -from upstash_vector import Data - -res = index.upsert( - vectors=[ - Data(id="id5", data="Goodbye World", metadata={"field": "value"}), - Data(id="id6", data="Hello World"), + ("id2", [0.2, 0.3], {"metadata_field": "another_value"}), # Without data + ("id3", [0.3, 0.4]), # Without metadata and data + + # Upserting with dictionary format + {"id": "id4", "vector": [0.4, 0.5], "metadata": {"field": "value"}, "data": "data-value"}, + {"id": "id5", "vector": [0.5, 0.6], "metadata": {"field": "another_value"}}, # Without data + {"id": "id6", "vector": [0.6, 0.7]}, # Without metadata and data + + # Upserting with Vector objects + Vector(id="id7", vector=[0.7, 0.8], metadata={"field": "value"}, data="text-data"), + Vector(id="id8", vector=[0.8, 0.9], metadata={"field": "another_value"}), + Vector(id="id9", vector=[0.9, 1.0]) ] ) -``` - -Also, a namespace can be specified to upsert vectors into it. -When no namespace is provided, the default namespace is used. - -```python -index.upsert( - vectors=[ - ("id1", [0.1, 0.2]), - ("id2", [0.3,0.4]), - ], - namespace="ns", -) -``` - -### Query Vectors - -Some number of vectors that are approximately most similar to a given -query vector can be requested from a namespace of an index. - -```python -res = index.query( - vector=[0.6, 0.9], - top_k=5, - include_vectors=False, - include_metadata=True, - include_data=True, - filter="metadata_f = 'metadata_v'" -) - -# List of query results, sorted in the descending order of similarity -for r in res: - print( - r.id, # The id used while upserting the vector - r.score, # The similarity score of this vector to the query vector. Higher is more similar. - r.vector, # The value of the vector, if requested. - r.metadata, # The metadata of the vector, if requested and present. - r.data, # The data of the vector, if requested and present. - ) -``` - -If the index is created with an embedding model, raw string data can be queried. - -```python -res = index.query( - data="hello", - top_k=5, - include_vectors=False, - include_metadata=True, - include_data=True, -) -``` - -When a filter is provided, query results are further narrowed down based -on the vectors whose metadata matches with it. - -See [Metadata Filtering](https://upstash.com/docs/vector/features/filtering) documentation -for more information regarding the filter syntax. -Also, a namespace can be specified to query from. -When no namespace is provided, the default namespace is used. - -```python +# Query vectors that are most similar to a specified query vector res = index.query( - vector=[0.6, 0.9], - top_k=5, - namespace="ns", -) -``` - -### Fetch Vectors - -A set of vectors can be fetched from a namespace of an index. - -```python -res = index.fetch( - ids=["id3", "id4"], - include_vectors=False, - include_metadata=True, - include_data=True, + vector=[0.6, 0.9], # Query vector + top_k=5, # Number of closest matches to return + include_vectors=False, # Whether to include actual vector data in results + include_metadata=True, # Whether to include metadata in results + include_data=True, # Whether to include data in results + filter="metadata_field = 'metadata_value'" # Optional filter based on metadata ) -# List of fetch results, one for each id passed +# Iterate over and print the query results for r in res: - if not r: # Can be None, if there is no such vector with the given id - continue - - print( - r.id, # The id used while upserting the vector - r.vector, # The value of the vector, if requested. - r.metadata, # The metadata of the vector, if requested and present. - r.data, # The metadata of the vector, if requested and present. - ) -``` - -or, for singular fetch: - -```python -res = index.fetch( - "id1", - include_vectors=True, - include_metadata=True, - include_data=False, -) - -r = res[0] -if r: # Can be None, if there is no such vector with the given id - print( - r.id, # The id used while upserting the vector - r.vector, # The value of the vector, if requested. - r.metadata, # The metadata of the vector, if requested and present. - r.data, # The metadata of the vector, if requested and present. - ) -``` - -Also, a namespace can be specified to fetch from. -When no namespace is provided, the default namespace is used. - -```python -res = index.fetch( - ids=["id3", "id4"], - namespace="ns", -) -``` - -### Range Over Vectors - -The vectors upserted into a namespace of an index can be scanned -in a page by page fashion. - -```python -# Scans the vectors 100 vector at a time, -res = index.range( - cursor="", # Start the scan from the beginning - limit=100, - include_vectors=False, - include_metadata=True, - include_data=True, -) - -while res.next_cursor != "": - res = index.range( - cursor=res.next_cursor, - limit=100, - include_vectors=False, - include_metadata=True, - include_data=True, - ) - - for v in res.vectors: - print( - v.id, # The id used while upserting the vector - v.vector, # The value of the vector, if requested. - v.metadata, # The metadata of the vector, if requested and present. - v.data, # The data of the vector, if requested and present. - ) -``` - -Also, a namespace can be specified to range from. -When no namespace is provided, the default namespace is used. - -```python -res = index.range( - cursor="", - limit=100, - namespace="ns", -) -``` - -### Delete Vectors - -A list of vectors can be deleted from a namespace of index. -If no such vectors with the given ids exist, this is no-op. - -```python -res = index.delete( - ids=["id1", "id2"], -) - -print( - res.deleted, # How many vectors are deleted out of the given ids. -) -``` - -or, for singular deletion: - -```python -res = index.delete( - "id1", -) - -print(res) # A boolean indicating whether the vector is deleted or not. -``` - -Also, a namespace can be specified to delete from. -When no namespace is provided, the default namespace is used. - -```python -res = index.delete( - ids=["id1", "id2"], - namespace="ns", -) -``` - -### Update a Vector - -Either the vector value(or data for indexes created with an embedding model) or the metadata -can be updated without needing to set the other one. - -```python -res = index.update( - "id1", - metadata={"new_field": "new_value"}, -) - -print(res) # A boolean indicating whether the vector is updated or not. -``` - -Also, a namespace can be specified to update from. -When no namespace is provided, the default namespace is used. - -```python -res = index.update( - "id1", - metadata={"new_field": "new_value"}, - namespace="ns", -) -``` - -### Reset the Namespace - -All vectors can be removed from a namespace of an index. - -```python -index.reset() -``` - -Also, a namespace can be specified to reset. -When no namespace is provided, the default namespace is used. - -```python -index.reset( - namespace="ns", -) -``` - -All namespaces under the index can be reset with a single call -as well. - -```python -index.reset( - all=True, -) -``` - -### Index Info - -Some information regarding the status and type of the index can be requested. -This information also contains per-namespace status. - -```python -info = index.info() -print( - info.vector_count, # Total number of vectors across all namespaces - info.pending_vector_count, # Total number of vectors waiting to be indexed across all namespaces - info.index_size, # Total size of the index on disk in bytes - info.dimension, # Vector dimension - info.similarity_function, # Similarity function used -) - -for ns, ns_info in info.namespaces.items(): print( - ns, # Name of the namespace - ns_info.vector_count, # Total number of vectors in this namespaces - ns_info.pending_vector_count, # Total number of vectors waiting to be indexed in this namespaces + f"ID: {r.id}", # Unique identifier for the vector + f"Score: {r.score}", # Similarity score to the query vector + f"Vector: {r.vector if r.vector else 'N/A'}", # The vector data, if included + f"Metadata: {r.metadata if r.metadata else 'N/A'}", # Metadata associated with the vector + f"Data: {r.data if r.data else 'N/A'}" # Additional data, if included ) ``` -### List Namespaces +## Docs +For full usage details, including advanced options and examples, refer to the [Upstash Vector documentation](https://upstash.com/docs/vector/overall/getstarted). -All the names of active namespaces can be listed. +## Contributing -```python -namespaces = index.list_namespaces() -for ns in namespaces: - print(ns) # name of the namespace -``` +### Setup -### Delete a Namespace +This project uses [Poetry](https://python-poetry.org) for packaging and dependencies. After cloning the repository, install dependencies with: -A namespace can be deleted entirely. -If no such namespace exists, and exception is raised. -The default namespaces cannot be deleted. - -```python -index.delete_namespace(namespace="ns") +```shell +poetry install ``` -# Contributing - -## Preparing the environment -This project uses [Poetry](https://python-poetry.org) for packaging and dependency management. Make sure you are able to create the poetry shell with relevant dependencies. - You will also need a vector database on [Upstash](https://console.upstash.com/). -```commandline -poetry install -``` - -## Code Formatting +### Code Formatting ```bash poetry run ruff format . ``` -## Running tests +### Running Tests To run all the tests, make sure the poetry virtual environment activated with all the necessary dependencies.