feat: add Ollama component (#224)

Because We want to integrate Ollama client into our VDP pipeline platform. This commit Added the Ollama Component, which supports the following tasks: TASK_TEXT_GENERATION_CHAT, TASK_TEXT_EMBEDDINGS --------- Co-authored-by: Chang, Hui-Tang <[email protected]>
instill-ai · Jul 28, 2024 · 810f850 · 810f850
1 parent 7e16b2b
commit 810f850
Show file tree

Hide file tree

Showing 11 changed files with 1,888 additions and 0 deletions.
diff --git a/ai/ollama/v0/README.mdx b/ai/ollama/v0/README.mdx
@@ -0,0 +1,93 @@
+---
+title: "Ollama"
+lang: "en-US"
+draft: false
+description: "Learn about how to set up a VDP Ollama component https://github.com/instill-ai/instill-core"
+---
+
+The Ollama component is an AI component that allows users to connect the AI models served with the Ollama library.
+It can carry out the following tasks:
+
+- [Text Generation Chat](#text-generation-chat)
+- [Text Embeddings](#text-embeddings)
+
+
+
+## Release Stage
+
+`Alpha`
+
+
+
+## Configuration
+
+The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/ai/ollama/v0/config/definition.json).
+
+
+
+
+## Setup
+
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| Endpoint (required) | `endpoint` | string | Fill in your Ollama hosting endpoint. ### WARNING ###: As of 2024-07-26, the Ollama component does not support authentication methods. To prevent unauthorized access to your Ollama serving resources, please implement additional security measures such as IP whitelisting. |
+| Model Auto-Pull (required) | `auto-pull` | boolean | Automatically pull the requested models from the Ollama server if the model is not found in the local cache. |
+
+
+
+
+## Supported Tasks
+
+### Text Generation Chat
+
+Provide text outputs in response to text/image inputs.
+
+
+| Input | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Task ID (required) | `task` | string | `TASK_TEXT_GENERATION_CHAT` |
+| Model Name (required) | `model` | string | The OSS model to be used, check https://ollama.com/library for list of models available |
+| Prompt (required) | `prompt` | string | The prompt text |
+| System message | `system-message` | string | The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model’s behavior is set using a generic message as "You are a helpful assistant." |
+| Prompt Images | `prompt-images` | array[string] | The prompt images |
+| Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\}. |
+| Seed | `seed` | integer | The seed |
+| Temperature | `temperature` | number | The temperature for sampling |
+| Top K | `top-k` | integer | Top k for sampling |
+| Max new tokens | `max-new-tokens` | integer | The maximum number of tokens for model to generate |
+
+
+
+| Output | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Text | `text` | string | Model Output |
+
+
+
+
+
+
+### Text Embeddings
+
+Turn text into a vector of numbers that capture its meaning, unlocking use cases like semantic search.
+
+
+| Input | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Task ID (required) | `task` | string | `TASK_TEXT_EMBEDDINGS` |
+| Model Name (required) | `model` | string | The OSS model to be used, check https://ollama.com/library for list of models available |
+| Text (required) | `text` | string | The text |
+
+
+
+| Output | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Embedding | `embedding` | array[number] | Embedding of the input text |
+
+
+
+
+
+
+
diff --git a/ai/ollama/v0/assets/ollama.svg b/ai/ollama/v0/assets/ollama.svg
diff --git a/ai/ollama/v0/client.go b/ai/ollama/v0/client.go
@@ -0,0 +1,176 @@
+package ollama
+
+import (
+	"fmt"
+	"slices"
+
+	"github.com/instill-ai/component/internal/util/httpclient"
+	"go.uber.org/zap"
+)
+
+// reference: https://github.com/ollama/ollama/blob/main/docs/api.md
+// Ollama v0.2.5 on 2024-07-17
+
+type errBody struct {
+	Error struct {
+		Message string `json:"message"`
+	} `json:"error"`
+}
+
+func (e errBody) Message() string {
+	return e.Error.Message
+}
+
+type OllamaClient struct {
+	httpClient *httpclient.Client
+	autoPull   bool
+}
+
+func NewClient(endpoint string, autoPull bool, logger *zap.Logger) *OllamaClient {
+	c := httpclient.New("Ollama", endpoint, httpclient.WithLogger(logger),
+		httpclient.WithEndUserError(new(errBody)))
+	return &OllamaClient{httpClient: c, autoPull: autoPull}
+}
+
+type OllamaModelInfo struct {
+	Name       string `json:"name"`
+	ModifiedAt string `json:"modified_at"`
+	Size       int    `json:"size"`
+	Dijest     string `json:"digest"`
+	Details    struct {
+		Format            string `json:"format"`
+		Family            string `json:"family"`
+		Families          string `json:"families"`
+		ParameterSize     string `json:"parameter_size"`
+		QuantizationLevel string `json:"quantization_level"`
+	} `json:"details"`
+}
+
+type ListLocalModelsRequest struct {
+}
+
+type ListLocalModelsResponse struct {
+	Models []OllamaModelInfo `json:"models"`
+}
+
+func (c *OllamaClient) CheckModelAvailability(modelName string) bool {
+	request := &ListLocalModelsRequest{}
+	response := &ListLocalModelsResponse{}
+	req := c.httpClient.R().SetResult(&response).SetBody(request)
+	if _, err := req.Get("/api/tags"); err != nil {
+		return false
+	}
+	localModels := []string{}
+	for _, m := range response.Models {
+		localModels = append(localModels, m.Name)
+	}
+	return slices.Contains(localModels, modelName)
+}
+
+type PullModelRequest struct {
+	Name   string `json:"name"`
+	Stream bool   `json:"stream"`
+}
+
+type PullModelResponse struct {
+}
+
+func (c *OllamaClient) Pull(modelName string) error {
+	request := &PullModelRequest{
+		Name:   modelName,
+		Stream: false,
+	}
+	response := &PullModelResponse{}
+	req := c.httpClient.R().SetResult(&response).SetBody(request)
+	if _, err := req.Post("/api/pull"); err != nil {
+		return err
+	}
+	return nil
+
+}
+
+type OllamaChatMessage struct {
+	Role    string   `json:"role"`
+	Content string   `json:"content"`
+	Images  []string `json:"images,omitempty"`
+}
+
+type OllamaOptions struct {
+	Temperature float32 `json:"temperature,omitempty"`
+	TopK        int     `json:"top_k,omitempty"`
+	Seed        int     `json:"seed,omitempty"`
+}
+
+type ChatRequest struct {
+	Model    string              `json:"model"`
+	Messages []OllamaChatMessage `json:"messages"`
+	Stream   bool                `json:"stream"`
+	Options  OllamaOptions       `json:"options"`
+}
+
+type ChatResponse struct {
+	Model              string            `json:"model"`
+	CreatedAt          string            `json:"created_at"`
+	Message            OllamaChatMessage `json:"message"`
+	Done               bool              `json:"done"`
+	DoneReason         string            `json:"done_reason"`
+	TotalDuration      int               `json:"total_duration"`
+	LoadDuration       int               `json:"load_duration"`
+	PromptEvalCount    int               `json:"prompt_eval_count"`
+	PromptEvalDuration int               `json:"prompt_eval_duration"`
+	EvalCount          int               `json:"eval_count"`
+	EvalDuration       int               `json:"eval_duration"`
+}
+
+func (c *OllamaClient) Chat(request ChatRequest) (ChatResponse, error) {
+	response := ChatResponse{}
+	isAvailable := c.CheckModelAvailability(request.Model)
+
+	if !isAvailable && !c.autoPull {
+		return response, fmt.Errorf("model %s is not available", request.Model)
+	}
+	if !isAvailable {
+		err := c.Pull(request.Model)
+		if err != nil {
+			return response, fmt.Errorf("error when auto pulling model %v", err)
+		}
+	}
+	req := c.httpClient.R().SetResult(&response).SetBody(request)
+	if _, err := req.Post("/api/chat"); err != nil {
+		return response, fmt.Errorf("error when sending chat request %v", err)
+	}
+	return response, nil
+}
+
+type EmbedRequest struct {
+	Model  string `json:"model"`
+	Prompt string `json:"prompt"`
+}
+
+type EmbedResponse struct {
+	Embedding []float32 `json:"embedding"`
+}
+
+func (c *OllamaClient) Embed(request EmbedRequest) (EmbedResponse, error) {
+	response := EmbedResponse{}
+	isAvailable := c.CheckModelAvailability(request.Model)
+
+	if !isAvailable && !c.autoPull {
+		return response, fmt.Errorf("model %s is not available", request.Model)
+	}
+	if !isAvailable {
+		err := c.Pull(request.Model)
+		if err != nil {
+			return response, fmt.Errorf("error when auto pulling model %v", err)
+		}
+	}
+	req := c.httpClient.R().SetResult(&response).SetBody(request)
+	if _, err := req.Post("/api/embeddings"); err != nil {
+		return response, fmt.Errorf("error when sending embeddings request %v", err)
+	}
+	return response, nil
+}
+
+func (c *OllamaClient) IsAutoPull() bool {
+	return c.autoPull
+}