diff --git a/api/app-node/chain/v1alpha1/llmchain_types.go b/api/app-node/chain/v1alpha1/llmchain_types.go index 6757fb78b..3c318f975 100644 --- a/api/app-node/chain/v1alpha1/llmchain_types.go +++ b/api/app-node/chain/v1alpha1/llmchain_types.go @@ -38,7 +38,7 @@ type CommonChainConfig struct { // Usually this value is just empty Model string `json:"model,omitempty"` // MaxTokens is the maximum number of tokens to generate to use in a llm call. - // +kubebuilder:default=1024 + // +kubebuilder:default=2048 MaxTokens int `json:"maxTokens,omitempty"` // Temperature is the temperature for sampling to use in a llm call, between 0 and 1. //+kubebuilder:validation:Minimum=0 @@ -57,8 +57,7 @@ type CommonChainConfig struct { MinLength int `json:"minLength,omitempty"` // MaxLength is the maximum length of the generated text in a llm call. // +kubebuilder:validation:Minimum=10 - // +kubebuilder:validation:Maximum=4096 - // +kubebuilder:default=1024 + // +kubebuilder:default=2048 MaxLength int `json:"maxLength,omitempty"` // RepetitionPenalty is the repetition penalty for sampling in a llm call. RepetitionPenalty float64 `json:"repetitionPenalty,omitempty"` diff --git a/apiserver/pkg/application/application.go b/apiserver/pkg/application/application.go index 32762577a..afa7d88f1 100644 --- a/apiserver/pkg/application/application.go +++ b/apiserver/pkg/application/application.go @@ -47,8 +47,8 @@ func addDefaultValue(gApp *generated.Application, app *v1alpha1.Application) { gApp.NumDocuments = pointer.Int(5) gApp.ScoreThreshold = pointer.Float64(0.3) gApp.Temperature = pointer.Float64(0.7) - gApp.MaxLength = pointer.Int(1024) - gApp.MaxTokens = pointer.Int(1024) + gApp.MaxLength = pointer.Int(2048) + gApp.MaxTokens = pointer.Int(2048) gApp.ConversionWindowSize = pointer.Int(5) } diff --git a/apiserver/pkg/chat/chat.go b/apiserver/pkg/chat/chat.go index 68112a649..1afcbc8d2 100644 --- a/apiserver/pkg/chat/chat.go +++ b/apiserver/pkg/chat/chat.go @@ -27,7 +27,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/klog/v2" "github.com/kubeagi/arcadia/api/base/v1alpha1" @@ -43,7 +42,7 @@ var ( Conversations = map[string]Conversation{} ) -func AppRun(ctx context.Context, req ChatReqBody, respStream chan string) (*ChatRespBody, error) { +func AppRun(ctx context.Context, req ChatReqBody, respStream chan string, messageID string) (*ChatRespBody, error) { token := auth.ForOIDCToken(ctx) c, err := client.GetClient(token) if err != nil { @@ -92,7 +91,7 @@ func AppRun(ctx context.Context, req ChatReqBody, respStream chan string) (*Chat Debug: req.Debug, } } - messageID := string(uuid.NewUUID()) + conversation.Messages = append(conversation.Messages, Message{ ID: messageID, Query: req.Query, diff --git a/apiserver/service/chat.go b/apiserver/service/chat.go index 81c711275..e1b35499d 100644 --- a/apiserver/service/chat.go +++ b/apiserver/service/chat.go @@ -66,6 +66,7 @@ func chatHandler() gin.HandlerFunc { if req.NewChat { req.ConversationID = string(uuid.NewUUID()) } + messageID := string(uuid.NewUUID()) var response *chat.ChatRespBody var err error @@ -84,9 +85,10 @@ func chatHandler() gin.HandlerFunc { } } }() - response, err = chat.AppRun(c.Request.Context(), req, respStream) + response, err = chat.AppRun(c.Request.Context(), req, respStream, messageID) if err != nil { c.SSEvent("error", chat.ChatRespBody{ + MessageID: messageID, ConversationID: req.ConversationID, Message: err.Error(), CreatedAt: time.Now(), @@ -132,6 +134,7 @@ func chatHandler() gin.HandlerFunc { clientDisconnected := c.Stream(func(w io.Writer) bool { if msg, ok := <-respStream; ok { c.SSEvent("", chat.ChatRespBody{ + MessageID: messageID, ConversationID: req.ConversationID, Message: msg, CreatedAt: time.Now(), @@ -148,7 +151,7 @@ func chatHandler() gin.HandlerFunc { klog.FromContext(c.Request.Context()).Info("end to receive messages") } else { // handle chat blocking mode - response, err = chat.AppRun(c.Request.Context(), req, nil) + response, err = chat.AppRun(c.Request.Context(), req, nil, messageID) if err != nil { c.JSON(http.StatusInternalServerError, chat.ErrorResp{Err: err.Error()}) klog.FromContext(c.Request.Context()).Error(err, "error resp") diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml index b6e227ca9..51a1fc033 100644 --- a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml +++ b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml @@ -49,14 +49,13 @@ spec: description: DisplayName defines datasource display name type: string maxLength: - default: 1024 + default: 2048 description: MaxLength is the maximum length of the generated text in a llm call. - maximum: 4096 minimum: 10 type: integer maxTokens: - default: 1024 + default: 2048 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml index a4f46eac8..094f4ea93 100644 --- a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml +++ b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml @@ -45,14 +45,13 @@ spec: description: DisplayName defines datasource display name type: string maxLength: - default: 1024 + default: 2048 description: MaxLength is the maximum length of the generated text in a llm call. - maximum: 4096 minimum: 10 type: integer maxTokens: - default: 1024 + default: 2048 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml index 4c1935284..05d449dfb 100644 --- a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml +++ b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml @@ -48,14 +48,13 @@ spec: description: DisplayName defines datasource display name type: string maxLength: - default: 1024 + default: 2048 description: MaxLength is the maximum length of the generated text in a llm call. - maximum: 4096 minimum: 10 type: integer maxTokens: - default: 1024 + default: 2048 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/deploy/charts/arcadia/Chart.yaml b/deploy/charts/arcadia/Chart.yaml index 25a462ae7..e77a98e6a 100644 --- a/deploy/charts/arcadia/Chart.yaml +++ b/deploy/charts/arcadia/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: arcadia description: A Helm chart(KubeBB Component) for KubeAGI Arcadia type: application -version: 0.2.17 +version: 0.2.18 appVersion: "0.1.0" keywords: diff --git a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml index b6e227ca9..51a1fc033 100644 --- a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml +++ b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml @@ -49,14 +49,13 @@ spec: description: DisplayName defines datasource display name type: string maxLength: - default: 1024 + default: 2048 description: MaxLength is the maximum length of the generated text in a llm call. - maximum: 4096 minimum: 10 type: integer maxTokens: - default: 1024 + default: 2048 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml index a4f46eac8..094f4ea93 100644 --- a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml +++ b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml @@ -45,14 +45,13 @@ spec: description: DisplayName defines datasource display name type: string maxLength: - default: 1024 + default: 2048 description: MaxLength is the maximum length of the generated text in a llm call. - maximum: 4096 minimum: 10 type: integer maxTokens: - default: 1024 + default: 2048 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml index 4c1935284..05d449dfb 100644 --- a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml +++ b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml @@ -48,14 +48,13 @@ spec: description: DisplayName defines datasource display name type: string maxLength: - default: 1024 + default: 2048 description: MaxLength is the maximum length of the generated text in a llm call. - maximum: 4096 minimum: 10 type: integer maxTokens: - default: 1024 + default: 2048 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/pkg/appruntime/app_runtime.go b/pkg/appruntime/app_runtime.go index 52bbef14e..5920de3f0 100644 --- a/pkg/appruntime/app_runtime.go +++ b/pkg/appruntime/app_runtime.go @@ -164,6 +164,7 @@ func (a *Application) Run(ctx context.Context, cli dynamic.Interface, respStream "question": input.Question, "_answer_stream": respStream, "_history": input.History, + "context": "", } visited := make(map[string]bool) waitRunningNodes := list.New() diff --git a/pkg/appruntime/chain/retrievalqachain.go b/pkg/appruntime/chain/retrievalqachain.go index e00c9c052..a61aacda4 100644 --- a/pkg/appruntime/chain/retrievalqachain.go +++ b/pkg/appruntime/chain/retrievalqachain.go @@ -96,6 +96,9 @@ func (l *RetrievalQAChain) Run(ctx context.Context, cli dynamic.Interface, args options := getChainOptions(instance.Spec.CommonChainConfig) llmChain := chains.NewLLMChain(llm, prompt) + if history != nil { + llmChain.Memory = getMemory(llm, instance.Spec.Memory, history, "", "") + } var baseChain chains.Chain var stuffDocuments *appretriever.KnowledgeBaseStuffDocuments if knowledgeBaseRetriever, ok := v3.(*appretriever.KnowledgeBaseRetriever); ok {