diff --git a/api/app-node/chain/v1alpha1/llmchain_types.go b/api/app-node/chain/v1alpha1/llmchain_types.go index 16f766709..6757fb78b 100644 --- a/api/app-node/chain/v1alpha1/llmchain_types.go +++ b/api/app-node/chain/v1alpha1/llmchain_types.go @@ -38,6 +38,7 @@ type CommonChainConfig struct { // Usually this value is just empty Model string `json:"model,omitempty"` // MaxTokens is the maximum number of tokens to generate to use in a llm call. + // +kubebuilder:default=1024 MaxTokens int `json:"maxTokens,omitempty"` // Temperature is the temperature for sampling to use in a llm call, between 0 and 1. //+kubebuilder:validation:Minimum=0 diff --git a/apiserver/graph/generated/generated.go b/apiserver/graph/generated/generated.go index 4948de501..de3e74951 100644 --- a/apiserver/graph/generated/generated.go +++ b/apiserver/graph/generated/generated.go @@ -77,6 +77,7 @@ type ComplexityRoot struct { Knowledgebase func(childComplexity int) int Llm func(childComplexity int) int MaxLength func(childComplexity int) int + MaxTokens func(childComplexity int) int Metadata func(childComplexity int) int Model func(childComplexity int) int NumDocuments func(childComplexity int) int @@ -824,6 +825,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Application.MaxLength(childComplexity), true + case "Application.maxTokens": + if e.complexity.Application.MaxTokens == nil { + break + } + + return e.complexity.Application.MaxTokens(childComplexity), true + case "Application.metadata": if e.complexity.Application.Metadata == nil { break @@ -3923,6 +3931,11 @@ type Application { """ maxLength: Int + """ + maxTokens 最大输出token + """ + maxTokens: Int + """ conversionWindowSize 对话轮次 """ @@ -4151,6 +4164,11 @@ input UpdateApplicationConfigInput { """ maxLength: Int + """ + maxTokens 最大输出token + """ + maxTokens: Int + """ conversionWindowSize 对话轮次 """ @@ -7658,6 +7676,47 @@ func (ec *executionContext) fieldContext_Application_maxLength(ctx context.Conte return fc, nil } +func (ec *executionContext) _Application_maxTokens(ctx context.Context, field graphql.CollectedField, obj *Application) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Application_maxTokens(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.MaxTokens, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(*int) + fc.Result = res + return ec.marshalOInt2ᚖint(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Application_maxTokens(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Application", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _Application_conversionWindowSize(ctx context.Context, field graphql.CollectedField, obj *Application) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Application_conversionWindowSize(ctx, field) if err != nil { @@ -8753,6 +8812,8 @@ func (ec *executionContext) fieldContext_ApplicationMutation_updateApplicationCo return ec.fieldContext_Application_temperature(ctx, field) case "maxLength": return ec.fieldContext_Application_maxLength(ctx, field) + case "maxTokens": + return ec.fieldContext_Application_maxTokens(ctx, field) case "conversionWindowSize": return ec.fieldContext_Application_conversionWindowSize(ctx, field) case "knowledgebase": @@ -8836,6 +8897,8 @@ func (ec *executionContext) fieldContext_ApplicationQuery_getApplication(ctx con return ec.fieldContext_Application_temperature(ctx, field) case "maxLength": return ec.fieldContext_Application_maxLength(ctx, field) + case "maxTokens": + return ec.fieldContext_Application_maxTokens(ctx, field) case "conversionWindowSize": return ec.fieldContext_Application_conversionWindowSize(ctx, field) case "knowledgebase": @@ -29967,7 +30030,7 @@ func (ec *executionContext) unmarshalInputUpdateApplicationConfigInput(ctx conte asMap[k] = v } - fieldsInOrder := [...]string{"name", "namespace", "prologue", "model", "llm", "temperature", "maxLength", "conversionWindowSize", "knowledgebase", "scoreThreshold", "numDocuments", "docNullReturn", "userPrompt", "showNextGuid"} + fieldsInOrder := [...]string{"name", "namespace", "prologue", "model", "llm", "temperature", "maxLength", "maxTokens", "conversionWindowSize", "knowledgebase", "scoreThreshold", "numDocuments", "docNullReturn", "userPrompt", "showNextGuid"} for _, k := range fieldsInOrder { v, ok := asMap[k] if !ok { @@ -30037,6 +30100,15 @@ func (ec *executionContext) unmarshalInputUpdateApplicationConfigInput(ctx conte return it, err } it.MaxLength = data + case "maxTokens": + var err error + + ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("maxTokens")) + data, err := ec.unmarshalOInt2ᚖint(ctx, v) + if err != nil { + return it, err + } + it.MaxTokens = data case "conversionWindowSize": var err error @@ -31209,6 +31281,8 @@ func (ec *executionContext) _Application(ctx context.Context, sel ast.SelectionS out.Values[i] = ec._Application_temperature(ctx, field, obj) case "maxLength": out.Values[i] = ec._Application_maxLength(ctx, field, obj) + case "maxTokens": + out.Values[i] = ec._Application_maxTokens(ctx, field, obj) case "conversionWindowSize": out.Values[i] = ec._Application_conversionWindowSize(ctx, field, obj) case "knowledgebase": diff --git a/apiserver/graph/generated/models_gen.go b/apiserver/graph/generated/models_gen.go index 339bbc922..cf8aa649e 100644 --- a/apiserver/graph/generated/models_gen.go +++ b/apiserver/graph/generated/models_gen.go @@ -50,6 +50,8 @@ type Application struct { Temperature *float64 `json:"temperature,omitempty"` // maxLength 最大响应长度 MaxLength *int `json:"maxLength,omitempty"` + // maxTokens 最大输出token + MaxTokens *int `json:"maxTokens,omitempty"` // conversionWindowSize 对话轮次 ConversionWindowSize *int `json:"conversionWindowSize,omitempty"` // knowledgebase 指当前知识库应用使用的知识库,即 Kind 为 KnowledgeBase 的 CR 的名称,目前一个应用只支持0或1个知识库 @@ -1198,6 +1200,8 @@ type UpdateApplicationConfigInput struct { Temperature *float64 `json:"temperature,omitempty"` // maxLength 最大响应长度 MaxLength *int `json:"maxLength,omitempty"` + // maxTokens 最大输出token + MaxTokens *int `json:"maxTokens,omitempty"` // conversionWindowSize 对话轮次 ConversionWindowSize *int `json:"conversionWindowSize,omitempty"` // knowledgebase 指当前知识库应用使用的知识库,即 Kind 为 KnowledgeBase 的 CR 的名称,目前一个应用只支持0或1个知识库 diff --git a/apiserver/graph/schema/application.gql b/apiserver/graph/schema/application.gql index 993aa0580..6a9088786 100644 --- a/apiserver/graph/schema/application.gql +++ b/apiserver/graph/schema/application.gql @@ -57,6 +57,7 @@ mutation updateApplicationConfig($input: UpdateApplicationConfigInput!){ llm temperature maxLength + maxTokens conversionWindowSize knowledgebase scoreThreshold @@ -91,6 +92,7 @@ query getApplication($name: String!, $namespace: String!){ llm temperature maxLength + maxTokens conversionWindowSize knowledgebase scoreThreshold diff --git a/apiserver/graph/schema/application.graphqls b/apiserver/graph/schema/application.graphqls index 8f2731390..fe340a483 100644 --- a/apiserver/graph/schema/application.graphqls +++ b/apiserver/graph/schema/application.graphqls @@ -49,6 +49,11 @@ type Application { """ maxLength: Int + """ + maxTokens 最大输出token + """ + maxTokens: Int + """ conversionWindowSize 对话轮次 """ @@ -277,6 +282,11 @@ input UpdateApplicationConfigInput { """ maxLength: Int + """ + maxTokens 最大输出token + """ + maxTokens: Int + """ conversionWindowSize 对话轮次 """ diff --git a/apiserver/pkg/application/application.go b/apiserver/pkg/application/application.go index 61f9ec00f..195a3cf86 100644 --- a/apiserver/pkg/application/application.go +++ b/apiserver/pkg/application/application.go @@ -48,6 +48,7 @@ func addDefaultValue(gApp *generated.Application, app *v1alpha1.Application) { gApp.ScoreThreshold = pointer.Float64(0.3) gApp.Temperature = pointer.Float64(0.7) gApp.MaxLength = pointer.Int(1024) + gApp.MaxTokens = pointer.Int(1024) gApp.ConversionWindowSize = pointer.Int(5) } @@ -85,6 +86,7 @@ func cr2app(prompt *apiprompt.Prompt, chainConfig *apichain.CommonChainConfig, r gApp.Model = pointer.String(chainConfig.Model) gApp.Temperature = pointer.Float64(chainConfig.Temperature) gApp.MaxLength = pointer.Int(chainConfig.MaxLength) + gApp.MaxTokens = pointer.Int(chainConfig.MaxTokens) gApp.ConversionWindowSize = pointer.Int(chainConfig.Memory.ConversionWindowSize) } for _, node := range app.Spec.Nodes { @@ -367,6 +369,7 @@ func UpdateApplicationConfig(ctx context.Context, c dynamic.Interface, input gen }, Model: pointer.StringDeref(input.Model, ""), MaxLength: pointer.IntDeref(input.MaxLength, 0), + MaxTokens: pointer.IntDeref(input.MaxTokens, 0), Temperature: pointer.Float64Deref(input.Temperature, 0), }, }, @@ -374,6 +377,7 @@ func UpdateApplicationConfig(ctx context.Context, c dynamic.Interface, input gen if err = createOrUpdateResource(ctx, c, common.SchemaOf(&common.ArcadiaAPIGroup, strings.ToLower(chainKind)), input.Namespace, input.Name, func() { qachain.Spec.Model = pointer.StringDeref(input.Model, qachain.Spec.Model) qachain.Spec.MaxLength = pointer.IntDeref(input.MaxLength, qachain.Spec.MaxLength) + qachain.Spec.MaxTokens = pointer.IntDeref(input.MaxTokens, qachain.Spec.MaxTokens) qachain.Spec.Temperature = pointer.Float64Deref(input.Temperature, qachain.Spec.Temperature) qachain.Spec.Memory.ConversionWindowSize = pointer.IntDeref(input.ConversionWindowSize, qachain.Spec.Memory.ConversionWindowSize) }, qachain); err != nil { @@ -401,6 +405,7 @@ func UpdateApplicationConfig(ctx context.Context, c dynamic.Interface, input gen }, Model: pointer.StringDeref(input.Model, ""), MaxLength: pointer.IntDeref(input.MaxLength, 0), + MaxTokens: pointer.IntDeref(input.MaxTokens, 0), Temperature: pointer.Float64Deref(input.Temperature, 0), }, }, @@ -408,6 +413,7 @@ func UpdateApplicationConfig(ctx context.Context, c dynamic.Interface, input gen if err = createOrUpdateResource(ctx, c, common.SchemaOf(&common.ArcadiaAPIGroup, strings.ToLower(chainKind)), input.Namespace, input.Name, func() { llmchain.Spec.Model = pointer.StringDeref(input.Model, llmchain.Spec.Model) llmchain.Spec.MaxLength = pointer.IntDeref(input.MaxLength, llmchain.Spec.MaxLength) + llmchain.Spec.MaxTokens = pointer.IntDeref(input.MaxTokens, llmchain.Spec.MaxTokens) llmchain.Spec.Temperature = pointer.Float64Deref(input.Temperature, llmchain.Spec.Temperature) llmchain.Spec.Memory.ConversionWindowSize = pointer.IntDeref(input.ConversionWindowSize, llmchain.Spec.Memory.ConversionWindowSize) }, llmchain); err != nil { diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml index 62acca9f8..b6e227ca9 100644 --- a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml +++ b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml @@ -56,6 +56,7 @@ spec: minimum: 10 type: integer maxTokens: + default: 1024 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml index 16474c02d..a4f46eac8 100644 --- a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml +++ b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml @@ -52,6 +52,7 @@ spec: minimum: 10 type: integer maxTokens: + default: 1024 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml index 36f7a535a..4c1935284 100644 --- a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml +++ b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml @@ -55,6 +55,7 @@ spec: minimum: 10 type: integer maxTokens: + default: 1024 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/deploy/charts/arcadia/Chart.yaml b/deploy/charts/arcadia/Chart.yaml index f20742a51..9d8a3f96b 100644 --- a/deploy/charts/arcadia/Chart.yaml +++ b/deploy/charts/arcadia/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: arcadia description: A Helm chart(KubeBB Component) for KubeAGI Arcadia type: application -version: 0.2.13 +version: 0.2.14 appVersion: "0.1.0" keywords: diff --git a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml index 62acca9f8..b6e227ca9 100644 --- a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml +++ b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml @@ -56,6 +56,7 @@ spec: minimum: 10 type: integer maxTokens: + default: 1024 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml index 16474c02d..a4f46eac8 100644 --- a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml +++ b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml @@ -52,6 +52,7 @@ spec: minimum: 10 type: integer maxTokens: + default: 1024 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer diff --git a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml index 36f7a535a..4c1935284 100644 --- a/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml +++ b/deploy/charts/arcadia/crds/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml @@ -55,6 +55,7 @@ spec: minimum: 10 type: integer maxTokens: + default: 1024 description: MaxTokens is the maximum number of tokens to generate to use in a llm call. type: integer