diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index bf54fda467..196ea1f0f9 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -81,22 +81,27 @@ custom-setting会遵循如下表格,根据`name`和协议来替换对应的字 `failover` 的配置字段说明如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|------------------|--------|-----------------|-------|-----------------------------| -| enabled | bool | 非必填 | false | 是否启用 apiToken 的 failover 机制 | -| failureThreshold | int | 非必填 | 3 | 触发 failover 连续请求失败的阈值(次数) | -| successThreshold | int | 非必填 | 1 | 健康检测的成功阈值(次数) | -| healthCheckInterval | int | 非必填 | 5000 | 健康检测的间隔时间,单位毫秒 | -| healthCheckTimeout | int | 非必填 | 5000 | 健康检测的超时时间,单位毫秒 | -| healthCheckModel | string | 启用 failover 时必填 | | 健康检测使用的模型 | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------------------|--------|-----------------|-------|-----------------------------------| +| enabled | bool | 非必填 | false | 是否启用 apiToken 的 failover 机制 | +| failureThreshold | int | 非必填 | 3 | 触发 failover 连续请求失败的阈值(次数) | +| successThreshold | int | 非必填 | 1 | 健康检测的成功阈值(次数) | +| healthCheckInterval | int | 非必填 | 5000 | 健康检测的间隔时间,单位毫秒 | +| healthCheckTimeout | int | 非必填 | 5000 | 健康检测的超时时间,单位毫秒 | +| healthCheckModel | string | 启用 failover 时必填 | | 健康检测使用的模型 | +| failoverOnStatus | array of string | 非必填 | ["4.*", "5.*"] | 需要进行 failover 的原始请求的状态码,支持正则表达式匹配 | `retryOnFailure` 的配置字段说明如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|------------------|--------|-----------------|-------|-------------| -| enabled | bool | 非必填 | false | 是否启用失败请求重试 | -| maxRetries | int | 非必填 | 1 | 最大重试次数 | -| retryTimeout | int | 非必填 | 30000 | 重试超时时间,单位毫秒 | +目前仅支持对非流式请求进行重试。 + + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------------------|--------|--------|-------|---------------------------| +| enabled | bool | 非必填 | false | 是否启用失败请求重试 | +| maxRetries | int | 非必填 | 1 | 最大重试次数 | +| retryTimeout | int | 非必填 | 30000 | 重试超时时间,单位毫秒 | +| retryOnStatus | array of string | 非必填 | ["4.*", "5.*"] | 需要进行重试的原始请求的状态码,支持正则表达式匹配 | ### 提供商特有配置 diff --git a/plugins/wasm-go/extensions/ai-proxy/go.mod b/plugins/wasm-go/extensions/ai-proxy/go.mod index 4480d61aa9..3a9baaa2e0 100644 --- a/plugins/wasm-go/extensions/ai-proxy/go.mod +++ b/plugins/wasm-go/extensions/ai-proxy/go.mod @@ -13,11 +13,17 @@ require ( github.com/tidwall/gjson v1.17.3 ) +require ( + github.com/tetratelabs/wazero v1.7.2 // indirect + github.com/wasilibs/go-re2 v1.6.0 // indirect + golang.org/x/sys v0.21.0 // indirect +) + require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/google/uuid v1.3.0 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect - github.com/magefile/mage v1.14.0 // indirect + github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect diff --git a/plugins/wasm-go/extensions/ai-proxy/go.sum b/plugins/wasm-go/extensions/ai-proxy/go.sum index 3ce5811665..b1b7172ac7 100644 --- a/plugins/wasm-go/extensions/ai-proxy/go.sum +++ b/plugins/wasm-go/extensions/ai-proxy/go.sum @@ -8,10 +8,14 @@ github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKE github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0= github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo= github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= +github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a h1:tdPcGgyiH0K+SbsJBBm2oPyEIOTAvLBwD9TuUwVtZho= +github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc= +github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.17.3 h1:bwWLZU7icoKRG+C+0PNwIKC6FCJO/Q3p2pZvuP0jN94= github.com/tidwall/gjson v1.17.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -23,6 +27,10 @@ github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE= github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/wasilibs/go-re2 v1.6.0 h1:CLlhDebt38wtl/zz4ww+hkXBMcxjrKFvTDXzFW2VOz8= +github.com/wasilibs/go-re2 v1.6.0/go.mod h1:prArCyErsypRBI/jFAFJEbzyHzjABKqkzlidF0SNA04= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/plugins/wasm-go/extensions/ai-proxy/main.go b/plugins/wasm-go/extensions/ai-proxy/main.go index 35d06b9502..3e9a555b4e 100644 --- a/plugins/wasm-go/extensions/ai-proxy/main.go +++ b/plugins/wasm-go/extensions/ai-proxy/main.go @@ -189,7 +189,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo log.Errorf("unable to load :status header from response: %v", err) } ctx.DontReadResponseBody() - return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, log) + return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, status, log) } // Reset ctxApiTokenRequestFailureCount if the request is successful, diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go index 9644693f5e..1431572c13 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go @@ -30,6 +30,8 @@ type failover struct { healthCheckTimeout int64 `required:"false" yaml:"healthCheckTimeout" json:"healthCheckTimeout"` // @Title zh-CN 健康检测使用的模型 healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"` + // @Title zh-CN 需要进行 failover 的原始请求的状态码,支持正则表达式匹配 + failoverOnStatus []string `required:"false" yaml:"failoverOnStatus" json:"failoverOnStatus"` // @Title zh-CN 本次请求使用的 apiToken ctxApiTokenInUse string // @Title zh-CN 记录本次请求时所有可用的 apiToken @@ -92,6 +94,14 @@ func (f *failover) FromJson(json gjson.Result) { f.healthCheckTimeout = 5000 } f.healthCheckModel = json.Get("healthCheckModel").String() + + for _, status := range json.Get("failoverOnStatus").Array() { + f.failoverOnStatus = append(f.failoverOnStatus, status.String()) + } + // If failoverOnStatus is empty, default to retry on 4xx and 5xx + if len(f.failoverOnStatus) == 0 { + f.failoverOnStatus = []string{"4.*", "5.*"} + } } func (f *failover) Validate() error { @@ -557,17 +567,21 @@ func (c *ProviderConfig) resetSharedData() { _ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0) } -func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) types.Action { - if c.isFailoverEnabled() { +func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, status string, log wrapper.Log) types.Action { + if c.isFailoverEnabled() && util.MatchStatus(status, c.failover.failoverOnStatus) { c.handleUnavailableApiToken(ctx, apiTokenInUse, log) } - if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) { + if c.isRetryOnFailureEnabled() && util.MatchStatus(status, c.retryOnFailure.retryOnStatus) && isNotStreamingResponse(ctx) { c.retryFailedRequest(activeProvider, ctx, apiTokenInUse, apiTokens, log) return types.HeaderStopAllIterationAndWatermark } return types.ActionContinue } +func isNotStreamingResponse(ctx wrapper.HttpContext) bool { + return ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) +} + func (c *ProviderConfig) GetApiTokenInUse(ctx wrapper.HttpContext) string { token, _ := ctx.GetContext(c.failover.ctxApiTokenInUse).(string) return token diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go index 59691d855f..5eaec63254 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go @@ -22,6 +22,8 @@ type retryOnFailure struct { maxRetries int64 `required:"false" yaml:"maxRetries" json:"maxRetries"` // @Title zh-CN 重试超时时间 retryTimeout int64 `required:"false" yaml:"retryTimeout" json:"retryTimeout"` + // @Title zh-CN 需要进行重试的原始请求的状态码,支持正则表达式匹配 + retryOnStatus []string `required:"false" yaml:"retryOnStatus" json:"retryOnStatus"` } func (r *retryOnFailure) FromJson(json gjson.Result) { @@ -34,6 +36,13 @@ func (r *retryOnFailure) FromJson(json gjson.Result) { if r.retryTimeout == 0 { r.retryTimeout = 30 * 1000 } + for _, status := range json.Get("retryOnStatus").Array() { + r.retryOnStatus = append(r.retryOnStatus, status.String()) + } + // If retryOnStatus is empty, default to retry on 4xx and 5xx + if len(r.retryOnStatus) == 0 { + r.retryOnStatus = []string{"4.*", "5.*"} + } } func (c *ProviderConfig) isRetryOnFailureEnabled() bool { diff --git a/plugins/wasm-go/extensions/ai-proxy/util/string.go b/plugins/wasm-go/extensions/ai-proxy/util/string.go index 391bca4815..88a35f40ce 100644 --- a/plugins/wasm-go/extensions/ai-proxy/util/string.go +++ b/plugins/wasm-go/extensions/ai-proxy/util/string.go @@ -1,8 +1,20 @@ package util +import regexp "github.com/wasilibs/go-re2" + func StripPrefix(s string, prefix string) string { if len(prefix) != 0 && len(s) >= len(prefix) && s[0:len(prefix)] == prefix { return s[len(prefix):] } return s } + +func MatchStatus(status string, patterns []string) bool { + for _, pattern := range patterns { + matched, _ := regexp.MatchString(pattern, status) + if matched { + return true + } + } + return false +}