From 29ad64567a8c0fd6ecca6c312289ef395d07a1cf Mon Sep 17 00:00:00 2001 From: "Han-Ru Chen (Future-Outlier)" Date: Wed, 22 Jan 2025 08:55:47 +0800 Subject: [PATCH] [flyteagent] Default Service Config Using Round Robin Mechanism (#6179) * Add FlyteAgent Default Service Config Using Round Robin Mechanism Signed-off-by: Future-Outlier * update dns endpoint to k8s endpoint Signed-off-by: Future-Outlier * update Signed-off-by: Future-Outlier --------- Signed-off-by: Future-Outlier --- charts/flyte-binary/README.md | 2 +- charts/flyte-binary/values.yaml | 2 +- charts/flyte-core/README.md | 6 +++--- charts/flyte-core/values.yaml | 2 +- .../manifests/complete-agent.yaml | 8 ++++---- .../sandbox-bundled/manifests/complete.yaml | 4 ++-- docker/sandbox-bundled/manifests/dev.yaml | 4 ++-- .../flyte_agents/developing_agents.md | 2 +- .../go/tasks/plugins/webapi/agent/config.go | 7 ++++--- .../tasks/plugins/webapi/agent/config_test.go | 20 +++++++++++++++++++ 10 files changed, 39 insertions(+), 18 deletions(-) diff --git a/charts/flyte-binary/README.md b/charts/flyte-binary/README.md index 42f6a151ba..58dd369722 100644 --- a/charts/flyte-binary/README.md +++ b/charts/flyte-binary/README.md @@ -22,7 +22,7 @@ Chart for basic single Flyte executable deployment | commonAnnotations | object | `{}` | | | commonLabels | object | `{}` | | | configuration.agentService.defaultAgent.defaultTimeout | string | `"10s"` | | -| configuration.agentService.defaultAgent.endpoint | string | `"dns:///flyteagent.flyte.svc.cluster.local:8000"` | | +| configuration.agentService.defaultAgent.endpoint | string | `"k8s://flyteagent.flyte:8000"` | | | configuration.agentService.defaultAgent.insecure | bool | `true` | | | configuration.agentService.defaultAgent.timeouts.GetTask | string | `"10s"` | | | configuration.annotations | object | `{}` | | diff --git a/charts/flyte-binary/values.yaml b/charts/flyte-binary/values.yaml index 9ad5d2d2d8..7f6d657105 100644 --- a/charts/flyte-binary/values.yaml +++ b/charts/flyte-binary/values.yaml @@ -171,7 +171,7 @@ configuration: # agentService Flyte Agent configuration agentService: defaultAgent: - endpoint: "dns:///flyteagent.flyte.svc.cluster.local:8000" + endpoint: "k8s://flyteagent.flyte:8000" insecure: true timeouts: GetTask: 10s diff --git a/charts/flyte-core/README.md b/charts/flyte-core/README.md index e3f5dd2b6f..b873129129 100644 --- a/charts/flyte-core/README.md +++ b/charts/flyte-core/README.md @@ -211,9 +211,9 @@ helm install gateway bitnami/contour -n flyte | flyteadmin.serviceMonitor.scrapeTimeout | string | `"30s"` | Sets the timeout after which request to scrape metrics will time out | | flyteadmin.tolerations | list | `[]` | tolerations for Flyteadmin deployment | | flyteagent.enabled | bool | `false` | | -| flyteagent.plugin_config.plugins.agent-service | object | `{"defaultAgent":{"endpoint":"dns:///flyteagent.flyte.svc.cluster.local:8000","insecure":true},"supportedTaskTypes":[]}` | Agent service configuration for propeller. | -| flyteagent.plugin_config.plugins.agent-service.defaultAgent | object | `{"endpoint":"dns:///flyteagent.flyte.svc.cluster.local:8000","insecure":true}` | The default agent service to use for plugin tasks. | -| flyteagent.plugin_config.plugins.agent-service.defaultAgent.endpoint | string | `"dns:///flyteagent.flyte.svc.cluster.local:8000"` | The agent service endpoint propeller should connect to. | +| flyteagent.plugin_config.plugins.agent-service | object | `{"defaultAgent":{"endpoint":"k8s://flyteagent.flyte:8000","insecure":true},"supportedTaskTypes":[]}` | Agent service configuration for propeller. | +| flyteagent.plugin_config.plugins.agent-service.defaultAgent | object | `{"endpoint":"k8s://flyteagent.flyte:8000","insecure":true}` | The default agent service to use for plugin tasks. | +| flyteagent.plugin_config.plugins.agent-service.defaultAgent.endpoint | string | `"k8s://flyteagent.flyte:8000"` | The agent service endpoint propeller should connect to. | | flyteagent.plugin_config.plugins.agent-service.defaultAgent.insecure | bool | `true` | Whether the connection from propeller to the agent service should use TLS. | | flyteagent.plugin_config.plugins.agent-service.supportedTaskTypes | list | `[]` | The task types supported by the default agent. As of #5460 these are discovered automatically and don't need to be configured. | | flyteagent.podLabels | object | `{}` | Labels for flyteagent pods | diff --git a/charts/flyte-core/values.yaml b/charts/flyte-core/values.yaml index 8be6cb20e6..5bda29fba0 100755 --- a/charts/flyte-core/values.yaml +++ b/charts/flyte-core/values.yaml @@ -288,7 +288,7 @@ flyteagent: # -- The default agent service to use for plugin tasks. defaultAgent: # -- The agent service endpoint propeller should connect to. - endpoint: "dns:///flyteagent.flyte.svc.cluster.local:8000" + endpoint: "k8s://flyteagent.flyte:8000" # -- Whether the connection from propeller to the agent service should use TLS. insecure: true # -- The task types supported by the default agent. As of #5460 these are discovered automatically and don't diff --git a/docker/sandbox-bundled/manifests/complete-agent.yaml b/docker/sandbox-bundled/manifests/complete-agent.yaml index 0785282576..deba4976e8 100644 --- a/docker/sandbox-bundled/manifests/complete-agent.yaml +++ b/docker/sandbox-bundled/manifests/complete-agent.yaml @@ -486,7 +486,7 @@ data: agent-service: defaultAgent: defaultTimeout: 10s - endpoint: dns:///flyteagent.flyte.svc.cluster.local:8000 + endpoint: k8s://flyteagent.flyte:8000 insecure: true timeouts: GetTask: 10s @@ -823,7 +823,7 @@ type: Opaque --- apiVersion: v1 data: - haSharedSecret: UnZJZHEzUExzbkJsOW1wYw== + haSharedSecret: bjlDbkZsVU1UVmpLdGU4TQ== proxyPassword: "" proxyUsername: "" kind: Secret @@ -1254,7 +1254,7 @@ spec: metadata: annotations: checksum/cluster-resource-templates: 6fd9b172465e3089fcc59f738b92b8dc4d8939360c19de8ee65f68b0e7422035 - checksum/configuration: 5a537c05dbd27a7f2884eb78f4e762205c3bcc3248ab9e509ab7074c7e5f953d + checksum/configuration: 7841a55b7d0bd6a6d44f37ccf05297fb7c3338c1ebd9c2608d499e4f8c817383 checksum/configuration-secret: 09216ffaa3d29e14f88b1f30af580d02a2a5e014de4d750b7f275cc07ed4e914 labels: app.kubernetes.io/component: flyte-binary @@ -1420,7 +1420,7 @@ spec: metadata: annotations: checksum/config: 8f50e768255a87f078ba8b9879a0c174c3e045ffb46ac8723d2eedbe293c8d81 - checksum/secret: ce172103045f4215e361b4c109776a78fe06660a4ade01c7351ea07212e7cfb9 + checksum/secret: fd8767b33da59722977568a2a3f4cee7ef850b02f579d8d5e381c038d5a20d42 labels: app: docker-registry release: flyte-sandbox diff --git a/docker/sandbox-bundled/manifests/complete.yaml b/docker/sandbox-bundled/manifests/complete.yaml index 420b69536a..cd32c00996 100644 --- a/docker/sandbox-bundled/manifests/complete.yaml +++ b/docker/sandbox-bundled/manifests/complete.yaml @@ -805,7 +805,7 @@ type: Opaque --- apiVersion: v1 data: - haSharedSecret: dDFiem04NjFzb29ZWHFtNA== + haSharedSecret: ZzdHam5sWUQ2RGlIVHBOSw== proxyPassword: "" proxyUsername: "" kind: Secret @@ -1369,7 +1369,7 @@ spec: metadata: annotations: checksum/config: 8f50e768255a87f078ba8b9879a0c174c3e045ffb46ac8723d2eedbe293c8d81 - checksum/secret: 529d34a9c4d3c82b9eec5028fcc30f26e923fa77a57eb29c4705d28c85355963 + checksum/secret: 3ce2fdef0e59a7eba350423dd49173ed960af107bc4c0873e80ea1cf5895e160 labels: app: docker-registry release: flyte-sandbox diff --git a/docker/sandbox-bundled/manifests/dev.yaml b/docker/sandbox-bundled/manifests/dev.yaml index 1969c1b3a3..3c16005b5a 100644 --- a/docker/sandbox-bundled/manifests/dev.yaml +++ b/docker/sandbox-bundled/manifests/dev.yaml @@ -499,7 +499,7 @@ metadata: --- apiVersion: v1 data: - haSharedSecret: Y1V1RU03eGVhUDFFc1pSdQ== + haSharedSecret: ckNscjBXTmJMdmF6QXJ5aw== proxyPassword: "" proxyUsername: "" kind: Secret @@ -934,7 +934,7 @@ spec: metadata: annotations: checksum/config: 8f50e768255a87f078ba8b9879a0c174c3e045ffb46ac8723d2eedbe293c8d81 - checksum/secret: 66507f448be8010226a1ad2c741fb2866ef4372b68e61287c7500b47fae05572 + checksum/secret: ee0ad692a622e9665348e44e2168eb06e24ac7683fe357f9ca92ea77283fa4d7 labels: app: docker-registry release: flyte-sandbox diff --git a/docs/user_guide/flyte_agents/developing_agents.md b/docs/user_guide/flyte_agents/developing_agents.md index a8b2050b01..2e238afc79 100644 --- a/docs/user_guide/flyte_agents/developing_agents.md +++ b/docs/user_guide/flyte_agents/developing_agents.md @@ -198,7 +198,7 @@ plugins: agent-service: # By default, all requests will be sent to the default agent. defaultAgent: - endpoint: "dns:///flyteagent.flyte.svc.cluster.local:8000" + endpoint: "k8s://flyteagent.flyte:8000" insecure: true timeouts: # CreateTask, GetTask and DeleteTask are for async agents. diff --git a/flyteplugins/go/tasks/plugins/webapi/agent/config.go b/flyteplugins/go/tasks/plugins/webapi/agent/config.go index f26499f320..3f9c7bb5fe 100644 --- a/flyteplugins/go/tasks/plugins/webapi/agent/config.go +++ b/flyteplugins/go/tasks/plugins/webapi/agent/config.go @@ -40,9 +40,10 @@ var ( }, }, DefaultAgent: Deployment{ - Endpoint: "", - Insecure: true, - DefaultTimeout: config.Duration{Duration: 10 * time.Second}, + Endpoint: "", + Insecure: true, + DefaultTimeout: config.Duration{Duration: 10 * time.Second}, + DefaultServiceConfig: `{"loadBalancingConfig": [{"round_robin":{}}]}`, }, // AsyncPlugin should be registered to at least one task type. // Reference: https://github.com/flyteorg/flyte/blob/master/flyteplugins/go/tasks/pluginmachinery/registry.go#L27 diff --git a/flyteplugins/go/tasks/plugins/webapi/agent/config_test.go b/flyteplugins/go/tasks/plugins/webapi/agent/config_test.go index 09abffb83c..27a17746dc 100644 --- a/flyteplugins/go/tasks/plugins/webapi/agent/config_test.go +++ b/flyteplugins/go/tasks/plugins/webapi/agent/config_test.go @@ -39,3 +39,23 @@ func TestGetAndSetConfig(t *testing.T) { assert.NoError(t, err) assert.Equal(t, &cfg, GetConfig()) } + +func TestDefaultAgentConfig(t *testing.T) { + cfg := defaultConfig + + assert.Equal(t, "", cfg.DefaultAgent.Endpoint) + assert.True(t, cfg.DefaultAgent.Insecure) + assert.Equal(t, 10*time.Second, cfg.DefaultAgent.DefaultTimeout.Duration) + assert.Equal(t, `{"loadBalancingConfig": [{"round_robin":{}}]}`, cfg.DefaultAgent.DefaultServiceConfig) + + assert.Empty(t, cfg.DefaultAgent.Timeouts) + + expectedTaskTypes := []string{"task_type_1", "task_type_2"} + assert.Equal(t, expectedTaskTypes, cfg.SupportedTaskTypes) + + assert.Empty(t, cfg.AgentDeployments) + + assert.Empty(t, cfg.AgentForTaskTypes) + + assert.Equal(t, 10*time.Second, cfg.PollInterval.Duration) +}