From 576f47644469f9be9f1c5caf8628f6b1398e7a5a Mon Sep 17 00:00:00 2001 From: Paul Lorenz Date: Fri, 19 Jan 2024 18:15:47 -0500 Subject: [PATCH] Add link chaos test. Fixes #1702 --- ziti/cmd/edge/login.go | 2 +- ziti/cmd/fabric/validate_router_links.go | 2 +- zititest/go.mod | 4 +- zititest/go.sum | 4 + zititest/models/links-test/main.go | 39 +++-- zititest/models/links-test/validation.go | 204 ++++++++++++++++++++++ zititest/zitilab/chaos/chaos.go | 20 +++ zititest/zitilab/zitirest/clients.go | 211 +++++++++++++++++++++++ 8 files changed, 465 insertions(+), 21 deletions(-) create mode 100644 zititest/models/links-test/validation.go create mode 100644 zititest/zitilab/zitirest/clients.go diff --git a/ziti/cmd/edge/login.go b/ziti/cmd/edge/login.go index 3147c72cc..78d6fe43a 100644 --- a/ziti/cmd/edge/login.go +++ b/ziti/cmd/edge/login.go @@ -359,7 +359,7 @@ func login(o *LoginOptions, url string, authentication string) (*gabs.Container, } resp, err := client. - SetTimeout(time.Duration(time.Duration(timeout)*time.Second)). + SetTimeout(time.Duration(timeout)*time.Second). SetDebug(verbose). R(). SetQueryParam("method", method). diff --git a/ziti/cmd/fabric/validate_router_links.go b/ziti/cmd/fabric/validate_router_links.go index 719bc561a..5db007da0 100644 --- a/ziti/cmd/fabric/validate_router_links.go +++ b/ziti/cmd/fabric/validate_router_links.go @@ -107,7 +107,7 @@ func (self *validateRouterLinksAction) validateRouterLinks(_ *cobra.Command, arg case routerDetail := <-self.eventNotify: result := "validation successful" if !routerDetail.ValidateSuccess { - result = fmt.Sprintf("error: unable to validation (%s)", routerDetail.Message) + result = fmt.Sprintf("error: unable to validate (%s)", routerDetail.Message) errCount++ } fmt.Printf("routerId: %s, routerName: %v, links: %v, %s\n", diff --git a/zititest/go.mod b/zititest/go.mod index 8bb9d6a61..aa5950a47 100644 --- a/zititest/go.mod +++ b/zititest/go.mod @@ -12,7 +12,7 @@ require ( github.com/michaelquigley/pfxlog v0.6.10 github.com/openziti/agent v1.0.16 github.com/openziti/channel/v2 v2.0.116 - github.com/openziti/fablab v0.5.38 + github.com/openziti/fablab v0.5.42 github.com/openziti/foundation/v2 v2.0.36 github.com/openziti/identity v1.0.69 github.com/openziti/sdk-golang v0.22.17 @@ -189,7 +189,7 @@ require ( golang.org/x/image v0.13.0 // indirect golang.org/x/mod v0.14.0 // indirect golang.org/x/oauth2 v0.16.0 // indirect - golang.org/x/sync v0.5.0 // indirect + golang.org/x/sync v0.6.0 // indirect golang.org/x/sys v0.16.0 // indirect golang.org/x/term v0.16.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/zititest/go.sum b/zititest/go.sum index 77e7fc6ca..ae5fbf901 100644 --- a/zititest/go.sum +++ b/zititest/go.sum @@ -595,6 +595,8 @@ github.com/openziti/edge-api v0.26.8 h1:W1iHwVrb4hVKXWhfN8g/dl1d0RGdus6nOYbhM5ix github.com/openziti/edge-api v0.26.8/go.mod h1:Tm2Qn1BC0zF+F261Y8FTyPtS/UkeUtUBYoT6ueCjVbA= github.com/openziti/fablab v0.5.38 h1:G8ieax/d4LGeRPuMT2XHTOc18jtZTaXtPmjA+5CVO3U= github.com/openziti/fablab v0.5.38/go.mod h1:LstfQixYgv82aUBR8ranX2Hc9KHohFC1G5/AeaeTMwg= +github.com/openziti/fablab v0.5.42 h1:vENJKfEba2T4sSLwlKDL/IzBYfY8iHnhc4umf6IESiY= +github.com/openziti/fablab v0.5.42/go.mod h1:HDT06y1QX8kO8ZQrgHvZmJsvc8iRybESGtlDLDII4ks= github.com/openziti/foundation/v2 v2.0.36 h1:ogEIvsWur8/9mUzf9NOB4hRUyx372Uy6AmnHRcurIkY= github.com/openziti/foundation/v2 v2.0.36/go.mod h1:MdK2oAJSwo7iCfvVdG16ZGz47qP7nG97ovnqEdXW2kQ= github.com/openziti/identity v1.0.69 h1:wNgQomnv8ar2S1wge9jQK1jpqE2virOKKG8GyfTiHMQ= @@ -1025,6 +1027,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180606202747-9527bec2660b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= diff --git a/zititest/models/links-test/main.go b/zititest/models/links-test/main.go index a59fa8c76..c08f1b3e6 100644 --- a/zititest/models/links-test/main.go +++ b/zititest/models/links-test/main.go @@ -1,9 +1,24 @@ +/* + Copyright NetFoundry Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + package main import ( "embed" _ "embed" - "fmt" "github.com/openziti/fablab" "github.com/openziti/fablab/kernel/lib/actions" "github.com/openziti/fablab/kernel/lib/actions/component" @@ -217,7 +232,7 @@ var m = &model.Model{ workflow.AddAction(edge.Login("#ctrl1")) workflow.AddAction(component.StopInParallel(models.RouterTag, 50)) - workflow.AddAction(edge.InitEdgeRouters(models.RouterTag, 2)) + workflow.AddAction(edge.InitEdgeRouters(models.RouterTag, 10)) return workflow }), @@ -229,6 +244,10 @@ var m = &model.Model{ "login2": model.Bind(edge.Login("#ctrl2")), "login3": model.Bind(edge.Login("#ctrl3")), "sowChaos": model.Bind(model.ActionFunc(sowChaos)), + "validateUp": model.Bind(model.ActionFunc(func(run model.Run) error { + return chaos.ValidateUp(run, "*", 50, 15*time.Second) + })), + "validateLinks": model.Bind(model.ActionFunc(validateLinks)), }, Infrastructure: model.Stages{ @@ -252,22 +271,8 @@ var m = &model.Model{ }, } -func sowChaos(run model.Run) error { - controllers, err := chaos.SelectRandom(run, ".ctrl", chaos.RandomOfTotal()) - if err != nil { - return err - } - routers, err := chaos.SelectRandom(run, ".router", chaos.Percentage(15)) - if err != nil { - return err - } - toRestart := append(routers, controllers...) - fmt.Printf("restarting %v controllers and %v routers\n", len(controllers), len(routers)) - return chaos.RestartSelected(run, toRestart, 50) -} - func main() { - m.AddActivationActions("stop", "bootstrap") + m.AddActivationActions("bootstrap") model.AddBootstrapExtension(binding.AwsCredentialsLoader) model.AddBootstrapExtension(aws_ssh_key.KeyManager) diff --git a/zititest/models/links-test/validation.go b/zititest/models/links-test/validation.go new file mode 100644 index 000000000..9f683cfe7 --- /dev/null +++ b/zititest/models/links-test/validation.go @@ -0,0 +1,204 @@ +/* + Copyright NetFoundry Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "context" + "fmt" + "github.com/michaelquigley/pfxlog" + "github.com/openziti/channel/v2" + "github.com/openziti/channel/v2/protobufs" + "github.com/openziti/fablab/kernel/model" + "github.com/openziti/ziti/common/pb/mgmt_pb" + "github.com/openziti/ziti/controller/rest_client/link" + "github.com/openziti/ziti/zititest/zitilab/chaos" + "github.com/openziti/ziti/zititest/zitilab/zitirest" + "google.golang.org/protobuf/proto" + "time" +) + +func sowChaos(run model.Run) error { + controllers, err := chaos.SelectRandom(run, ".ctrl", chaos.RandomOfTotal()) + if err != nil { + return err + } + routers, err := chaos.SelectRandom(run, ".router", chaos.Percentage(15)) + if err != nil { + return err + } + toRestart := append(routers, controllers...) + fmt.Printf("restarting %v controllers and %v routers\n", len(controllers), len(routers)) + return chaos.RestartSelected(run, toRestart, 50) +} + +func validateLinks(run model.Run) error { + ctrls := run.GetModel().SelectComponents(".ctrl") + errC := make(chan error, len(ctrls)) + deadline := time.Now().Add(15 * time.Minute) + for _, ctrl := range ctrls { + ctrlComponent := ctrl + go validateLinksForCtrlWithChan(ctrlComponent, deadline, errC) + } + + for i := 0; i < len(ctrls); i++ { + err := <-errC + if err != nil { + return err + } + } + + return nil +} + +func validateLinksForCtrlWithChan(c *model.Component, deadline time.Time, errC chan<- error) { + errC <- validateLinksForCtrl(c, deadline) +} + +func validateLinksForCtrl(c *model.Component, deadline time.Time) error { + username := c.MustStringVariable("credentials.edge.username") + password := c.MustStringVariable("credentials.edge.password") + edgeApiBaseUrl := c.Host.PublicIp + ":1280" + + clients, err := zitirest.NewManagementClients(edgeApiBaseUrl) + if err != nil { + return err + } + if err = clients.Authenticate(username, password); err != nil { + return err + } + + allLinksPresent := false + start := time.Now() + + logger := pfxlog.Logger().WithField("ctrl", c.Id) + var lastLog time.Time + for time.Now().Before(deadline) && !allLinksPresent { + linkCount, err := getLinkCount(clients) + if err != nil { + return nil + } + if linkCount == 79800 { + allLinksPresent = true + } else { + time.Sleep(5 * time.Second) + } + if time.Since(lastLog) > time.Minute { + logger.Infof("current link count: %v, elapsed time: %v", linkCount, time.Since(start)) + lastLog = time.Now() + } + } + + if allLinksPresent { + logger.Infof("all links present, elapsed time: %v", time.Since(start)) + } else { + return fmt.Errorf("fail to reach expected link count of 79800 on controller %v", c.Id) + } + + return validateRouterLinks(c.Id, clients) +} + +func getLinkCount(clients *zitirest.Clients) (int64, error) { + ctx, cancelF := context.WithTimeout(context.Background(), 15*time.Second) + defer cancelF() + + filter := "limit 1" + result, err := clients.Fabric.Link.ListLinks(&link.ListLinksParams{ + Filter: &filter, + Context: ctx, + }) + + if err != nil { + return 0, err + } + linkCount := *result.Payload.Meta.Pagination.TotalCount + return linkCount, nil +} + +func validateRouterLinks(id string, clients *zitirest.Clients) error { + logger := pfxlog.Logger().WithField("ctrl", id) + + closeNotify := make(chan struct{}) + eventNotify := make(chan *mgmt_pb.RouterLinkDetails, 1) + + handleLinkResults := func(msg *channel.Message, _ channel.Channel) { + detail := &mgmt_pb.RouterLinkDetails{} + if err := proto.Unmarshal(msg.Body, detail); err != nil { + pfxlog.Logger().WithError(err).Error("unable to unmarshal router link details") + return + } + eventNotify <- detail + } + + bindHandler := func(binding channel.Binding) error { + binding.AddReceiveHandlerF(int32(mgmt_pb.ContentType_ValidateRouterLinksResultType), handleLinkResults) + binding.AddCloseHandler(channel.CloseHandlerF(func(ch channel.Channel) { + close(closeNotify) + })) + return nil + } + + ch, err := clients.NewWsMgmtChannel(channel.BindHandlerF(bindHandler)) + if err != nil { + return err + } + + request := &mgmt_pb.ValidateRouterLinksRequest{ + Filter: "limit none", + } + responseMsg, err := protobufs.MarshalTyped(request).WithTimeout(10 * time.Second).SendForReply(ch) + + response := &mgmt_pb.ValidateRouterLinksResponse{} + if err = protobufs.TypedResponse(response).Unmarshall(responseMsg, err); err != nil { + return err + } + + if !response.Success { + return fmt.Errorf("failed to start link validation: %s", response.Message) + } + + logger.Infof("started validation of %v routers", response.RouterCount) + + expected := response.RouterCount + + for expected > 0 { + select { + case <-closeNotify: + fmt.Printf("channel closed, exiting") + return nil + case routerDetail := <-eventNotify: + result := "validation successful" + if !routerDetail.ValidateSuccess { + return fmt.Errorf("error: unable to validate on controller %s (%s)", routerDetail.Message, id) + } + + for _, linkDetail := range routerDetail.LinkDetails { + + if !linkDetail.IsValid { + fmt.Printf("routerId: %s, routerName: %v, links: %v, %s\n", + routerDetail.RouterId, routerDetail.RouterName, len(routerDetail.LinkDetails), result) + fmt.Printf("\tlinkId: %s, destConnected: %v, ctrlState: %v, routerState: %v, dest: %v, dialed: %v \n", + linkDetail.LinkId, linkDetail.DestConnected, linkDetail.CtrlState, linkDetail.RouterState.String(), + linkDetail.DestRouterId, linkDetail.Dialed) + return fmt.Errorf("router link validation error on %s", id) + } + } + expected-- + } + } + logger.Infof("link validation of %v routers successful", response.RouterCount) + return nil +} diff --git a/zititest/zitilab/chaos/chaos.go b/zititest/zitilab/chaos/chaos.go index 524a8559d..a9bc75ff0 100644 --- a/zititest/zitilab/chaos/chaos.go +++ b/zititest/zitilab/chaos/chaos.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/openziti/fablab/kernel/model" "math/rand" + "time" ) func StaticNumber(val int) func(int) int { @@ -73,3 +74,22 @@ func RestartSelected(run model.Run, list []*model.Component, concurrency int) er return fmt.Errorf("component %v isn't of ServerComponent type, is of type %T", c, c.Type) }) } + +func ValidateUp(run model.Run, spec string, concurrency int, timeout time.Duration) error { + start := time.Now() + return run.GetModel().ForEachComponent(spec, concurrency, func(c *model.Component) error { + for { + isRunning, err := c.IsRunning(run) + if err != nil { + return err + } + if isRunning { + return nil + } + if time.Since(start) > timeout { + return fmt.Errorf("timed out waiting for component %s to be running", c.Id) + } + time.Sleep(time.Second) + } + }) +} diff --git a/zititest/zitilab/zitirest/clients.go b/zititest/zitilab/zitirest/clients.go new file mode 100644 index 000000000..b40c9446d --- /dev/null +++ b/zititest/zitilab/zitirest/clients.go @@ -0,0 +1,211 @@ +/* + Copyright NetFoundry Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package zitirest + +import ( + "context" + "crypto/tls" + "crypto/x509" + "encoding/json" + "fmt" + httptransport "github.com/go-openapi/runtime/client" + "github.com/gorilla/websocket" + "github.com/michaelquigley/pfxlog" + "github.com/openziti/channel/v2" + "github.com/openziti/channel/v2/websockets" + "github.com/openziti/edge-api/rest_management_api_client" + "github.com/openziti/edge-api/rest_management_api_client/authentication" + "github.com/openziti/edge-api/rest_model" + "github.com/openziti/identity" + "github.com/openziti/ziti/controller/env" + fabric_rest_client "github.com/openziti/ziti/controller/rest_client" + "github.com/openziti/ziti/ziti/util" + "github.com/pkg/errors" + "io" + "net" + "net/http" + "net/url" + "strings" + "time" +) + +type Clients struct { + host string + wellKnownCerts []byte + token string + Fabric *fabric_rest_client.ZitiFabric + Edge *rest_management_api_client.ZitiEdgeManagement + + FabricRuntime *httptransport.Runtime + EdgeRuntime *httptransport.Runtime +} + +func (self *Clients) NewTlsClientConfig() *tls.Config { + rootCaPool := x509.NewCertPool() + rootCaPool.AppendCertsFromPEM(self.wellKnownCerts) + + return &tls.Config{ + RootCAs: rootCaPool, + } +} + +func (self *Clients) Authenticate(user, password string) error { + ctx, cancelF := context.WithTimeout(context.Background(), 15*time.Second) + defer cancelF() + + result, err := self.Edge.Authentication.Authenticate(&authentication.AuthenticateParams{ + Auth: &rest_model.Authenticate{ + Username: rest_model.Username(user), + Password: rest_model.Password(password), + }, + Method: "password", + Context: ctx, + }) + if err != nil { + var authErr util.ApiErrorPayload + if errors.As(err, &authErr) { + out, _ := json.Marshal(authErr) + fmt.Println(string(out)) + } + return err + } + self.token = *result.Payload.Data.Token + pfxlog.Logger().WithField("token", self.token).Info("authenticated successfully") + self.FabricRuntime.DefaultAuthentication = &util.EdgeManagementAuth{ + Token: self.token, + } + + self.EdgeRuntime.DefaultAuthentication = &util.EdgeManagementAuth{ + Token: self.token, + } + + return nil +} + +func (self *Clients) NewWsMgmtChannel(bindHandler channel.BindHandler) (channel.Channel, error) { + log := pfxlog.Logger() + + baseUrl := self.host + "/" + string(util.FabricAPI) + wsUrl := strings.ReplaceAll(baseUrl, "http", "ws") + "/v1/ws-api" + dialer := &websocket.Dialer{ + Proxy: http.ProxyFromEnvironment, + TLSClientConfig: self.NewTlsClientConfig(), + HandshakeTimeout: 10 * time.Second, + } + + result := http.Header{} + result.Set(env.ZitiSession, self.token) + + conn, resp, err := dialer.Dial(wsUrl, result) + if err != nil { + if resp != nil { + if body, rerr := io.ReadAll(resp.Body); rerr == nil { + log.WithError(err).Errorf("response body [%v]", string(body)) + } + } else { + log.WithError(err).Error("websocket dial returned error") + } + return nil, err + } + + id := &identity.TokenId{Token: "mgmt"} + underlayFactory := websockets.NewUnderlayFactory(id, conn, nil) + + ch, err := channel.NewChannel("mgmt", underlayFactory, bindHandler, nil) + if err != nil { + return nil, err + } + return ch, nil +} + +func (self *Clients) LoadWellKnownCerts() error { + if !strings.HasPrefix(self.host, "http") { + self.host = "https://" + self.host + } + + wellKnownCerts, _, err := util.GetWellKnownCerts(self.host) + if err != nil { + return errors.Wrapf(err, "unable to retrieve server certificate authority from %v", self.host) + } + + certsTrusted, err := util.AreCertsTrusted(self.host, wellKnownCerts) + if err != nil { + return errors.Wrapf(err, "unable to verify well known certs for host %v", self.host) + } + + if !certsTrusted { + return errors.New("server supplied certs not trusted by server, unable to continue") + } + + self.wellKnownCerts = wellKnownCerts + return nil +} + +func (self *Clients) newRestClientTransport() *http.Client { + httpClientTransport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 10 * time.Second, + }).DialContext, + + ForceAttemptHTTP2: true, + MaxIdleConns: 10, + IdleConnTimeout: 10 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + TLSClientConfig: self.NewTlsClientConfig(), + } + + httpClient := &http.Client{ + Transport: httpClientTransport, + Timeout: 10 * time.Second, + } + return httpClient +} + +func NewManagementClients(host string) (*Clients, error) { + if !strings.HasPrefix(host, "http") { + host = "https://" + host + } + + clients := &Clients{ + host: host, + } + + if err := clients.LoadWellKnownCerts(); err != nil { + return nil, err + } + + httpClient := clients.newRestClientTransport() + + parsedHost, err := url.Parse(host) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse host URL '%v'", host) + } + + clients.FabricRuntime = httptransport.NewWithClient(parsedHost.Host, + fabric_rest_client.DefaultBasePath, fabric_rest_client.DefaultSchemes, httpClient) + + clients.EdgeRuntime = httptransport.NewWithClient(parsedHost.Host, + rest_management_api_client.DefaultBasePath, rest_management_api_client.DefaultSchemes, httpClient) + + clients.Fabric = fabric_rest_client.New(clients.FabricRuntime, nil) + clients.Edge = rest_management_api_client.New(clients.EdgeRuntime, nil) + + return clients, nil +}