Skip to content

Commit

Permalink
pd: setup more helper methods we'll use
Browse files Browse the repository at this point in the history
  • Loading branch information
adamdecaf committed Oct 4, 2024
1 parent fe0b901 commit b7d3689
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 5 deletions.
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module github.com/adamdecaf/deadcheck

go 1.23.2

replace github.com/PagerDuty/go-pagerduty v1.8.0 => github.com/adamdecaf/go-pagerduty v0.0.0-20241004210059-8b8b6c17a79a

require (
github.com/PagerDuty/go-pagerduty v1.8.0
github.com/gorilla/mux v1.8.1
Expand Down Expand Up @@ -31,6 +33,7 @@ require (
github.com/subosito/gotenv v1.6.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
github.com/PagerDuty/go-pagerduty v1.8.0 h1:MTFqTffIcAervB83U7Bx6HERzLbyaSPL/+oxH3zyluI=
github.com/PagerDuty/go-pagerduty v1.8.0/go.mod h1:nzIeAqyFSJAFkjWKvMzug0JtwDg+V+UoCWjFrfFH5mI=
github.com/adamdecaf/go-pagerduty v0.0.0-20241004210059-8b8b6c17a79a h1:5ZBCLAwwKWdxQJ1ayipucLXmAC6eoP5Zi1El2iRMfQY=
github.com/adamdecaf/go-pagerduty v0.0.0-20241004210059-8b8b6c17a79a/go.mod h1:ilimTqwHSBjmvKeYA/yayDBZvzf/CX4Pwa9Qbhekzok=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -62,6 +62,8 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk=
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY=
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
Expand Down
4 changes: 1 addition & 3 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,7 @@ type PagerDuty struct {
EscalationPolicy string `yaml:"escalationPolicy"`

// From is an email address of a valid user associated with the account making the request
// From string `yaml:"from"`

// TODO(adam): Read 'serviceID' to lookup service, rather than by name
From string `yaml:"from"`

RoutingKey string
}
21 changes: 21 additions & 0 deletions internal/pd/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ import (
"github.com/stretchr/testify/require"
)

var (
adamUserID = "P1F29KL"

defaultEscalationPolicy = "POHSZE0"
)

func newTestClient(t *testing.T) *client {
t.Helper()

Expand All @@ -44,9 +50,15 @@ func newTestClient(t *testing.T) *client {
t.Skip("no DEADCHECK_ESCALATION_POLICY specified, skipping test...")
}

from := os.Getenv("DEADCHECK_PAGERDUTY_FROM")
if from == "" {
t.Skip("no DEADCHECK_PAGERDUTY_FROM specified, skipping test...")
}

cc, err := NewClient(&config.PagerDuty{
ApiKey: apiKey,
EscalationPolicy: escPolicy,
From: from,
RoutingKey: os.Getenv("DEADCHECK_ROUTING_KEY"),
})
require.NoError(t, err)
Expand All @@ -57,6 +69,15 @@ func newTestClient(t *testing.T) *client {
return cl
}

func skipInCI(t *testing.T) {
t.Helper()

inGithubActions := os.Getenv("GITHUB_ACTIONS") != ""
if inGithubActions {
t.Skip("not running test in GITHUB_ACTIONS")
}
}

func TestClient(t *testing.T) {
pdc := newTestClient(t)
require.NoError(t, pdc.ping())
Expand Down
59 changes: 59 additions & 0 deletions internal/pd/escalation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package pd

import (
"context"
"fmt"
"strings"

"github.com/PagerDuty/go-pagerduty"
)

type escalationPolicySetup struct {
id string
name string

userIDs []string
}

func (c *client) findEscalationPolicy(ctx context.Context, setup escalationPolicySetup) (*pagerduty.EscalationPolicy, error) {
opts := pagerduty.ListEscalationPoliciesOptions{
Limit: 100,
}
eps, err := c.underlying.ListEscalationPoliciesWithContext(ctx, opts)
if err != nil {
return nil, fmt.Errorf("listing escalation policies: %w", err)
}
for _, ep := range eps.EscalationPolicies {
if ep.ID == setup.id {
return &ep, nil
}
if strings.EqualFold(ep.Name, setup.name) {
return &ep, nil
}
}

// Can't find it so create one
req := pagerduty.EscalationPolicy{
Name: setup.name,
Description: "managed by deadcheck, DO NOT MODIFY",
}
// Add an escalation rule
for _, userID := range setup.userIDs {
rule := pagerduty.EscalationRule{
Delay: 1,
Targets: []pagerduty.APIObject{
{
Type: "user_reference",
ID: userID,
},
},
}
req.EscalationRules = append(req.EscalationRules, rule)
}

ep, err := c.underlying.CreateEscalationPolicyWithContext(ctx, req)
if err != nil {
return nil, fmt.Errorf("creating escalation policy: %w", err)
}
return ep, nil
}
81 changes: 81 additions & 0 deletions internal/pd/incidents.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package pd

import (
"context"
"fmt"
"time"

"github.com/PagerDuty/go-pagerduty"
)

func (c *client) setupInitialIncident(ctx context.Context, service *pagerduty.Service, ep *pagerduty.EscalationPolicy) (*pagerduty.Incident, error) {
req := &pagerduty.CreateIncidentOptions{
Title: fmt.Sprintf("Creating ongoing incdient for %s", service.Name),
Body: &pagerduty.APIDetails{
Details: "This incident will be active and used by deadcheck to alert you when check-ins do not occur as expected. Deadcheck will update this incident to reflect the current status of check-in.",
},
Urgency: "low",
EscalationPolicy: &pagerduty.APIReference{
ID: ep.ID,
Type: "escalation_policy",
},
Service: &pagerduty.APIReference{
ID: service.ID,
Type: "service",
},
}
inc, err := c.underlying.CreateIncidentWithContext(ctx, c.pdConfig.From, req)
if err != nil {
return nil, fmt.Errorf("creating incident: %w", err)
}
return inc, nil
}

func (c *client) snoozeIncident(ctx context.Context, inc *pagerduty.Incident, service *pagerduty.Service, snooze time.Duration) error {
// Ack the incident
update := []pagerduty.ManageIncidentsOptions{
{
ID: inc.ID,
Status: "acknowledged",
},
}
_, err := c.underlying.ManageIncidentsWithContext(ctx, c.pdConfig.From, update)
if err != nil {
return fmt.Errorf("incident acknowledged: %w", err)
}

// Snooze the incident
inc, err = c.underlying.SnoozeIncidentWithContext(ctx, inc.ID, c.pdConfig.From, uint(snooze.Seconds()))
if err != nil {
return fmt.Errorf("snoozing incident: %w", err)
}

// Update the incident details for humans to read
expectedCheckin := time.Now().In(time.UTC).Add(snooze).Format("2006-01-02 15:04 UTC")
update = []pagerduty.ManageIncidentsOptions{
{
ID: inc.ID,
Title: fmt.Sprintf("%s did not check-in, expected check-in at %v", service.Name, expectedCheckin),
Urgency: "high",
},
}
_, err = c.underlying.ManageIncidentsWithContext(ctx, c.pdConfig.From, update)
if err != nil {
return fmt.Errorf("updating incidnet for after snooze: %w", err)
}
return nil
}

func (c *client) resolveIncident(ctx context.Context, inc *pagerduty.Incident) error {
update := []pagerduty.ManageIncidentsOptions{
{
ID: inc.ID,
Status: "resolved",
},
}
_, err := c.underlying.ManageIncidentsWithContext(ctx, c.pdConfig.From, update)
if err != nil {
return fmt.Errorf("resolving incident: %w", err)
}
return nil
}
2 changes: 2 additions & 0 deletions internal/pd/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ func (c *client) UpdateMaintenanceWindow(ctx context.Context, maintWindow *pager
maintWindow.StartTime = start.Format(maintWindowTimeFormat)
maintWindow.EndTime = end.Format(maintWindowTimeFormat)

// TODO(adam): need to calcualte the next start/end date for the MW

_, err := c.underlying.UpdateMaintenanceWindowWithContext(ctx, *maintWindow)
return err
}
54 changes: 54 additions & 0 deletions internal/pd/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,60 @@ func TestService__Setup(t *testing.T) {
require.Equal(t, "17:32", end.In(loc).Format("15:04"))
}

// TODO(adam): V2Event triggers created during a MW are ignored by PD, so how are we going to prompt it to alert right as a MW ends?
// TODO(adam): create an incident during the MW? Can we set a future dated start time?

// Can we create an incident during the MW
// Then snooze it for the MW duration?

// Do we even need MW windows?
// SnoozeIncidentWithContext(ctx context.Context, id string, duration uint) (*Incident, error)
// can check .PendingActions
//
// Create the incident with an empty EscalationPolicy
// Then snooze it for the MW time, and reassign to escalation policy?
//
// On check-in snooze again for however long

func TestService_SnoozedIncident(t *testing.T) {
skipInCI(t) // This test creates real alerts, so don't run it in CI

ctx := context.Background()

conf := config.Check{
ID: base.ID(),
Name: makeServiceName(t),
}
pdc := newTestClient(t)

service, err := pdc.Setup(ctx, conf)
require.NoError(t, err)
t.Cleanup(func() {
pdc.deleteService(service)
})

t.Logf("setup service %v named %v", service.ID, service.Name)

// Create a new escalation policy with nothing routed
ep, err := pdc.findEscalationPolicy(ctx, escalationPolicySetup{
id: defaultEscalationPolicy,
})
require.NoError(t, err)

// Create an incident
inc, err := pdc.setupInitialIncident(ctx, service, ep)
require.NoError(t, err)

t.Logf("created incident %v escalating to %v", inc.ID, ep.Name)

err = pdc.snoozeIncident(ctx, inc, service, time.Hour)
require.NoError(t, err)

// Resolve incident
err = pdc.resolveIncident(ctx, inc)
require.NoError(t, err)
}

func makeServiceName(t *testing.T) string {
return fmt.Sprintf("%s_%d", t.Name(), time.Now().In(time.UTC).Unix())
}

0 comments on commit b7d3689

Please sign in to comment.