Skip to content

Commit

Permalink
feat(oohelperd): first stab at adding more metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
bassosimone committed Dec 21, 2023
1 parent fa486a4 commit 5cdfb83
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 1 deletion.
7 changes: 7 additions & 0 deletions internal/oohelperd/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,18 @@ func dnsDo(ctx context.Context, config *dnsConfig) {
reso := config.NewResolver(config.Logger)
defer reso.CloseIdleConnections()

// take the time before running this micro-measurement
started := time.Now()

// perform and log the actual DNS lookup
ol := logx.NewOperationLogger(config.Logger, "DNSLookup %s", config.Domain)
addrs, err := reso.LookupHost(ctx, config.Domain)
ol.Stop(err)

// publish the time required for running this micro-measurement
elapsed := time.Since(started)
metricDNSTaskDurationSeconds.Observe(elapsed.Seconds())

// make sure we return an empty slice on failure because this
// is what the legacy TH would have done.
if addrs == nil {
Expand Down
2 changes: 1 addition & 1 deletion internal/oohelperd/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
elapsed := time.Since(started)

// track the time required to produce a response
metricWCTaskDurationSeconds.Observe(float64(elapsed.Seconds()))
metricWCTaskDurationSeconds.Observe(elapsed.Seconds())

// handle the case of fundamental failure
if err != nil {
Expand Down
11 changes: 11 additions & 0 deletions internal/oohelperd/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func httpDo(ctx context.Context, config *httpConfig) {
ol.Stop(err)
return
}

// The original test helper failed with extra headers while here
// we're implementing (for now?) a more liberal approach.
for k, vs := range config.Headers {
Expand All @@ -85,9 +86,19 @@ func httpDo(ctx context.Context, config *httpConfig) {
}
}
}

clnt := config.NewClient(config.Logger)
defer clnt.CloseIdleConnections()

// take the time before starting the HTTP task
t0 := time.Now()

resp, err := clnt.Do(req)

// publish the elapsed time required for measuring HTTP
elapsed := time.Since(t0)
metricHTTPTaskDurationSeconds.Observe(elapsed.Seconds())

if err != nil {
// fix: emit -1 like the old test helper does
config.Out <- ctrlHTTPResponse{
Expand Down
68 changes: 68 additions & 0 deletions internal/oohelperd/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,72 @@ var (
0.99: 0.001, // 0.989 <= φ <= 0.991
},
})

// metricDNSTaskDurationSeconds summarizes the duration of the DNS task.
metricDNSTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_dnstask_duration_seconds",
Help: "Summarizes the time to complete the DNS measurement task (in seconds)",
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
//
// TODO(bassosimone,FedericoCeratto): investigate whether using
// a shorter-than-10m observation interval is better for us
Objectives: map[float64]float64{
0.25: 0.010, // 0.240 <= φ <= 0.260
0.5: 0.010, // 0.490 <= φ <= 0.510
0.75: 0.010, // 0.740 <= φ <= 0.760
0.9: 0.010, // 0.899 <= φ <= 0.901
0.99: 0.001, // 0.989 <= φ <= 0.991
},
})

// metricTCPTaskDurationSeconds summarizes the duration of the TCP task.
metricTCPTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_tcptask_duration_seconds",
Help: "Summarizes the time to complete the TCP measurement task (in seconds)",
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
//
// TODO(bassosimone,FedericoCeratto): investigate whether using
// a shorter-than-10m observation interval is better for us
Objectives: map[float64]float64{
0.25: 0.010, // 0.240 <= φ <= 0.260
0.5: 0.010, // 0.490 <= φ <= 0.510
0.75: 0.010, // 0.740 <= φ <= 0.760
0.9: 0.010, // 0.899 <= φ <= 0.901
0.99: 0.001, // 0.989 <= φ <= 0.991
},
})

// metricTLSTaskDurationSeconds summarizes the duration of the TLS task.
metricTLSTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_tlstask_duration_seconds",
Help: "Summarizes the time to complete the TLS measurement task (in seconds)",
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
//
// TODO(bassosimone,FedericoCeratto): investigate whether using
// a shorter-than-10m observation interval is better for us
Objectives: map[float64]float64{
0.25: 0.010, // 0.240 <= φ <= 0.260
0.5: 0.010, // 0.490 <= φ <= 0.510
0.75: 0.010, // 0.740 <= φ <= 0.760
0.9: 0.010, // 0.899 <= φ <= 0.901
0.99: 0.001, // 0.989 <= φ <= 0.991
},
})

// metricHTTPTaskDurationSeconds summarizes the duration of the HTTP task.
metricHTTPTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_httptask_duration_seconds",
Help: "Summarizes the time to complete the HTTP measurement task (in seconds)",
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
//
// TODO(bassosimone,FedericoCeratto): investigate whether using
// a shorter-than-10m observation interval is better for us
Objectives: map[float64]float64{
0.25: 0.010, // 0.240 <= φ <= 0.260
0.5: 0.010, // 0.490 <= φ <= 0.510
0.75: 0.010, // 0.740 <= φ <= 0.760
0.9: 0.010, // 0.899 <= φ <= 0.901
0.99: 0.001, // 0.989 <= φ <= 0.991
},
})
)
33 changes: 33 additions & 0 deletions internal/oohelperd/tcptls.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,16 @@ type tcpTLSConfig struct {

// tcpTLSDo performs the TCP and (possibly) TLS checks.
func tcpTLSDo(ctx context.Context, config *tcpTLSConfig) {

// add an overall timeout for this task
const timeout = 15 * time.Second
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

// tell the parent that we're done
defer config.Wg.Done()

// assemble result and arrange for it to be written back
out := &tcpResultPair{
Address: config.Address,
Endpoint: config.Endpoint,
Expand All @@ -82,23 +88,40 @@ func tcpTLSDo(ctx context.Context, config *tcpTLSConfig) {
defer func() {
config.Out <- out
}()

// 1: TCP dial

ol := logx.NewOperationLogger(
config.Logger,
"TCPConnect %s EnableTLS=%v SNI=%s",
config.Endpoint,
config.EnableTLS,
config.URLHostname,
)

dialer := config.NewDialer(config.Logger)
defer dialer.CloseIdleConnections()

// save the time before we start connecting
tcpT0 := time.Now()

conn, err := dialer.DialContext(ctx, "tcp", config.Endpoint)

// publish the time required to connect
tcpElapsed := time.Since(tcpT0)
metricTCPTaskDurationSeconds.Observe(tcpElapsed.Seconds())

out.TCP.Failure = tcpMapFailure(newfailure(err))
out.TCP.Status = err == nil
defer measurexlite.MaybeClose(conn)

if err != nil || !config.EnableTLS {
ol.Stop(err)
return
}

// 2: TLS handshake (if needed)

// See https://github.com/ooni/probe/issues/2413 to understand
// why we're using nil to force netxlite to use the cached
// default Mozilla cert pool.
Expand All @@ -107,8 +130,18 @@ func tcpTLSDo(ctx context.Context, config *tcpTLSConfig) {
RootCAs: nil,
ServerName: config.URLHostname,
}

thx := config.NewTSLHandshaker(config.Logger)

// save time before handshake
tlsT0 := time.Now()

tlsConn, err := thx.Handshake(ctx, conn, tlsConfig)

// publish time required to handshake
tlsElapsed := time.Since(tlsT0)
metricTLSTaskDurationSeconds.Observe(tlsElapsed.Seconds())

ol.Stop(err)
out.TLS = &ctrlTLSResult{
ServerName: config.URLHostname,
Expand Down

0 comments on commit 5cdfb83

Please sign in to comment.