solo-io · ryanrolds · Dec 19, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/README.md b/README.md
@@ -7,9 +7,11 @@ This simple CLI, written in Go, is sending jUnit metrics to a back-end using [Op
 > Inspired by https://github.com/axw/test2otlp, which sends traces and spans for `go test` JSON events as they occur.
 
 ## Background
+
 As jUnit represents a de-facto standard for test results in every programming language, this tool consumes the XML files produced by the test runner (or a tool converting to xUnit format), sending metrics to one or more open-source or commercial back-ends with Open Telemetry.
 
 ## Supported CI runners
+
 This tool will work in the context of a CI runner, such as a Github action, a Jenkins job, a Gitlab runner, or even a local execution. This is important because it will use the context of the CI execution to infer the attributes to be added to the OpenTelemetry traces and spans.
 
 In particular the order of evaluation to detect the right execution context is the following:
@@ -19,6 +21,7 @@ In particular the order of evaluation to detect the right execution context is t
 ```
 
 ### Local execution
+
 It reads the environment variables that are avaible in the context of a local execution, representing the fallback if no context is discovered:
 
 ```golang
@@ -73,6 +76,7 @@ func FromGithub() *ScmContext {
 ```
 
 ### Jenkins multibranch pipelines
+
 It reads the environment variables that are avaible in the context of a Jenkins multibranch pipeline execution:
 
 ```golang
@@ -109,6 +113,7 @@ func FromJenkins() *ScmContext {
 ```
 
 ### Gitlab Runners
+
 It reads the environment variables that are avaible in the context of a Gitlab runner execution:
 
 ```golang
@@ -137,6 +142,7 @@ func FromGitlab() *ScmContext {
 ```
 
 ## OpenTelemetry configuration
+
 This tool is able to override the following attributes:
 
 | Attribute | Flag | Default value | Description |
@@ -153,47 +159,59 @@ For using this tool in a distributed tracing scenario, where there is a parent t
 
 For further reference on environment variables in the OpenTelemetry SDK, please read the [official specification](https://opentelemetry.io/docs/reference/specification/sdk-environment-variables/)
 
-## OpenTelemetry Attributes
-This tool is going to parse the XML report produced by jUnit, or any other tool converting to that format, adding different attributes, separated by different categories:
+## Traces
 
-- Test metrics attributes
-- Ownership attributes
+Traces are sent to the OpenTelemetry collector, representing the test execution. Each run of the tool will create a root trace that contains spans for each suite. Each suite will contain spans for each test case.
 
-### Metrics and Traces
-The following attributes are added as metrics and/or traces.
+In addition to the common attributes, the tool will add the following attributes to the trace document representing the test execution:
 
-#### Test execution attributes
-For each test execution, represented by a test report file, the tool will add the following attributes to the metric document, including them in the trace representing the test execution.
+## Metrics 
 
-| Attribute | Description |
+For each test execution, represented by a test report file, the tool will add the following metrics to the document, including them in the trace representing the test execution.
+
+| Metric    | Description |
 | --------- | ----------- |
 | `tests.suite.failed` | Number of failed tests in the test execution |
 | `tests.suite.error` | Number of errored tests in the test execution |
-| `tests.suite.passed` | Number of passed tests in the test execution |
 | `tests.suite.skipped` | Number of skipped tests in the test execution |
-| `tests.suite.duration` | Duration of the test execution |
-| `tests.suite.suitename` | Name of the test execution |
-| `tests.suite.systemerr` | Log produced by Systemerr |
-| `tests.suite.systemout` | Log produced by Systemout |
+| `tests.suite.passed` | Number of passed tests in the test execution |
 | `tests.suite.total` | Total number of tests in the test execution |
+| `tests.suite.duration` | Duration of the test execution |
+| `tests.suite.duration.histogram` | Histogram of the test execution duration |
+| `tests.case.failed` | The test failed |
+| `tests.case.error` | The test errored |
+| `tests.case.skipped` | The test errored |
+| `tests.case.passed` | The test passed |
+| `tests.case.duration` | Duration of the test execution |
+| `tests.case.duration.histogram` | Histogram of the test execution duration |
 
-#### Test case attributes
-For each test case in the test execution, the tool will add the following attributes to the span document representing the test case:
+## OpenTelemetry Attributes
+
+This tool is going to parse the XML report produced by jUnit, or any other tool converting to that format, adding different attributes, separated by different categories:
+
+- Runtime attributes
+- Ownership attributes
+- Report properties
+- Test suite attributes
+- Test case attributes
+
+### Runtime attributes
+
+Runtime attributes are added to the root trace, spans, and metrics sent by the tool.
 
 | Attribute | Description |
 | --------- | ----------- |
-| `tests.case.classname` | Classname or file for the test case |
-| `tests.case.duration` | Duration of the test case |
-| `tests.case.error` | Error message of the test case |
-| `tests.case.message` | Message of the test case |
-| `tests.case.status` | Status of the test case |
-| `tests.case.systemerr` | Log produced by Systemerr |
-| `tests.case.systemout` | Log produced by Systemout |
+| `host.arch` | Architecture of the host where the test execution is processed |
+| `os.name` | Name of the OS where the test execution is processed |
+| `service.name` | Name of the service where the test execution is processed |
+| `service.version` | Version of the service where the test execution is processed |
 
 ### Ownership attributes
-These attributes are added to the traces and spans sent by the tool, identifying the owner (or owners) of the test suite, trying to correlate a test failure with an author or authors. To identify the owner, the tool will inspect the SCM repository for the project.
+
+These attributes are added to the traces, spans and metrics, identifying the owner (or owners) of the test suite, trying to correlate a test failure with an author or authors. To identify the owner, the tool will inspect the SCM repository for the project.
 
 #### SCM attributes
+
 Because the XML test report is evaluated for a project **in a SCM repository**, the tool will add the following attributes to each trace and span:
 
 | Attribute | Description |
@@ -207,7 +225,8 @@ Because the XML test report is evaluated for a project **in a SCM repository**,
 | `scm.type` | Type of the SCM (i.e. git, svn, mercurial)  At this moment the tool only supports Git repositories. |
 
 #### Change request attributes
-The tool will add the following attributes to each trace and span if and only if the XML test report is evaluated in the context of a change requests **for a Git repository**:
+
+The tool will add the following attributes to each trace, span, and metric if and only if the XML test report is evaluated in the context of a change requests **for a Git repository**:
 
 | Attribute | Description |
 | --------- | ----------- |
@@ -219,7 +238,41 @@ The tool will add the following attributes to each trace and span if and only if
 
 A changeset is calculated based on the HEAD commit and the first ancestor between HEAD and the branch where the changeset is submitted against.
 
+### Report properties 
+
+The jUnit XML report can contain properties at different levels. The tool will add the properties to the testsuite and testcase spans automatically. If the `--properties-allowed` flag is set, only the properties listed in the flag will be added to the spans.
+
+### Test suite attributes
+
+For each test suite in the test execution, the tool will add the following attributes to the span document representing the test suite:
+
+| Attribute | Spans | Metrics | Description |
+| --------- | ----- | ------- | ----------- |
+| `code.namespace` | x | x | Class/module of the test suite |
+| `tests.suite.suitename` | x | x | Name of the test suite |
+| `tests.suite.duration` | x | | Duration of the test suite |
+| `tests.suite.systemerr` | x | | Log produced by Systemerr |
+| `tests.suite.systemout` | x | | Log produced by Systemout |
+
+### Test case attributes
+
+For each test case in the test execution, the tool will add the following attributes to the span document representing the test case:
+
+| Attribute | Spans | Metrics | Description |
+| --------- | ----- | ------- | ----------- |
+| `code.namespace` | x | x | Class/module of the test suite |
+| `code.function` | x | x | Function or method of the test case |
+| `tests.suite.suitename` | x | x | Name of the test suite |
+| `tests.case.classname` | x | x | Classname or file for the test case |
+| `tests.case.duration` | x | | Duration of the test case |
+| `tests.case.error` | x | | Error message of the test case |
+| `tests.case.message` | x | | Message of the test case |
+| `tests.case.status` | x | | Status of the test case |
+| `tests.case.systemerr` | x | | Log produced by Systemerr |
+| `tests.case.systemout` | x | | Log produced by Systemout |
+
 ## Docker image
+
 It's possible to run the binary as a Docker image. To build and use the image
 
 1. First build the Docker image using this Make goal:
@@ -242,6 +295,7 @@ cat TEST-sample3.xml | docker run --rm -i --network elastic_junit2otlp --volume
   - We are passing command line flags to the container, setting the service name (_DOCKERFOO_) and the trace name (_TRACEBAR_).
 
 ## Demos
+
 To demonstrate how traces and metrics are sent to different back-ends, we are provising the following demos:
 
 - Elastic
@@ -250,6 +304,7 @@ To demonstrate how traces and metrics are sent to different back-ends, we are pr
 - Zipkin
 
 ### Elastic
+
 It will use the Elastic Stack as back-end, sending the traces, spans and metrics through the APM Server, storing them in Elasticsearch and finally using Kibana as visualisation layer.
 
 ```shell
@@ -262,6 +317,7 @@ open http://localhost:5601/app/apm/services?rangeFrom=now-15m&rangeTo=now&compar
 ```
 
 ### Jaeger
+
 It will use Jaeger as back-end, sending the traces, spans and metrics through the OpenTelemetry collector, storing them in memory.
 
 ```shell
@@ -274,6 +330,7 @@ open http://localhost:16686
 ```
 
 ### Prometheus
+
 It will use Prometheus as back-end, sending the traces, spans and metrics through the OpenTelemetry collector, storing them in memory.
 
 ```shell
@@ -286,6 +343,7 @@ open http://localhost:9090
 ```
 
 ### Zipkin
+
 It will use Prometheus as back-end, sending the traces, spans and metrics through the OpenTelemetry collector, storing them in memory.
 
 ```shell

diff --git a/main.go b/main.go
@@ -69,6 +69,11 @@ func createIntCounter(meter metric.Meter, name string, description string) metri
 	return counter
 }
 
+func createFloat64Histogram(meter metric.Meter, name string, description string, unit string) metric.Float64Histogram {
+	histogram, _ := meter.Float64Histogram(name, metric.WithDescription(description), metric.WithUnit(unit))
+	return histogram
+}
+
 func createTracesAndSpans(ctx context.Context, srvName string, tracesProvides *sdktrace.TracerProvider, suites []junit.Suite) error {
 	tracer := tracesProvides.Tracer(srvName)
 	meter := otel.Meter(srvName)
@@ -79,60 +84,104 @@ func createTracesAndSpans(ctx context.Context, srvName string, tracesProvides *s
 		runtimeAttributes = append(runtimeAttributes, scmAttributes...)
 	}
 
-	durationCounter := createIntCounter(meter, TestsDuration, "Duration of the tests")
-	errorCounter := createIntCounter(meter, ErrorTestsCount, "Total number of failed tests")
-	failedCounter := createIntCounter(meter, FailedTestsCount, "Total number of failed tests")
-	passedCounter := createIntCounter(meter, PassedTestsCount, "Total number of passed tests")
-	skippedCounter := createIntCounter(meter, SkippedTestsCount, "Total number of skipped tests")
-	testsCounter := createIntCounter(meter, TotalTestsCount, "Total number of executed tests")
-
-	ctx, outerSpan := tracer.Start(ctx, traceNameFlag, trace.WithAttributes(runtimeAttributes...), trace.WithSpanKind(trace.SpanKindServer))
+	// test suite metrics
+	suiteErrorCounter := createIntCounter(meter, ErrorTestsCount, "Total number of failed tests")
+	suiteFailedCounter := createIntCounter(meter, FailedTestsCount, "Total number of failed tests")
+	suiteSkippedCounter := createIntCounter(meter, SkippedTestsCount, "Total number of skipped tests")
+	suitePassedCounter := createIntCounter(meter, PassedTestsCount, "Total number of passed tests")
+	suiteTestsCounter := createIntCounter(meter, TotalTestsCount, "Total number of executed tests")
+	suiteDurationCounter := createIntCounter(meter, TestsDuration, "Duration of the tests")
+	suiteDurationHistogram := createFloat64Histogram(meter, TestsDurationHist, "Duration of the tests", "s")
+
+	// test case metrics
+	caseFailedCounter := createIntCounter(meter, CaseFailedCount, "Total number of failed tests")
+	caseErrorCounter := createIntCounter(meter, CaseErrorCount, "Total number of error tests")
+	casePassedCounter := createIntCounter(meter, CasePassedCount, "Total number of passed tests")
+	caseSkippedCounter := createIntCounter(meter, CaseSkippedCount, "Total number of skipped tests")
+	caseDurationCounter := createIntCounter(meter, CaseDuration, "Duration of the tests")
+	caseDurationHistogram := createFloat64Histogram(meter, CaseDurationHist, "Duration of the tests", "s")
+
+	// outer span for the whole report
+	ctx, outerSpan := tracer.Start(ctx, traceNameFlag, trace.WithAttributes(runtimeAttributes...),
+		trace.WithSpanKind(trace.SpanKindServer))
 	defer outerSpan.End()
 
 	for _, suite := range suites {
 		totals := suite.Totals
 
+		// attributes for the suite that are common for metrics and spans
 		suiteAttributes := []attribute.KeyValue{
 			semconv.CodeNamespaceKey.String(suite.Package),
 			attribute.Key(TestsSuiteName).String(suite.Name),
-			attribute.Key(TestsSystemErr).String(suite.SystemErr),
-			attribute.Key(TestsSystemOut).String(suite.SystemOut),
-			attribute.Key(TestsDuration).Int64(suite.Totals.Duration.Milliseconds()),
 		}
-
 		suiteAttributes = append(suiteAttributes, runtimeAttributes...)
 		suiteAttributes = append(suiteAttributes, propsToLabels(suite.Properties)...)
 
-		attributeSet := attribute.NewSet(suiteAttributes...)
-		metricAttributes := metric.WithAttributeSet(attributeSet)
+		metricAttributes := metric.WithAttributes(suiteAttributes...)
+
+		// metrics for the suite
+		suiteErrorCounter.Add(ctx, int64(totals.Error), metricAttributes)
+		suiteFailedCounter.Add(ctx, int64(totals.Failed), metricAttributes)
+		suiteSkippedCounter.Add(ctx, int64(totals.Skipped), metricAttributes)
+		suitePassedCounter.Add(ctx, int64(totals.Passed), metricAttributes)
+		suiteTestsCounter.Add(ctx, int64(totals.Tests), metricAttributes)
+
+		suiteDurationCounter.Add(ctx, totals.Duration.Milliseconds(), metricAttributes)
+		suiteDurationHistogram.Record(ctx, totals.Duration.Seconds(), metricAttributes)
+
+		// attributes for the suite span
+		suiteSpanAttributes := append([]attribute.KeyValue{}, suiteAttributes...)
+		suiteSpanAttributes = append(suiteSpanAttributes,
+			attribute.Key(TestsDuration).Int64(suite.Totals.Duration.Milliseconds()),
+			attribute.Key(TestsSystemErr).String(suite.SystemErr),
+			attribute.Key(TestsSystemOut).String(suite.SystemOut),
+		)
 
-		durationCounter.Add(ctx, totals.Duration.Milliseconds(), metricAttributes)
-		errorCounter.Add(ctx, int64(totals.Error), metricAttributes)
-		failedCounter.Add(ctx, int64(totals.Failed), metricAttributes)
-		passedCounter.Add(ctx, int64(totals.Passed), metricAttributes)
-		skippedCounter.Add(ctx, int64(totals.Skipped), metricAttributes)
-		testsCounter.Add(ctx, int64(totals.Tests), metricAttributes)
+		// start the suite span
+		ctx, suiteSpan := tracer.Start(ctx, suite.Name, trace.WithAttributes(suiteSpanAttributes...))
 
-		ctx, suiteSpan := tracer.Start(ctx, suite.Name, trace.WithAttributes(suiteAttributes...))
+		// iterate tests and add metrics and spans
 		for _, test := range suite.Tests {
+			// attributes for the test case that are common for metrics and spans
 			testAttributes := []attribute.KeyValue{
 				semconv.CodeFunctionKey.String(test.Name),
-				attribute.Key(TestDuration).Int64(test.Duration.Milliseconds()),
 				attribute.Key(TestClassName).String(test.Classname),
+			}
+			testAttributes = append(testAttributes, suiteAttributes...)
+			testAttributes = append(testAttributes, runtimeAttributes...)
+			testAttributes = append(testAttributes, propsToLabels(test.Properties)...)
+
+			metricAttributes := metric.WithAttributes(testAttributes...)
+
+			// metrics for the test case
+			if test.Status == junit.StatusError {
+				caseErrorCounter.Add(ctx, 1, metricAttributes)
+			} else if test.Status == junit.StatusFailed {
+				caseFailedCounter.Add(ctx, 1, metricAttributes)
+			} else if test.Status == junit.StatusPassed {
+				casePassedCounter.Add(ctx, 1, metricAttributes)
+			} else if test.Status == junit.StatusSkipped {
+				caseSkippedCounter.Add(ctx, 1, metricAttributes)
+			}
+
+			caseDurationCounter.Add(ctx, test.Duration.Milliseconds(), metricAttributes)
+			caseDurationHistogram.Record(ctx, test.Duration.Seconds(), metricAttributes)
+
+			// attributes for the test span
+			testSpanAttributes := append([]attribute.KeyValue{}, testAttributes...)
+			testSpanAttributes = append(testSpanAttributes,
+				attribute.Key(TestDuration).Int64(test.Duration.Milliseconds()),
 				attribute.Key(TestMessage).String(test.Message),
 				attribute.Key(TestStatus).String(string(test.Status)),
 				attribute.Key(TestSystemErr).String(test.SystemErr),
 				attribute.Key(TestSystemOut).String(test.SystemOut),
-			}
-
-			testAttributes = append(testAttributes, propsToLabels(test.Properties)...)
-			testAttributes = append(testAttributes, suiteAttributes...)
+			)
 
 			if test.Error != nil {
-				testAttributes = append(testAttributes, attribute.Key(TestError).String(test.Error.Error()))
+				testSpanAttributes = append(testSpanAttributes, attribute.Key(TestError).String(test.Error.Error()))
 			}
 
-			_, testSpan := tracer.Start(ctx, test.Name, trace.WithAttributes(testAttributes...))
+			_, testSpan := tracer.Start(ctx, test.Name, trace.WithAttributes(testSpanAttributes...))
 			testSpan.End()
 		}
 

diff --git a/main_test.go b/main_test.go
@@ -326,7 +326,6 @@ func Test_Main_SampleXML(t *testing.T) {
 	assertStringValueInAttribute(t, srvVersionAttribute.Value, "")
 
 	instrumentationLibrarySpans := resourceSpans.InstrumentationLibrarySpans[0]
-
 	assert.Equal(t, "jaeger-srv-test", instrumentationLibrarySpans.InstrumentationLibrary.Name)
 
 	spans := instrumentationLibrarySpans.Spans
@@ -355,6 +354,22 @@ func Test_Main_SampleXML(t *testing.T) {
 	// last span is server type
 	aTestCase = spans[expectedSpansCount-1]
 	assert.Equal(t, "SPAN_KIND_SERVER", aTestCase.Kind)
+
+	// metrics
+	resourceMetrics := testReport.resourceMetrics.Metrics[0]
+	metrics := resourceMetrics.InstrumentationLibraryMetrics[0].Metrics
+
+	assert.Equal(t, "tests.suite.error", metrics[0].Name)
+	assert.Equal(t, "tests.suite.failed", metrics[1].Name)
+	assert.Equal(t, "tests.suite.skipped", metrics[2].Name)
+	assert.Equal(t, "tests.suite.passed", metrics[3].Name)
+	assert.Equal(t, "tests.suite.total", metrics[4].Name)
+	assert.Equal(t, "tests.suite.duration", metrics[5].Name)
+	assert.Equal(t, "tests.suite.duration.histogram", metrics[6].Name)
+
+	assert.Equal(t, "tests.case.passed", metrics[7].Name)
+	assert.Equal(t, "tests.case.duration", metrics[8].Name)
+	assert.Equal(t, "tests.case.duration.histogram", metrics[9].Name)
 }
 
 func Test_GetServiceVariable(t *testing.T) {