prune-junit-xml: fix false positives for benchmarks

Because Go emits a "run" event for a benchmark, but no "passed" (golang/go#66825 (comment)), gotestsum treats the benchmark as failed (gotestyourself/gotestsum#413 (comment) -2172063128) because the events for the test are incomplete. Discussions on how to handle this in gotestsum haven't led to a conclusion and drag on. In the meantime, scheduler_perf jobs keep being treated as failed because of this. By identifying the false positives and removing them, prune-junit-xml can solve the situation for us.
pohly · Nov 8, 2024 · 510a39d · 510a39d
1 parent e273349
commit 510a39d
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 0 deletions.
diff --git a/cmd/prune-junit-xml/prunexml.go b/cmd/prune-junit-xml/prunexml.go
@@ -33,6 +33,8 @@ func main() {
 	maxTextSize := flag.Int("max-text-size", 1, "maximum size of attribute or text (in MB)")
 	pruneTests := flag.Bool("prune-tests", true,
 		"prune's xml files to display only top level tests and failed sub-tests")
+	fixBenchmarks := flag.Bool("fix-benchmarks", true,
+		"Go reports starting benchmarks, but not that they passed (https://github.com/golang/go/issues/66825#issuecomment-2343229005). This causes gotestsum to report the benchmark as a failure (https://github.com/gotestyourself/gotestsum/issues/413#issuecomment-2172063128). This option controls a heuristic which suppresses such false positives.")
 	flag.Parse()
 	for _, path := range flag.Args() {
 		fmt.Printf("processing junit xml file : %s\n", path)
@@ -46,6 +48,9 @@ func main() {
 			panic(err)
 		}
 
+		if *fixBenchmarks {
+			pruneFalsePositiveBenchmarks(suites)
+		}
 		pruneXML(suites, *maxTextSize*1e6) // convert MB into bytes (roughly!)
 		if *pruneTests {
 			pruneTESTS(suites)
@@ -64,6 +69,42 @@ func main() {
 	}
 }
 
+// pruneFalsePositiveBenchmarks identifies all benchmarks with a false
+// positive and removes their failure message, adjusting the overall suite
+// summary accordingly.
+//
+// False positives are test cases where the failure message doesn't include
+// a "FAIL: <benchmark name>". Because of this check, benchmarks which mark
+// themselves as failed through e.g. b.Errorf are still listed as failure.
+//
+// See https://github.com/golang/go/issues/66825#issuecomment-2343229005
+// and https://github.com/gotestyourself/gotestsum/issues/413#issuecomment-2172063128
+func pruneFalsePositiveBenchmarks(suites *junitxml.JUnitTestSuites) {
+	for i, suite := range suites.Suites {
+		if suite.Failures == 0 {
+			continue
+		}
+		for j, testcase := range suite.TestCases {
+			if testcase.Failure == nil {
+				continue
+			}
+			if !strings.HasPrefix(testcase.Name, "Benchmark") {
+				// Not a benchmark, leave it alone.
+				continue
+			}
+			if strings.Contains(testcase.Failure.Contents, "FAIL: "+testcase.Name) {
+				// Looks genuine, "go test" reported a failure.
+				continue
+			}
+			// Strip out the false positive failure.
+			testcase.Failure = nil
+			suite.TestCases[j] = testcase
+			suite.Failures--
+		}
+		suites.Suites[i] = suite
+	}
+}
+
 func pruneXML(suites *junitxml.JUnitTestSuites, maxBytes int) {
 	for _, suite := range suites.Suites {
 		for i := range suite.TestCases {

diff --git a/cmd/prune-junit-xml/prunexml_test.go b/cmd/prune-junit-xml/prunexml_test.go
@@ -123,3 +123,50 @@ func TestPruneTESTS(t *testing.T) {
 	_ = writer.Flush()
 	assert.Equal(t, outputXML, output.String(), "tests in xml was not pruned correctly")
 }
+
+func TestPruneFalsePositiveBenchmarks(t *testing.T) {
+	sourceXML := `<?xml version="1.0" encoding="UTF-8"?>
+<testsuites>
+	<testsuite tests="1" failures="1" time="5.50000" name="k8s.io/kubernetes/cluster/gce/cos" timestamp="">
+		<properties>
+			<property name="go.version" value="go1.18 linux/amd64"></property>
+		</properties>
+		<testcase classname="k8s.io/kubernetes/cluster/gce/cos" name="BenchmarkA" time="0.950000">
+                   <failure message="Failed" type="">=== RUN   BenchmarkA&#xA;BenchmarkA&#xA;BenchmarkA-36    &#x9;    4222&#x9;    256516 ns/op&#xA;</failure>
+                </testcase>
+	</testsuite>
+	<testsuite tests="1" failures="1" time="5.50000" name="k8s.io/kubernetes/cluster/gce/cos2" timestamp="">
+		<properties>
+			<property name="go.version" value="go1.18 linux/amd64"></property>
+		</properties>
+		<testcase classname="k8s.io/kubernetes/cluster/gce/cos2" name="BenchmarkB" time="0.950000">
+			<failure message="Failed" type="">=== RUN   BenchmarkB&#xA;BenchmarkB&#xA;BenchmarkB-36    &#x9;    4222&#x9;    256516 ns/op&#xA;    matching_test.go:675: fake error&#xA;--- FAIL: BenchmarkB&#xA;</failure>
+                </testcase>
+	</testsuite>
+</testsuites>`
+
+	outputXML := `<?xml version="1.0" encoding="UTF-8"?>
+<testsuites>
+	<testsuite tests="1" failures="0" time="5.50000" name="k8s.io/kubernetes/cluster/gce/cos" timestamp="">
+		<properties>
+			<property name="go.version" value="go1.18 linux/amd64"></property>
+		</properties>
+		<testcase classname="k8s.io/kubernetes/cluster/gce/cos" name="BenchmarkA" time="0.950000"></testcase>
+	</testsuite>
+	<testsuite tests="1" failures="1" time="5.50000" name="k8s.io/kubernetes/cluster/gce/cos2" timestamp="">
+		<properties>
+			<property name="go.version" value="go1.18 linux/amd64"></property>
+		</properties>
+		<testcase classname="k8s.io/kubernetes/cluster/gce/cos2" name="BenchmarkB" time="0.950000">
+			<failure message="Failed" type="">=== RUN   BenchmarkB&#xA;BenchmarkB&#xA;BenchmarkB-36    &#x9;    4222&#x9;    256516 ns/op&#xA;    matching_test.go:675: fake error&#xA;--- FAIL: BenchmarkB&#xA;</failure>
+		</testcase>
+	</testsuite>
+</testsuites>`
+	suites, _ := fetchXML(strings.NewReader(sourceXML))
+	pruneFalsePositiveBenchmarks(suites)
+	var output bytes.Buffer
+	writer := bufio.NewWriter(&output)
+	_ = streamXML(writer, suites)
+	_ = writer.Flush()
+	assert.Equal(t, outputXML, output.String(), "tests in xml was not pruned correctly")
+}