Make benchmarks more consistent (#3295)

michel-laterman · web-flow · commit afb472982971 · 2024-03-05T15:15:08.000+01:00
Call b.ResetTimer() directly before main benchmarks are run in order to ignore any overhead in setting up the benchmarks
Restructure some of the benchmarks for more consistent performance.
Use a nop logger is some benchmarks.
Run benchstat after the individual benchmark jobs to get a summery of the benchmarks for the pr/main.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -46,6 +46,7 @@ steps:
         command: ".buildkite/scripts/run_benchmark.sh pr"
         artifact_paths:
           - build/next.out
+          - build/next.stat
         agents:
           provider: "gcp"
           machineType: "c2-standard-8"
@@ -55,6 +56,7 @@ steps:
         command: ".buildkite/scripts/run_benchmark.sh base"
         artifact_paths:
           - build/base.out
+          - build/base.stat
         agents:
           provider: "gcp"
           machineType: "c2-standard-8"
diff --git a/.buildkite/scripts/run_benchmark.sh b/.buildkite/scripts/run_benchmark.sh
@@ -15,6 +15,7 @@ export BENCHMARK_ARGS="-count=8 -benchmem"
 if [[ ${TYPE} == "pr" ]]; then
     echo "Starting the go benchmark for the pull request"
     BENCH_BASE=next.out make benchmark
+    BENCH_BASE=next.out make benchstat | tee build/next.stat
     BENCH=$(cat build/next.out)
     buildkite-agent annotate --style 'info' --context "gobench_pr" --append << _EOF_
 #### Benchmark for pull request
@@ -34,6 +35,7 @@ if [[ ${TYPE} == "base" ]]; then
     echo "Starting the go benchmark for the pull request"
     git checkout ${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
     BENCH_BASE=base.out make benchmark
+    BENCH_BASE=base.out make benchstat | tee build/base.stat
     BENCH=$(cat build/base.out)
     buildkite-agent annotate --style 'info' --context "gobench_base" --append << _EOF_
 #### Benchmark for the ${BUILDKITE_PULL_REQUEST_BASE_BRANCH}
diff --git a/Makefile b/Makefile
@@ -18,7 +18,7 @@ BUILDER_IMAGE=docker.elastic.co/beats-dev/golang-crossbuild:${GO_VERSION}-main-d
 #Benchmark related targets
 BENCH_BASE ?= benchmark-$(COMMIT).out
 BENCH_NEXT ?=
-BENCHMARK_ARGS := -count=8 -benchmem
+BENCHMARK_ARGS := -count=10 -benchtime=3s -benchmem
 BENCHMARK_PACKAGE ?= ./...
 BENCHMARK_FILTER ?= Bench
 
diff --git a/internal/pkg/api/handleAck_test.go b/internal/pkg/api/handleAck_test.go
@@ -28,19 +28,19 @@ import (
 )
 
 func BenchmarkMakeUpdatePolicyBody(b *testing.B) {
-	b.ReportAllocs()
-
 	const policyID = "ed110be4-c2a0-42b8-adc0-94c2f0569207"
 	const newRev = 2
 	const coord = 1
 
+	b.ResetTimer()
+	b.ReportAllocs()
+
 	for n := 0; n < b.N; n++ {
 		makeUpdatePolicyBody(policyID, newRev, coord)
 	}
 }
 
 func TestMakeUpdatePolicyBody(t *testing.T) {
-
 	const policyID = "ed110be4-c2a0-42b8-adc0-94c2f0569207"
 	const newRev = 2
 	const coord = 1
diff --git a/internal/pkg/api/handleCheckin_test.go b/internal/pkg/api/handleCheckin_test.go
@@ -29,6 +29,7 @@ import (
 	testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
 
 	"github.com/hashicorp/go-version"
+	"github.com/rs/zerolog"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/require"
@@ -631,7 +632,7 @@ func Benchmark_CheckinT_writeResponse(b *testing.B) {
 	}
 	ct := NewCheckinT(verCon, cfg, nil, nil, nil, nil, nil, nil, ftesting.NewMockBulk())
 
-	logger := testlog.SetLogger(b)
+	logger := zerolog.Nop()
 	req := &http.Request{
 		Header: http.Header{
 			"Accept-Encoding": []string{"gzip"},
@@ -657,7 +658,7 @@ func BenchmarkParallel_CheckinT_writeResponse(b *testing.B) {
 	}
 	ct := NewCheckinT(verCon, cfg, nil, nil, nil, nil, nil, nil, ftesting.NewMockBulk())
 
-	logger := testlog.SetLogger(b)
+	logger := zerolog.Nop()
 	req := &http.Request{
 		Header: http.Header{
 			"Accept-Encoding": []string{"gzip"},
diff --git a/internal/pkg/bulk/bulk_integration_test.go b/internal/pkg/bulk/bulk_integration_test.go
@@ -380,11 +380,9 @@ func benchmarkCreate(n int, b *testing.B) {
 }
 
 func BenchmarkCreate(b *testing.B) {
-
 	benchmarks := []int{1, 64, 8192, 16384, 32768, 65536}
 
 	for _, n := range benchmarks {
-
 		bindFunc := func(n int) func(b *testing.B) {
 			return func(b *testing.B) {
 				benchmarkCreate(n, b)
@@ -398,8 +396,6 @@ func BenchmarkCreate(b *testing.B) {
 // Not a particularly useful benchmark, but gives some idea of memory overhead.
 
 func benchmarkCRUD(n int, b *testing.B) {
-	b.ReportAllocs()
-
 	ctx, cn := context.WithCancel(context.Background())
 	defer cn()
 	ctx = testlog.SetLogger(b).WithContext(ctx)
@@ -415,8 +411,11 @@ func benchmarkCRUD(n int, b *testing.B) {
 	ch := make(chan error, n)
 	var wait sync.WaitGroup
 	wait.Add(n)
-	for i := 0; i < n; i++ {
 
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < n; i++ {
 		go func() {
 			defer wait.Done()
 
@@ -466,11 +465,9 @@ func benchmarkCRUD(n int, b *testing.B) {
 }
 
 func BenchmarkCRUD(b *testing.B) {
-
 	benchmarks := []int{1, 64, 8192, 16384, 32768, 65536}
 
 	for _, n := range benchmarks {
-
 		bindFunc := func(n int) func(b *testing.B) {
 			return func(b *testing.B) {
 				benchmarkCRUD(n, b)
diff --git a/internal/pkg/bulk/bulk_test.go b/internal/pkg/bulk/bulk_test.go
@@ -16,7 +16,6 @@ import (
 	"testing"
 
 	"github.com/elastic/fleet-server/v7/internal/pkg/apikey"
-	testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
 	"github.com/rs/zerolog"
 )
 
@@ -305,12 +304,10 @@ func TestCancelCtxChildBulker(t *testing.T) {
 }
 
 func benchmarkMockBulk(b *testing.B, samples [][]byte) {
-	b.ReportAllocs()
 	mock := &mockBulkTransport{}
 
 	ctx, cancelF := context.WithCancel(context.Background())
 	defer cancelF()
-	ctx = testlog.SetLogger(b).WithContext(ctx)
 
 	n := len(samples)
 	bulker := NewBulker(mock, nil, WithFlushThresholdCount(n))
@@ -334,8 +331,10 @@ func benchmarkMockBulk(b *testing.B, samples [][]byte) {
 
 	var wait sync.WaitGroup
 	wait.Add(n)
-	for i := 0; i < n; i++ {
 
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < n; i++ {
 		go func(sampleData []byte) {
 			defer wait.Done()
 
@@ -360,7 +359,6 @@ func benchmarkMockBulk(b *testing.B, samples [][]byte) {
 				// Delete
 				err = bulker.Delete(ctx, index, id)
 				if err != nil {
-					b.Logf("Delete failed index: %s id: %s", index, id)
 					b.Error(err)
 				}
 			}
@@ -373,7 +371,6 @@ func benchmarkMockBulk(b *testing.B, samples [][]byte) {
 }
 
 func BenchmarkMockBulk(b *testing.B) {
-
 	benchmarks := []int{1, 8, 64, 4096, 32768}
 
 	// Create the samples outside the loop to avoid accounting
@@ -391,7 +388,6 @@ func BenchmarkMockBulk(b *testing.B) {
 	}
 
 	for _, n := range benchmarks {
-
 		bindFunc := func(n int) func(b *testing.B) {
 			return func(b *testing.B) {
 				benchmarkMockBulk(b, samples[:n])
diff --git a/internal/pkg/bulk/opMulti_integration_test.go b/internal/pkg/bulk/opMulti_integration_test.go
@@ -15,12 +15,9 @@ import (
 	testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
 )
 
-// This runs a series of CRUD operations through elastic.
+// benchmarkMultiUpdate runs a series of CRUD operations through elastic.
 // Not a particularly useful benchmark, but gives some idea of memory overhead.
-
 func benchmarkMultiUpdate(n int, b *testing.B) {
-	b.ReportAllocs()
-
 	ctx, cn := context.WithCancel(context.Background())
 	defer cn()
 	ctx = testlog.SetLogger(b).WithContext(ctx)
@@ -42,6 +39,9 @@ func benchmarkMultiUpdate(n int, b *testing.B) {
 		b.Fatal(err)
 	}
 
+	b.ResetTimer()
+	b.ReportAllocs()
+
 	for j := 0; j < b.N; j++ {
 		fields := UpdateFields{
 			"dateval": time.Now().Format(time.RFC3339),
@@ -64,8 +64,9 @@ func benchmarkMultiUpdate(n int, b *testing.B) {
 	}
 }
 
-func BenchmarkMultiUpdate(b *testing.B) {
-
+// BenchmarkMultiUpdateIntegration runs a benchmark for CRUD operations on a live ES instance
+// The results may be inconsistent due to the ES requirement.
+func BenchmarkMultiUpdateIntegration(b *testing.B) {
 	benchmarks := []int{1, 64, 8192, 37268, 131072}
 
 	for _, n := range benchmarks {
diff --git a/internal/pkg/bulk/opMulti_test.go b/internal/pkg/bulk/opMulti_test.go
@@ -50,8 +50,9 @@ func BenchmarkMultiUpdateMock(b *testing.B) {
 
 	for _, n := range benchmarks {
 		b.Run(strconv.Itoa(n), func(b *testing.B) {
-			b.ReportAllocs()
 			ctx := testlog.SetLogger(b).WithContext(context.Background())
+			b.ResetTimer()
+			b.ReportAllocs()
 			for i := 0; i < b.N; i++ {
 				if _, err := bulker.MUpdate(ctx, ops[:n]); err != nil {
 					b.Fatal(err)
diff --git a/internal/pkg/checkin/bulk_test.go b/internal/pkg/checkin/bulk_test.go
@@ -205,13 +205,8 @@ func validateTimestamp(tb testing.TB, start time.Time, ts string) {
 	}
 }
 
-func benchmarkBulk(n int, flush bool, b *testing.B) {
-	ctx := testlog.SetLogger(b).WithContext(context.Background())
-	b.ReportAllocs()
-
+func benchmarkBulk(n int, b *testing.B) {
 	mockBulk := ftesting.NewMockBulk()
-	mockBulk.On("MUpdate", mock.Anything, mock.Anything, []bulk.Opt(nil)).Return([]bulk.BulkIndexerResponseItem{}, nil)
-
 	bc := NewBulk(mockBulk)
 
 	ids := make([]string, 0, n)
@@ -220,34 +215,61 @@ func benchmarkBulk(n int, flush bool, b *testing.B) {
 		ids = append(ids, id)
 	}
 
+	b.ResetTimer()
+	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
-
 		for _, id := range ids {
 			err := bc.CheckIn(id, "", "", nil, nil, nil, "")
 			if err != nil {
 				b.Fatal(err)
 			}
 		}
+	}
+}
 
-		if flush {
-			err := bc.flush(ctx)
+func benchmarkFlush(n int, b *testing.B) {
+	ctx := context.Background()
+	mockBulk := ftesting.NewMockBulk()
+	mockBulk.On("MUpdate", mock.Anything, mock.Anything, []bulk.Opt(nil)).Return([]bulk.BulkIndexerResponseItem{}, nil)
+	bc := NewBulk(mockBulk)
+
+	ids := make([]string, 0, n)
+	for i := 0; i < n; i++ {
+		id := xid.New().String()
+		ids = append(ids, id)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		for _, id := range ids {
+			err := bc.CheckIn(id, "", "", nil, nil, nil, "")
 			if err != nil {
 				b.Fatal(err)
 			}
 		}
+		b.StartTimer()
+
+		err := bc.flush(ctx)
+		if err != nil {
+			b.Fatal(err)
+		}
 	}
+
 }
 
-func BenchmarkBulk_1(b *testing.B)      { benchmarkBulk(1, false, b) }
-func BenchmarkBulk_64(b *testing.B)     { benchmarkBulk(64, false, b) }
-func BenchmarkBulk_8192(b *testing.B)   { benchmarkBulk(8192, false, b) }
-func BenchmarkBulk_37268(b *testing.B)  { benchmarkBulk(37268, false, b) }
-func BenchmarkBulk_131072(b *testing.B) { benchmarkBulk(131072, false, b) }
-func BenchmarkBulk_262144(b *testing.B) { benchmarkBulk(262144, false, b) }
-
-func BenchmarkBulkFlush_1(b *testing.B)      { benchmarkBulk(1, true, b) }
-func BenchmarkBulkFlush_64(b *testing.B)     { benchmarkBulk(64, true, b) }
-func BenchmarkBulkFlush_8192(b *testing.B)   { benchmarkBulk(8192, true, b) }
-func BenchmarkBulkFlush_37268(b *testing.B)  { benchmarkBulk(37268, true, b) }
-func BenchmarkBulkFlush_131072(b *testing.B) { benchmarkBulk(131072, true, b) }
-func BenchmarkBulkFlush_262144(b *testing.B) { benchmarkBulk(262144, true, b) }
+func BenchmarkBulk_1(b *testing.B)      { benchmarkBulk(1, b) }
+func BenchmarkBulk_64(b *testing.B)     { benchmarkBulk(64, b) }
+func BenchmarkBulk_8192(b *testing.B)   { benchmarkBulk(8192, b) }
+func BenchmarkBulk_37268(b *testing.B)  { benchmarkBulk(37268, b) }
+func BenchmarkBulk_131072(b *testing.B) { benchmarkBulk(131072, b) }
+func BenchmarkBulk_262144(b *testing.B) { benchmarkBulk(262144, b) }
+
+func BenchmarkFlush_1(b *testing.B)      { benchmarkFlush(1, b) }
+func BenchmarkFlush_64(b *testing.B)     { benchmarkFlush(64, b) }
+func BenchmarkFlush_8192(b *testing.B)   { benchmarkFlush(8192, b) }
+func BenchmarkFlush_37268(b *testing.B)  { benchmarkFlush(37268, b) }
+func BenchmarkFlush_131072(b *testing.B) { benchmarkFlush(131072, b) }
+func BenchmarkFlush_262144(b *testing.B) { benchmarkFlush(262144, b) }
diff --git a/internal/pkg/dsl/tmpl_test.go b/internal/pkg/dsl/tmpl_test.go
diff --git a/internal/pkg/policy/sub_test.go b/internal/pkg/policy/sub_test.go