Skip to content

Commit d5e4452

Browse files
authored
Add WithAllocBufferColStrProvider string column allocator for batch insert performance boost (#1181)
* add ColStrProvider for column_gen * add test for WithAllocBufferColStrProvider
1 parent ae82686 commit d5e4452

File tree

3 files changed

+240
-1
lines changed

3 files changed

+240
-1
lines changed

lib/column/column_gen.go

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/column/column_gen_option.go

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Licensed to ClickHouse, Inc. under one or more contributor
2+
// license agreements. See the NOTICE file distributed with
3+
// this work for additional information regarding copyright
4+
// ownership. ClickHouse, Inc. licenses this file to you under
5+
// the Apache License, Version 2.0 (the "License"); you may
6+
// not use this file except in compliance with the License.
7+
// You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package column
19+
20+
import "github.com/ClickHouse/ch-go/proto"
21+
22+
// ColStrProvider defines provider of proto.ColStr
23+
type ColStrProvider func() proto.ColStr
24+
25+
// colStrProvider provide proto.ColStr for Column() when type is String
26+
var colStrProvider ColStrProvider = defaultColStrProvider
27+
28+
// defaultColStrProvider defines sample provider for proto.ColStr
29+
func defaultColStrProvider() proto.ColStr {
30+
return proto.ColStr{}
31+
}
32+
33+
// issue: https://github.com/ClickHouse/clickhouse-go/issues/1164
34+
// WithAllocBufferColStrProvider allow pre alloc buffer cap for proto.ColStr
35+
// It is more suitable for scenarios where a lot of data is written in batches
36+
func WithAllocBufferColStrProvider(cap int) {
37+
colStrProvider = func() proto.ColStr {
38+
return proto.ColStr{Buf: make([]byte, 0, cap)}
39+
}
40+
}
41+
42+
// WithColStrProvider more flexible than WithAllocBufferColStrProvider, such as use sync.Pool
43+
func WithColStrProvider(provider ColStrProvider) {
44+
colStrProvider = provider
45+
}

tests/issues/1164_test.go

+194
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
package issues
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"github.com/ClickHouse/clickhouse-go/v2"
7+
"github.com/ClickHouse/clickhouse-go/v2/lib/column"
8+
clickhouse_tests "github.com/ClickHouse/clickhouse-go/v2/tests"
9+
"github.com/stretchr/testify/require"
10+
"testing"
11+
)
12+
13+
func TestIssue1164(t *testing.T) {
14+
var (
15+
conn, err = clickhouse_tests.GetConnection("issues", clickhouse.Settings{
16+
"max_execution_time": 60,
17+
"allow_experimental_object_type": true,
18+
}, nil, &clickhouse.Compression{
19+
Method: clickhouse.CompressionLZ4,
20+
})
21+
)
22+
ctx := context.Background()
23+
require.NoError(t, err)
24+
const ddl = "CREATE TABLE test_1164 (Col1 String) Engine MergeTree() ORDER BY tuple()"
25+
err = conn.Exec(ctx, ddl)
26+
require.NoError(t, err)
27+
defer func() {
28+
conn.Exec(ctx, "DROP TABLE IF EXISTS test_1164")
29+
}()
30+
31+
column.WithAllocBufferColStrProvider(4096)
32+
33+
batch, err := conn.PrepareBatch(ctx, "INSERT INTO test_1164")
34+
require.NoError(t, err)
35+
36+
for i := 0; i < 10000; i++ {
37+
appendErr := batch.Append(fmt.Sprintf("some_text_%d", i))
38+
require.NoError(t, appendErr)
39+
}
40+
41+
err = batch.Send()
42+
require.NoError(t, err)
43+
}
44+
45+
func BenchmarkIssue1164(b *testing.B) {
46+
// result:
47+
//cpu: Intel(R) Xeon(R) CPU E5-26xx v4
48+
//BenchmarkIssue1164
49+
//BenchmarkIssue1164/default-10000
50+
//BenchmarkIssue1164/default-10000-8 100 11533744 ns/op 1992731 B/op 40129 allocs/op
51+
//BenchmarkIssue1164/preAlloc-10000
52+
//BenchmarkIssue1164/preAlloc-10000-8 104 11136623 ns/op 1991154 B/op 40110 allocs/op
53+
//BenchmarkIssue1164/default-50000
54+
//BenchmarkIssue1164/default-50000-8 22 49932579 ns/op 11592053 B/op 200150 allocs/op
55+
//BenchmarkIssue1164/preAlloc-50000
56+
//BenchmarkIssue1164/preAlloc-50000-8 24 49687163 ns/op 11573934 B/op 200148 allocs/op
57+
b.Run("default-10000", func(b *testing.B) {
58+
var (
59+
conn, err = clickhouse_tests.GetConnection("issues", clickhouse.Settings{
60+
"max_execution_time": 60,
61+
"allow_experimental_object_type": true,
62+
}, nil, &clickhouse.Compression{
63+
Method: clickhouse.CompressionLZ4,
64+
})
65+
)
66+
ctx := context.Background()
67+
require.NoError(b, err)
68+
const ddl = "CREATE TABLE test_1164 (Col1 String) Engine MergeTree() ORDER BY tuple()"
69+
err = conn.Exec(ctx, ddl)
70+
require.NoError(b, err)
71+
defer func() {
72+
conn.Exec(ctx, "DROP TABLE IF EXISTS test_1164")
73+
}()
74+
75+
b.ReportAllocs()
76+
for k := 0; k < b.N; k++ {
77+
batch, err := conn.PrepareBatch(ctx, "INSERT INTO test_1164")
78+
require.NoError(b, err)
79+
80+
for i := 0; i < 10000; i++ {
81+
appendErr := batch.Append(fmt.Sprintf("some_text_%d", i))
82+
require.NoError(b, appendErr)
83+
}
84+
85+
err = batch.Send()
86+
require.NoError(b, err)
87+
}
88+
89+
})
90+
b.Run("preAlloc-10000", func(b *testing.B) {
91+
var (
92+
conn, err = clickhouse_tests.GetConnection("issues", clickhouse.Settings{
93+
"max_execution_time": 60,
94+
"allow_experimental_object_type": true,
95+
}, nil, &clickhouse.Compression{
96+
Method: clickhouse.CompressionLZ4,
97+
})
98+
)
99+
ctx := context.Background()
100+
require.NoError(b, err)
101+
const ddl = "CREATE TABLE test_1164 (Col1 String) Engine MergeTree() ORDER BY tuple()"
102+
err = conn.Exec(ctx, ddl)
103+
require.NoError(b, err)
104+
defer func() {
105+
conn.Exec(ctx, "DROP TABLE IF EXISTS test_1164")
106+
}()
107+
108+
column.WithAllocBufferColStrProvider(4096)
109+
110+
b.ReportAllocs()
111+
for k := 0; k < b.N; k++ {
112+
batch, err := conn.PrepareBatch(ctx, "INSERT INTO test_1164")
113+
require.NoError(b, err)
114+
115+
for i := 0; i < 10000; i++ {
116+
appendErr := batch.Append(fmt.Sprintf("some_text_%d", i))
117+
require.NoError(b, appendErr)
118+
}
119+
120+
err = batch.Send()
121+
require.NoError(b, err)
122+
}
123+
124+
})
125+
b.Run("default-50000", func(b *testing.B) {
126+
var (
127+
conn, err = clickhouse_tests.GetConnection("issues", clickhouse.Settings{
128+
"max_execution_time": 60,
129+
"allow_experimental_object_type": true,
130+
}, nil, &clickhouse.Compression{
131+
Method: clickhouse.CompressionLZ4,
132+
})
133+
)
134+
ctx := context.Background()
135+
require.NoError(b, err)
136+
const ddl = "CREATE TABLE test_1164 (Col1 String) Engine MergeTree() ORDER BY tuple()"
137+
err = conn.Exec(ctx, ddl)
138+
require.NoError(b, err)
139+
defer func() {
140+
conn.Exec(ctx, "DROP TABLE IF EXISTS test_1164")
141+
}()
142+
143+
b.ReportAllocs()
144+
for k := 0; k < b.N; k++ {
145+
batch, err := conn.PrepareBatch(ctx, "INSERT INTO test_1164")
146+
require.NoError(b, err)
147+
148+
for i := 0; i < 50000; i++ {
149+
appendErr := batch.Append(fmt.Sprintf("some_text_%d", i))
150+
require.NoError(b, appendErr)
151+
}
152+
153+
err = batch.Send()
154+
require.NoError(b, err)
155+
}
156+
157+
})
158+
b.Run("preAlloc-50000", func(b *testing.B) {
159+
var (
160+
conn, err = clickhouse_tests.GetConnection("issues", clickhouse.Settings{
161+
"max_execution_time": 60,
162+
"allow_experimental_object_type": true,
163+
}, nil, &clickhouse.Compression{
164+
Method: clickhouse.CompressionLZ4,
165+
})
166+
)
167+
ctx := context.Background()
168+
require.NoError(b, err)
169+
const ddl = "CREATE TABLE test_1164 (Col1 String) Engine MergeTree() ORDER BY tuple()"
170+
err = conn.Exec(ctx, ddl)
171+
require.NoError(b, err)
172+
defer func() {
173+
conn.Exec(ctx, "DROP TABLE IF EXISTS test_1164")
174+
}()
175+
176+
column.WithAllocBufferColStrProvider(4096)
177+
178+
b.ReportAllocs()
179+
for k := 0; k < b.N; k++ {
180+
batch, err := conn.PrepareBatch(ctx, "INSERT INTO test_1164")
181+
require.NoError(b, err)
182+
183+
for i := 0; i < 50000; i++ {
184+
appendErr := batch.Append(fmt.Sprintf("some_text_%d", i))
185+
require.NoError(b, appendErr)
186+
}
187+
188+
err = batch.Send()
189+
require.NoError(b, err)
190+
}
191+
192+
})
193+
194+
}

0 commit comments

Comments
 (0)