Skip to content

Commit c8c2b0e

Browse files
Merge pull request #1426 from mdonkers/insert-performance-improvements
[improvement] Some performance related changes to evaluate
2 parents 577ea49 + d0b8c0e commit c8c2b0e

File tree

6 files changed

+93
-16
lines changed

6 files changed

+93
-16
lines changed

lib/column/column_gen.go

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/column/column_gen_option.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ package column
2020
import "github.com/ClickHouse/ch-go/proto"
2121

2222
// ColStrProvider defines provider of proto.ColStr
23-
type ColStrProvider func() proto.ColStr
23+
type ColStrProvider func(name string) proto.ColStr
2424

2525
// colStrProvider provide proto.ColStr for Column() when type is String
2626
var colStrProvider ColStrProvider = defaultColStrProvider
2727

2828
// defaultColStrProvider defines sample provider for proto.ColStr
29-
func defaultColStrProvider() proto.ColStr {
29+
func defaultColStrProvider(string) proto.ColStr {
3030
return proto.ColStr{}
3131
}
3232

@@ -35,7 +35,7 @@ func defaultColStrProvider() proto.ColStr {
3535
//
3636
// It is more suitable for scenarios where a lot of data is written in batches
3737
func WithAllocBufferColStrProvider(cap int) {
38-
colStrProvider = func() proto.ColStr {
38+
colStrProvider = func(string) proto.ColStr {
3939
return proto.ColStr{Buf: make([]byte, 0, cap)}
4040
}
4141
}

lib/column/enum.go

+15-2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ func Enum(chType Type, name string) (Interface, error) {
4646
v := int8(indexes[i])
4747
enum.iv[values[i]] = proto.Enum8(v)
4848
enum.vi[proto.Enum8(v)] = values[i]
49+
50+
enum.enumValuesBitset[uint8(v)>>6] |= 1 << (v & 63)
4951
}
5052
return &enum, nil
5153
}
@@ -54,12 +56,23 @@ func Enum(chType Type, name string) (Interface, error) {
5456
vi: make(map[proto.Enum16]string, len(values)),
5557
chType: chType,
5658
name: name,
59+
// to be updated below, when ranging over all index/enum values
60+
minEnum: math.MaxInt16,
61+
maxEnum: math.MinInt16,
5762
}
5863

5964
for i := range values {
60-
enum.iv[values[i]] = proto.Enum16(indexes[i])
61-
enum.vi[proto.Enum16(indexes[i])] = values[i]
65+
k := int16(indexes[i])
66+
enum.iv[values[i]] = proto.Enum16(k)
67+
enum.vi[proto.Enum16(k)] = values[i]
68+
if k < enum.minEnum {
69+
enum.minEnum = k
70+
}
71+
if k > enum.maxEnum {
72+
enum.maxEnum = k
73+
}
6274
}
75+
enum.continuous = (enum.maxEnum-enum.minEnum)+1 == int16(len(enum.vi))
6376
return &enum, nil
6477
}
6578

lib/column/enum16.go

+14-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ type Enum16 struct {
3131
chType Type
3232
col proto.ColEnum16
3333
name string
34+
35+
continuous bool
36+
minEnum int16
37+
maxEnum int16
3438
}
3539

3640
func (col *Enum16) Reset() {
@@ -179,9 +183,17 @@ func (col *Enum16) Append(v any) (nulls []uint8, err error) {
179183
func (col *Enum16) AppendRow(elem any) error {
180184
switch elem := elem.(type) {
181185
case int16:
182-
return col.AppendRow(int(elem))
186+
if col.continuous && elem >= col.minEnum && elem <= col.maxEnum {
187+
col.col.Append(proto.Enum16(elem))
188+
} else {
189+
return col.AppendRow(int(elem))
190+
}
183191
case *int16:
184-
return col.AppendRow(int(*elem))
192+
if col.continuous && *elem >= col.minEnum && *elem <= col.maxEnum {
193+
col.col.Append(proto.Enum16(*elem))
194+
} else {
195+
return col.AppendRow(int(*elem))
196+
}
185197
case int:
186198
v := proto.Enum16(elem)
187199
_, ok := col.vi[v]

lib/column/enum8.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ type Enum8 struct {
3131
chType Type
3232
name string
3333
col proto.ColEnum8
34+
35+
// Encoding of the enums that have been specified by the user.
36+
// Using this when appending rows, to validate the enum is valud.
37+
enumValuesBitset [4]uint64
3438
}
3539

3640
func (col *Enum8) Reset() {
@@ -183,27 +187,25 @@ func (col *Enum8) AppendRow(elem any) error {
183187
case *int8:
184188
return col.AppendRow(int(*elem))
185189
case int:
186-
v := proto.Enum8(elem)
187-
_, ok := col.vi[v]
188-
if !ok {
190+
// Check if the enum value is defined
191+
if col.enumValuesBitset[uint8(elem)>>6]&(1<<(elem&63)) == 0 {
189192
return &Error{
190193
Err: fmt.Errorf("unknown element %v", elem),
191194
ColumnType: string(col.chType),
192195
}
193196
}
194-
col.col.Append(v)
197+
col.col.Append(proto.Enum8(elem))
195198
case *int:
196199
switch {
197200
case elem != nil:
198-
v := proto.Enum8(*elem)
199-
_, ok := col.vi[v]
200-
if !ok {
201+
// Check if the enum value is defined
202+
if col.enumValuesBitset[uint8(*elem)>>6]&(1<<(*elem&63)) == 0 {
201203
return &Error{
202204
Err: fmt.Errorf("unknown element %v", *elem),
203205
ColumnType: string(col.chType),
204206
}
205207
}
206-
col.col.Append(v)
208+
col.col.Append(proto.Enum8(*elem))
207209
default:
208210
col.col.Append(0)
209211
}

lib/column/enum_test.go

+50
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package column
22

33
import (
4+
"slices"
45
"testing"
56

67
"github.com/stretchr/testify/assert"
@@ -155,3 +156,52 @@ func TestExtractEnumNamedValues(t *testing.T) {
155156
})
156157
}
157158
}
159+
160+
func TestEnumValuesBoundsChecks(t *testing.T) {
161+
tests := []struct {
162+
name string
163+
enumType string
164+
validEnums []int
165+
}{
166+
{
167+
name: "Simple enum range",
168+
enumType: "Enum8('-2'=-2,'-1'=-1,'0'=0,'1'=1,'2'=2)",
169+
validEnums: createValidEnumsRange(-2, 2),
170+
},
171+
{
172+
name: "Full enum range",
173+
enumType: "Enum8('-128'=-128,'-127'=-127,'-126'=-126,'-125'=-125,'-124'=-124,'-123'=-123,'-122'=-122,'-121'=-121,'-120'=-120,'-119'=-119,'-118'=-118,'-117'=-117,'-116'=-116,'-115'=-115,'-114'=-114,'-113'=-113,'-112'=-112,'-111'=-111,'-110'=-110,'-109'=-109,'-108'=-108,'-107'=-107,'-106'=-106,'-105'=-105,'-104'=-104,'-103'=-103,'-102'=-102,'-101'=-101,'-100'=-100,'-99'=-99,'-98'=-98,'-97'=-97,'-96'=-96,'-95'=-95,'-94'=-94,'-93'=-93,'-92'=-92,'-91'=-91,'-90'=-90,'-89'=-89,'-88'=-88,'-87'=-87,'-86'=-86,'-85'=-85,'-84'=-84,'-83'=-83,'-82'=-82,'-81'=-81,'-80'=-80,'-79'=-79,'-78'=-78,'-77'=-77,'-76'=-76,'-75'=-75,'-74'=-74,'-73'=-73,'-72'=-72,'-71'=-71,'-70'=-70,'-69'=-69,'-68'=-68,'-67'=-67,'-66'=-66,'-65'=-65,'-64'=-64,'-63'=-63,'-62'=-62,'-61'=-61,'-60'=-60,'-59'=-59,'-58'=-58,'-57'=-57,'-56'=-56,'-55'=-55,'-54'=-54,'-53'=-53,'-52'=-52,'-51'=-51,'-50'=-50,'-49'=-49,'-48'=-48,'-47'=-47,'-46'=-46,'-45'=-45,'-44'=-44,'-43'=-43,'-42'=-42,'-41'=-41,'-40'=-40,'-39'=-39,'-38'=-38,'-37'=-37,'-36'=-36,'-35'=-35,'-34'=-34,'-33'=-33,'-32'=-32,'-31'=-31,'-30'=-30,'-29'=-29,'-28'=-28,'-27'=-27,'-26'=-26,'-25'=-25,'-24'=-24,'-23'=-23,'-22'=-22,'-21'=-21,'-20'=-20,'-19'=-19,'-18'=-18,'-17'=-17,'-16'=-16,'-15'=-15,'-14'=-14,'-13'=-13,'-12'=-12,'-11'=-11,'-10'=-10,'-9'=-9,'-8'=-8,'-7'=-7,'-6'=-6,'-5'=-5,'-4'=-4,'-3'=-3,'-2'=-2,'-1'=-1,'0'=0,'1'=1,'2'=2,'3'=3,'4'=4,'5'=5,'6'=6,'7'=7,'8'=8,'9'=9,'10'=10,'11'=11,'12'=12,'13'=13,'14'=14,'15'=15,'16'=16,'17'=17,'18'=18,'19'=19,'20'=20,'21'=21,'22'=22,'23'=23,'24'=24,'25'=25,'26'=26,'27'=27,'28'=28,'29'=29,'30'=30,'31'=31,'32'=32,'33'=33,'34'=34,'35'=35,'36'=36,'37'=37,'38'=38,'39'=39,'40'=40,'41'=41,'42'=42,'43'=43,'44'=44,'45'=45,'46'=46,'47'=47,'48'=48,'49'=49,'50'=50,'51'=51,'52'=52,'53'=53,'54'=54,'55'=55,'56'=56,'57'=57,'58'=58,'59'=59,'60'=60,'61'=61,'62'=62,'63'=63,'64'=64,'65'=65,'66'=66,'67'=67,'68'=68,'69'=69,'70'=70,'71'=71,'72'=72,'73'=73,'74'=74,'75'=75,'76'=76,'77'=77,'78'=78,'79'=79,'80'=80,'81'=81,'82'=82,'83'=83,'84'=84,'85'=85,'86'=86,'87'=87,'88'=88,'89'=89,'90'=90,'91'=91,'92'=92,'93'=93,'94'=94,'95'=95,'96'=96,'97'=97,'98'=98,'99'=99,'100'=100,'101'=101,'102'=102,'103'=103,'104'=104,'105'=105,'106'=106,'107'=107,'108'=108,'109'=109,'110'=110,'111'=111,'112'=112,'113'=113,'114'=114,'115'=115,'116'=116,'117'=117,'118'=118,'119'=119,'120'=120,'121'=121,'122'=122,'123'=123,'124'=124,'125'=125,'126'=126,'127'=127)",
174+
validEnums: createValidEnumsRange(-128, 127),
175+
},
176+
{
177+
name: "Enum range with gaps",
178+
enumType: "Enum8('-10'=-10,'-5'=-5,'0'=0,'1'=1,'5'=5,'10'=10)",
179+
validEnums: []int{-10, -5, 0, 1, 5, 10},
180+
},
181+
}
182+
for _, tt := range tests {
183+
t.Run(tt.name, func(t *testing.T) {
184+
e, err := Enum(Type(tt.enumType), tt.name)
185+
assert.NoError(t, err)
186+
187+
// Try appending the full enum8 range. If the value is in the validEnums slice it should not error
188+
for i := -128; i < 128; i++ {
189+
valid := e.AppendRow(i)
190+
191+
if slices.Contains(tt.validEnums, i) {
192+
assert.NoError(t, valid)
193+
} else {
194+
assert.Error(t, valid)
195+
}
196+
}
197+
})
198+
}
199+
}
200+
201+
func createValidEnumsRange(min, max int) []int {
202+
resultRange := make([]int, 0, max-min+1)
203+
for i := min; i <= max; i++ {
204+
resultRange = append(resultRange, i)
205+
}
206+
return resultRange
207+
}

0 commit comments

Comments
 (0)