Skip to content

Commit 859a174

Browse files
authored
Support slow‑start feature to allows a backend instance gradually recover its weight (bfenetworks#692)
1 parent 6767c89 commit 859a174

File tree

13 files changed

+290
-28
lines changed

13 files changed

+290
-28
lines changed

bfe_balance/backend/bfe_backend.go

+15
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ type BfeBackend struct {
4141
succNum int // number of consecutive successes of health-check request
4242

4343
closeChan chan bool // tell health-check to stop
44+
45+
restarted bool // indicate if this backend is new bring-up by health-check
4446
}
4547

4648
func NewBfeBackend() *BfeBackend {
@@ -90,6 +92,19 @@ func (back *BfeBackend) setAvail(avail bool) {
9092
}
9193
}
9294

95+
func (back *BfeBackend) SetRestart(restart bool) {
96+
back.Lock()
97+
back.restarted = restart
98+
back.Unlock()
99+
}
100+
101+
func (back *BfeBackend) GetRestart() bool {
102+
back.RLock()
103+
restart := back.restarted
104+
back.RUnlock()
105+
return restart
106+
}
107+
93108
func (back *BfeBackend) ConnNum() int {
94109
back.RLock()
95110
connNum := back.connNum

bfe_balance/backend/health_check.go

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ loop:
9696
}
9797

9898
log.Logger.Info("backend %s back to Normal", backend.Name)
99+
backend.SetRestart(true)
99100
backend.SetAvail(true)
100101
break loop
101102
}

bfe_balance/bal_gslb/bal_gslb.go

+10
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,16 @@ func (bal *BalanceGslb) SetGslbBasic(gslbBasic cluster_conf.GslbBasicConf) {
8989
bal.lock.Unlock()
9090
}
9191

92+
func (bal *BalanceGslb) SetSlowStart(backendConf cluster_conf.BackendBasic) {
93+
bal.lock.Lock()
94+
95+
for _, sub := range bal.subClusters {
96+
sub.setSlowStart(*backendConf.SlowStartTime)
97+
}
98+
99+
bal.lock.Unlock()
100+
}
101+
92102
// Init inializes gslb cluster with config
93103
func (bal *BalanceGslb) Init(gslbConf gslb_conf.GslbClusterConf) error {
94104
totalWeight := 0

bfe_balance/bal_gslb/bal_gslb_test.go

+62
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,65 @@ func SetReqHeader(req *bfe_basic.Request, key string) {
133133
req.HttpRequest.Header.Set(key, "val")
134134
}
135135
}
136+
137+
func TestSlowStart(t *testing.T) {
138+
t.Logf("bal_gslb_test: TestSlowStart")
139+
var c cluster_table_conf.ClusterBackend
140+
var gb cluster_conf.GslbBasicConf
141+
var g gslb_conf.GslbClusterConf
142+
var err error
143+
144+
loadJson("testdata/cluster1", &c)
145+
loadJson("testdata/gb", &gb)
146+
loadJson("testdata/g1", &g)
147+
t.Logf("%v %v %v\n", c, gb, g)
148+
149+
bal := NewBalanceGslb("cluster_dumi")
150+
if err := bal.Init(g); err != nil {
151+
t.Errorf("init error %s", err)
152+
}
153+
t.Logf("%+v\n", bal)
154+
if bal.totalWeight != 100 || !bal.single || bal.subClusters[bal.avail].Name != "light.example.wt" || bal.retryMax != 3 || bal.crossRetry != 1 {
155+
t.Errorf("init error")
156+
}
157+
158+
if len(bal.subClusters) != 3 {
159+
t.Errorf("cluster len error")
160+
}
161+
162+
t.Logf("%+v", bal.subClusters[0])
163+
t.Logf("%+v", bal.subClusters[1])
164+
t.Logf("%+v", bal.subClusters[2])
165+
166+
var c1 cluster_table_conf.ClusterBackend
167+
var gb1 cluster_conf.GslbBasicConf
168+
var g1 gslb_conf.GslbClusterConf
169+
loadJson("testdata/cluster2", &c1)
170+
loadJson("testdata/gb2", &gb1)
171+
loadJson("testdata/g2", &g1)
172+
173+
err = cluster_conf.GslbBasicConfCheck(&gb1)
174+
if err != nil {
175+
t.Errorf("GslbBasicConfCheck err %s", err)
176+
}
177+
t.Logf("%v %v %v\n", c1, gb1, g1)
178+
if err := bal.Reload(g1); err != nil {
179+
t.Errorf("reload error %s", err)
180+
}
181+
182+
bal.SetGslbBasic(gb1)
183+
184+
var backendConf cluster_conf.BackendBasic
185+
err = cluster_conf.BackendBasicCheck(&backendConf)
186+
if err != nil {
187+
t.Errorf("BackendBasicCheck err %s", err)
188+
}
189+
var ssTime = 30
190+
backendConf.SlowStartTime = &ssTime
191+
bal.SetSlowStart(backendConf)
192+
193+
t.Logf("%+v\n", bal)
194+
t.Logf("%+v", bal.subClusters[0])
195+
t.Logf("%+v", bal.subClusters[1])
196+
t.Logf("%+v", bal.subClusters[2])
197+
}

bfe_balance/bal_gslb/sub_cluster.go

+4
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ func (sub *SubCluster) balance(algor int, key []byte) (*backend.BfeBackend, erro
8585
return sub.backends.Balance(algor, key)
8686
}
8787

88+
func (sub *SubCluster) setSlowStart(slowStartTime int) {
89+
sub.backends.SetSlowStart(slowStartTime)
90+
}
91+
8892
// SubClusterList is a list of subcluster.
8993
type SubClusterList []*SubCluster
9094

bfe_balance/bal_slb/backend_rr.go

+50-6
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,27 @@
1616

1717
package bal_slb
1818

19+
import (
20+
"time"
21+
)
22+
1923
import (
2024
"github.com/bfenetworks/bfe/bfe_balance/backend"
2125
"github.com/bfenetworks/bfe/bfe_config/bfe_cluster_conf/cluster_table_conf"
2226
)
2327

28+
type WeightSS struct {
29+
final int // final target weight after slow-start
30+
slowStartTime int // time for backend increases the weight to the full value, in seconds
31+
startTime time.Time // time of the first request
32+
}
33+
2434
type BackendRR struct {
25-
weight int // weight of this backend
26-
current int // current weight
27-
backend *backend.BfeBackend // point to BfeBackend
35+
weight int // weight of this backend
36+
current int // current weight
37+
backend *backend.BfeBackend // point to BfeBackend
38+
inSlowStart bool // indicate if in slow-start phase
39+
weightSS WeightSS // slow_start related parameters
2840
}
2941

3042
func NewBackendRR() *BackendRR {
@@ -36,15 +48,17 @@ func NewBackendRR() *BackendRR {
3648

3749
// Init initialize BackendRR with BackendConf
3850
func (backRR *BackendRR) Init(subClusterName string, conf *cluster_table_conf.BackendConf) {
39-
backRR.weight = *conf.Weight
40-
backRR.current = *conf.Weight
51+
// scale up 100 times from conf file
52+
backRR.weight = *conf.Weight * 100
53+
backRR.current = backRR.weight
54+
backRR.weightSS.final = backRR.weight
4155

4256
back := backRR.backend
4357
back.Init(subClusterName, conf)
4458
}
4559

4660
func (backRR *BackendRR) UpdateWeight(weight int) {
47-
backRR.weight = weight
61+
backRR.weight = weight * 100
4862

4963
// if weight > 0, don't touch backRR.current
5064
if weight <= 0 {
@@ -60,3 +74,33 @@ func (backRR *BackendRR) MatchAddrPort(addr string, port int) bool {
6074
back := backRR.backend
6175
return back.Addr == addr && back.Port == port
6276
}
77+
78+
func (backRR *BackendRR) initSlowStart(ssTime int) {
79+
backRR.weightSS.slowStartTime = ssTime
80+
if backRR.weightSS.slowStartTime == 0 {
81+
backRR.inSlowStart = false
82+
} else {
83+
backRR.weightSS.startTime = time.Now()
84+
backRR.inSlowStart = true
85+
86+
// set weight/current to 1, to avoid no traffic allowed at the beginning of start
87+
backRR.weight = 1
88+
backRR.current = 1
89+
}
90+
}
91+
92+
func (backRR *BackendRR) updateSlowStart() {
93+
if backRR.inSlowStart {
94+
current := time.Duration(backRR.weightSS.final) * time.Since(backRR.weightSS.startTime)
95+
if backRR.weightSS.slowStartTime != 0 {
96+
current /= time.Duration(backRR.weightSS.slowStartTime) * time.Second
97+
backRR.weight = int(current)
98+
} else {
99+
backRR.weight = backRR.weightSS.final
100+
}
101+
if backRR.weight >= backRR.weightSS.final {
102+
backRR.weight = backRR.weightSS.final
103+
backRR.inSlowStart = false
104+
}
105+
}
106+
}

bfe_balance/bal_slb/backend_rr_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ func TestBackendRRInit_case1(t *testing.T) {
3636
backendRR := NewBackendRR()
3737
backendRR.Init("example.cluster", &conf)
3838

39-
if backendRR.weight != 10 {
40-
t.Error("backend.weight should be 10")
39+
if backendRR.weight != 10 * 100 {
40+
t.Error("backend.weight should be 10 * 100")
4141
}
4242

43-
if backendRR.current != 10 {
44-
t.Error("backend.current should be 10")
43+
if backendRR.current != 10 * 100 {
44+
t.Error("backend.current should be 10 * 100")
4545
}
4646

4747
backend := backendRR.backend

bfe_balance/bal_slb/bal_rr.go

+34-4
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,13 @@ func (s BackendListSorter) Less(i, j int) bool {
8989

9090
type BalanceRR struct {
9191
sync.Mutex
92-
Name string
93-
backends BackendList // list of BackendRR
94-
sorted bool // list of BackeneRR sorted or not
95-
next int // next backend to schedule
92+
Name string
93+
backends BackendList // list of BackendRR
94+
sorted bool // list of BackeneRR sorted or not
95+
next int // next backend to schedule
96+
97+
slowStartNum int // number of backends in slow_start phase
98+
slowStartTime int // time for backend increases the weight to the full value, in seconds
9699
}
97100

98101
func NewBalanceRR(name string) *BalanceRR {
@@ -113,6 +116,27 @@ func (brr *BalanceRR) Init(conf cluster_table_conf.SubClusterBackend) {
113116
brr.next = 0
114117
}
115118

119+
func (brr *BalanceRR) SetSlowStart(ssTime int) {
120+
brr.Lock()
121+
brr.slowStartTime = ssTime
122+
brr.Unlock()
123+
}
124+
125+
func (brr *BalanceRR) checkSlowStart() {
126+
brr.Lock()
127+
defer brr.Unlock()
128+
if brr.slowStartTime > 0 {
129+
for _, backendRR := range brr.backends {
130+
backend := backendRR.backend
131+
if backend.GetRestart() {
132+
backend.SetRestart(false)
133+
backendRR.initSlowStart(brr.slowStartTime)
134+
}
135+
backendRR.updateSlowStart()
136+
}
137+
}
138+
}
139+
116140
// Release releases backend list.
117141
func (brr *BalanceRR) Release() {
118142
for _, back := range brr.backends {
@@ -162,6 +186,8 @@ func (brr *BalanceRR) Update(conf cluster_table_conf.SubClusterBackend) {
162186
for _, bkConf := range confMap {
163187
backendRR := NewBackendRR()
164188
backendRR.Init(brr.Name, bkConf)
189+
backend := backendRR.backend
190+
backend.SetRestart(true)
165191
// add to backendsNew
166192
backendsNew = append(backendsNew, backendRR)
167193
}
@@ -195,6 +221,10 @@ func (brr *BalanceRR) ensureSortedUnlocked() {
195221

196222
// Balance select one backend from sub cluster in round robin manner.
197223
func (brr *BalanceRR) Balance(algor int, key []byte) (*backend.BfeBackend, error) {
224+
// Slow start is not supported when session sticky is enabled
225+
if algor != WrrSticky {
226+
brr.checkSlowStart()
227+
}
198228
switch algor {
199229
case WrrSimple:
200230
return brr.simpleBalance()

0 commit comments

Comments
 (0)