Skip to content

Commit dd451d0

Browse files
committed
1. Refactor:change TigLabs raft Authors to tiglabs raft Authors.
2. Refactor: add tiglabs modify if the source file is etcd authors 3. Fix: when raft member become leader,must apply from appliyID to commitID 4. Refactor: add raft heartbear check, if no leader,then warnint 5. Fix: truncate raft corrupt data 6. Refactor: raft heartbeat maybe block when peers host down Signed-off-by: awzhgw <guowl18702995996@gmail.com>
1 parent 45667fc commit dd451d0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+284
-177
lines changed

config.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

errors.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

future.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 The TigLabs raft Authors.
1+
// Copyright 2018 The tiglabs raft Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

logger/logger.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

pool.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 The TigLabs raft Authors.
1+
// Copyright 2018 The tiglabs raft Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

proto/codec.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 The TigLabs raft Authors.
1+
// Copyright 2018 The tiglabs raft Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

proto/pool.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 The TigLabs raft Authors.
1+
// Copyright 2018 The tiglabs raft Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

proto/proto.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 The TigLabs raft Authors.
1+
// Copyright 2018 The tiglabs raft Authors.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -115,6 +115,12 @@ type Message struct {
115115
Snapshot Snapshot // No need for codec
116116
}
117117

118+
func (m *Message) ToString() (mesg string) {
119+
return fmt.Sprintf("Mesg:[%v] type(%v) ForceVote(%v) Reject(%v) RejectIndex(%v) "+
120+
"From(%v) To(%v) Term(%v) LogTrem(%v) Index(%v) Commit(%v)", m.ID, m.Type.String(), m.ForceVote,
121+
m.Reject, m.RejectIndex, m.From, m.To, m.Term, m.LogTerm, m.Index, m.Commit)
122+
}
123+
118124
type ConfChange struct {
119125
Type ConfChangeType
120126
Peer Peer

raft.go

+60
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.
@@ -19,6 +20,7 @@ import (
1920
"runtime"
2021
"sync"
2122
"sync/atomic"
23+
"time"
2224
"unsafe"
2325

2426
"github.com/tiglabs/raft/logger"
@@ -58,6 +60,11 @@ type peerState struct {
5860
mu sync.RWMutex
5961
}
6062

63+
type monitorStatus struct {
64+
conErrCount uint8
65+
replicasErrCnt map[uint64]uint8
66+
}
67+
6168
func (s *peerState) change(c *proto.ConfChange) {
6269
s.mu.Lock()
6370
switch c.Type {
@@ -102,6 +109,7 @@ type raft struct {
102109
peerState peerState
103110
pending map[uint64]*Future
104111
snapping map[uint64]*snapshotStatus
112+
mStatus *monitorStatus
105113
propc chan *proposal
106114
applyc chan *apply
107115
recvc chan *proto.Message
@@ -130,10 +138,15 @@ func newRaft(config *Config, raftConfig *RaftConfig) (*raft, error) {
130138
return nil, err
131139
}
132140

141+
mStatus := &monitorStatus{
142+
conErrCount: 0,
143+
replicasErrCnt: make(map[uint64]uint8),
144+
}
133145
raft := &raft{
134146
raftFsm: r,
135147
config: config,
136148
raftConfig: raftConfig,
149+
mStatus: mStatus,
137150
pending: make(map[uint64]*Future),
138151
snapping: make(map[uint64]*snapshotStatus),
139152
recvc: make(chan *proto.Message, config.ReqBufferSize),
@@ -155,6 +168,7 @@ func newRaft(config *Config, raftConfig *RaftConfig) (*raft, error) {
155168

156169
util.RunWorker(raft.runApply, raft.handlePanic)
157170
util.RunWorker(raft.run, raft.handlePanic)
171+
util.RunWorker(raft.monitor, raft.handlePanic)
158172
return raft, nil
159173
}
160174

@@ -388,6 +402,51 @@ func (s *raft) run() {
388402
}
389403
}
390404

405+
func (s *raft) monitor() {
406+
statusTicker := time.NewTicker(5 * time.Second)
407+
leaderTicker := time.NewTicker(1 * time.Minute)
408+
for {
409+
select {
410+
case <-s.stopc:
411+
statusTicker.Stop()
412+
return
413+
414+
case <-statusTicker.C:
415+
if s.raftFsm.leader == NoLeader || s.raftFsm.state == stateCandidate {
416+
s.mStatus.conErrCount++
417+
} else {
418+
s.mStatus.conErrCount = 0
419+
}
420+
if s.mStatus.conErrCount > 5 {
421+
errMsg := fmt.Sprintf("raft status not health partitionID[%d]_nodeID[%d]_leader[%v]_state[%v]_replicas[%v]",
422+
s.raftFsm.id, s.raftFsm.config.NodeID, s.raftFsm.leader, s.raftFsm.state, s.raftFsm.peers())
423+
logger.Error(errMsg)
424+
425+
s.mStatus.conErrCount = 0
426+
}
427+
case <-leaderTicker.C:
428+
if s.raftFsm.state == stateLeader {
429+
for id, p := range s.raftFsm.replicas {
430+
if id == s.raftFsm.config.NodeID {
431+
continue
432+
}
433+
if p.active == false {
434+
s.mStatus.replicasErrCnt[id]++
435+
} else {
436+
s.mStatus.replicasErrCnt[id] = 0
437+
}
438+
if s.mStatus.replicasErrCnt[id] > 5 {
439+
errMsg := fmt.Sprintf("raft partitionID[%d] replicaID[%v] not active peer[%v]",
440+
s.raftFsm.id, id, p.peer)
441+
logger.Error(errMsg)
442+
s.mStatus.replicasErrCnt[id] = 0
443+
}
444+
}
445+
}
446+
}
447+
}
448+
}
449+
391450
func (s *raft) tick() {
392451
if s.restoringSnapshot.Get() {
393452
return
@@ -446,6 +505,7 @@ func (s *raft) reciveMessage(m *proto.Message) {
446505
case <-s.stopc:
447506
case s.recvc <- m:
448507
default:
508+
logger.Warn(fmt.Sprintf("raft[%v] discard message(%v)", s.raftConfig.ID, m.ToString()))
449509
return
450510
}
451511
}

raft_fsm.go

+21-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.
@@ -21,6 +22,7 @@ import (
2122

2223
"github.com/tiglabs/raft/logger"
2324
"github.com/tiglabs/raft/proto"
25+
"time"
2426
)
2527

2628
// NoLeader is a placeholder nodeID used when there is no leader.
@@ -130,9 +132,20 @@ func newRaftFsm(config *Config, raftConfig *RaftConfig) (*raftFsm, error) {
130132
logger.Debug("newRaft[%v] [peers: [%s], term: %d, commit: %d, applied: %d, lastindex: %d, lastterm: %d]",
131133
r.id, strings.Join(peerStrs, ","), r.term, r.raftLog.committed, r.raftLog.applied, r.raftLog.lastIndex(), r.raftLog.lastTerm())
132134
}
135+
go r.doRandomSeed()
133136
return r, nil
134137
}
135138

139+
func (r *raftFsm) doRandomSeed() {
140+
ticker := time.Tick(time.Duration(rand.Intn(5)) * time.Second)
141+
for {
142+
select {
143+
case <-ticker:
144+
r.rand.Seed(time.Now().UnixNano())
145+
}
146+
}
147+
}
148+
136149
// raft main method
137150
func (r *raftFsm) Step(m *proto.Message) {
138151
if m.Type == proto.LocalMsgHup {
@@ -207,7 +220,7 @@ func (r *raftFsm) loadState(state proto.HardState) error {
207220
}
208221

209222
func (r *raftFsm) recoverCommit() error {
210-
for r.raftLog.applied < r.raftLog.committed {
223+
for r.raftLog.applied <= r.raftLog.committed {
211224
committedEntries := r.raftLog.nextEnts(64 * MB)
212225
for _, entry := range committedEntries {
213226
r.raftLog.appliedTo(entry.Index)
@@ -230,6 +243,9 @@ func (r *raftFsm) recoverCommit() error {
230243
r.applyConfChange(cc)
231244
}
232245
}
246+
if r.raftLog.applied == r.raftLog.committed {
247+
break
248+
}
233249
}
234250
return nil
235251
}
@@ -337,7 +353,10 @@ func (r *raftFsm) reset(term, lasti uint64, isLeader bool) {
337353
}
338354

339355
func (r *raftFsm) resetRandomizedElectionTimeout() {
340-
r.randElectionTick = r.config.ElectionTick + r.rand.Intn(r.config.ElectionTick)
356+
randTick := r.rand.Intn(r.config.ElectionTick)
357+
r.randElectionTick = r.config.ElectionTick + randTick
358+
logger.Debug("raft[%v] random election timeout randElectionTick=%v, config.ElectionTick=%v, randTick=%v", r.id,
359+
r.randElectionTick, r.config.ElectionTick, randTick)
341360
}
342361

343362
func (r *raftFsm) pastElectionTimeout() bool {

raft_fsm_candidate.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

raft_fsm_follower.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

raft_fsm_leader.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.
@@ -28,7 +29,7 @@ func (r *raftFsm) becomeLeader() {
2829
if r.state == stateFollower {
2930
panic(AppPanicError(fmt.Sprintf("[raft->becomeLeader][%v] invalid transition [follower -> leader].", r.id)))
3031
}
31-
32+
r.recoverCommit()
3233
lasti := r.raftLog.lastIndex()
3334
r.step = stepLeader
3435
r.reset(r.term, lasti, true)

raft_fsm_state.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

raft_log.go

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2015 The etcd Authors
2+
// Modified work copyright 2018 The tiglabs Authors.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)