3B要实现的是log功能,要完成这一部分只看论文的 Figure2 部分已经不够了,最好把整个 Section5 仔细看一遍。
AppendEntries
根据 Figure2 不难将之前实现的 AppendEntries 进一步补充完整。这里要注意的是当 rf.lastApplied < rf.commitIndex 时,要向 applyCh 发送新提交的log,为了避免阻塞 AppendEntries 返回,这里我选择创建一个新的协程来实现这一功能。
func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) {
rf.mu.Lock()
defer rf.mu.Unlock()
// reply false if term < currentTerm
if args.Term < rf.currentTerm {
reply.Term = rf.currentTerm
reply.Success = false
DPrintf("%d server in term %d reject AppendEntries request from %d in term %d", rf.me, rf.currentTerm, args.LeaderId, args.Term)
return
}
// if RPC term >= currentTerm, convert to follower
if args.Term >= rf.currentTerm {
rf.convertToFollower(args.Term)
}
// reply false if log doesn't contain an entry at prevLogIndex
// whose term matches prevLogTerm
if args.PrevLogIndex > 0 {
if len(rf.log)-1 < args.PrevLogIndex {
reply.Term = rf.currentTerm
reply.Success = false
return
}
if rf.log[args.PrevLogIndex].Term != args.PrevLogTerm {
reply.Term = rf.currentTerm
reply.Success = false
DPrintf("%d server log term mismatch at index %d", rf.me, args.PrevLogIndex)
return
}
}
// append any new entries not already in the log
for i := args.PrevLogIndex + 1; i <= args.PrevLogIndex+len(args.Entries); i++ {
if len(rf.log)-1 < i {
// append any new entries not already in the log
rf.log = append(rf.log, args.Entries[i-args.PrevLogIndex-1:]...)
break
}
if rf.log[i].Term != args.Entries[i-args.PrevLogIndex-1].Term {
// delete any existing entries that conflict with new ones
rf.log = rf.log[:i]
// append any new entries not already in the log
rf.log = append(rf.log, args.Entries[i-args.PrevLogIndex-1:]...)
DPrintf("%d server deleted conflicting entries and appended at %d from leader %d", rf.me, i, args.LeaderId)
break
}
}
if args.LeaderCommit > rf.commitIndex {
// update commitIndex
rf.commitIndex = min(args.LeaderCommit, len(rf.log)-1)
DPrintf("%d server update commitIndex = %d", rf.me, rf.commitIndex)
}
if rf.lastApplied < rf.commitIndex {
DPrintf("%d server lastApplied: %d, commitIndex: %d", rf.me, rf.lastApplied, rf.commitIndex)
go rf.ApplyLogEntries()
}
// reset heartbeat timer
rf.lastHeartbeat = time.Now()
reply.Term = rf.currentTerm
reply.Success = true
}
ApplyLogEntries
不管是 follower 还是 leader 都可以通过 ApplyLogEntries 来向 applyCh 发送新的log
func (rf *Raft) ApplyLogEntries() {
rf.mu.Lock()
defer rf.mu.Unlock()
DPrintf("%d server apply log entries, lastApplied %d, commitIndex %d", rf.me, rf.lastApplied, rf.commitIndex)
for rf.lastApplied < rf.commitIndex {
applyMsg := raftapi.ApplyMsg{}
applyMsg.Command = rf.log[rf.lastApplied+1].Command
applyMsg.CommandIndex = rf.lastApplied + 1
applyMsg.CommandValid = true
rf.applyCh <- applyMsg
rf.lastApplied++
}
DPrintf("%d server finished applying log entries", rf.me)
}
通过以上两个函数 follower 就可以接收并应用log了,接下来是更复杂的leader部分。
Start
Start函数负责接收客户端请求,将 command 存在 log 中。在 committed 后,会通过 applyCh 通知。向 follower 发送新日志达成共识的过程通过异步实现,确保 Start 函数能够立即返回。
// the service using Raft (e.g. a k/v server) wants to start
// agreement on the next command to be appended to Raft's log. if this
// server isn't the leader, returns false. otherwise start the
// agreement and return immediately. there is no guarantee that this
// command will ever be committed to the Raft log, since the leader
// may fail or lose an election. even if the Raft instance has been killed,
// this function should return gracefully.
//
// the first return value is the index that the command will appear at
// if it's ever committed. the second return value is the current
// term. the third return value is true if this server believes it is
// the leader.
func (rf *Raft) Start(command interface{}) (int, int, bool) {
index := -1
term := -1
isLeader := true
rf.mu.Lock()
if rf.state != leader {
isLeader = false
rf.mu.Unlock()
return index, term, isLeader
}
// append command to log
newEntry := LogEntry{}
newEntry.Command = command
newEntry.Term = rf.currentTerm
rf.log = append(rf.log, newEntry)
index = len(rf.log) - 1
term = rf.currentTerm
rf.mu.Unlock()
var mu sync.Mutex
cond := sync.NewCond(&mu)
count := 1
fisished := 1
go rf.WaitForCommit(index, cond, &count, &fisished)
for i := range rf.peers {
if i == rf.me {
continue
}
go rf.SendNewLogEntries(i, term, cond, &count, &fisished)
}
return index, term, isLeader
}
WaitForCommit 函数负责等待并更新commitIndex。由于需要循环等待并检查是否已完成commit,这里使用条件变量来更优雅地实现循环等待,避免空转。这里commitIndex的更新遵照 Figure2 的规则,从而确保commitIndex之前的log都是committed。这里贴一下 Figure2 的原文:If there exists an N such that N > commitIndex,a majority of matchIndex[i] ≥ N,and log[N].term == currentTerm: set commitIndex = N
func (rf *Raft) WaitForCommit(index int, cond *sync.Cond, count *int, finished *int) {
cond.L.Lock()
for *count <= len(rf.peers)/2 && *finished < len(rf.peers) {
cond.Wait()
}
cond.L.Unlock()
rf.mu.Lock()
if index > len(rf.log)-1 {
rf.mu.Unlock()
return
}
if rf.commitIndex < index {
cnt := 1
for i := 0; i < len(rf.matchIndex); i++ {
if i != rf.me && rf.matchIndex[i] >= index {
cnt++
}
}
if cnt > len(rf.peers)/2 && rf.log[index].Term == rf.currentTerm {
rf.commitIndex = index
DPrintf("%d leader update commitIndex = %d", rf.me, rf.commitIndex)
go rf.ApplyLogEntries()
}
}
rf.mu.Unlock()
}
SendNewLogEntries 函数负责向 follower 发送 log,根据nextIndex来确定要发送的日志,发送成功要更新nextIndex,失败要对nextIndex-1后重试。要注意的是 reply 失败有两种情况,如果是因为follower的term已经大于了当前的term,那么要转变为follower并不再重试。
func (rf *Raft) SendNewLogEntries(server int, term int, cond *sync.Cond, count *int, finished *int) {
args := &AppendEntriesArgs{}
reply := &AppendEntriesReply{}
// check if still leader
rf.mu.Lock()
if rf.state != leader {
rf.mu.Unlock()
cond.L.Lock()
*finished += 1
cond.Broadcast()
cond.L.Unlock()
return
}
args.Term = term
args.LeaderId = rf.me
args.LeaderCommit = rf.commitIndex
args.PrevLogIndex = rf.nextIndex[server] - 1
if args.PrevLogIndex > 0 {
args.PrevLogTerm = rf.log[args.PrevLogIndex].Term
} else {
args.PrevLogTerm = 0
}
args.Entries = rf.log[rf.nextIndex[server]:]
rf.mu.Unlock()
ok := rf.sendAppendEntries(server, args, reply)
if !ok {
cond.L.Lock()
*finished += 1
cond.Broadcast()
cond.L.Unlock()
return
}
cond.L.Lock()
defer cond.L.Unlock()
*finished += 1
if reply.Success {
rf.mu.Lock()
rf.nextIndex[server] = max(args.PrevLogIndex+len(args.Entries)+1, rf.nextIndex[server])
rf.matchIndex[server] = rf.nextIndex[server] - 1
rf.mu.Unlock()
*count += 1
}
cond.Broadcast()
rf.mu.Lock()
defer rf.mu.Unlock()
// if RPC term > currentTerm, convert to follower
if reply.Term > rf.currentTerm {
rf.lastHeartbeat = time.Now()
rf.convertToFollower(reply.Term)
} else if rf.state == leader && !reply.Success && term == rf.currentTerm {
rf.nextIndex[server] = max(1, rf.nextIndex[server]-1)
rf.matchIndex[server] = max(0, rf.matchIndex[server]-1)
*finished -= 1
DPrintf("%d leader decrease %d server nextIndex %d, matchIndex %d", rf.me, server, rf.nextIndex[server], rf.matchIndex[server])
go rf.SendNewLogEntries(server, term, cond, count, finished)
}
}
至此,leader的Start函数就完全实现了。
心跳
随着log功能的引入,leader的心跳也要包含log。和上面的SendNewLogEntries相似,心跳传递nextIndex和commitIndex间的log,在reply失败时根据情况选择转变为follower或减少nextIndex再重试。
选举
上面的内容基本实现了log功能,但想要完全通过3B测试,还需要完善选举部分。因为引入了log,所以选举投票时还需要考虑log的影响。只有当 candidate 的 log 比 follower 更“新”时,follower 才会投票给 candidate,在论文的 5.4.1 Election restriction 有详细的论述。那么怎么比较两个log哪个更“新”呢?论文给出了具体的判断依据:如果日志的最后条目具有不同的任期,那么任期较晚的日志更“新”;如果日志以相同的任期结束,则拥有较长日志的更“新”。
func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
rf.mu.Lock()
defer rf.mu.Unlock()
// reply false if term < currentTerm
if args.Term < rf.currentTerm {
reply.Term = rf.currentTerm
reply.VoteGranted = false
return
}
// if RPC term > currentTerm, convert to follower
if args.Term > rf.currentTerm {
DPrintf("%d server update term from %d to %d", rf.me, rf.currentTerm, args.Term)
rf.convertToFollower(args.Term)
}
reply.Term = rf.currentTerm
// grant vote if haven't voted this term
if rf.votedFor == -1 || rf.votedFor == args.CandidateId {
if args.LastLogTerm > rf.log[len(rf.log)-1].Term || (args.LastLogTerm == rf.log[len(rf.log)-1].Term && args.LastLogIndex >= len(rf.log)-1) {
rf.votedFor = args.CandidateId
reply.VoteGranted = true
// update last heartbeat time only when vote is granted
rf.lastHeartbeat = time.Now()
rf.persist()
DPrintf("%d server voted for %d in term %d, lastLogIndex %d, lastLogTerm %d", rf.me, args.CandidateId, rf.currentTerm, len(rf.log)-1, rf.log[len(rf.log)-1].Term)
return
}
}
reply.VoteGranted = false
}

2164

被折叠的 条评论
为什么被折叠?



