6.824 Lab3-Raft Part 3B

原创已于 2026-02-06 15:05:29 修改 · 369 阅读

10 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#分布式 #golang #Raft

于 2026-02-02 18:12:55 首次发布

3B要实现的是log功能，要完成这一部分只看论文的 Figure2 部分已经不够了，最好把整个 Section5 仔细看一遍。

AppendEntries

根据 Figure2 不难将之前实现的 AppendEntries 进一步补充完整。这里要注意的是当 rf.lastApplied < rf.commitIndex 时，要向 applyCh 发送新提交的log，为了避免阻塞 AppendEntries 返回，这里我选择创建一个新的协程来实现这一功能。

func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) {
	rf.mu.Lock()
	defer rf.mu.Unlock()

	// reply false if term < currentTerm
	if args.Term < rf.currentTerm {
		reply.Term = rf.currentTerm
		reply.Success = false
		DPrintf("%d server in term %d reject AppendEntries request from %d in term %d", rf.me, rf.currentTerm, args.LeaderId, args.Term)
		return
	}

	// if RPC term >= currentTerm, convert to follower
	if args.Term >= rf.currentTerm {
		rf.convertToFollower(args.Term)
	}

	// reply false if log doesn't contain an entry at prevLogIndex
	// whose term matches prevLogTerm
	if args.PrevLogIndex > 0 {
		if len(rf.log)-1 < args.PrevLogIndex {
			reply.Term = rf.currentTerm
			reply.Success = false
			return
		}
		if rf.log[args.PrevLogIndex].Term != args.PrevLogTerm {
			reply.Term = rf.currentTerm
			reply.Success = false
			DPrintf("%d server log term mismatch at index %d", rf.me, args.PrevLogIndex)
			return
		}
	}

	// append any new entries not already in the log
	for i := args.PrevLogIndex + 1; i <= args.PrevLogIndex+len(args.Entries); i++ {
		if len(rf.log)-1 < i {
			// append any new entries not already in the log
			rf.log = append(rf.log, args.Entries[i-args.PrevLogIndex-1:]...)
			break
		}
		if rf.log[i].Term != args.Entries[i-args.PrevLogIndex-1].Term {
			// delete any existing entries that conflict with new ones
			rf.log = rf.log[:i]
			// append any new entries not already in the log
			rf.log = append(rf.log, args.Entries[i-args.PrevLogIndex-1:]...)
			DPrintf("%d server deleted conflicting entries and appended at %d from leader %d", rf.me, i, args.LeaderId)
			break
		}
	}

	if args.LeaderCommit > rf.commitIndex {
		// update commitIndex
		rf.commitIndex = min(args.LeaderCommit, len(rf.log)-1)
		DPrintf("%d server update commitIndex = %d", rf.me, rf.commitIndex)
	}

	if rf.lastApplied < rf.commitIndex {
		DPrintf("%d server lastApplied: %d, commitIndex: %d", rf.me, rf.lastApplied, rf.commitIndex)
		go rf.ApplyLogEntries()
	}

	// reset heartbeat timer
	rf.lastHeartbeat = time.Now()

	reply.Term = rf.currentTerm
	reply.Success = true
}

ApplyLogEntries

不管是 follower 还是 leader 都可以通过 ApplyLogEntries 来向 applyCh 发送新的log

func (rf *Raft) ApplyLogEntries() {
	rf.mu.Lock()
	defer rf.mu.Unlock()

	DPrintf("%d server apply log entries, lastApplied %d, commitIndex %d", rf.me, rf.lastApplied, rf.commitIndex)
	for rf.lastApplied < rf.commitIndex {
		applyMsg := raftapi.ApplyMsg{}
		applyMsg.Command = rf.log[rf.lastApplied+1].Command
		applyMsg.CommandIndex = rf.lastApplied + 1
		applyMsg.CommandValid = true

		rf.applyCh <- applyMsg

		rf.lastApplied++
	}
	DPrintf("%d server finished applying log entries", rf.me)
}

通过以上两个函数 follower 就可以接收并应用log了，接下来是更复杂的leader部分。

Start

Start函数负责接收客户端请求，将 command 存在 log 中。在 committed 后，会通过 applyCh 通知。向 follower 发送新日志达成共识的过程通过异步实现，确保 Start 函数能够立即返回。

// the service using Raft (e.g. a k/v server) wants to start
// agreement on the next command to be appended to Raft's log. if this
// server isn't the leader, returns false. otherwise start the
// agreement and return immediately. there is no guarantee that this
// command will ever be committed to the Raft log, since the leader
// may fail or lose an election. even if the Raft instance has been killed,
// this function should return gracefully.
//
// the first return value is the index that the command will appear at
// if it's ever committed. the second return value is the current
// term. the third return value is true if this server believes it is
// the leader.
func (rf *Raft) Start(command interface{}) (int, int, bool) {
	index := -1
	term := -1
	isLeader := true

	rf.mu.Lock()

	if rf.state != leader {
		isLeader = false
		rf.mu.Unlock()
		return index, term, isLeader
	}

	// append command to log
	newEntry := LogEntry{}
	newEntry.Command = command
	newEntry.Term = rf.currentTerm
	rf.log = append(rf.log, newEntry)
	index = len(rf.log) - 1
	term = rf.currentTerm
	rf.mu.Unlock()

	var mu sync.Mutex
	cond := sync.NewCond(&mu)
	count := 1
	fisished := 1

	go rf.WaitForCommit(index, cond, &count, &fisished)
	for i := range rf.peers {
		if i == rf.me {
			continue
		}

		go rf.SendNewLogEntries(i, term, cond, &count, &fisished)
	}

	return index, term, isLeader
}

WaitForCommit 函数负责等待并更新commitIndex。由于需要循环等待并检查是否已完成commit，这里使用条件变量来更优雅地实现循环等待，避免空转。这里commitIndex的更新遵照 Figure2 的规则，从而确保commitIndex之前的log都是committed。这里贴一下 Figure2 的原文：If there exists an N such that N > commitIndex,a majority of matchIndex[i] ≥ N,and log[N].term == currentTerm: set commitIndex = N

func (rf *Raft) WaitForCommit(index int, cond *sync.Cond, count *int, finished *int) {
	cond.L.Lock()
	for *count <= len(rf.peers)/2 && *finished < len(rf.peers) {
		cond.Wait()
	}
	cond.L.Unlock()

	rf.mu.Lock()
	if index > len(rf.log)-1 {
		rf.mu.Unlock()
		return
	}

	if rf.commitIndex < index {
		cnt := 1
		for i := 0; i < len(rf.matchIndex); i++ {
			if i != rf.me && rf.matchIndex[i] >= index {
				cnt++
			}
		}
		if cnt > len(rf.peers)/2 && rf.log[index].Term == rf.currentTerm {
			rf.commitIndex = index
			DPrintf("%d leader update commitIndex = %d", rf.me, rf.commitIndex)
			go rf.ApplyLogEntries()
		}
	}

	rf.mu.Unlock()
}

SendNewLogEntries 函数负责向 follower 发送 log，根据nextIndex来确定要发送的日志，发送成功要更新nextIndex，失败要对nextIndex-1后重试。要注意的是 reply 失败有两种情况，如果是因为follower的term已经大于了当前的term，那么要转变为follower并不再重试。

func (rf *Raft) SendNewLogEntries(server int, term int, cond *sync.Cond, count *int, finished *int) {
	args := &AppendEntriesArgs{}
	reply := &AppendEntriesReply{}

    // check if still leader
	rf.mu.Lock()
	if rf.state != leader {
		rf.mu.Unlock()
		cond.L.Lock()
		*finished += 1
		cond.Broadcast()
		cond.L.Unlock()
		return
	}

	args.Term = term
	args.LeaderId = rf.me
	args.LeaderCommit = rf.commitIndex
	args.PrevLogIndex = rf.nextIndex[server] - 1
	if args.PrevLogIndex > 0 {
		args.PrevLogTerm = rf.log[args.PrevLogIndex].Term
	} else {
		args.PrevLogTerm = 0
	}
	args.Entries = rf.log[rf.nextIndex[server]:]
	rf.mu.Unlock()

	ok := rf.sendAppendEntries(server, args, reply)
	if !ok {
		cond.L.Lock()
		*finished += 1
		cond.Broadcast()
		cond.L.Unlock()
		return
	}

	cond.L.Lock()
	defer cond.L.Unlock()
	*finished += 1

	if reply.Success {
		rf.mu.Lock()
		rf.nextIndex[server] = max(args.PrevLogIndex+len(args.Entries)+1, rf.nextIndex[server])
		rf.matchIndex[server] = rf.nextIndex[server] - 1
		rf.mu.Unlock()
		*count += 1
	}
	cond.Broadcast()

	rf.mu.Lock()
	defer rf.mu.Unlock()
	// if RPC term > currentTerm, convert to follower
	if reply.Term > rf.currentTerm {
		rf.lastHeartbeat = time.Now()
		rf.convertToFollower(reply.Term)
	} else if rf.state == leader && !reply.Success && term == rf.currentTerm {
		rf.nextIndex[server] = max(1, rf.nextIndex[server]-1)
		rf.matchIndex[server] = max(0, rf.matchIndex[server]-1)
		*finished -= 1
		DPrintf("%d leader decrease %d server nextIndex %d, matchIndex %d", rf.me, server, rf.nextIndex[server], rf.matchIndex[server])
		go rf.SendNewLogEntries(server, term, cond, count, finished)
	}
}

至此，leader的Start函数就完全实现了。

心跳

随着log功能的引入，leader的心跳也要包含log。和上面的SendNewLogEntries相似，心跳传递nextIndex和commitIndex间的log，在reply失败时根据情况选择转变为follower或减少nextIndex再重试。

选举

上面的内容基本实现了log功能，但想要完全通过3B测试，还需要完善选举部分。因为引入了log，所以选举投票时还需要考虑log的影响。只有当 candidate 的 log 比 follower 更“新”时，follower 才会投票给 candidate，在论文的 5.4.1 Election restriction 有详细的论述。那么怎么比较两个log哪个更“新”呢？论文给出了具体的判断依据：如果日志的最后条目具有不同的任期，那么任期较晚的日志更“新”；如果日志以相同的任期结束，则拥有较长日志的更“新”。

func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
	rf.mu.Lock()
	defer rf.mu.Unlock()

	// reply false if term < currentTerm
	if args.Term < rf.currentTerm {
		reply.Term = rf.currentTerm
		reply.VoteGranted = false
		return
	}

	// if RPC term > currentTerm, convert to follower
	if args.Term > rf.currentTerm {
		DPrintf("%d server update term from %d to %d", rf.me, rf.currentTerm, args.Term)
		rf.convertToFollower(args.Term)
	}

	reply.Term = rf.currentTerm

	// grant vote if haven't voted this term
	if rf.votedFor == -1 || rf.votedFor == args.CandidateId {
		if args.LastLogTerm > rf.log[len(rf.log)-1].Term || (args.LastLogTerm == rf.log[len(rf.log)-1].Term && args.LastLogIndex >= len(rf.log)-1) {
			rf.votedFor = args.CandidateId
			reply.VoteGranted = true
			// update last heartbeat time only when vote is granted
			rf.lastHeartbeat = time.Now()
			rf.persist()
			DPrintf("%d server voted for %d in term %d, lastLogIndex %d, lastLogTerm %d", rf.me, args.CandidateId, rf.currentTerm, len(rf.log)-1, rf.log[len(rf.log)-1].Term)
			return
		}
	}
	reply.VoteGranted = false
}