123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- // Copyright 2015 The etcd Authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package rafthttp
- import (
- "encoding/binary"
- "fmt"
- "io"
- "time"
- "github.com/coreos/etcd/etcdserver/stats"
- "github.com/coreos/etcd/pkg/pbutil"
- "github.com/coreos/etcd/pkg/types"
- "github.com/coreos/etcd/raft/raftpb"
- )
- const (
- msgTypeLinkHeartbeat uint8 = 0
- msgTypeAppEntries uint8 = 1
- msgTypeApp uint8 = 2
- msgAppV2BufSize = 1024 * 1024
- )
- // msgappv2 stream sends three types of message: linkHeartbeatMessage,
- // AppEntries and MsgApp. AppEntries is the MsgApp that is sent in
- // replicate state in raft, whose index and term are fully predictable.
- //
- // Data format of linkHeartbeatMessage:
- // | offset | bytes | description |
- // +--------+-------+-------------+
- // | 0 | 1 | \x00 |
- //
- // Data format of AppEntries:
- // | offset | bytes | description |
- // +--------+-------+-------------+
- // | 0 | 1 | \x01 |
- // | 1 | 8 | length of entries |
- // | 9 | 8 | length of first entry |
- // | 17 | n1 | first entry |
- // ...
- // | x | 8 | length of k-th entry data |
- // | x+8 | nk | k-th entry data |
- // | x+8+nk | 8 | commit index |
- //
- // Data format of MsgApp:
- // | offset | bytes | description |
- // +--------+-------+-------------+
- // | 0 | 1 | \x02 |
- // | 1 | 8 | length of encoded message |
- // | 9 | n | encoded message |
- type msgAppV2Encoder struct {
- w io.Writer
- fs *stats.FollowerStats
- term uint64
- index uint64
- buf []byte
- uint64buf []byte
- uint8buf []byte
- }
- func newMsgAppV2Encoder(w io.Writer, fs *stats.FollowerStats) *msgAppV2Encoder {
- return &msgAppV2Encoder{
- w: w,
- fs: fs,
- buf: make([]byte, msgAppV2BufSize),
- uint64buf: make([]byte, 8),
- uint8buf: make([]byte, 1),
- }
- }
- func (enc *msgAppV2Encoder) encode(m raftpb.Message) error {
- start := time.Now()
- switch {
- case isLinkHeartbeatMessage(m):
- enc.uint8buf[0] = byte(msgTypeLinkHeartbeat)
- if _, err := enc.w.Write(enc.uint8buf); err != nil {
- return err
- }
- case enc.index == m.Index && enc.term == m.LogTerm && m.LogTerm == m.Term:
- enc.uint8buf[0] = byte(msgTypeAppEntries)
- if _, err := enc.w.Write(enc.uint8buf); err != nil {
- return err
- }
- // write length of entries
- binary.BigEndian.PutUint64(enc.uint64buf, uint64(len(m.Entries)))
- if _, err := enc.w.Write(enc.uint64buf); err != nil {
- return err
- }
- for i := 0; i < len(m.Entries); i++ {
- // write length of entry
- binary.BigEndian.PutUint64(enc.uint64buf, uint64(m.Entries[i].Size()))
- if _, err := enc.w.Write(enc.uint64buf); err != nil {
- return err
- }
- if n := m.Entries[i].Size(); n < msgAppV2BufSize {
- if _, err := m.Entries[i].MarshalTo(enc.buf); err != nil {
- return err
- }
- if _, err := enc.w.Write(enc.buf[:n]); err != nil {
- return err
- }
- } else {
- if _, err := enc.w.Write(pbutil.MustMarshal(&m.Entries[i])); err != nil {
- return err
- }
- }
- enc.index++
- }
- // write commit index
- binary.BigEndian.PutUint64(enc.uint64buf, m.Commit)
- if _, err := enc.w.Write(enc.uint64buf); err != nil {
- return err
- }
- enc.fs.Succ(time.Since(start))
- default:
- if err := binary.Write(enc.w, binary.BigEndian, msgTypeApp); err != nil {
- return err
- }
- // write size of message
- if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil {
- return err
- }
- // write message
- if _, err := enc.w.Write(pbutil.MustMarshal(&m)); err != nil {
- return err
- }
- enc.term = m.Term
- enc.index = m.Index
- if l := len(m.Entries); l > 0 {
- enc.index = m.Entries[l-1].Index
- }
- enc.fs.Succ(time.Since(start))
- }
- return nil
- }
- type msgAppV2Decoder struct {
- r io.Reader
- local, remote types.ID
- term uint64
- index uint64
- buf []byte
- uint64buf []byte
- uint8buf []byte
- }
- func newMsgAppV2Decoder(r io.Reader, local, remote types.ID) *msgAppV2Decoder {
- return &msgAppV2Decoder{
- r: r,
- local: local,
- remote: remote,
- buf: make([]byte, msgAppV2BufSize),
- uint64buf: make([]byte, 8),
- uint8buf: make([]byte, 1),
- }
- }
- func (dec *msgAppV2Decoder) decode() (raftpb.Message, error) {
- var (
- m raftpb.Message
- typ uint8
- )
- if _, err := io.ReadFull(dec.r, dec.uint8buf); err != nil {
- return m, err
- }
- typ = uint8(dec.uint8buf[0])
- switch typ {
- case msgTypeLinkHeartbeat:
- return linkHeartbeatMessage, nil
- case msgTypeAppEntries:
- m = raftpb.Message{
- Type: raftpb.MsgApp,
- From: uint64(dec.remote),
- To: uint64(dec.local),
- Term: dec.term,
- LogTerm: dec.term,
- Index: dec.index,
- }
- // decode entries
- if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
- return m, err
- }
- l := binary.BigEndian.Uint64(dec.uint64buf)
- m.Entries = make([]raftpb.Entry, int(l))
- for i := 0; i < int(l); i++ {
- if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
- return m, err
- }
- size := binary.BigEndian.Uint64(dec.uint64buf)
- var buf []byte
- if size < msgAppV2BufSize {
- buf = dec.buf[:size]
- if _, err := io.ReadFull(dec.r, buf); err != nil {
- return m, err
- }
- } else {
- buf = make([]byte, int(size))
- if _, err := io.ReadFull(dec.r, buf); err != nil {
- return m, err
- }
- }
- dec.index++
- // 1 alloc
- pbutil.MustUnmarshal(&m.Entries[i], buf)
- }
- // decode commit index
- if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil {
- return m, err
- }
- m.Commit = binary.BigEndian.Uint64(dec.uint64buf)
- case msgTypeApp:
- var size uint64
- if err := binary.Read(dec.r, binary.BigEndian, &size); err != nil {
- return m, err
- }
- buf := make([]byte, int(size))
- if _, err := io.ReadFull(dec.r, buf); err != nil {
- return m, err
- }
- pbutil.MustUnmarshal(&m, buf)
- dec.term = m.Term
- dec.index = m.Index
- if l := len(m.Entries); l > 0 {
- dec.index = m.Entries[l-1].Index
- }
- default:
- return m, fmt.Errorf("failed to parse type %d in msgappv2 stream", typ)
- }
- return m, nil
- }
|