peer.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package rafthttp
  14. import (
  15. "bytes"
  16. "fmt"
  17. "log"
  18. "net/http"
  19. "sync"
  20. "time"
  21. "github.com/coreos/etcd/etcdserver/stats"
  22. "github.com/coreos/etcd/pkg/pbutil"
  23. "github.com/coreos/etcd/pkg/types"
  24. "github.com/coreos/etcd/raft/raftpb"
  25. )
  26. const (
  27. connPerSender = 4
  28. // senderBufSize is the size of sender buffer, which helps hold the
  29. // temporary network latency.
  30. // The size ensures that sender does not drop messages when the network
  31. // is out of work for less than 1 second in good path.
  32. senderBufSize = 64
  33. appRespBatchMs = 50
  34. propBatchMs = 10
  35. ConnReadTimeout = 5 * time.Second
  36. ConnWriteTimeout = 5 * time.Second
  37. )
  38. type peer struct {
  39. id types.ID
  40. cid types.ID
  41. tr http.RoundTripper
  42. r Raft
  43. fs *stats.FollowerStats
  44. shouldstop chan struct{}
  45. batcher *Batcher
  46. propBatcher *ProposalBatcher
  47. q chan *raftpb.Message
  48. stream *stream
  49. // wait for the handling routines
  50. wg sync.WaitGroup
  51. mu sync.RWMutex
  52. u string // the url this sender post to
  53. // if the last send was successful, thi sender is active.
  54. // Or it is inactive
  55. active bool
  56. errored error
  57. paused bool
  58. }
  59. func NewPeer(tr http.RoundTripper, u string, id types.ID, cid types.ID, r Raft, fs *stats.FollowerStats, shouldstop chan struct{}) *peer {
  60. p := &peer{
  61. id: id,
  62. active: true,
  63. tr: tr,
  64. u: u,
  65. cid: cid,
  66. r: r,
  67. fs: fs,
  68. stream: &stream{},
  69. shouldstop: shouldstop,
  70. batcher: NewBatcher(100, appRespBatchMs*time.Millisecond),
  71. propBatcher: NewProposalBatcher(100, propBatchMs*time.Millisecond),
  72. q: make(chan *raftpb.Message, senderBufSize),
  73. }
  74. p.wg.Add(connPerSender)
  75. for i := 0; i < connPerSender; i++ {
  76. go p.handle()
  77. }
  78. return p
  79. }
  80. func (p *peer) Update(u string) {
  81. p.mu.Lock()
  82. defer p.mu.Unlock()
  83. p.u = u
  84. }
  85. // Send sends the data to the remote node. It is always non-blocking.
  86. // It may be fail to send data if it returns nil error.
  87. // TODO (xiangli): reasonable retry logic
  88. func (p *peer) Send(m raftpb.Message) error {
  89. p.mu.RLock()
  90. pause := p.paused
  91. p.mu.RUnlock()
  92. if pause {
  93. return nil
  94. }
  95. // move all the stream related stuff into stream
  96. p.stream.invalidate(m.Term)
  97. if shouldInitStream(m) && !p.stream.isOpen() {
  98. p.mu.Lock()
  99. u := p.u
  100. p.mu.Unlock()
  101. p.stream.open(types.ID(m.From), p.id, p.cid, m.Term, p.tr, u, p.r)
  102. p.batcher.Reset(time.Now())
  103. }
  104. var err error
  105. switch {
  106. case isProposal(m):
  107. p.propBatcher.Batch(m)
  108. case canBatch(m) && p.stream.isOpen():
  109. if !p.batcher.ShouldBatch(time.Now()) {
  110. err = p.send(m)
  111. }
  112. case canUseStream(m):
  113. if ok := p.stream.write(m); !ok {
  114. err = p.send(m)
  115. }
  116. default:
  117. err = p.send(m)
  118. }
  119. // send out batched MsgProp if needed
  120. // TODO: it is triggered by all outcoming send now, and it needs
  121. // more clear solution. Either use separate goroutine to trigger it
  122. // or use streaming.
  123. if !p.propBatcher.IsEmpty() {
  124. t := time.Now()
  125. if !p.propBatcher.ShouldBatch(t) {
  126. p.send(p.propBatcher.Message)
  127. p.propBatcher.Reset(t)
  128. }
  129. }
  130. return err
  131. }
  132. func (p *peer) send(m raftpb.Message) error {
  133. // TODO: don't block. we should be able to have 1000s
  134. // of messages out at a time.
  135. select {
  136. case p.q <- &m:
  137. return nil
  138. default:
  139. log.Printf("sender: dropping %s because maximal number %d of sender buffer entries to %s has been reached",
  140. m.Type, senderBufSize, p.u)
  141. return fmt.Errorf("reach maximal serving")
  142. }
  143. }
  144. // Stop performs any necessary finalization and terminates the peer
  145. // elegantly.
  146. func (p *peer) Stop() {
  147. close(p.q)
  148. p.wg.Wait()
  149. p.stream.stop()
  150. }
  151. func (p *peer) handle() {
  152. defer p.wg.Done()
  153. for m := range p.q {
  154. start := time.Now()
  155. err := p.post(pbutil.MustMarshal(m))
  156. end := time.Now()
  157. p.mu.Lock()
  158. if err != nil {
  159. if p.errored == nil || p.errored.Error() != err.Error() {
  160. log.Printf("sender: error posting to %s: %v", p.id, err)
  161. p.errored = err
  162. }
  163. if p.active {
  164. log.Printf("sender: the connection with %s becomes inactive", p.id)
  165. p.active = false
  166. }
  167. if m.Type == raftpb.MsgApp {
  168. p.fs.Fail()
  169. }
  170. } else {
  171. if !p.active {
  172. log.Printf("sender: the connection with %s becomes active", p.id)
  173. p.active = true
  174. p.errored = nil
  175. }
  176. if m.Type == raftpb.MsgApp {
  177. p.fs.Succ(end.Sub(start))
  178. }
  179. }
  180. p.mu.Unlock()
  181. }
  182. }
  183. // post POSTs a data payload to a url. Returns nil if the POST succeeds,
  184. // error on any failure.
  185. func (p *peer) post(data []byte) error {
  186. p.mu.RLock()
  187. req, err := http.NewRequest("POST", p.u, bytes.NewBuffer(data))
  188. p.mu.RUnlock()
  189. if err != nil {
  190. return err
  191. }
  192. req.Header.Set("Content-Type", "application/protobuf")
  193. req.Header.Set("X-Etcd-Cluster-ID", p.cid.String())
  194. resp, err := p.tr.RoundTrip(req)
  195. if err != nil {
  196. return err
  197. }
  198. resp.Body.Close()
  199. switch resp.StatusCode {
  200. case http.StatusPreconditionFailed:
  201. select {
  202. case p.shouldstop <- struct{}{}:
  203. default:
  204. }
  205. log.Printf("rafthttp: conflicting cluster ID with the target cluster (%s != %s)", resp.Header.Get("X-Etcd-Cluster-ID"), p.cid)
  206. return nil
  207. case http.StatusForbidden:
  208. select {
  209. case p.shouldstop <- struct{}{}:
  210. default:
  211. }
  212. log.Println("rafthttp: this member has been permanently removed from the cluster")
  213. log.Println("rafthttp: the data-dir used by this member must be removed so that this host can be re-added with a new member ID")
  214. return nil
  215. case http.StatusNoContent:
  216. return nil
  217. default:
  218. return fmt.Errorf("unexpected http status %s while posting to %q", http.StatusText(resp.StatusCode), req.URL.String())
  219. }
  220. }
  221. // attachStream attaches a streamSever to the peer.
  222. func (p *peer) attachStream(server *streamServer) error {
  223. server.fs = p.fs
  224. return p.stream.attach(server)
  225. }
  226. // Pause pauses the peer. The peer will simply drops all incoming
  227. // messages without retruning an error.
  228. func (p *peer) Pause() {
  229. p.mu.Lock()
  230. defer p.mu.Unlock()
  231. p.paused = true
  232. }
  233. // Resume resumes a paused peer.
  234. func (p *peer) Resume() {
  235. p.mu.Lock()
  236. defer p.mu.Unlock()
  237. p.paused = false
  238. }
  239. func isProposal(m raftpb.Message) bool { return m.Type == raftpb.MsgProp }