peer.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package rafthttp
  15. import (
  16. "bytes"
  17. "errors"
  18. "fmt"
  19. "log"
  20. "net/http"
  21. "sync"
  22. "time"
  23. "github.com/coreos/etcd/etcdserver/stats"
  24. "github.com/coreos/etcd/pkg/pbutil"
  25. "github.com/coreos/etcd/pkg/types"
  26. "github.com/coreos/etcd/raft/raftpb"
  27. )
  28. const (
  29. connPerSender = 4
  30. // senderBufSize is the size of sender buffer, which helps hold the
  31. // temporary network latency.
  32. // The size ensures that sender does not drop messages when the network
  33. // is out of work for less than 1 second in good path.
  34. senderBufSize = 64
  35. appRespBatchMs = 50
  36. propBatchMs = 10
  37. ConnReadTimeout = 5 * time.Second
  38. ConnWriteTimeout = 5 * time.Second
  39. )
  40. type peer struct {
  41. sync.Mutex
  42. id types.ID
  43. cid types.ID
  44. tr http.RoundTripper
  45. r Raft
  46. fs *stats.FollowerStats
  47. errorc chan error
  48. batcher *Batcher
  49. propBatcher *ProposalBatcher
  50. q chan *raftpb.Message
  51. stream *stream
  52. // wait for the handling routines
  53. wg sync.WaitGroup
  54. // the url this sender post to
  55. u string
  56. // if the last send was successful, the sender is active.
  57. // Or it is inactive
  58. active bool
  59. errored error
  60. paused bool
  61. stopped bool
  62. }
  63. func NewPeer(tr http.RoundTripper, u string, id types.ID, cid types.ID, r Raft, fs *stats.FollowerStats, errorc chan error) *peer {
  64. p := &peer{
  65. id: id,
  66. active: true,
  67. tr: tr,
  68. u: u,
  69. cid: cid,
  70. r: r,
  71. fs: fs,
  72. stream: &stream{},
  73. errorc: errorc,
  74. batcher: NewBatcher(100, appRespBatchMs*time.Millisecond),
  75. propBatcher: NewProposalBatcher(100, propBatchMs*time.Millisecond),
  76. q: make(chan *raftpb.Message, senderBufSize),
  77. }
  78. p.wg.Add(connPerSender)
  79. for i := 0; i < connPerSender; i++ {
  80. go p.handle()
  81. }
  82. return p
  83. }
  84. func (p *peer) Update(u string) {
  85. p.Lock()
  86. defer p.Unlock()
  87. if p.stopped {
  88. // TODO: not panic here?
  89. panic("peer: update a stopped peer")
  90. }
  91. p.u = u
  92. }
  93. // Send sends the data to the remote node. It is always non-blocking.
  94. // It may be fail to send data if it returns nil error.
  95. // TODO (xiangli): reasonable retry logic
  96. func (p *peer) Send(m raftpb.Message) error {
  97. p.Lock()
  98. defer p.Unlock()
  99. if p.stopped {
  100. return errors.New("peer: stopped")
  101. }
  102. if p.paused {
  103. return nil
  104. }
  105. // move all the stream related stuff into stream
  106. p.stream.invalidate(m.Term)
  107. if shouldInitStream(m) && !p.stream.isOpen() {
  108. u := p.u
  109. // todo: steam open should not block.
  110. p.stream.open(types.ID(m.From), p.id, p.cid, m.Term, p.tr, u, p.r)
  111. p.batcher.Reset(time.Now())
  112. }
  113. var err error
  114. switch {
  115. case isProposal(m):
  116. p.propBatcher.Batch(m)
  117. case canBatch(m) && p.stream.isOpen():
  118. if !p.batcher.ShouldBatch(time.Now()) {
  119. err = p.send(m)
  120. }
  121. case canUseStream(m):
  122. if ok := p.stream.write(m); !ok {
  123. err = p.send(m)
  124. }
  125. default:
  126. err = p.send(m)
  127. }
  128. // send out batched MsgProp if needed
  129. // TODO: it is triggered by all outcoming send now, and it needs
  130. // more clear solution. Either use separate goroutine to trigger it
  131. // or use streaming.
  132. if !p.propBatcher.IsEmpty() {
  133. t := time.Now()
  134. if !p.propBatcher.ShouldBatch(t) {
  135. p.send(p.propBatcher.Message)
  136. p.propBatcher.Reset(t)
  137. }
  138. }
  139. return err
  140. }
  141. func (p *peer) send(m raftpb.Message) error {
  142. // TODO: don't block. we should be able to have 1000s
  143. // of messages out at a time.
  144. select {
  145. case p.q <- &m:
  146. return nil
  147. default:
  148. log.Printf("sender: dropping %s because maximal number %d of sender buffer entries to %s has been reached",
  149. m.Type, senderBufSize, p.u)
  150. return fmt.Errorf("reach maximal serving")
  151. }
  152. }
  153. // Stop performs any necessary finalization and terminates the peer
  154. // elegantly.
  155. func (p *peer) Stop() {
  156. close(p.q)
  157. p.wg.Wait()
  158. p.Lock()
  159. defer p.Unlock()
  160. p.stream.stop()
  161. p.stopped = true
  162. }
  163. func (p *peer) handle() {
  164. defer p.wg.Done()
  165. for m := range p.q {
  166. start := time.Now()
  167. err := p.post(pbutil.MustMarshal(m))
  168. end := time.Now()
  169. p.Lock()
  170. if err != nil {
  171. if p.errored == nil || p.errored.Error() != err.Error() {
  172. log.Printf("sender: error posting to %s: %v", p.id, err)
  173. p.errored = err
  174. }
  175. if p.active {
  176. log.Printf("sender: the connection with %s became inactive", p.id)
  177. p.active = false
  178. }
  179. if m.Type == raftpb.MsgApp {
  180. p.fs.Fail()
  181. }
  182. } else {
  183. if !p.active {
  184. log.Printf("sender: the connection with %s became active", p.id)
  185. p.active = true
  186. p.errored = nil
  187. }
  188. if m.Type == raftpb.MsgApp {
  189. p.fs.Succ(end.Sub(start))
  190. }
  191. }
  192. p.Unlock()
  193. }
  194. }
  195. // post POSTs a data payload to a url. Returns nil if the POST succeeds,
  196. // error on any failure.
  197. func (p *peer) post(data []byte) error {
  198. p.Lock()
  199. req, err := http.NewRequest("POST", p.u, bytes.NewBuffer(data))
  200. p.Unlock()
  201. if err != nil {
  202. return err
  203. }
  204. req.Header.Set("Content-Type", "application/protobuf")
  205. req.Header.Set("X-Etcd-Cluster-ID", p.cid.String())
  206. resp, err := p.tr.RoundTrip(req)
  207. if err != nil {
  208. return err
  209. }
  210. resp.Body.Close()
  211. switch resp.StatusCode {
  212. case http.StatusPreconditionFailed:
  213. err := fmt.Errorf("conflicting cluster ID with the target cluster (%s != %s)", resp.Header.Get("X-Etcd-Cluster-ID"), p.cid)
  214. select {
  215. case p.errorc <- err:
  216. default:
  217. }
  218. return nil
  219. case http.StatusForbidden:
  220. err := fmt.Errorf("the member has been permanently removed from the cluster")
  221. select {
  222. case p.errorc <- err:
  223. default:
  224. }
  225. return nil
  226. case http.StatusNoContent:
  227. return nil
  228. default:
  229. return fmt.Errorf("unexpected http status %s while posting to %q", http.StatusText(resp.StatusCode), req.URL.String())
  230. }
  231. }
  232. // attachStream attaches a streamSever to the peer.
  233. func (p *peer) attachStream(sw *streamWriter) error {
  234. p.Lock()
  235. defer p.Unlock()
  236. if p.stopped {
  237. return errors.New("peer: stopped")
  238. }
  239. sw.fs = p.fs
  240. return p.stream.attach(sw)
  241. }
  242. // Pause pauses the peer. The peer will simply drops all incoming
  243. // messages without retruning an error.
  244. func (p *peer) Pause() {
  245. p.Lock()
  246. defer p.Unlock()
  247. p.paused = true
  248. }
  249. // Resume resumes a paused peer.
  250. func (p *peer) Resume() {
  251. p.Lock()
  252. defer p.Unlock()
  253. p.paused = false
  254. }
  255. func isProposal(m raftpb.Message) bool { return m.Type == raftpb.MsgProp }