peer.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package rafthttp
  14. import (
  15. "bytes"
  16. "errors"
  17. "fmt"
  18. "log"
  19. "net/http"
  20. "sync"
  21. "time"
  22. "github.com/coreos/etcd/etcdserver/stats"
  23. "github.com/coreos/etcd/pkg/pbutil"
  24. "github.com/coreos/etcd/pkg/types"
  25. "github.com/coreos/etcd/raft/raftpb"
  26. )
  27. const (
  28. connPerSender = 4
  29. // senderBufSize is the size of sender buffer, which helps hold the
  30. // temporary network latency.
  31. // The size ensures that sender does not drop messages when the network
  32. // is out of work for less than 1 second in good path.
  33. senderBufSize = 64
  34. appRespBatchMs = 50
  35. propBatchMs = 10
  36. ConnReadTimeout = 5 * time.Second
  37. ConnWriteTimeout = 5 * time.Second
  38. )
  39. type peer struct {
  40. sync.Mutex
  41. id types.ID
  42. cid types.ID
  43. tr http.RoundTripper
  44. r Raft
  45. fs *stats.FollowerStats
  46. errorc chan error
  47. batcher *Batcher
  48. propBatcher *ProposalBatcher
  49. q chan *raftpb.Message
  50. stream *stream
  51. // wait for the handling routines
  52. wg sync.WaitGroup
  53. // the url this sender post to
  54. u string
  55. // if the last send was successful, the sender is active.
  56. // Or it is inactive
  57. active bool
  58. errored error
  59. paused bool
  60. stopped bool
  61. }
  62. func NewPeer(tr http.RoundTripper, u string, id types.ID, cid types.ID, r Raft, fs *stats.FollowerStats, errorc chan error) *peer {
  63. p := &peer{
  64. id: id,
  65. active: true,
  66. tr: tr,
  67. u: u,
  68. cid: cid,
  69. r: r,
  70. fs: fs,
  71. stream: &stream{},
  72. errorc: errorc,
  73. batcher: NewBatcher(100, appRespBatchMs*time.Millisecond),
  74. propBatcher: NewProposalBatcher(100, propBatchMs*time.Millisecond),
  75. q: make(chan *raftpb.Message, senderBufSize),
  76. }
  77. p.wg.Add(connPerSender)
  78. for i := 0; i < connPerSender; i++ {
  79. go p.handle()
  80. }
  81. return p
  82. }
  83. func (p *peer) Update(u string) {
  84. p.Lock()
  85. defer p.Unlock()
  86. if p.stopped {
  87. // TODO: not panic here?
  88. panic("peer: update a stopped peer")
  89. }
  90. p.u = u
  91. }
  92. // Send sends the data to the remote node. It is always non-blocking.
  93. // It may be fail to send data if it returns nil error.
  94. // TODO (xiangli): reasonable retry logic
  95. func (p *peer) Send(m raftpb.Message) error {
  96. p.Lock()
  97. defer p.Unlock()
  98. if p.stopped {
  99. return errors.New("peer: stopped")
  100. }
  101. if p.paused {
  102. return nil
  103. }
  104. // move all the stream related stuff into stream
  105. p.stream.invalidate(m.Term)
  106. if shouldInitStream(m) && !p.stream.isOpen() {
  107. u := p.u
  108. // todo: steam open should not block.
  109. p.stream.open(types.ID(m.From), p.id, p.cid, m.Term, p.tr, u, p.r)
  110. p.batcher.Reset(time.Now())
  111. }
  112. var err error
  113. switch {
  114. case isProposal(m):
  115. p.propBatcher.Batch(m)
  116. case canBatch(m) && p.stream.isOpen():
  117. if !p.batcher.ShouldBatch(time.Now()) {
  118. err = p.send(m)
  119. }
  120. case canUseStream(m):
  121. if ok := p.stream.write(m); !ok {
  122. err = p.send(m)
  123. }
  124. default:
  125. err = p.send(m)
  126. }
  127. // send out batched MsgProp if needed
  128. // TODO: it is triggered by all outcoming send now, and it needs
  129. // more clear solution. Either use separate goroutine to trigger it
  130. // or use streaming.
  131. if !p.propBatcher.IsEmpty() {
  132. t := time.Now()
  133. if !p.propBatcher.ShouldBatch(t) {
  134. p.send(p.propBatcher.Message)
  135. p.propBatcher.Reset(t)
  136. }
  137. }
  138. return err
  139. }
  140. func (p *peer) send(m raftpb.Message) error {
  141. // TODO: don't block. we should be able to have 1000s
  142. // of messages out at a time.
  143. select {
  144. case p.q <- &m:
  145. return nil
  146. default:
  147. log.Printf("sender: dropping %s because maximal number %d of sender buffer entries to %s has been reached",
  148. m.Type, senderBufSize, p.u)
  149. return fmt.Errorf("reach maximal serving")
  150. }
  151. }
  152. // Stop performs any necessary finalization and terminates the peer
  153. // elegantly.
  154. func (p *peer) Stop() {
  155. close(p.q)
  156. p.wg.Wait()
  157. p.Lock()
  158. defer p.Unlock()
  159. p.stream.stop()
  160. p.stopped = true
  161. }
  162. func (p *peer) handle() {
  163. defer p.wg.Done()
  164. for m := range p.q {
  165. start := time.Now()
  166. err := p.post(pbutil.MustMarshal(m))
  167. end := time.Now()
  168. p.Lock()
  169. if err != nil {
  170. if p.errored == nil || p.errored.Error() != err.Error() {
  171. log.Printf("sender: error posting to %s: %v", p.id, err)
  172. p.errored = err
  173. }
  174. if p.active {
  175. log.Printf("sender: the connection with %s becomes inactive", p.id)
  176. p.active = false
  177. }
  178. if m.Type == raftpb.MsgApp {
  179. p.fs.Fail()
  180. }
  181. } else {
  182. if !p.active {
  183. log.Printf("sender: the connection with %s becomes active", p.id)
  184. p.active = true
  185. p.errored = nil
  186. }
  187. if m.Type == raftpb.MsgApp {
  188. p.fs.Succ(end.Sub(start))
  189. }
  190. }
  191. p.Unlock()
  192. }
  193. }
  194. // post POSTs a data payload to a url. Returns nil if the POST succeeds,
  195. // error on any failure.
  196. func (p *peer) post(data []byte) error {
  197. p.Lock()
  198. req, err := http.NewRequest("POST", p.u, bytes.NewBuffer(data))
  199. p.Unlock()
  200. if err != nil {
  201. return err
  202. }
  203. req.Header.Set("Content-Type", "application/protobuf")
  204. req.Header.Set("X-Etcd-Cluster-ID", p.cid.String())
  205. resp, err := p.tr.RoundTrip(req)
  206. if err != nil {
  207. return err
  208. }
  209. resp.Body.Close()
  210. switch resp.StatusCode {
  211. case http.StatusPreconditionFailed:
  212. err := fmt.Errorf("conflicting cluster ID with the target cluster (%s != %s)", resp.Header.Get("X-Etcd-Cluster-ID"), p.cid)
  213. select {
  214. case p.errorc <- err:
  215. default:
  216. }
  217. return nil
  218. case http.StatusForbidden:
  219. err := fmt.Errorf("the member has been permanently removed from the cluster")
  220. select {
  221. case p.errorc <- err:
  222. default:
  223. }
  224. return nil
  225. case http.StatusNoContent:
  226. return nil
  227. default:
  228. return fmt.Errorf("unexpected http status %s while posting to %q", http.StatusText(resp.StatusCode), req.URL.String())
  229. }
  230. }
  231. // attachStream attaches a streamSever to the peer.
  232. func (p *peer) attachStream(sw *streamWriter) error {
  233. p.Lock()
  234. defer p.Unlock()
  235. if p.stopped {
  236. return errors.New("peer: stopped")
  237. }
  238. sw.fs = p.fs
  239. return p.stream.attach(sw)
  240. }
  241. // Pause pauses the peer. The peer will simply drops all incoming
  242. // messages without retruning an error.
  243. func (p *peer) Pause() {
  244. p.Lock()
  245. defer p.Unlock()
  246. p.paused = true
  247. }
  248. // Resume resumes a paused peer.
  249. func (p *peer) Resume() {
  250. p.Lock()
  251. defer p.Unlock()
  252. p.paused = false
  253. }
  254. func isProposal(m raftpb.Message) bool { return m.Type == raftpb.MsgProp }