peer.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package rafthttp
  14. import (
  15. "bytes"
  16. "fmt"
  17. "log"
  18. "net/http"
  19. "sync"
  20. "time"
  21. "github.com/coreos/etcd/etcdserver/stats"
  22. "github.com/coreos/etcd/pkg/pbutil"
  23. "github.com/coreos/etcd/pkg/types"
  24. "github.com/coreos/etcd/raft/raftpb"
  25. )
  26. const (
  27. connPerSender = 4
  28. // senderBufSize is the size of sender buffer, which helps hold the
  29. // temporary network latency.
  30. // The size ensures that sender does not drop messages when the network
  31. // is out of work for less than 1 second in good path.
  32. senderBufSize = 64
  33. appRespBatchMs = 50
  34. propBatchMs = 10
  35. ConnReadTimeout = 5 * time.Second
  36. ConnWriteTimeout = 5 * time.Second
  37. )
  38. func NewPeer(tr http.RoundTripper, u string, id types.ID, cid types.ID, r Raft, fs *stats.FollowerStats, shouldstop chan struct{}) *peer {
  39. p := &peer{
  40. id: id,
  41. active: true,
  42. tr: tr,
  43. u: u,
  44. cid: cid,
  45. r: r,
  46. fs: fs,
  47. shouldstop: shouldstop,
  48. batcher: NewBatcher(100, appRespBatchMs*time.Millisecond),
  49. propBatcher: NewProposalBatcher(100, propBatchMs*time.Millisecond),
  50. q: make(chan *raftpb.Message, senderBufSize),
  51. }
  52. p.wg.Add(connPerSender)
  53. for i := 0; i < connPerSender; i++ {
  54. go p.handle()
  55. }
  56. return p
  57. }
  58. type peer struct {
  59. id types.ID
  60. cid types.ID
  61. tr http.RoundTripper
  62. r Raft
  63. fs *stats.FollowerStats
  64. shouldstop chan struct{}
  65. strmCln *streamClient
  66. batcher *Batcher
  67. propBatcher *ProposalBatcher
  68. q chan *raftpb.Message
  69. strmSrvMu sync.Mutex
  70. strmSrv *streamServer
  71. // wait for the handling routines
  72. wg sync.WaitGroup
  73. mu sync.RWMutex
  74. u string // the url this sender post to
  75. // if the last send was successful, thi sender is active.
  76. // Or it is inactive
  77. active bool
  78. errored error
  79. paused bool
  80. }
  81. // StartStreaming enables streaming in the peer using the given writer,
  82. // which provides a fast and efficient way to send appendEntry messages.
  83. func (p *peer) StartStreaming(w WriteFlusher, to types.ID, term uint64) (<-chan struct{}, error) {
  84. p.strmSrvMu.Lock()
  85. defer p.strmSrvMu.Unlock()
  86. if p.strmSrv != nil {
  87. // ignore lower-term streaming request
  88. if term < p.strmSrv.term {
  89. return nil, fmt.Errorf("out of data streaming request: term %d, request term %d", term, p.strmSrv.term)
  90. }
  91. // stop the existing one
  92. p.strmSrv.stop()
  93. p.strmSrv = nil
  94. }
  95. p.strmSrv = startStreamServer(w, to, term, p.fs)
  96. return p.strmSrv.stopNotify(), nil
  97. }
  98. func (p *peer) Update(u string) {
  99. p.mu.Lock()
  100. defer p.mu.Unlock()
  101. p.u = u
  102. }
  103. // Send sends the data to the remote node. It is always non-blocking.
  104. // It may be fail to send data if it returns nil error.
  105. // TODO (xiangli): reasonable retry logic
  106. func (p *peer) Send(m raftpb.Message) error {
  107. p.mu.RLock()
  108. pause := p.paused
  109. p.mu.RUnlock()
  110. if pause {
  111. return nil
  112. }
  113. p.maybeStopStream(m.Term)
  114. if shouldInitStream(m) && !p.hasStreamClient() {
  115. p.initStream(types.ID(m.From), types.ID(m.To), m.Term)
  116. p.batcher.Reset(time.Now())
  117. }
  118. var err error
  119. switch {
  120. case isProposal(m):
  121. p.propBatcher.Batch(m)
  122. case canBatch(m) && p.hasStreamClient():
  123. if !p.batcher.ShouldBatch(time.Now()) {
  124. err = p.send(m)
  125. }
  126. case canUseStream(m):
  127. if ok := p.tryStream(m); !ok {
  128. err = p.send(m)
  129. }
  130. default:
  131. err = p.send(m)
  132. }
  133. // send out batched MsgProp if needed
  134. // TODO: it is triggered by all outcoming send now, and it needs
  135. // more clear solution. Either use separate goroutine to trigger it
  136. // or use streaming.
  137. if !p.propBatcher.IsEmpty() {
  138. t := time.Now()
  139. if !p.propBatcher.ShouldBatch(t) {
  140. p.send(p.propBatcher.Message)
  141. p.propBatcher.Reset(t)
  142. }
  143. }
  144. return err
  145. }
  146. func (p *peer) send(m raftpb.Message) error {
  147. // TODO: don't block. we should be able to have 1000s
  148. // of messages out at a time.
  149. select {
  150. case p.q <- &m:
  151. return nil
  152. default:
  153. log.Printf("sender: dropping %s because maximal number %d of sender buffer entries to %s has been reached",
  154. m.Type, senderBufSize, p.u)
  155. return fmt.Errorf("reach maximal serving")
  156. }
  157. }
  158. // Stop performs any necessary finalization and terminates the peer
  159. // elegantly.
  160. func (p *peer) Stop() {
  161. close(p.q)
  162. p.wg.Wait()
  163. p.strmSrvMu.Lock()
  164. if p.strmSrv != nil {
  165. p.strmSrv.stop()
  166. p.strmSrv = nil
  167. }
  168. p.strmSrvMu.Unlock()
  169. if p.strmCln != nil {
  170. p.strmCln.stop()
  171. }
  172. }
  173. // Pause pauses the peer. The peer will simply drops all incoming
  174. // messages without retruning an error.
  175. func (p *peer) Pause() {
  176. p.mu.Lock()
  177. defer p.mu.Unlock()
  178. p.paused = true
  179. }
  180. // Resume resumes a paused peer.
  181. func (p *peer) Resume() {
  182. p.mu.Lock()
  183. defer p.mu.Unlock()
  184. p.paused = false
  185. }
  186. func (p *peer) maybeStopStream(term uint64) {
  187. if p.strmCln != nil && term > p.strmCln.term {
  188. p.strmCln.stop()
  189. p.strmCln = nil
  190. }
  191. p.strmSrvMu.Lock()
  192. defer p.strmSrvMu.Unlock()
  193. if p.strmSrv != nil && term > p.strmSrv.term {
  194. p.strmSrv.stop()
  195. p.strmSrv = nil
  196. }
  197. }
  198. func (p *peer) hasStreamClient() bool {
  199. return p.strmCln != nil && !p.strmCln.isStopped()
  200. }
  201. func (p *peer) initStream(from, to types.ID, term uint64) {
  202. strmCln := newStreamClient(from, to, term, p.r)
  203. p.mu.Lock()
  204. u := p.u
  205. p.mu.Unlock()
  206. if err := strmCln.start(p.tr, u, p.cid); err != nil {
  207. log.Printf("rafthttp: start stream client error: %v", err)
  208. return
  209. }
  210. p.strmCln = strmCln
  211. }
  212. func (p *peer) tryStream(m raftpb.Message) bool {
  213. p.strmSrvMu.Lock()
  214. defer p.strmSrvMu.Unlock()
  215. if p.strmSrv == nil || m.Term != p.strmSrv.term {
  216. return false
  217. }
  218. if err := p.strmSrv.send(m.Entries); err != nil {
  219. log.Printf("rafthttp: send stream message error: %v", err)
  220. p.strmSrv.stop()
  221. p.strmSrv = nil
  222. return false
  223. }
  224. return true
  225. }
  226. func (p *peer) handle() {
  227. defer p.wg.Done()
  228. for m := range p.q {
  229. start := time.Now()
  230. err := p.post(pbutil.MustMarshal(m))
  231. end := time.Now()
  232. p.mu.Lock()
  233. if err != nil {
  234. if p.errored == nil || p.errored.Error() != err.Error() {
  235. log.Printf("sender: error posting to %s: %v", p.id, err)
  236. p.errored = err
  237. }
  238. if p.active {
  239. log.Printf("sender: the connection with %s becomes inactive", p.id)
  240. p.active = false
  241. }
  242. if m.Type == raftpb.MsgApp {
  243. p.fs.Fail()
  244. }
  245. } else {
  246. if !p.active {
  247. log.Printf("sender: the connection with %s becomes active", p.id)
  248. p.active = true
  249. p.errored = nil
  250. }
  251. if m.Type == raftpb.MsgApp {
  252. p.fs.Succ(end.Sub(start))
  253. }
  254. }
  255. p.mu.Unlock()
  256. }
  257. }
  258. // post POSTs a data payload to a url. Returns nil if the POST succeeds,
  259. // error on any failure.
  260. func (p *peer) post(data []byte) error {
  261. p.mu.RLock()
  262. req, err := http.NewRequest("POST", p.u, bytes.NewBuffer(data))
  263. p.mu.RUnlock()
  264. if err != nil {
  265. return err
  266. }
  267. req.Header.Set("Content-Type", "application/protobuf")
  268. req.Header.Set("X-Etcd-Cluster-ID", p.cid.String())
  269. resp, err := p.tr.RoundTrip(req)
  270. if err != nil {
  271. return err
  272. }
  273. resp.Body.Close()
  274. switch resp.StatusCode {
  275. case http.StatusPreconditionFailed:
  276. select {
  277. case p.shouldstop <- struct{}{}:
  278. default:
  279. }
  280. log.Printf("rafthttp: conflicting cluster ID with the target cluster (%s != %s)", resp.Header.Get("X-Etcd-Cluster-ID"), p.cid)
  281. return nil
  282. case http.StatusForbidden:
  283. select {
  284. case p.shouldstop <- struct{}{}:
  285. default:
  286. }
  287. log.Println("rafthttp: this member has been permanently removed from the cluster")
  288. log.Println("rafthttp: the data-dir used by this member must be removed so that this host can be re-added with a new member ID")
  289. return nil
  290. case http.StatusNoContent:
  291. return nil
  292. default:
  293. return fmt.Errorf("unexpected http status %s while posting to %q", http.StatusText(resp.StatusCode), req.URL.String())
  294. }
  295. }
  296. func isProposal(m raftpb.Message) bool { return m.Type == raftpb.MsgProp }