sender.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package rafthttp
  14. import (
  15. "bytes"
  16. "fmt"
  17. "log"
  18. "net/http"
  19. "sync"
  20. "time"
  21. "github.com/coreos/etcd/etcdserver/stats"
  22. "github.com/coreos/etcd/pkg/pbutil"
  23. "github.com/coreos/etcd/pkg/types"
  24. "github.com/coreos/etcd/raft/raftpb"
  25. )
  26. const (
  27. connPerSender = 4
  28. // senderBufSize is the size of sender buffer, which helps hold the
  29. // temporary network latency.
  30. // The size ensures that sender does not drop messages when the network
  31. // is out of work for less than 1 second in good path.
  32. senderBufSize = 64
  33. appRespBatchMs = 50
  34. propBatchMs = 10
  35. ConnReadTimeout = 5 * time.Second
  36. ConnWriteTimeout = 5 * time.Second
  37. )
  38. type Sender interface {
  39. // StartStreaming enables streaming in the sender using the given writer,
  40. // which provides a fast and efficient way to send appendEntry messages.
  41. StartStreaming(w WriteFlusher, to types.ID, term uint64) (done <-chan struct{}, err error)
  42. Update(u string)
  43. // Send sends the data to the remote node. It is always non-blocking.
  44. // It may be fail to send data if it returns nil error.
  45. Send(m raftpb.Message) error
  46. // Stop performs any necessary finalization and terminates the Sender
  47. // elegantly.
  48. Stop()
  49. // Pause pauses the sender. The sender will simply drops all incoming
  50. // messages without retruning an error.
  51. Pause()
  52. // Resume resumes a paused sender.
  53. Resume()
  54. }
  55. func NewSender(tr http.RoundTripper, u string, id types.ID, cid types.ID, p Processor, fs *stats.FollowerStats, shouldstop chan struct{}) *sender {
  56. s := &sender{
  57. id: id,
  58. active: true,
  59. tr: tr,
  60. u: u,
  61. cid: cid,
  62. p: p,
  63. fs: fs,
  64. shouldstop: shouldstop,
  65. batcher: NewBatcher(100, appRespBatchMs*time.Millisecond),
  66. propBatcher: NewProposalBatcher(100, propBatchMs*time.Millisecond),
  67. q: make(chan *raftpb.Message, senderBufSize),
  68. }
  69. s.wg.Add(connPerSender)
  70. for i := 0; i < connPerSender; i++ {
  71. go s.handle()
  72. }
  73. return s
  74. }
  75. type sender struct {
  76. id types.ID
  77. cid types.ID
  78. tr http.RoundTripper
  79. p Processor
  80. fs *stats.FollowerStats
  81. shouldstop chan struct{}
  82. strmCln *streamClient
  83. batcher *Batcher
  84. propBatcher *ProposalBatcher
  85. q chan *raftpb.Message
  86. strmSrvMu sync.Mutex
  87. strmSrv *streamServer
  88. // wait for the handling routines
  89. wg sync.WaitGroup
  90. mu sync.RWMutex
  91. u string // the url this sender post to
  92. // if the last send was successful, thi sender is active.
  93. // Or it is inactive
  94. active bool
  95. errored error
  96. paused bool
  97. }
  98. func (s *sender) StartStreaming(w WriteFlusher, to types.ID, term uint64) (<-chan struct{}, error) {
  99. s.strmSrvMu.Lock()
  100. defer s.strmSrvMu.Unlock()
  101. if s.strmSrv != nil {
  102. // ignore lower-term streaming request
  103. if term < s.strmSrv.term {
  104. return nil, fmt.Errorf("out of data streaming request: term %d, request term %d", term, s.strmSrv.term)
  105. }
  106. // stop the existing one
  107. s.strmSrv.stop()
  108. s.strmSrv = nil
  109. }
  110. s.strmSrv = startStreamServer(w, to, term, s.fs)
  111. return s.strmSrv.stopNotify(), nil
  112. }
  113. func (s *sender) Update(u string) {
  114. s.mu.Lock()
  115. defer s.mu.Unlock()
  116. s.u = u
  117. }
  118. // TODO (xiangli): reasonable retry logic
  119. func (s *sender) Send(m raftpb.Message) error {
  120. s.mu.RLock()
  121. pause := s.paused
  122. s.mu.RUnlock()
  123. if pause {
  124. return nil
  125. }
  126. s.maybeStopStream(m.Term)
  127. if shouldInitStream(m) && !s.hasStreamClient() {
  128. s.initStream(types.ID(m.From), types.ID(m.To), m.Term)
  129. s.batcher.Reset(time.Now())
  130. }
  131. var err error
  132. switch {
  133. case isProposal(m):
  134. s.propBatcher.Batch(m)
  135. case canBatch(m) && s.hasStreamClient():
  136. if !s.batcher.ShouldBatch(time.Now()) {
  137. err = s.send(m)
  138. }
  139. case canUseStream(m):
  140. if ok := s.tryStream(m); !ok {
  141. err = s.send(m)
  142. }
  143. default:
  144. err = s.send(m)
  145. }
  146. // send out batched MsgProp if needed
  147. // TODO: it is triggered by all outcoming send now, and it needs
  148. // more clear solution. Either use separate goroutine to trigger it
  149. // or use streaming.
  150. if !s.propBatcher.IsEmpty() {
  151. t := time.Now()
  152. if !s.propBatcher.ShouldBatch(t) {
  153. s.send(s.propBatcher.Message)
  154. s.propBatcher.Reset(t)
  155. }
  156. }
  157. return err
  158. }
  159. func (s *sender) send(m raftpb.Message) error {
  160. // TODO: don't block. we should be able to have 1000s
  161. // of messages out at a time.
  162. select {
  163. case s.q <- &m:
  164. return nil
  165. default:
  166. log.Printf("sender: dropping %s because maximal number %d of sender buffer entries to %s has been reached",
  167. m.Type, senderBufSize, s.u)
  168. return fmt.Errorf("reach maximal serving")
  169. }
  170. }
  171. func (s *sender) Stop() {
  172. close(s.q)
  173. s.wg.Wait()
  174. s.strmSrvMu.Lock()
  175. if s.strmSrv != nil {
  176. s.strmSrv.stop()
  177. s.strmSrv = nil
  178. }
  179. s.strmSrvMu.Unlock()
  180. if s.strmCln != nil {
  181. s.strmCln.stop()
  182. }
  183. }
  184. func (s *sender) Pause() {
  185. s.mu.Lock()
  186. defer s.mu.Unlock()
  187. s.paused = true
  188. }
  189. func (s *sender) Resume() {
  190. s.mu.Lock()
  191. defer s.mu.Unlock()
  192. s.paused = false
  193. }
  194. func (s *sender) maybeStopStream(term uint64) {
  195. if s.strmCln != nil && term > s.strmCln.term {
  196. s.strmCln.stop()
  197. s.strmCln = nil
  198. }
  199. s.strmSrvMu.Lock()
  200. defer s.strmSrvMu.Unlock()
  201. if s.strmSrv != nil && term > s.strmSrv.term {
  202. s.strmSrv.stop()
  203. s.strmSrv = nil
  204. }
  205. }
  206. func (s *sender) hasStreamClient() bool {
  207. return s.strmCln != nil && !s.strmCln.isStopped()
  208. }
  209. func (s *sender) initStream(from, to types.ID, term uint64) {
  210. strmCln := newStreamClient(from, to, term, s.p)
  211. s.mu.Lock()
  212. u := s.u
  213. s.mu.Unlock()
  214. if err := strmCln.start(s.tr, u, s.cid); err != nil {
  215. log.Printf("rafthttp: start stream client error: %v", err)
  216. return
  217. }
  218. s.strmCln = strmCln
  219. }
  220. func (s *sender) tryStream(m raftpb.Message) bool {
  221. s.strmSrvMu.Lock()
  222. defer s.strmSrvMu.Unlock()
  223. if s.strmSrv == nil || m.Term != s.strmSrv.term {
  224. return false
  225. }
  226. if err := s.strmSrv.send(m.Entries); err != nil {
  227. log.Printf("rafthttp: send stream message error: %v", err)
  228. s.strmSrv.stop()
  229. s.strmSrv = nil
  230. return false
  231. }
  232. return true
  233. }
  234. func (s *sender) handle() {
  235. defer s.wg.Done()
  236. for m := range s.q {
  237. start := time.Now()
  238. err := s.post(pbutil.MustMarshal(m))
  239. end := time.Now()
  240. s.mu.Lock()
  241. if err != nil {
  242. if s.errored == nil || s.errored.Error() != err.Error() {
  243. log.Printf("sender: error posting to %s: %v", s.id, err)
  244. s.errored = err
  245. }
  246. if s.active {
  247. log.Printf("sender: the connection with %s becomes inactive", s.id)
  248. s.active = false
  249. }
  250. if m.Type == raftpb.MsgApp {
  251. s.fs.Fail()
  252. }
  253. } else {
  254. if !s.active {
  255. log.Printf("sender: the connection with %s becomes active", s.id)
  256. s.active = true
  257. s.errored = nil
  258. }
  259. if m.Type == raftpb.MsgApp {
  260. s.fs.Succ(end.Sub(start))
  261. }
  262. }
  263. s.mu.Unlock()
  264. }
  265. }
  266. // post POSTs a data payload to a url. Returns nil if the POST succeeds,
  267. // error on any failure.
  268. func (s *sender) post(data []byte) error {
  269. s.mu.RLock()
  270. req, err := http.NewRequest("POST", s.u, bytes.NewBuffer(data))
  271. s.mu.RUnlock()
  272. if err != nil {
  273. return err
  274. }
  275. req.Header.Set("Content-Type", "application/protobuf")
  276. req.Header.Set("X-Etcd-Cluster-ID", s.cid.String())
  277. resp, err := s.tr.RoundTrip(req)
  278. if err != nil {
  279. return err
  280. }
  281. resp.Body.Close()
  282. switch resp.StatusCode {
  283. case http.StatusPreconditionFailed:
  284. select {
  285. case s.shouldstop <- struct{}{}:
  286. default:
  287. }
  288. log.Printf("rafthttp: conflicting cluster ID with the target cluster (%s != %s)", resp.Header.Get("X-Etcd-Cluster-ID"), s.cid)
  289. return nil
  290. case http.StatusForbidden:
  291. select {
  292. case s.shouldstop <- struct{}{}:
  293. default:
  294. }
  295. log.Println("rafthttp: this member has been permanently removed from the cluster")
  296. log.Println("rafthttp: the data-dir used by this member must be removed so that this host can be re-added with a new member ID")
  297. return nil
  298. case http.StatusNoContent:
  299. return nil
  300. default:
  301. return fmt.Errorf("unexpected http status %s while posting to %q", http.StatusText(resp.StatusCode), req.URL.String())
  302. }
  303. }
  304. func isProposal(m raftpb.Message) bool { return m.Type == raftpb.MsgProp }