transport.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package rafthttp
  15. import (
  16. "context"
  17. "net/http"
  18. "sync"
  19. "time"
  20. "github.com/coreos/etcd/etcdserver/stats"
  21. "github.com/coreos/etcd/pkg/logutil"
  22. "github.com/coreos/etcd/pkg/transport"
  23. "github.com/coreos/etcd/pkg/types"
  24. "github.com/coreos/etcd/raft"
  25. "github.com/coreos/etcd/raft/raftpb"
  26. "github.com/coreos/etcd/snap"
  27. "github.com/coreos/pkg/capnslog"
  28. "github.com/xiang90/probing"
  29. "golang.org/x/time/rate"
  30. )
  31. var plog = logutil.NewMergeLogger(capnslog.NewPackageLogger("github.com/coreos/etcd", "rafthttp"))
  32. type Raft interface {
  33. Process(ctx context.Context, m raftpb.Message) error
  34. IsIDRemoved(id uint64) bool
  35. ReportUnreachable(id uint64)
  36. ReportSnapshot(id uint64, status raft.SnapshotStatus)
  37. }
  38. type Transporter interface {
  39. // Start starts the given Transporter.
  40. // Start MUST be called before calling other functions in the interface.
  41. Start() error
  42. // Handler returns the HTTP handler of the transporter.
  43. // A transporter HTTP handler handles the HTTP requests
  44. // from remote peers.
  45. // The handler MUST be used to handle RaftPrefix(/raft)
  46. // endpoint.
  47. Handler() http.Handler
  48. // Send sends out the given messages to the remote peers.
  49. // Each message has a To field, which is an id that maps
  50. // to an existing peer in the transport.
  51. // If the id cannot be found in the transport, the message
  52. // will be ignored.
  53. Send(m []raftpb.Message)
  54. // SendSnapshot sends out the given snapshot message to a remote peer.
  55. // The behavior of SendSnapshot is similar to Send.
  56. SendSnapshot(m snap.Message)
  57. // AddRemote adds a remote with given peer urls into the transport.
  58. // A remote helps newly joined member to catch up the progress of cluster,
  59. // and will not be used after that.
  60. // It is the caller's responsibility to ensure the urls are all valid,
  61. // or it panics.
  62. AddRemote(id types.ID, urls []string)
  63. // AddPeer adds a peer with given peer urls into the transport.
  64. // It is the caller's responsibility to ensure the urls are all valid,
  65. // or it panics.
  66. // Peer urls are used to connect to the remote peer.
  67. AddPeer(id types.ID, urls []string)
  68. // RemovePeer removes the peer with given id.
  69. RemovePeer(id types.ID)
  70. // RemoveAllPeers removes all the existing peers in the transport.
  71. RemoveAllPeers()
  72. // UpdatePeer updates the peer urls of the peer with the given id.
  73. // It is the caller's responsibility to ensure the urls are all valid,
  74. // or it panics.
  75. UpdatePeer(id types.ID, urls []string)
  76. // ActiveSince returns the time that the connection with the peer
  77. // of the given id becomes active.
  78. // If the connection is active since peer was added, it returns the adding time.
  79. // If the connection is currently inactive, it returns zero time.
  80. ActiveSince(id types.ID) time.Time
  81. // Stop closes the connections and stops the transporter.
  82. Stop()
  83. }
  84. // Transport implements Transporter interface. It provides the functionality
  85. // to send raft messages to peers, and receive raft messages from peers.
  86. // User should call Handler method to get a handler to serve requests
  87. // received from peerURLs.
  88. // User needs to call Start before calling other functions, and call
  89. // Stop when the Transport is no longer used.
  90. type Transport struct {
  91. DialTimeout time.Duration // maximum duration before timing out dial of the request
  92. // DialRetryFrequency defines the frequency of streamReader dial retrial attempts;
  93. // a distinct rate limiter is created per every peer (default value: 10 events/sec)
  94. DialRetryFrequency rate.Limit
  95. TLSInfo transport.TLSInfo // TLS information used when creating connection
  96. ID types.ID // local member ID
  97. URLs types.URLs // local peer URLs
  98. ClusterID types.ID // raft cluster ID for request validation
  99. Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
  100. Snapshotter *snap.Snapshotter
  101. ServerStats *stats.ServerStats // used to record general transportation statistics
  102. // used to record transportation statistics with followers when
  103. // performing as leader in raft protocol
  104. LeaderStats *stats.LeaderStats
  105. // ErrorC is used to report detected critical errors, e.g.,
  106. // the member has been permanently removed from the cluster
  107. // When an error is received from ErrorC, user should stop raft state
  108. // machine and thus stop the Transport.
  109. ErrorC chan error
  110. streamRt http.RoundTripper // roundTripper used by streams
  111. pipelineRt http.RoundTripper // roundTripper used by pipelines
  112. mu sync.RWMutex // protect the remote and peer map
  113. remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
  114. peers map[types.ID]Peer // peers map
  115. prober probing.Prober
  116. }
  117. func (t *Transport) Start() error {
  118. var err error
  119. t.streamRt, err = newStreamRoundTripper(t.TLSInfo, t.DialTimeout)
  120. if err != nil {
  121. return err
  122. }
  123. t.pipelineRt, err = NewRoundTripper(t.TLSInfo, t.DialTimeout)
  124. if err != nil {
  125. return err
  126. }
  127. t.remotes = make(map[types.ID]*remote)
  128. t.peers = make(map[types.ID]Peer)
  129. t.prober = probing.NewProber(t.pipelineRt)
  130. // If client didn't provide dial retry frequency, use the default
  131. // (100ms backoff between attempts to create a new stream),
  132. // so it doesn't bring too much overhead when retry.
  133. if t.DialRetryFrequency == 0 {
  134. t.DialRetryFrequency = rate.Every(100 * time.Millisecond)
  135. }
  136. return nil
  137. }
  138. func (t *Transport) Handler() http.Handler {
  139. pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
  140. streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
  141. snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
  142. mux := http.NewServeMux()
  143. mux.Handle(RaftPrefix, pipelineHandler)
  144. mux.Handle(RaftStreamPrefix+"/", streamHandler)
  145. mux.Handle(RaftSnapshotPrefix, snapHandler)
  146. mux.Handle(ProbingPrefix, probing.NewHandler())
  147. return mux
  148. }
  149. func (t *Transport) Get(id types.ID) Peer {
  150. t.mu.RLock()
  151. defer t.mu.RUnlock()
  152. return t.peers[id]
  153. }
  154. func (t *Transport) Send(msgs []raftpb.Message) {
  155. for _, m := range msgs {
  156. if m.To == 0 {
  157. // ignore intentionally dropped message
  158. continue
  159. }
  160. to := types.ID(m.To)
  161. t.mu.RLock()
  162. p, pok := t.peers[to]
  163. g, rok := t.remotes[to]
  164. t.mu.RUnlock()
  165. if pok {
  166. if m.Type == raftpb.MsgApp {
  167. t.ServerStats.SendAppendReq(m.Size())
  168. }
  169. p.send(m)
  170. continue
  171. }
  172. if rok {
  173. g.send(m)
  174. continue
  175. }
  176. plog.Debugf("ignored message %s (sent to unknown peer %s)", m.Type, to)
  177. }
  178. }
  179. func (t *Transport) Stop() {
  180. t.mu.Lock()
  181. defer t.mu.Unlock()
  182. for _, r := range t.remotes {
  183. r.stop()
  184. }
  185. for _, p := range t.peers {
  186. p.stop()
  187. }
  188. t.prober.RemoveAll()
  189. if tr, ok := t.streamRt.(*http.Transport); ok {
  190. tr.CloseIdleConnections()
  191. }
  192. if tr, ok := t.pipelineRt.(*http.Transport); ok {
  193. tr.CloseIdleConnections()
  194. }
  195. t.peers = nil
  196. t.remotes = nil
  197. }
  198. // CutPeer drops messages to the specified peer.
  199. func (t *Transport) CutPeer(id types.ID) {
  200. t.mu.RLock()
  201. p, pok := t.peers[id]
  202. g, gok := t.remotes[id]
  203. t.mu.RUnlock()
  204. if pok {
  205. p.(Pausable).Pause()
  206. }
  207. if gok {
  208. g.Pause()
  209. }
  210. }
  211. // MendPeer recovers the message dropping behavior of the given peer.
  212. func (t *Transport) MendPeer(id types.ID) {
  213. t.mu.RLock()
  214. p, pok := t.peers[id]
  215. g, gok := t.remotes[id]
  216. t.mu.RUnlock()
  217. if pok {
  218. p.(Pausable).Resume()
  219. }
  220. if gok {
  221. g.Resume()
  222. }
  223. }
  224. func (t *Transport) AddRemote(id types.ID, us []string) {
  225. t.mu.Lock()
  226. defer t.mu.Unlock()
  227. if t.remotes == nil {
  228. // there's no clean way to shutdown the golang http server
  229. // (see: https://github.com/golang/go/issues/4674) before
  230. // stopping the transport; ignore any new connections.
  231. return
  232. }
  233. if _, ok := t.peers[id]; ok {
  234. return
  235. }
  236. if _, ok := t.remotes[id]; ok {
  237. return
  238. }
  239. urls, err := types.NewURLs(us)
  240. if err != nil {
  241. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  242. }
  243. t.remotes[id] = startRemote(t, urls, id)
  244. }
  245. func (t *Transport) AddPeer(id types.ID, us []string) {
  246. t.mu.Lock()
  247. defer t.mu.Unlock()
  248. if t.peers == nil {
  249. panic("transport stopped")
  250. }
  251. if _, ok := t.peers[id]; ok {
  252. return
  253. }
  254. urls, err := types.NewURLs(us)
  255. if err != nil {
  256. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  257. }
  258. fs := t.LeaderStats.Follower(id.String())
  259. t.peers[id] = startPeer(t, urls, id, fs)
  260. addPeerToProber(t.prober, id.String(), us)
  261. plog.Infof("added peer %s", id)
  262. }
  263. func (t *Transport) RemovePeer(id types.ID) {
  264. t.mu.Lock()
  265. defer t.mu.Unlock()
  266. t.removePeer(id)
  267. }
  268. func (t *Transport) RemoveAllPeers() {
  269. t.mu.Lock()
  270. defer t.mu.Unlock()
  271. for id := range t.peers {
  272. t.removePeer(id)
  273. }
  274. }
  275. // the caller of this function must have the peers mutex.
  276. func (t *Transport) removePeer(id types.ID) {
  277. if peer, ok := t.peers[id]; ok {
  278. peer.stop()
  279. } else {
  280. plog.Panicf("unexpected removal of unknown peer '%d'", id)
  281. }
  282. delete(t.peers, id)
  283. delete(t.LeaderStats.Followers, id.String())
  284. t.prober.Remove(id.String())
  285. plog.Infof("removed peer %s", id)
  286. }
  287. func (t *Transport) UpdatePeer(id types.ID, us []string) {
  288. t.mu.Lock()
  289. defer t.mu.Unlock()
  290. // TODO: return error or just panic?
  291. if _, ok := t.peers[id]; !ok {
  292. return
  293. }
  294. urls, err := types.NewURLs(us)
  295. if err != nil {
  296. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  297. }
  298. t.peers[id].update(urls)
  299. t.prober.Remove(id.String())
  300. addPeerToProber(t.prober, id.String(), us)
  301. plog.Infof("updated peer %s", id)
  302. }
  303. func (t *Transport) ActiveSince(id types.ID) time.Time {
  304. t.mu.Lock()
  305. defer t.mu.Unlock()
  306. if p, ok := t.peers[id]; ok {
  307. return p.activeSince()
  308. }
  309. return time.Time{}
  310. }
  311. func (t *Transport) SendSnapshot(m snap.Message) {
  312. t.mu.Lock()
  313. defer t.mu.Unlock()
  314. p := t.peers[types.ID(m.To)]
  315. if p == nil {
  316. m.CloseWithError(errMemberNotFound)
  317. return
  318. }
  319. p.sendSnap(m)
  320. }
  321. // Pausable is a testing interface for pausing transport traffic.
  322. type Pausable interface {
  323. Pause()
  324. Resume()
  325. }
  326. func (t *Transport) Pause() {
  327. for _, p := range t.peers {
  328. p.(Pausable).Pause()
  329. }
  330. }
  331. func (t *Transport) Resume() {
  332. for _, p := range t.peers {
  333. p.(Pausable).Resume()
  334. }
  335. }
  336. type nopTransporter struct{}
  337. func NewNopTransporter() Transporter {
  338. return &nopTransporter{}
  339. }
  340. func (s *nopTransporter) Start() error { return nil }
  341. func (s *nopTransporter) Handler() http.Handler { return nil }
  342. func (s *nopTransporter) Send(m []raftpb.Message) {}
  343. func (s *nopTransporter) SendSnapshot(m snap.Message) {}
  344. func (s *nopTransporter) AddRemote(id types.ID, us []string) {}
  345. func (s *nopTransporter) AddPeer(id types.ID, us []string) {}
  346. func (s *nopTransporter) RemovePeer(id types.ID) {}
  347. func (s *nopTransporter) RemoveAllPeers() {}
  348. func (s *nopTransporter) UpdatePeer(id types.ID, us []string) {}
  349. func (s *nopTransporter) ActiveSince(id types.ID) time.Time { return time.Time{} }
  350. func (s *nopTransporter) Stop() {}
  351. func (s *nopTransporter) Pause() {}
  352. func (s *nopTransporter) Resume() {}
  353. type snapTransporter struct {
  354. nopTransporter
  355. snapDoneC chan snap.Message
  356. snapDir string
  357. }
  358. func NewSnapTransporter(snapDir string) (Transporter, <-chan snap.Message) {
  359. ch := make(chan snap.Message, 1)
  360. tr := &snapTransporter{snapDoneC: ch, snapDir: snapDir}
  361. return tr, ch
  362. }
  363. func (s *snapTransporter) SendSnapshot(m snap.Message) {
  364. ss := snap.New(s.snapDir)
  365. ss.SaveDBFrom(m.ReadCloser, m.Snapshot.Metadata.Index+1)
  366. m.CloseWithError(nil)
  367. s.snapDoneC <- m
  368. }