transport.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package rafthttp
  15. import (
  16. "context"
  17. "net/http"
  18. "sync"
  19. "time"
  20. "go.etcd.io/etcd/etcdserver/api/snap"
  21. stats "go.etcd.io/etcd/etcdserver/api/v2stats"
  22. "go.etcd.io/etcd/pkg/logutil"
  23. "go.etcd.io/etcd/pkg/transport"
  24. "go.etcd.io/etcd/pkg/types"
  25. "go.etcd.io/etcd/raft"
  26. "go.etcd.io/etcd/raft/raftpb"
  27. "github.com/coreos/pkg/capnslog"
  28. "github.com/xiang90/probing"
  29. "go.uber.org/zap"
  30. "golang.org/x/time/rate"
  31. )
  32. var plog = logutil.NewMergeLogger(capnslog.NewPackageLogger("go.etcd.io/etcd", "rafthttp"))
  33. type Raft interface {
  34. Process(ctx context.Context, m raftpb.Message) error
  35. IsIDRemoved(id uint64) bool
  36. ReportUnreachable(id uint64)
  37. ReportSnapshot(id uint64, status raft.SnapshotStatus)
  38. }
  39. type Transporter interface {
  40. // Start starts the given Transporter.
  41. // Start MUST be called before calling other functions in the interface.
  42. Start() error
  43. // Handler returns the HTTP handler of the transporter.
  44. // A transporter HTTP handler handles the HTTP requests
  45. // from remote peers.
  46. // The handler MUST be used to handle RaftPrefix(/raft)
  47. // endpoint.
  48. Handler() http.Handler
  49. // Send sends out the given messages to the remote peers.
  50. // Each message has a To field, which is an id that maps
  51. // to an existing peer in the transport.
  52. // If the id cannot be found in the transport, the message
  53. // will be ignored.
  54. Send(m []raftpb.Message)
  55. // SendSnapshot sends out the given snapshot message to a remote peer.
  56. // The behavior of SendSnapshot is similar to Send.
  57. SendSnapshot(m snap.Message)
  58. // AddRemote adds a remote with given peer urls into the transport.
  59. // A remote helps newly joined member to catch up the progress of cluster,
  60. // and will not be used after that.
  61. // It is the caller's responsibility to ensure the urls are all valid,
  62. // or it panics.
  63. AddRemote(id types.ID, urls []string)
  64. // AddPeer adds a peer with given peer urls into the transport.
  65. // It is the caller's responsibility to ensure the urls are all valid,
  66. // or it panics.
  67. // Peer urls are used to connect to the remote peer.
  68. AddPeer(id types.ID, urls []string)
  69. // RemovePeer removes the peer with given id.
  70. RemovePeer(id types.ID)
  71. // RemoveAllPeers removes all the existing peers in the transport.
  72. RemoveAllPeers()
  73. // UpdatePeer updates the peer urls of the peer with the given id.
  74. // It is the caller's responsibility to ensure the urls are all valid,
  75. // or it panics.
  76. UpdatePeer(id types.ID, urls []string)
  77. // ActiveSince returns the time that the connection with the peer
  78. // of the given id becomes active.
  79. // If the connection is active since peer was added, it returns the adding time.
  80. // If the connection is currently inactive, it returns zero time.
  81. ActiveSince(id types.ID) time.Time
  82. // ActivePeers returns the number of active peers.
  83. ActivePeers() int
  84. // Stop closes the connections and stops the transporter.
  85. Stop()
  86. }
  87. // Transport implements Transporter interface. It provides the functionality
  88. // to send raft messages to peers, and receive raft messages from peers.
  89. // User should call Handler method to get a handler to serve requests
  90. // received from peerURLs.
  91. // User needs to call Start before calling other functions, and call
  92. // Stop when the Transport is no longer used.
  93. type Transport struct {
  94. Logger *zap.Logger
  95. DialTimeout time.Duration // maximum duration before timing out dial of the request
  96. // DialRetryFrequency defines the frequency of streamReader dial retrial attempts;
  97. // a distinct rate limiter is created per every peer (default value: 10 events/sec)
  98. DialRetryFrequency rate.Limit
  99. TLSInfo transport.TLSInfo // TLS information used when creating connection
  100. ID types.ID // local member ID
  101. URLs types.URLs // local peer URLs
  102. ClusterID types.ID // raft cluster ID for request validation
  103. Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
  104. Snapshotter *snap.Snapshotter
  105. ServerStats *stats.ServerStats // used to record general transportation statistics
  106. // used to record transportation statistics with followers when
  107. // performing as leader in raft protocol
  108. LeaderStats *stats.LeaderStats
  109. // ErrorC is used to report detected critical errors, e.g.,
  110. // the member has been permanently removed from the cluster
  111. // When an error is received from ErrorC, user should stop raft state
  112. // machine and thus stop the Transport.
  113. ErrorC chan error
  114. streamRt http.RoundTripper // roundTripper used by streams
  115. pipelineRt http.RoundTripper // roundTripper used by pipelines
  116. mu sync.RWMutex // protect the remote and peer map
  117. remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
  118. peers map[types.ID]Peer // peers map
  119. pipelineProber probing.Prober
  120. streamProber probing.Prober
  121. }
  122. func (t *Transport) Start() error {
  123. var err error
  124. t.streamRt, err = newStreamRoundTripper(t.TLSInfo, t.DialTimeout)
  125. if err != nil {
  126. return err
  127. }
  128. t.pipelineRt, err = NewRoundTripper(t.TLSInfo, t.DialTimeout)
  129. if err != nil {
  130. return err
  131. }
  132. t.remotes = make(map[types.ID]*remote)
  133. t.peers = make(map[types.ID]Peer)
  134. t.pipelineProber = probing.NewProber(t.pipelineRt)
  135. t.streamProber = probing.NewProber(t.streamRt)
  136. // If client didn't provide dial retry frequency, use the default
  137. // (100ms backoff between attempts to create a new stream),
  138. // so it doesn't bring too much overhead when retry.
  139. if t.DialRetryFrequency == 0 {
  140. t.DialRetryFrequency = rate.Every(100 * time.Millisecond)
  141. }
  142. return nil
  143. }
  144. func (t *Transport) Handler() http.Handler {
  145. pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
  146. streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
  147. snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
  148. mux := http.NewServeMux()
  149. mux.Handle(RaftPrefix, pipelineHandler)
  150. mux.Handle(RaftStreamPrefix+"/", streamHandler)
  151. mux.Handle(RaftSnapshotPrefix, snapHandler)
  152. mux.Handle(ProbingPrefix, probing.NewHandler())
  153. return mux
  154. }
  155. func (t *Transport) Get(id types.ID) Peer {
  156. t.mu.RLock()
  157. defer t.mu.RUnlock()
  158. return t.peers[id]
  159. }
  160. func (t *Transport) Send(msgs []raftpb.Message) {
  161. for _, m := range msgs {
  162. if m.To == 0 {
  163. // ignore intentionally dropped message
  164. continue
  165. }
  166. to := types.ID(m.To)
  167. t.mu.RLock()
  168. p, pok := t.peers[to]
  169. g, rok := t.remotes[to]
  170. t.mu.RUnlock()
  171. if pok {
  172. if m.Type == raftpb.MsgApp {
  173. t.ServerStats.SendAppendReq(m.Size())
  174. }
  175. p.send(m)
  176. continue
  177. }
  178. if rok {
  179. g.send(m)
  180. continue
  181. }
  182. if t.Logger != nil {
  183. t.Logger.Debug(
  184. "ignored message send request; unknown remote peer target",
  185. zap.String("type", m.Type.String()),
  186. zap.String("unknown-target-peer-id", to.String()),
  187. )
  188. } else {
  189. plog.Debugf("ignored message %s (sent to unknown peer %s)", m.Type, to)
  190. }
  191. }
  192. }
  193. func (t *Transport) Stop() {
  194. t.mu.Lock()
  195. defer t.mu.Unlock()
  196. for _, r := range t.remotes {
  197. r.stop()
  198. }
  199. for _, p := range t.peers {
  200. p.stop()
  201. }
  202. t.pipelineProber.RemoveAll()
  203. t.streamProber.RemoveAll()
  204. if tr, ok := t.streamRt.(*http.Transport); ok {
  205. tr.CloseIdleConnections()
  206. }
  207. if tr, ok := t.pipelineRt.(*http.Transport); ok {
  208. tr.CloseIdleConnections()
  209. }
  210. t.peers = nil
  211. t.remotes = nil
  212. }
  213. // CutPeer drops messages to the specified peer.
  214. func (t *Transport) CutPeer(id types.ID) {
  215. t.mu.RLock()
  216. p, pok := t.peers[id]
  217. g, gok := t.remotes[id]
  218. t.mu.RUnlock()
  219. if pok {
  220. p.(Pausable).Pause()
  221. }
  222. if gok {
  223. g.Pause()
  224. }
  225. }
  226. // MendPeer recovers the message dropping behavior of the given peer.
  227. func (t *Transport) MendPeer(id types.ID) {
  228. t.mu.RLock()
  229. p, pok := t.peers[id]
  230. g, gok := t.remotes[id]
  231. t.mu.RUnlock()
  232. if pok {
  233. p.(Pausable).Resume()
  234. }
  235. if gok {
  236. g.Resume()
  237. }
  238. }
  239. func (t *Transport) AddRemote(id types.ID, us []string) {
  240. t.mu.Lock()
  241. defer t.mu.Unlock()
  242. if t.remotes == nil {
  243. // there's no clean way to shutdown the golang http server
  244. // (see: https://github.com/golang/go/issues/4674) before
  245. // stopping the transport; ignore any new connections.
  246. return
  247. }
  248. if _, ok := t.peers[id]; ok {
  249. return
  250. }
  251. if _, ok := t.remotes[id]; ok {
  252. return
  253. }
  254. urls, err := types.NewURLs(us)
  255. if err != nil {
  256. if t.Logger != nil {
  257. t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
  258. } else {
  259. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  260. }
  261. }
  262. t.remotes[id] = startRemote(t, urls, id)
  263. if t.Logger != nil {
  264. t.Logger.Info(
  265. "added new remote peer",
  266. zap.String("local-member-id", t.ID.String()),
  267. zap.String("remote-peer-id", id.String()),
  268. zap.Strings("remote-peer-urls", us),
  269. )
  270. }
  271. }
  272. func (t *Transport) AddPeer(id types.ID, us []string) {
  273. t.mu.Lock()
  274. defer t.mu.Unlock()
  275. if t.peers == nil {
  276. panic("transport stopped")
  277. }
  278. if _, ok := t.peers[id]; ok {
  279. return
  280. }
  281. urls, err := types.NewURLs(us)
  282. if err != nil {
  283. if t.Logger != nil {
  284. t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
  285. } else {
  286. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  287. }
  288. }
  289. fs := t.LeaderStats.Follower(id.String())
  290. t.peers[id] = startPeer(t, urls, id, fs)
  291. addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
  292. addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
  293. if t.Logger != nil {
  294. t.Logger.Info(
  295. "added remote peer",
  296. zap.String("local-member-id", t.ID.String()),
  297. zap.String("remote-peer-id", id.String()),
  298. zap.Strings("remote-peer-urls", us),
  299. )
  300. } else {
  301. plog.Infof("added peer %s", id)
  302. }
  303. }
  304. func (t *Transport) RemovePeer(id types.ID) {
  305. t.mu.Lock()
  306. defer t.mu.Unlock()
  307. t.removePeer(id)
  308. }
  309. func (t *Transport) RemoveAllPeers() {
  310. t.mu.Lock()
  311. defer t.mu.Unlock()
  312. for id := range t.peers {
  313. t.removePeer(id)
  314. }
  315. }
  316. // the caller of this function must have the peers mutex.
  317. func (t *Transport) removePeer(id types.ID) {
  318. if peer, ok := t.peers[id]; ok {
  319. peer.stop()
  320. } else {
  321. if t.Logger != nil {
  322. t.Logger.Panic("unexpected removal of unknown remote peer", zap.String("remote-peer-id", id.String()))
  323. } else {
  324. plog.Panicf("unexpected removal of unknown peer '%d'", id)
  325. }
  326. }
  327. delete(t.peers, id)
  328. delete(t.LeaderStats.Followers, id.String())
  329. t.pipelineProber.Remove(id.String())
  330. t.streamProber.Remove(id.String())
  331. if t.Logger != nil {
  332. t.Logger.Info(
  333. "removed remote peer",
  334. zap.String("local-member-id", t.ID.String()),
  335. zap.String("removed-remote-peer-id", id.String()),
  336. )
  337. } else {
  338. plog.Infof("removed peer %s", id)
  339. }
  340. }
  341. func (t *Transport) UpdatePeer(id types.ID, us []string) {
  342. t.mu.Lock()
  343. defer t.mu.Unlock()
  344. // TODO: return error or just panic?
  345. if _, ok := t.peers[id]; !ok {
  346. return
  347. }
  348. urls, err := types.NewURLs(us)
  349. if err != nil {
  350. if t.Logger != nil {
  351. t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
  352. } else {
  353. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  354. }
  355. }
  356. t.peers[id].update(urls)
  357. t.pipelineProber.Remove(id.String())
  358. addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
  359. t.streamProber.Remove(id.String())
  360. addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
  361. if t.Logger != nil {
  362. t.Logger.Info(
  363. "updated remote peer",
  364. zap.String("local-member-id", t.ID.String()),
  365. zap.String("updated-remote-peer-id", id.String()),
  366. zap.Strings("updated-remote-peer-urls", us),
  367. )
  368. } else {
  369. plog.Infof("updated peer %s", id)
  370. }
  371. }
  372. func (t *Transport) ActiveSince(id types.ID) time.Time {
  373. t.mu.RLock()
  374. defer t.mu.RUnlock()
  375. if p, ok := t.peers[id]; ok {
  376. return p.activeSince()
  377. }
  378. return time.Time{}
  379. }
  380. func (t *Transport) SendSnapshot(m snap.Message) {
  381. t.mu.Lock()
  382. defer t.mu.Unlock()
  383. p := t.peers[types.ID(m.To)]
  384. if p == nil {
  385. m.CloseWithError(errMemberNotFound)
  386. return
  387. }
  388. p.sendSnap(m)
  389. }
  390. // Pausable is a testing interface for pausing transport traffic.
  391. type Pausable interface {
  392. Pause()
  393. Resume()
  394. }
  395. func (t *Transport) Pause() {
  396. t.mu.RLock()
  397. defer t.mu.RUnlock()
  398. for _, p := range t.peers {
  399. p.(Pausable).Pause()
  400. }
  401. }
  402. func (t *Transport) Resume() {
  403. t.mu.RLock()
  404. defer t.mu.RUnlock()
  405. for _, p := range t.peers {
  406. p.(Pausable).Resume()
  407. }
  408. }
  409. // ActivePeers returns a channel that closes when an initial
  410. // peer connection has been established. Use this to wait until the
  411. // first peer connection becomes active.
  412. func (t *Transport) ActivePeers() (cnt int) {
  413. t.mu.RLock()
  414. defer t.mu.RUnlock()
  415. for _, p := range t.peers {
  416. if !p.activeSince().IsZero() {
  417. cnt++
  418. }
  419. }
  420. return cnt
  421. }