transport.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package rafthttp
  15. import (
  16. "io"
  17. "net/http"
  18. "sync"
  19. "time"
  20. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
  21. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/xiang90/probing"
  22. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  23. "github.com/coreos/etcd/etcdserver/stats"
  24. "github.com/coreos/etcd/pkg/transport"
  25. "github.com/coreos/etcd/pkg/types"
  26. "github.com/coreos/etcd/raft"
  27. "github.com/coreos/etcd/raft/raftpb"
  28. )
  29. var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "rafthttp")
  30. type Raft interface {
  31. Process(ctx context.Context, m raftpb.Message) error
  32. IsIDRemoved(id uint64) bool
  33. ReportUnreachable(id uint64)
  34. ReportSnapshot(id uint64, status raft.SnapshotStatus)
  35. }
  36. // SnapshotSaver is the interface that wraps the SaveFrom method.
  37. type SnapshotSaver interface {
  38. // SaveFrom saves the snapshot data at the given index from the given reader.
  39. SaveFrom(r io.Reader, index uint64) error
  40. }
  41. type Transporter interface {
  42. // Start starts the given Transporter.
  43. // Start MUST be called before calling other functions in the interface.
  44. Start() error
  45. // Handler returns the HTTP handler of the transporter.
  46. // A transporter HTTP handler handles the HTTP requests
  47. // from remote peers.
  48. // The handler MUST be used to handle RaftPrefix(/raft)
  49. // endpoint.
  50. Handler() http.Handler
  51. // Send sends out the given messages to the remote peers.
  52. // Each message has a To field, which is an id that maps
  53. // to an existing peer in the transport.
  54. // If the id cannot be found in the transport, the message
  55. // will be ignored.
  56. Send(m []raftpb.Message)
  57. // AddRemote adds a remote with given peer urls into the transport.
  58. // A remote helps newly joined member to catch up the progress of cluster,
  59. // and will not be used after that.
  60. // It is the caller's responsibility to ensure the urls are all valid,
  61. // or it panics.
  62. AddRemote(id types.ID, urls []string)
  63. // AddPeer adds a peer with given peer urls into the transport.
  64. // It is the caller's responsibility to ensure the urls are all valid,
  65. // or it panics.
  66. // Peer urls are used to connect to the remote peer.
  67. AddPeer(id types.ID, urls []string)
  68. // RemovePeer removes the peer with given id.
  69. RemovePeer(id types.ID)
  70. // RemoveAllPeers removes all the existing peers in the transport.
  71. RemoveAllPeers()
  72. // UpdatePeer updates the peer urls of the peer with the given id.
  73. // It is the caller's responsibility to ensure the urls are all valid,
  74. // or it panics.
  75. UpdatePeer(id types.ID, urls []string)
  76. // ActiveSince returns the time that the connection with the peer
  77. // of the given id becomes active.
  78. // If the connection is active since peer was added, it returns the adding time.
  79. // If the connection is currently inactive, it returns zero time.
  80. ActiveSince(id types.ID) time.Time
  81. // SnapshotReady accepts a snapshot at the given index that is ready to send out.
  82. // SnapshotReady MUST not be called when the snapshot sent result of previous
  83. // accepted one has not been reported.
  84. SnapshotReady(rc io.ReadCloser, index uint64)
  85. // Stop closes the connections and stops the transporter.
  86. Stop()
  87. }
  88. // Transport implements Transporter interface. It provides the functionality
  89. // to send raft messages to peers, and receive raft messages from peers.
  90. // User should call Handler method to get a handler to serve requests
  91. // received from peerURLs.
  92. // User needs to call Start before calling other functions, and call
  93. // Stop when the Transport is no longer used.
  94. type Transport struct {
  95. DialTimeout time.Duration // maximum duration before timing out dial of the request
  96. TLSInfo transport.TLSInfo // TLS information used when creating connection
  97. ID types.ID // local member ID
  98. ClusterID types.ID // raft cluster ID for request validation
  99. Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
  100. SnapSaver SnapshotSaver // used to save snapshot in v3 snapshot messages
  101. ServerStats *stats.ServerStats // used to record general transportation statistics
  102. // used to record transportation statistics with followers when
  103. // performing as leader in raft protocol
  104. LeaderStats *stats.LeaderStats
  105. // error channel used to report detected critical error, e.g.,
  106. // the member has been permanently removed from the cluster
  107. // When an error is received from ErrorC, user should stop raft state
  108. // machine and thus stop the Transport.
  109. ErrorC chan error
  110. V3demo bool
  111. streamRt http.RoundTripper // roundTripper used by streams
  112. pipelineRt http.RoundTripper // roundTripper used by pipelines
  113. mu sync.RWMutex // protect the term, remote and peer map
  114. term uint64 // the latest term that has been observed
  115. remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
  116. peers map[types.ID]Peer // peers map
  117. snapst *snapshotStore
  118. prober probing.Prober
  119. }
  120. func (t *Transport) Start() error {
  121. var err error
  122. // Read/write timeout is set for stream roundTripper to promptly
  123. // find out broken status, which minimizes the number of messages
  124. // sent on broken connection.
  125. t.streamRt, err = transport.NewTimeoutTransport(t.TLSInfo, t.DialTimeout, ConnReadTimeout, ConnWriteTimeout)
  126. if err != nil {
  127. return err
  128. }
  129. t.pipelineRt, err = transport.NewTransport(t.TLSInfo, t.DialTimeout)
  130. if err != nil {
  131. return err
  132. }
  133. t.remotes = make(map[types.ID]*remote)
  134. t.peers = make(map[types.ID]Peer)
  135. t.snapst = &snapshotStore{}
  136. t.prober = probing.NewProber(t.pipelineRt)
  137. return nil
  138. }
  139. func (t *Transport) Handler() http.Handler {
  140. pipelineHandler := newPipelineHandler(t.Raft, t.ClusterID)
  141. streamHandler := newStreamHandler(t, t.Raft, t.ID, t.ClusterID)
  142. snapHandler := newSnapshotHandler(t.Raft, t.SnapSaver, t.ClusterID)
  143. mux := http.NewServeMux()
  144. mux.Handle(RaftPrefix, pipelineHandler)
  145. mux.Handle(RaftStreamPrefix+"/", streamHandler)
  146. mux.Handle(RaftSnapshotPrefix, snapHandler)
  147. mux.Handle(ProbingPrefix, probing.NewHandler())
  148. return mux
  149. }
  150. func (t *Transport) Get(id types.ID) Peer {
  151. t.mu.RLock()
  152. defer t.mu.RUnlock()
  153. return t.peers[id]
  154. }
  155. func (t *Transport) maybeUpdatePeersTerm(term uint64) {
  156. t.mu.Lock()
  157. defer t.mu.Unlock()
  158. if t.term >= term {
  159. return
  160. }
  161. t.term = term
  162. for _, p := range t.peers {
  163. p.setTerm(term)
  164. }
  165. }
  166. func (t *Transport) Send(msgs []raftpb.Message) {
  167. for _, m := range msgs {
  168. if m.To == 0 {
  169. // ignore intentionally dropped message
  170. continue
  171. }
  172. to := types.ID(m.To)
  173. // update terms for all the peers
  174. // ignore MsgProp since it does not have a valid term
  175. if m.Type != raftpb.MsgProp {
  176. t.maybeUpdatePeersTerm(m.Term)
  177. }
  178. p, ok := t.peers[to]
  179. if ok {
  180. if m.Type == raftpb.MsgApp {
  181. t.ServerStats.SendAppendReq(m.Size())
  182. }
  183. p.send(m)
  184. continue
  185. }
  186. g, ok := t.remotes[to]
  187. if ok {
  188. g.send(m)
  189. continue
  190. }
  191. plog.Debugf("ignored message %s (sent to unknown peer %s)", m.Type, to)
  192. }
  193. }
  194. func (t *Transport) Stop() {
  195. for _, r := range t.remotes {
  196. r.stop()
  197. }
  198. for _, p := range t.peers {
  199. p.stop()
  200. }
  201. t.prober.RemoveAll()
  202. if tr, ok := t.streamRt.(*http.Transport); ok {
  203. tr.CloseIdleConnections()
  204. }
  205. if tr, ok := t.pipelineRt.(*http.Transport); ok {
  206. tr.CloseIdleConnections()
  207. }
  208. }
  209. func (t *Transport) AddRemote(id types.ID, us []string) {
  210. t.mu.Lock()
  211. defer t.mu.Unlock()
  212. if _, ok := t.remotes[id]; ok {
  213. return
  214. }
  215. urls, err := types.NewURLs(us)
  216. if err != nil {
  217. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  218. }
  219. t.remotes[id] = startRemote(t.pipelineRt, urls, t.ID, id, t.ClusterID, t.Raft, t.ErrorC)
  220. }
  221. func (t *Transport) AddPeer(id types.ID, us []string) {
  222. t.mu.Lock()
  223. defer t.mu.Unlock()
  224. if _, ok := t.peers[id]; ok {
  225. return
  226. }
  227. urls, err := types.NewURLs(us)
  228. if err != nil {
  229. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  230. }
  231. fs := t.LeaderStats.Follower(id.String())
  232. t.peers[id] = startPeer(t.streamRt, t.pipelineRt, urls, t.ID, id, t.ClusterID, t.snapst, t.Raft, fs, t.ErrorC, t.term, t.V3demo)
  233. addPeerToProber(t.prober, id.String(), us)
  234. }
  235. func (t *Transport) RemovePeer(id types.ID) {
  236. t.mu.Lock()
  237. defer t.mu.Unlock()
  238. t.removePeer(id)
  239. }
  240. func (t *Transport) RemoveAllPeers() {
  241. t.mu.Lock()
  242. defer t.mu.Unlock()
  243. for id := range t.peers {
  244. t.removePeer(id)
  245. }
  246. }
  247. // the caller of this function must have the peers mutex.
  248. func (t *Transport) removePeer(id types.ID) {
  249. if peer, ok := t.peers[id]; ok {
  250. peer.stop()
  251. } else {
  252. plog.Panicf("unexpected removal of unknown peer '%d'", id)
  253. }
  254. delete(t.peers, id)
  255. delete(t.LeaderStats.Followers, id.String())
  256. t.prober.Remove(id.String())
  257. }
  258. func (t *Transport) UpdatePeer(id types.ID, us []string) {
  259. t.mu.Lock()
  260. defer t.mu.Unlock()
  261. // TODO: return error or just panic?
  262. if _, ok := t.peers[id]; !ok {
  263. return
  264. }
  265. urls, err := types.NewURLs(us)
  266. if err != nil {
  267. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  268. }
  269. t.peers[id].update(urls)
  270. t.prober.Remove(id.String())
  271. addPeerToProber(t.prober, id.String(), us)
  272. }
  273. func (t *Transport) ActiveSince(id types.ID) time.Time {
  274. t.mu.Lock()
  275. defer t.mu.Unlock()
  276. if p, ok := t.peers[id]; ok {
  277. return p.activeSince()
  278. }
  279. return time.Time{}
  280. }
  281. func (t *Transport) SnapshotReady(rc io.ReadCloser, index uint64) {
  282. t.snapst.put(rc, index)
  283. }
  284. type Pausable interface {
  285. Pause()
  286. Resume()
  287. }
  288. // for testing
  289. func (t *Transport) Pause() {
  290. for _, p := range t.peers {
  291. p.(Pausable).Pause()
  292. }
  293. }
  294. func (t *Transport) Resume() {
  295. for _, p := range t.peers {
  296. p.(Pausable).Resume()
  297. }
  298. }