transport.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package rafthttp
  15. import (
  16. "net/http"
  17. "sync"
  18. "time"
  19. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
  20. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/xiang90/probing"
  21. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  22. "github.com/coreos/etcd/etcdserver/stats"
  23. "github.com/coreos/etcd/pkg/types"
  24. "github.com/coreos/etcd/raft"
  25. "github.com/coreos/etcd/raft/raftpb"
  26. )
  27. var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "rafthttp")
  28. type Raft interface {
  29. Process(ctx context.Context, m raftpb.Message) error
  30. IsIDRemoved(id uint64) bool
  31. ReportUnreachable(id uint64)
  32. ReportSnapshot(id uint64, status raft.SnapshotStatus)
  33. }
  34. type Transporter interface {
  35. // Handler returns the HTTP handler of the transporter.
  36. // A transporter HTTP handler handles the HTTP requests
  37. // from remote peers.
  38. // The handler MUST be used to handle RaftPrefix(/raft)
  39. // endpoint.
  40. Handler() http.Handler
  41. // Send sends out the given messages to the remote peers.
  42. // Each message has a To field, which is an id that maps
  43. // to an existing peer in the transport.
  44. // If the id cannot be found in the transport, the message
  45. // will be ignored.
  46. Send(m []raftpb.Message)
  47. // AddRemote adds a remote with given peer urls into the transport.
  48. // A remote helps newly joined member to catch up the progress of cluster,
  49. // and will not be used after that.
  50. // It is the caller's responsibility to ensure the urls are all valid,
  51. // or it panics.
  52. AddRemote(id types.ID, urls []string)
  53. // AddPeer adds a peer with given peer urls into the transport.
  54. // It is the caller's responsibility to ensure the urls are all valid,
  55. // or it panics.
  56. // Peer urls are used to connect to the remote peer.
  57. AddPeer(id types.ID, urls []string)
  58. // RemovePeer removes the peer with given id.
  59. RemovePeer(id types.ID)
  60. // RemoveAllPeers removes all the existing peers in the transport.
  61. RemoveAllPeers()
  62. // UpdatePeer updates the peer urls of the peer with the given id.
  63. // It is the caller's responsibility to ensure the urls are all valid,
  64. // or it panics.
  65. UpdatePeer(id types.ID, urls []string)
  66. // ActiveSince returns the time that the connection with the peer
  67. // of the given id becomes active.
  68. // If the connection is active since peer was added, it returns the adding time.
  69. // If the connection is currently inactive, it returns zero time.
  70. ActiveSince(id types.ID) time.Time
  71. // Stop closes the connections and stops the transporter.
  72. Stop()
  73. }
  74. type transport struct {
  75. roundTripper http.RoundTripper
  76. id types.ID
  77. clusterID types.ID
  78. raft Raft
  79. serverStats *stats.ServerStats
  80. leaderStats *stats.LeaderStats
  81. mu sync.RWMutex // protect the term, remote and peer map
  82. term uint64 // the latest term that has been observed
  83. remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
  84. peers map[types.ID]Peer // peers map
  85. prober probing.Prober
  86. errorc chan error
  87. }
  88. func NewTransporter(rt http.RoundTripper, id, cid types.ID, r Raft, errorc chan error, ss *stats.ServerStats, ls *stats.LeaderStats) Transporter {
  89. return &transport{
  90. roundTripper: rt,
  91. id: id,
  92. clusterID: cid,
  93. raft: r,
  94. serverStats: ss,
  95. leaderStats: ls,
  96. remotes: make(map[types.ID]*remote),
  97. peers: make(map[types.ID]Peer),
  98. prober: probing.NewProber(rt),
  99. errorc: errorc,
  100. }
  101. }
  102. func (t *transport) Handler() http.Handler {
  103. pipelineHandler := NewHandler(t.raft, t.clusterID)
  104. streamHandler := newStreamHandler(t, t.raft, t.id, t.clusterID)
  105. mux := http.NewServeMux()
  106. mux.Handle(RaftPrefix, pipelineHandler)
  107. mux.Handle(RaftStreamPrefix+"/", streamHandler)
  108. mux.Handle(ProbingPrefix, probing.NewHandler())
  109. return mux
  110. }
  111. func (t *transport) Get(id types.ID) Peer {
  112. t.mu.RLock()
  113. defer t.mu.RUnlock()
  114. return t.peers[id]
  115. }
  116. func (t *transport) maybeUpdatePeersTerm(term uint64) {
  117. t.mu.Lock()
  118. defer t.mu.Unlock()
  119. if t.term >= term {
  120. return
  121. }
  122. t.term = term
  123. for _, p := range t.peers {
  124. p.setTerm(term)
  125. }
  126. }
  127. func (t *transport) Send(msgs []raftpb.Message) {
  128. for _, m := range msgs {
  129. // intentionally dropped message
  130. if m.To == 0 {
  131. continue
  132. }
  133. to := types.ID(m.To)
  134. if m.Type != raftpb.MsgProp { // proposal message does not have a valid term
  135. t.maybeUpdatePeersTerm(m.Term)
  136. }
  137. p, ok := t.peers[to]
  138. if ok {
  139. if m.Type == raftpb.MsgApp {
  140. t.serverStats.SendAppendReq(m.Size())
  141. }
  142. p.Send(m)
  143. continue
  144. }
  145. g, ok := t.remotes[to]
  146. if ok {
  147. g.Send(m)
  148. continue
  149. }
  150. plog.Debugf("ignored message %s (sent to unknown peer %s)", m.Type, to)
  151. }
  152. }
  153. func (t *transport) Stop() {
  154. for _, r := range t.remotes {
  155. r.Stop()
  156. }
  157. for _, p := range t.peers {
  158. p.Stop()
  159. }
  160. t.prober.RemoveAll()
  161. if tr, ok := t.roundTripper.(*http.Transport); ok {
  162. tr.CloseIdleConnections()
  163. }
  164. }
  165. func (t *transport) AddRemote(id types.ID, us []string) {
  166. t.mu.Lock()
  167. defer t.mu.Unlock()
  168. if _, ok := t.remotes[id]; ok {
  169. return
  170. }
  171. urls, err := types.NewURLs(us)
  172. if err != nil {
  173. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  174. }
  175. t.remotes[id] = startRemote(t.roundTripper, urls, t.id, id, t.clusterID, t.raft, t.errorc)
  176. }
  177. func (t *transport) AddPeer(id types.ID, us []string) {
  178. t.mu.Lock()
  179. defer t.mu.Unlock()
  180. if _, ok := t.peers[id]; ok {
  181. return
  182. }
  183. urls, err := types.NewURLs(us)
  184. if err != nil {
  185. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  186. }
  187. fs := t.leaderStats.Follower(id.String())
  188. t.peers[id] = startPeer(t.roundTripper, urls, t.id, id, t.clusterID, t.raft, fs, t.errorc, t.term)
  189. addPeerToProber(t.prober, id.String(), us)
  190. }
  191. func (t *transport) RemovePeer(id types.ID) {
  192. t.mu.Lock()
  193. defer t.mu.Unlock()
  194. t.removePeer(id)
  195. }
  196. func (t *transport) RemoveAllPeers() {
  197. t.mu.Lock()
  198. defer t.mu.Unlock()
  199. for id := range t.peers {
  200. t.removePeer(id)
  201. }
  202. }
  203. // the caller of this function must have the peers mutex.
  204. func (t *transport) removePeer(id types.ID) {
  205. if peer, ok := t.peers[id]; ok {
  206. peer.Stop()
  207. } else {
  208. plog.Panicf("unexpected removal of unknown peer '%d'", id)
  209. }
  210. delete(t.peers, id)
  211. delete(t.leaderStats.Followers, id.String())
  212. t.prober.Remove(id.String())
  213. }
  214. func (t *transport) UpdatePeer(id types.ID, us []string) {
  215. t.mu.Lock()
  216. defer t.mu.Unlock()
  217. // TODO: return error or just panic?
  218. if _, ok := t.peers[id]; !ok {
  219. return
  220. }
  221. urls, err := types.NewURLs(us)
  222. if err != nil {
  223. plog.Panicf("newURLs %+v should never fail: %+v", us, err)
  224. }
  225. t.peers[id].Update(urls)
  226. t.prober.Remove(id.String())
  227. addPeerToProber(t.prober, id.String(), us)
  228. }
  229. func (t *transport) ActiveSince(id types.ID) time.Time {
  230. t.mu.Lock()
  231. defer t.mu.Unlock()
  232. if p, ok := t.peers[id]; ok {
  233. return p.activeSince()
  234. }
  235. return time.Time{}
  236. }
  237. type Pausable interface {
  238. Pause()
  239. Resume()
  240. }
  241. // for testing
  242. func (t *transport) Pause() {
  243. for _, p := range t.peers {
  244. p.(Pausable).Pause()
  245. }
  246. }
  247. func (t *transport) Resume() {
  248. for _, p := range t.peers {
  249. p.(Pausable).Resume()
  250. }
  251. }