http.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package rafthttp
  15. import (
  16. "errors"
  17. "fmt"
  18. "io/ioutil"
  19. "net/http"
  20. "path"
  21. "strings"
  22. "time"
  23. pioutil "github.com/coreos/etcd/pkg/ioutil"
  24. "github.com/coreos/etcd/pkg/types"
  25. "github.com/coreos/etcd/raft/raftpb"
  26. "github.com/coreos/etcd/snap"
  27. "github.com/coreos/etcd/version"
  28. "golang.org/x/net/context"
  29. )
  30. const (
  31. // connReadLimitByte limits the number of bytes
  32. // a single read can read out.
  33. //
  34. // 64KB should be large enough for not causing
  35. // throughput bottleneck as well as small enough
  36. // for not causing a read timeout.
  37. connReadLimitByte = 64 * 1024
  38. )
  39. var (
  40. RaftPrefix = "/raft"
  41. ProbingPrefix = path.Join(RaftPrefix, "probing")
  42. RaftStreamPrefix = path.Join(RaftPrefix, "stream")
  43. RaftSnapshotPrefix = path.Join(RaftPrefix, "snapshot")
  44. errIncompatibleVersion = errors.New("incompatible version")
  45. errClusterIDMismatch = errors.New("cluster ID mismatch")
  46. )
  47. type peerGetter interface {
  48. Get(id types.ID) Peer
  49. }
  50. type writerToResponse interface {
  51. WriteTo(w http.ResponseWriter)
  52. }
  53. type pipelineHandler struct {
  54. tr Transporter
  55. r Raft
  56. cid types.ID
  57. }
  58. // newPipelineHandler returns a handler for handling raft messages
  59. // from pipeline for RaftPrefix.
  60. //
  61. // The handler reads out the raft message from request body,
  62. // and forwards it to the given raft state machine for processing.
  63. func newPipelineHandler(tr Transporter, r Raft, cid types.ID) http.Handler {
  64. return &pipelineHandler{
  65. tr: tr,
  66. r: r,
  67. cid: cid,
  68. }
  69. }
  70. func (h *pipelineHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
  71. if r.Method != "POST" {
  72. w.Header().Set("Allow", "POST")
  73. http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
  74. return
  75. }
  76. w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
  77. if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil {
  78. http.Error(w, err.Error(), http.StatusPreconditionFailed)
  79. return
  80. }
  81. if from, err := types.IDFromString(r.Header.Get("X-Server-From")); err != nil {
  82. if urls := r.Header.Get("X-PeerURLs"); urls != "" {
  83. h.tr.AddRemote(from, strings.Split(urls, ","))
  84. }
  85. }
  86. // Limit the data size that could be read from the request body, which ensures that read from
  87. // connection will not time out accidentally due to possible blocking in underlying implementation.
  88. limitedr := pioutil.NewLimitedBufferReader(r.Body, connReadLimitByte)
  89. b, err := ioutil.ReadAll(limitedr)
  90. if err != nil {
  91. plog.Errorf("failed to read raft message (%v)", err)
  92. http.Error(w, "error reading raft message", http.StatusBadRequest)
  93. recvFailures.WithLabelValues(r.RemoteAddr).Inc()
  94. return
  95. }
  96. var m raftpb.Message
  97. if err := m.Unmarshal(b); err != nil {
  98. plog.Errorf("failed to unmarshal raft message (%v)", err)
  99. http.Error(w, "error unmarshaling raft message", http.StatusBadRequest)
  100. recvFailures.WithLabelValues(r.RemoteAddr).Inc()
  101. return
  102. }
  103. receivedBytes.WithLabelValues(types.ID(m.From).String()).Add(float64(len(b)))
  104. if err := h.r.Process(context.TODO(), m); err != nil {
  105. switch v := err.(type) {
  106. case writerToResponse:
  107. v.WriteTo(w)
  108. default:
  109. plog.Warningf("failed to process raft message (%v)", err)
  110. http.Error(w, "error processing raft message", http.StatusInternalServerError)
  111. w.(http.Flusher).Flush()
  112. // disconnect the http stream
  113. panic(err)
  114. }
  115. return
  116. }
  117. // Write StatusNoContent header after the message has been processed by
  118. // raft, which facilitates the client to report MsgSnap status.
  119. w.WriteHeader(http.StatusNoContent)
  120. }
  121. type snapshotHandler struct {
  122. tr Transporter
  123. r Raft
  124. snapshotter *snap.Snapshotter
  125. cid types.ID
  126. }
  127. func newSnapshotHandler(tr Transporter, r Raft, snapshotter *snap.Snapshotter, cid types.ID) http.Handler {
  128. return &snapshotHandler{
  129. tr: tr,
  130. r: r,
  131. snapshotter: snapshotter,
  132. cid: cid,
  133. }
  134. }
  135. const unknownSnapshotSender = "UNKNOWN_SNAPSHOT_SENDER"
  136. // ServeHTTP serves HTTP request to receive and process snapshot message.
  137. //
  138. // If request sender dies without closing underlying TCP connection,
  139. // the handler will keep waiting for the request body until TCP keepalive
  140. // finds out that the connection is broken after several minutes.
  141. // This is acceptable because
  142. // 1. snapshot messages sent through other TCP connections could still be
  143. // received and processed.
  144. // 2. this case should happen rarely, so no further optimization is done.
  145. func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
  146. start := time.Now()
  147. if r.Method != "POST" {
  148. w.Header().Set("Allow", "POST")
  149. http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
  150. snapshotReceiveFailures.WithLabelValues(unknownSnapshotSender).Inc()
  151. return
  152. }
  153. w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
  154. if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil {
  155. http.Error(w, err.Error(), http.StatusPreconditionFailed)
  156. snapshotReceiveFailures.WithLabelValues(unknownSnapshotSender).Inc()
  157. return
  158. }
  159. if from, err := types.IDFromString(r.Header.Get("X-Server-From")); err != nil {
  160. if urls := r.Header.Get("X-PeerURLs"); urls != "" {
  161. h.tr.AddRemote(from, strings.Split(urls, ","))
  162. }
  163. }
  164. dec := &messageDecoder{r: r.Body}
  165. // let snapshots be very large since they can exceed 512MB for large installations
  166. m, err := dec.decodeLimit(uint64(1 << 63))
  167. from := types.ID(m.From).String()
  168. if err != nil {
  169. msg := fmt.Sprintf("failed to decode raft message (%v)", err)
  170. plog.Errorf(msg)
  171. http.Error(w, msg, http.StatusBadRequest)
  172. recvFailures.WithLabelValues(r.RemoteAddr).Inc()
  173. snapshotReceiveFailures.WithLabelValues(from).Inc()
  174. return
  175. }
  176. receivedBytes.WithLabelValues(from).Add(float64(m.Size()))
  177. if m.Type != raftpb.MsgSnap {
  178. plog.Errorf("unexpected raft message type %s on snapshot path", m.Type)
  179. http.Error(w, "wrong raft message type", http.StatusBadRequest)
  180. snapshotReceiveFailures.WithLabelValues(from).Inc()
  181. return
  182. }
  183. plog.Infof("receiving database snapshot [index:%d, from %s] ...", m.Snapshot.Metadata.Index, types.ID(m.From))
  184. // save incoming database snapshot.
  185. n, err := h.snapshotter.SaveDBFrom(r.Body, m.Snapshot.Metadata.Index)
  186. if err != nil {
  187. msg := fmt.Sprintf("failed to save KV snapshot (%v)", err)
  188. plog.Error(msg)
  189. http.Error(w, msg, http.StatusInternalServerError)
  190. snapshotReceiveFailures.WithLabelValues(from).Inc()
  191. return
  192. }
  193. receivedBytes.WithLabelValues(from).Add(float64(n))
  194. plog.Infof("received and saved database snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From))
  195. if err := h.r.Process(context.TODO(), m); err != nil {
  196. switch v := err.(type) {
  197. // Process may return writerToResponse error when doing some
  198. // additional checks before calling raft.Node.Step.
  199. case writerToResponse:
  200. v.WriteTo(w)
  201. default:
  202. msg := fmt.Sprintf("failed to process raft message (%v)", err)
  203. plog.Warningf(msg)
  204. http.Error(w, msg, http.StatusInternalServerError)
  205. snapshotReceiveFailures.WithLabelValues(from).Inc()
  206. }
  207. return
  208. }
  209. // Write StatusNoContent header after the message has been processed by
  210. // raft, which facilitates the client to report MsgSnap status.
  211. w.WriteHeader(http.StatusNoContent)
  212. snapshotReceive.WithLabelValues(from).Inc()
  213. snapshotReceiveSeconds.WithLabelValues(from).Observe(time.Since(start).Seconds())
  214. }
  215. type streamHandler struct {
  216. tr *Transport
  217. peerGetter peerGetter
  218. r Raft
  219. id types.ID
  220. cid types.ID
  221. }
  222. func newStreamHandler(tr *Transport, pg peerGetter, r Raft, id, cid types.ID) http.Handler {
  223. return &streamHandler{
  224. tr: tr,
  225. peerGetter: pg,
  226. r: r,
  227. id: id,
  228. cid: cid,
  229. }
  230. }
  231. func (h *streamHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
  232. if r.Method != "GET" {
  233. w.Header().Set("Allow", "GET")
  234. http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
  235. return
  236. }
  237. w.Header().Set("X-Server-Version", version.Version)
  238. w.Header().Set("X-Etcd-Cluster-ID", h.cid.String())
  239. if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil {
  240. http.Error(w, err.Error(), http.StatusPreconditionFailed)
  241. return
  242. }
  243. var t streamType
  244. switch path.Dir(r.URL.Path) {
  245. case streamTypeMsgAppV2.endpoint():
  246. t = streamTypeMsgAppV2
  247. case streamTypeMessage.endpoint():
  248. t = streamTypeMessage
  249. default:
  250. plog.Debugf("ignored unexpected streaming request path %s", r.URL.Path)
  251. http.Error(w, "invalid path", http.StatusNotFound)
  252. return
  253. }
  254. fromStr := path.Base(r.URL.Path)
  255. from, err := types.IDFromString(fromStr)
  256. if err != nil {
  257. plog.Errorf("failed to parse from %s into ID (%v)", fromStr, err)
  258. http.Error(w, "invalid from", http.StatusNotFound)
  259. return
  260. }
  261. if h.r.IsIDRemoved(uint64(from)) {
  262. plog.Warningf("rejected the stream from peer %s since it was removed", from)
  263. http.Error(w, "removed member", http.StatusGone)
  264. return
  265. }
  266. p := h.peerGetter.Get(from)
  267. if p == nil {
  268. // This may happen in following cases:
  269. // 1. user starts a remote peer that belongs to a different cluster
  270. // with the same cluster ID.
  271. // 2. local etcd falls behind of the cluster, and cannot recognize
  272. // the members that joined after its current progress.
  273. if urls := r.Header.Get("X-PeerURLs"); urls != "" {
  274. h.tr.AddRemote(from, strings.Split(urls, ","))
  275. }
  276. plog.Errorf("failed to find member %s in cluster %s", from, h.cid)
  277. http.Error(w, "error sender not found", http.StatusNotFound)
  278. return
  279. }
  280. wto := h.id.String()
  281. if gto := r.Header.Get("X-Raft-To"); gto != wto {
  282. plog.Errorf("streaming request ignored (ID mismatch got %s want %s)", gto, wto)
  283. http.Error(w, "to field mismatch", http.StatusPreconditionFailed)
  284. return
  285. }
  286. w.WriteHeader(http.StatusOK)
  287. w.(http.Flusher).Flush()
  288. c := newCloseNotifier()
  289. conn := &outgoingConn{
  290. t: t,
  291. Writer: w,
  292. Flusher: w.(http.Flusher),
  293. Closer: c,
  294. }
  295. p.attachOutgoingConn(conn)
  296. <-c.closeNotify()
  297. }
  298. // checkClusterCompatibilityFromHeader checks the cluster compatibility of
  299. // the local member from the given header.
  300. // It checks whether the version of local member is compatible with
  301. // the versions in the header, and whether the cluster ID of local member
  302. // matches the one in the header.
  303. func checkClusterCompatibilityFromHeader(header http.Header, cid types.ID) error {
  304. if err := checkVersionCompability(header.Get("X-Server-From"), serverVersion(header), minClusterVersion(header)); err != nil {
  305. plog.Errorf("request version incompatibility (%v)", err)
  306. return errIncompatibleVersion
  307. }
  308. if gcid := header.Get("X-Etcd-Cluster-ID"); gcid != cid.String() {
  309. plog.Errorf("request cluster ID mismatch (got %s want %s)", gcid, cid)
  310. return errClusterIDMismatch
  311. }
  312. return nil
  313. }
  314. type closeNotifier struct {
  315. done chan struct{}
  316. }
  317. func newCloseNotifier() *closeNotifier {
  318. return &closeNotifier{
  319. done: make(chan struct{}),
  320. }
  321. }
  322. func (n *closeNotifier) Close() error {
  323. close(n.done)
  324. return nil
  325. }
  326. func (n *closeNotifier) closeNotify() <-chan struct{} { return n.done }