server.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. package etcdserver
  2. import (
  3. "errors"
  4. "log"
  5. "math/rand"
  6. "time"
  7. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  8. "github.com/coreos/etcd/raft"
  9. "github.com/coreos/etcd/raft/raftpb"
  10. "github.com/coreos/etcd/store"
  11. "github.com/coreos/etcd/third_party/code.google.com/p/go.net/context"
  12. "github.com/coreos/etcd/wait"
  13. )
  14. const (
  15. defaultSyncTimeout = time.Second
  16. DefaultSnapCount = 10000
  17. )
  18. var (
  19. ErrUnknownMethod = errors.New("etcdserver: unknown method")
  20. ErrStopped = errors.New("etcdserver: server stopped")
  21. )
  22. func init() {
  23. rand.Seed(time.Now().UnixNano())
  24. }
  25. type SendFunc func(m []raftpb.Message)
  26. type SaveFunc func(st raftpb.HardState, ents []raftpb.Entry)
  27. type Response struct {
  28. Event *store.Event
  29. Watcher store.Watcher
  30. err error
  31. }
  32. type Storage interface {
  33. // Save function saves ents and state to the underlying stable storage.
  34. // Save MUST block until st and ents are on stable storage.
  35. Save(st raftpb.HardState, ents []raftpb.Entry)
  36. // SaveSnap function saves snapshot to the underlying stable storage.
  37. SaveSnap(snap raftpb.Snapshot)
  38. // TODO: WAL should be able to control cut itself. After implement self-controled cut,
  39. // remove it in this interface.
  40. // Cut cuts out a new wal file for saving new state and entries.
  41. Cut() error
  42. }
  43. type Server interface {
  44. // Start performs any initialization of the Server necessary for it to
  45. // begin serving requests. It must be called before Do or Process.
  46. // Start must be non-blocking; any long-running server functionality
  47. // should be implemented in goroutines.
  48. Start()
  49. // Stop terminates the Server and performs any necessary finalization.
  50. // Do and Process cannot be called after Stop has been invoked.
  51. Stop()
  52. // Do takes a request and attempts to fulfil it, returning a Response.
  53. Do(ctx context.Context, r pb.Request) (Response, error)
  54. // Process takes a raft message and applies it to the server's raft state
  55. // machine, respecting any timeout of the given context.
  56. Process(ctx context.Context, m raftpb.Message) error
  57. }
  58. // EtcdServer is the production implementation of the Server interface
  59. type EtcdServer struct {
  60. w wait.Wait
  61. done chan struct{}
  62. Node raft.Node
  63. Store store.Store
  64. // Send specifies the send function for sending msgs to peers. Send
  65. // MUST NOT block. It is okay to drop messages, since clients should
  66. // timeout and reissue their messages. If Send is nil, server will
  67. // panic.
  68. Send SendFunc
  69. Storage Storage
  70. Ticker <-chan time.Time
  71. SyncTicker <-chan time.Time
  72. SnapCount int64 // number of entries to trigger a snapshot
  73. }
  74. // Start prepares and starts server in a new goroutine. It is no longer safe to
  75. // modify a server's fields after it has been sent to Start.
  76. func (s *EtcdServer) Start() {
  77. if s.SnapCount == 0 {
  78. log.Printf("etcdserver: set snapshot count to default %d", DefaultSnapCount)
  79. s.SnapCount = DefaultSnapCount
  80. }
  81. s.w = wait.New()
  82. s.done = make(chan struct{})
  83. go s.run()
  84. }
  85. func (s *EtcdServer) Process(ctx context.Context, m raftpb.Message) error {
  86. return s.Node.Step(ctx, m)
  87. }
  88. func (s *EtcdServer) run() {
  89. var syncC <-chan time.Time
  90. // snapi indicates the index of the last submitted snapshot request
  91. var snapi, appliedi int64
  92. for {
  93. select {
  94. case <-s.Ticker:
  95. s.Node.Tick()
  96. case rd := <-s.Node.Ready():
  97. s.Storage.Save(rd.HardState, rd.Entries)
  98. s.Storage.SaveSnap(rd.Snapshot)
  99. s.Send(rd.Messages)
  100. // TODO(bmizerany): do this in the background, but take
  101. // care to apply entries in a single goroutine, and not
  102. // race them.
  103. for _, e := range rd.CommittedEntries {
  104. switch e.Type {
  105. case raftpb.EntryNormal:
  106. var r pb.Request
  107. if err := r.Unmarshal(e.Data); err != nil {
  108. panic("TODO: this is bad, what do we do about it?")
  109. }
  110. s.w.Trigger(r.Id, s.applyRequest(r))
  111. case raftpb.EntryConfig:
  112. var c raftpb.Config
  113. if err := c.Unmarshal(e.Data); err != nil {
  114. panic("TODO: this is bad, what do we do about it?")
  115. }
  116. s.applyConfig(c)
  117. s.w.Trigger(c.ID, nil)
  118. default:
  119. panic("unsupported entry type")
  120. }
  121. appliedi = e.Index
  122. }
  123. if rd.Snapshot.Index > snapi {
  124. snapi = rd.Snapshot.Index
  125. }
  126. // recover from snapshot if it is more updated than current applied
  127. if rd.Snapshot.Index > appliedi {
  128. if err := s.Store.Recovery(rd.Snapshot.Data); err != nil {
  129. panic("TODO: this is bad, what do we do about it?")
  130. }
  131. appliedi = rd.Snapshot.Index
  132. }
  133. if appliedi-snapi > s.SnapCount {
  134. s.snapshot()
  135. snapi = appliedi
  136. }
  137. if rd.SoftState != nil {
  138. if rd.RaftState == raft.StateLeader {
  139. syncC = s.SyncTicker
  140. } else {
  141. syncC = nil
  142. }
  143. }
  144. case <-syncC:
  145. s.sync(defaultSyncTimeout)
  146. case <-s.done:
  147. return
  148. }
  149. }
  150. }
  151. // Stop stops the server, and shuts down the running goroutine. Stop should be
  152. // called after a Start(s), otherwise it will block forever.
  153. func (s *EtcdServer) Stop() {
  154. s.Node.Stop()
  155. close(s.done)
  156. }
  157. // Do interprets r and performs an operation on s.Store according to r.Method
  158. // and other fields. If r.Method is "POST", "PUT", "DELETE", or a "GET" with
  159. // Quorum == true, r will be sent through consensus before performing its
  160. // respective operation. Do will block until an action is performed or there is
  161. // an error.
  162. func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
  163. if r.Id == 0 {
  164. panic("r.Id cannot be 0")
  165. }
  166. if r.Method == "GET" && r.Quorum {
  167. r.Method = "QGET"
  168. }
  169. switch r.Method {
  170. case "POST", "PUT", "DELETE", "QGET":
  171. data, err := r.Marshal()
  172. if err != nil {
  173. return Response{}, err
  174. }
  175. ch := s.w.Register(r.Id)
  176. s.Node.Propose(ctx, data)
  177. select {
  178. case x := <-ch:
  179. resp := x.(Response)
  180. return resp, resp.err
  181. case <-ctx.Done():
  182. s.w.Trigger(r.Id, nil) // GC wait
  183. return Response{}, ctx.Err()
  184. case <-s.done:
  185. return Response{}, ErrStopped
  186. }
  187. case "GET":
  188. switch {
  189. case r.Wait:
  190. wc, err := s.Store.Watch(r.Path, r.Recursive, false, r.Since)
  191. if err != nil {
  192. return Response{}, err
  193. }
  194. return Response{Watcher: wc}, nil
  195. default:
  196. ev, err := s.Store.Get(r.Path, r.Recursive, r.Sorted)
  197. if err != nil {
  198. return Response{}, err
  199. }
  200. return Response{Event: ev}, nil
  201. }
  202. default:
  203. return Response{}, ErrUnknownMethod
  204. }
  205. }
  206. func (s *EtcdServer) AddNode(ctx context.Context, id int64, context []byte) error {
  207. req := raftpb.Config{
  208. ID: GenID(),
  209. Type: raftpb.ConfigAddNode,
  210. NodeID: id,
  211. Context: context,
  212. }
  213. return s.configure(ctx, req)
  214. }
  215. func (s *EtcdServer) RemoveNode(ctx context.Context, id int64) error {
  216. req := raftpb.Config{
  217. ID: GenID(),
  218. Type: raftpb.ConfigRemoveNode,
  219. NodeID: id,
  220. }
  221. return s.configure(ctx, req)
  222. }
  223. // configure sends configuration change through consensus then performs it.
  224. // It will block until the change is performed or there is an error.
  225. func (s *EtcdServer) configure(ctx context.Context, r raftpb.Config) error {
  226. ch := s.w.Register(r.ID)
  227. if err := s.Node.Configure(ctx, r); err != nil {
  228. log.Printf("configure error: %v", err)
  229. s.w.Trigger(r.ID, nil)
  230. return err
  231. }
  232. select {
  233. case <-ch:
  234. return nil
  235. case <-ctx.Done():
  236. s.w.Trigger(r.ID, nil) // GC wait
  237. return ctx.Err()
  238. case <-s.done:
  239. return ErrStopped
  240. }
  241. }
  242. // sync proposes a SYNC request and is non-blocking.
  243. // This makes no guarantee that the request will be proposed or performed.
  244. // The request will be cancelled after the given timeout.
  245. func (s *EtcdServer) sync(timeout time.Duration) {
  246. ctx, cancel := context.WithTimeout(context.Background(), timeout)
  247. req := pb.Request{
  248. Method: "SYNC",
  249. Id: GenID(),
  250. Time: time.Now().UnixNano(),
  251. }
  252. data, err := req.Marshal()
  253. if err != nil {
  254. log.Printf("marshal request %#v error: %v", req, err)
  255. return
  256. }
  257. // There is no promise that node has leader when do SYNC request,
  258. // so it uses goroutine to propose.
  259. go func() {
  260. s.Node.Propose(ctx, data)
  261. cancel()
  262. }()
  263. }
  264. func getExpirationTime(r *pb.Request) time.Time {
  265. var t time.Time
  266. if r.Expiration != 0 {
  267. t = time.Unix(0, r.Expiration)
  268. }
  269. return t
  270. }
  271. // applyRequest interprets r as a call to store.X and returns an Response interpreted from store.Event
  272. func (s *EtcdServer) applyRequest(r pb.Request) Response {
  273. f := func(ev *store.Event, err error) Response {
  274. return Response{Event: ev, err: err}
  275. }
  276. expr := getExpirationTime(&r)
  277. switch r.Method {
  278. case "POST":
  279. return f(s.Store.Create(r.Path, r.Dir, r.Val, true, expr))
  280. case "PUT":
  281. exists, existsSet := getBool(r.PrevExist)
  282. switch {
  283. case existsSet:
  284. if exists {
  285. return f(s.Store.Update(r.Path, r.Val, expr))
  286. } else {
  287. return f(s.Store.Create(r.Path, r.Dir, r.Val, false, expr))
  288. }
  289. case r.PrevIndex > 0 || r.PrevValue != "":
  290. return f(s.Store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, expr))
  291. default:
  292. return f(s.Store.Set(r.Path, r.Dir, r.Val, expr))
  293. }
  294. case "DELETE":
  295. switch {
  296. case r.PrevIndex > 0 || r.PrevValue != "":
  297. return f(s.Store.CompareAndDelete(r.Path, r.PrevValue, r.PrevIndex))
  298. default:
  299. return f(s.Store.Delete(r.Path, r.Dir, r.Recursive))
  300. }
  301. case "QGET":
  302. return f(s.Store.Get(r.Path, r.Recursive, r.Sorted))
  303. case "SYNC":
  304. s.Store.DeleteExpiredKeys(time.Unix(0, r.Time))
  305. return Response{}
  306. default:
  307. // This should never be reached, but just in case:
  308. return Response{err: ErrUnknownMethod}
  309. }
  310. }
  311. func (s *EtcdServer) applyConfig(r raftpb.Config) {
  312. switch r.Type {
  313. case raftpb.ConfigAddNode:
  314. s.Node.AddNode(r.NodeID)
  315. case raftpb.ConfigRemoveNode:
  316. s.Node.RemoveNode(r.NodeID)
  317. default:
  318. // This should never be reached
  319. panic("unexpected config type")
  320. }
  321. }
  322. // TODO: non-blocking snapshot
  323. func (s *EtcdServer) snapshot() {
  324. d, err := s.Store.Save()
  325. // TODO: current store will never fail to do a snapshot
  326. // what should we do if the store might fail?
  327. if err != nil {
  328. panic("TODO: this is bad, what do we do about it?")
  329. }
  330. s.Node.Compact(d)
  331. s.Storage.Cut()
  332. }
  333. // TODO: move the function to /id pkg maybe?
  334. // GenID generates a random id that is not equal to 0.
  335. func GenID() (n int64) {
  336. for n == 0 {
  337. n = rand.Int63()
  338. }
  339. return
  340. }
  341. func getBool(v *bool) (vv bool, set bool) {
  342. if v == nil {
  343. return false, false
  344. }
  345. return *v, true
  346. }