etcd.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. package etcd
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "log"
  6. "net/http"
  7. "path"
  8. "time"
  9. "github.com/coreos/etcd/config"
  10. "github.com/coreos/etcd/raft"
  11. "github.com/coreos/etcd/store"
  12. )
  13. const (
  14. defaultHeartbeat = 1
  15. defaultElection = 5
  16. defaultTickDuration = time.Millisecond * 100
  17. v2machineKVPrefix = "/_etcd/machines"
  18. v2Prefix = "/v2/keys"
  19. v2machinePrefix = "/v2/machines"
  20. v2peersPrefix = "/v2/peers"
  21. v2LeaderPrefix = "/v2/leader"
  22. v2StoreStatsPrefix = "/v2/stats/store"
  23. raftPrefix = "/raft"
  24. )
  25. type Server struct {
  26. config *config.Config
  27. id int
  28. pubAddr string
  29. nodes map[string]bool
  30. tickDuration time.Duration
  31. proposal chan v2Proposal
  32. node *v2Raft
  33. t *transporter
  34. store.Store
  35. stop chan struct{}
  36. http.Handler
  37. }
  38. func New(c *config.Config, id int) *Server {
  39. if err := c.Sanitize(); err != nil {
  40. log.Fatalf("failed sanitizing configuration: %v", err)
  41. }
  42. s := &Server{
  43. config: c,
  44. id: id,
  45. pubAddr: c.Addr,
  46. nodes: make(map[string]bool),
  47. tickDuration: defaultTickDuration,
  48. proposal: make(chan v2Proposal),
  49. node: &v2Raft{
  50. Node: raft.New(id, defaultHeartbeat, defaultElection),
  51. result: make(map[wait]chan interface{}),
  52. },
  53. t: newTransporter(),
  54. Store: store.New(),
  55. stop: make(chan struct{}),
  56. }
  57. for _, seed := range c.Peers {
  58. s.nodes[seed] = true
  59. }
  60. m := http.NewServeMux()
  61. //m.Handle("/HEAD", handlerErr(s.serveHead))
  62. m.Handle(v2Prefix+"/", handlerErr(s.serveValue))
  63. m.Handle("/raft", s.t)
  64. m.Handle(v2machinePrefix, handlerErr(s.serveMachines))
  65. m.Handle(v2peersPrefix, handlerErr(s.serveMachines))
  66. m.Handle(v2LeaderPrefix, handlerErr(s.serveLeader))
  67. m.Handle(v2StoreStatsPrefix, handlerErr(s.serveStoreStats))
  68. s.Handler = m
  69. return s
  70. }
  71. func (s *Server) SetTick(d time.Duration) {
  72. s.tickDuration = d
  73. }
  74. func (s *Server) Run() {
  75. if len(s.config.Peers) == 0 {
  76. s.Bootstrap()
  77. } else {
  78. s.Join()
  79. }
  80. }
  81. func (s *Server) Stop() {
  82. close(s.stop)
  83. s.t.stop()
  84. }
  85. func (s *Server) Bootstrap() {
  86. log.Println("starting a bootstrap node")
  87. s.node.Campaign()
  88. s.node.Add(s.id, s.pubAddr)
  89. s.apply(s.node.Next())
  90. s.run()
  91. }
  92. func (s *Server) Join() {
  93. log.Println("joining cluster via peers", s.config.Peers)
  94. d, err := json.Marshal(&raft.Config{s.id, s.pubAddr})
  95. if err != nil {
  96. panic(err)
  97. }
  98. b, err := json.Marshal(&raft.Message{From: s.id, Type: 2, Entries: []raft.Entry{{Type: 1, Data: d}}})
  99. if err != nil {
  100. panic(err)
  101. }
  102. for seed := range s.nodes {
  103. if err := s.t.send(seed+raftPrefix, b); err != nil {
  104. log.Println(err)
  105. continue
  106. }
  107. // todo(xiangli) WAIT for join to be committed or retry...
  108. break
  109. }
  110. s.run()
  111. }
  112. func (s *Server) run() {
  113. node := s.node
  114. recv := s.t.recv
  115. ticker := time.NewTicker(s.tickDuration)
  116. v2SyncTicker := time.NewTicker(time.Millisecond * 500)
  117. var proposal chan v2Proposal
  118. for {
  119. if node.HasLeader() {
  120. proposal = s.proposal
  121. } else {
  122. proposal = nil
  123. }
  124. select {
  125. case p := <-proposal:
  126. node.Propose(p)
  127. case msg := <-recv:
  128. node.Step(*msg)
  129. case <-ticker.C:
  130. node.Tick()
  131. case <-v2SyncTicker.C:
  132. node.Sync()
  133. case <-s.stop:
  134. log.Printf("Node: %d stopped\n", s.id)
  135. return
  136. }
  137. s.apply(node.Next())
  138. s.send(node.Msgs())
  139. }
  140. }
  141. func (s *Server) apply(ents []raft.Entry) {
  142. offset := s.node.Applied() - len(ents) + 1
  143. for i, ent := range ents {
  144. switch ent.Type {
  145. // expose raft entry type
  146. case raft.Normal:
  147. if len(ent.Data) == 0 {
  148. continue
  149. }
  150. s.v2apply(offset+i, ent)
  151. case raft.AddNode:
  152. cfg := new(raft.Config)
  153. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  154. log.Println(err)
  155. break
  156. }
  157. if err := s.t.set(cfg.NodeId, cfg.Addr); err != nil {
  158. log.Println(err)
  159. break
  160. }
  161. log.Printf("Add Node %x %v\n", cfg.NodeId, cfg.Addr)
  162. s.nodes[cfg.Addr] = true
  163. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  164. s.Store.Set(p, false, cfg.Addr, store.Permanent)
  165. default:
  166. panic("unimplemented")
  167. }
  168. }
  169. }
  170. func (s *Server) send(msgs []raft.Message) {
  171. for i := range msgs {
  172. data, err := json.Marshal(msgs[i])
  173. if err != nil {
  174. // todo(xiangli): error handling
  175. log.Fatal(err)
  176. }
  177. // todo(xiangli): reuse routines and limit the number of sending routines
  178. // sync.Pool?
  179. go func(i int) {
  180. var err error
  181. if err = s.t.sendTo(msgs[i].To, data); err == nil {
  182. return
  183. }
  184. if err == errUnknownNode {
  185. err = s.fetchAddr(msgs[i].To)
  186. }
  187. if err == nil {
  188. err = s.t.sendTo(msgs[i].To, data)
  189. }
  190. if err != nil {
  191. log.Println(err)
  192. }
  193. }(i)
  194. }
  195. }
  196. func (s *Server) fetchAddr(nodeId int) error {
  197. for seed := range s.nodes {
  198. if err := s.t.fetchAddr(seed, nodeId); err == nil {
  199. return nil
  200. }
  201. }
  202. return fmt.Errorf("cannot fetch the address of node %d", nodeId)
  203. }