etcd.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. package etcd
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. "net/http"
  8. "path"
  9. "time"
  10. "github.com/coreos/etcd/config"
  11. "github.com/coreos/etcd/raft"
  12. "github.com/coreos/etcd/store"
  13. )
  14. const (
  15. defaultHeartbeat = 1
  16. defaultElection = 5
  17. defaultTickDuration = time.Millisecond * 100
  18. v2machineKVPrefix = "/_etcd/machines"
  19. v2Prefix = "/v2/keys"
  20. v2machinePrefix = "/v2/machines"
  21. v2peersPrefix = "/v2/peers"
  22. v2LeaderPrefix = "/v2/leader"
  23. v2StoreStatsPrefix = "/v2/stats/store"
  24. raftPrefix = "/raft"
  25. )
  26. type Server struct {
  27. config *config.Config
  28. id int64
  29. pubAddr string
  30. raftPubAddr string
  31. nodes map[string]bool
  32. tickDuration time.Duration
  33. proposal chan v2Proposal
  34. node *v2Raft
  35. t *transporter
  36. store.Store
  37. stop chan struct{}
  38. http.Handler
  39. }
  40. func New(c *config.Config, id int64) *Server {
  41. if err := c.Sanitize(); err != nil {
  42. log.Fatalf("failed sanitizing configuration: %v", err)
  43. }
  44. tc := &tls.Config{
  45. InsecureSkipVerify: true,
  46. }
  47. var err error
  48. if c.PeerTLSInfo().Scheme() == "https" {
  49. tc, err = c.PeerTLSInfo().ClientConfig()
  50. if err != nil {
  51. log.Fatal("failed to create raft transporter tls:", err)
  52. }
  53. }
  54. s := &Server{
  55. config: c,
  56. id: id,
  57. pubAddr: c.Addr,
  58. raftPubAddr: c.Peer.Addr,
  59. nodes: make(map[string]bool),
  60. tickDuration: defaultTickDuration,
  61. proposal: make(chan v2Proposal),
  62. node: &v2Raft{
  63. Node: raft.New(id, defaultHeartbeat, defaultElection),
  64. result: make(map[wait]chan interface{}),
  65. },
  66. t: newTransporter(tc),
  67. Store: store.New(),
  68. stop: make(chan struct{}),
  69. }
  70. for _, seed := range c.Peers {
  71. s.nodes[seed] = true
  72. }
  73. m := http.NewServeMux()
  74. //m.Handle("/HEAD", handlerErr(s.serveHead))
  75. m.Handle(v2Prefix+"/", handlerErr(s.serveValue))
  76. m.Handle(v2machinePrefix, handlerErr(s.serveMachines))
  77. m.Handle(v2peersPrefix, handlerErr(s.serveMachines))
  78. m.Handle(v2LeaderPrefix, handlerErr(s.serveLeader))
  79. m.Handle(v2StoreStatsPrefix, handlerErr(s.serveStoreStats))
  80. s.Handler = m
  81. return s
  82. }
  83. func (s *Server) SetTick(d time.Duration) {
  84. s.tickDuration = d
  85. }
  86. func (s *Server) RaftHandler() http.Handler {
  87. return s.t
  88. }
  89. func (s *Server) Run() {
  90. if len(s.config.Peers) == 0 {
  91. s.Bootstrap()
  92. } else {
  93. s.Join()
  94. }
  95. }
  96. func (s *Server) Stop() {
  97. close(s.stop)
  98. s.t.stop()
  99. }
  100. func (s *Server) Bootstrap() {
  101. log.Println("starting a bootstrap node")
  102. s.node.Campaign()
  103. s.node.Add(s.id, s.raftPubAddr, []byte(s.pubAddr))
  104. s.apply(s.node.Next())
  105. s.run()
  106. }
  107. func (s *Server) Join() {
  108. log.Println("joining cluster via peers", s.config.Peers)
  109. d, err := json.Marshal(&raft.Config{s.id, s.raftPubAddr, []byte(s.pubAddr)})
  110. if err != nil {
  111. panic(err)
  112. }
  113. b, err := json.Marshal(&raft.Message{From: s.id, Type: 2, Entries: []raft.Entry{{Type: 1, Data: d}}})
  114. if err != nil {
  115. panic(err)
  116. }
  117. for seed := range s.nodes {
  118. if err := s.t.send(seed+raftPrefix, b); err != nil {
  119. log.Println(err)
  120. continue
  121. }
  122. // todo(xiangli) WAIT for join to be committed or retry...
  123. break
  124. }
  125. s.run()
  126. }
  127. func (s *Server) run() {
  128. node := s.node
  129. recv := s.t.recv
  130. ticker := time.NewTicker(s.tickDuration)
  131. v2SyncTicker := time.NewTicker(time.Millisecond * 500)
  132. var proposal chan v2Proposal
  133. for {
  134. if node.HasLeader() {
  135. proposal = s.proposal
  136. } else {
  137. proposal = nil
  138. }
  139. select {
  140. case p := <-proposal:
  141. node.Propose(p)
  142. case msg := <-recv:
  143. node.Step(*msg)
  144. case <-ticker.C:
  145. node.Tick()
  146. case <-v2SyncTicker.C:
  147. node.Sync()
  148. case <-s.stop:
  149. log.Printf("Node: %d stopped\n", s.id)
  150. return
  151. }
  152. s.apply(node.Next())
  153. s.send(node.Msgs())
  154. }
  155. }
  156. func (s *Server) apply(ents []raft.Entry) {
  157. offset := s.node.Applied() - int64(len(ents)) + 1
  158. for i, ent := range ents {
  159. switch ent.Type {
  160. // expose raft entry type
  161. case raft.Normal:
  162. if len(ent.Data) == 0 {
  163. continue
  164. }
  165. s.v2apply(offset+int64(i), ent)
  166. case raft.AddNode:
  167. cfg := new(raft.Config)
  168. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  169. log.Println(err)
  170. break
  171. }
  172. if err := s.t.set(cfg.NodeId, cfg.Addr); err != nil {
  173. log.Println(err)
  174. break
  175. }
  176. log.Printf("Add Node %x %v %v\n", cfg.NodeId, cfg.Addr, string(cfg.Context))
  177. s.nodes[cfg.Addr] = true
  178. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  179. s.Store.Set(p, false, fmt.Sprintf("raft=%v&etcd=%v", cfg.Addr, string(cfg.Context)), store.Permanent)
  180. default:
  181. panic("unimplemented")
  182. }
  183. }
  184. }
  185. func (s *Server) send(msgs []raft.Message) {
  186. for i := range msgs {
  187. data, err := json.Marshal(msgs[i])
  188. if err != nil {
  189. // todo(xiangli): error handling
  190. log.Fatal(err)
  191. }
  192. // todo(xiangli): reuse routines and limit the number of sending routines
  193. // sync.Pool?
  194. go func(i int) {
  195. var err error
  196. if err = s.t.sendTo(msgs[i].To, data); err == nil {
  197. return
  198. }
  199. if err == errUnknownNode {
  200. err = s.fetchAddr(msgs[i].To)
  201. }
  202. if err == nil {
  203. err = s.t.sendTo(msgs[i].To, data)
  204. }
  205. if err != nil {
  206. log.Println(err)
  207. }
  208. }(i)
  209. }
  210. }
  211. func (s *Server) fetchAddr(nodeId int64) error {
  212. for seed := range s.nodes {
  213. if err := s.t.fetchAddr(seed, nodeId); err == nil {
  214. return nil
  215. }
  216. }
  217. return fmt.Errorf("cannot fetch the address of node %d", nodeId)
  218. }