etcd.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. package etcd
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. "net/http"
  8. "path"
  9. "time"
  10. "github.com/coreos/etcd/config"
  11. "github.com/coreos/etcd/raft"
  12. "github.com/coreos/etcd/store"
  13. )
  14. const (
  15. defaultHeartbeat = 1
  16. defaultElection = 5
  17. defaultTickDuration = time.Millisecond * 100
  18. v2machineKVPrefix = "/_etcd/machines"
  19. v2Prefix = "/v2/keys"
  20. v2machinePrefix = "/v2/machines"
  21. v2peersPrefix = "/v2/peers"
  22. v2LeaderPrefix = "/v2/leader"
  23. v2StoreStatsPrefix = "/v2/stats/store"
  24. v2configKVPrefix = "/_etcd/config"
  25. v2adminConfigPrefix = "/v2/admin/config"
  26. v2adminMachinesPrefix = "/v2/admin/machines/"
  27. raftPrefix = "/raft"
  28. )
  29. const (
  30. participant = iota
  31. standby
  32. stop
  33. )
  34. type Server struct {
  35. config *config.Config
  36. mode int
  37. id int64
  38. pubAddr string
  39. raftPubAddr string
  40. nodes map[string]bool
  41. tickDuration time.Duration
  42. proposal chan v2Proposal
  43. node *v2Raft
  44. t *transporter
  45. store.Store
  46. stop chan struct{}
  47. http.Handler
  48. }
  49. func New(c *config.Config, id int64) *Server {
  50. if err := c.Sanitize(); err != nil {
  51. log.Fatalf("failed sanitizing configuration: %v", err)
  52. }
  53. tc := &tls.Config{
  54. InsecureSkipVerify: true,
  55. }
  56. var err error
  57. if c.PeerTLSInfo().Scheme() == "https" {
  58. tc, err = c.PeerTLSInfo().ClientConfig()
  59. if err != nil {
  60. log.Fatal("failed to create raft transporter tls:", err)
  61. }
  62. }
  63. s := &Server{
  64. config: c,
  65. id: id,
  66. pubAddr: c.Addr,
  67. raftPubAddr: c.Peer.Addr,
  68. nodes: make(map[string]bool),
  69. tickDuration: defaultTickDuration,
  70. proposal: make(chan v2Proposal),
  71. node: &v2Raft{
  72. Node: raft.New(id, defaultHeartbeat, defaultElection),
  73. result: make(map[wait]chan interface{}),
  74. },
  75. t: newTransporter(tc),
  76. Store: store.New(),
  77. stop: make(chan struct{}),
  78. }
  79. for _, seed := range c.Peers {
  80. s.nodes[seed] = true
  81. }
  82. m := http.NewServeMux()
  83. m.Handle(v2Prefix+"/", handlerErr(s.serveValue))
  84. m.Handle(v2machinePrefix, handlerErr(s.serveMachines))
  85. m.Handle(v2peersPrefix, handlerErr(s.serveMachines))
  86. m.Handle(v2LeaderPrefix, handlerErr(s.serveLeader))
  87. m.Handle(v2StoreStatsPrefix, handlerErr(s.serveStoreStats))
  88. m.Handle(v2adminConfigPrefix, handlerErr(s.serveAdminConfig))
  89. m.Handle(v2adminMachinesPrefix, handlerErr(s.serveAdminMachines))
  90. s.Handler = m
  91. return s
  92. }
  93. func (s *Server) SetTick(d time.Duration) {
  94. s.tickDuration = d
  95. }
  96. func (s *Server) RaftHandler() http.Handler {
  97. return s.t
  98. }
  99. func (s *Server) ClusterConfig() *config.ClusterConfig {
  100. c := config.NewClusterConfig()
  101. // This is used for backward compatibility because it doesn't
  102. // set cluster config in older version.
  103. if e, err := s.Get(v2configKVPrefix, false, false); err == nil {
  104. json.Unmarshal([]byte(*e.Node.Value), c)
  105. }
  106. return c
  107. }
  108. func (s *Server) Run() {
  109. if len(s.config.Peers) == 0 {
  110. s.Bootstrap()
  111. } else {
  112. s.Join()
  113. }
  114. }
  115. func (s *Server) Stop() {
  116. close(s.stop)
  117. s.t.stop()
  118. }
  119. func (s *Server) Bootstrap() {
  120. log.Println("starting a bootstrap node")
  121. s.node.Campaign()
  122. s.node.Add(s.id, s.raftPubAddr, []byte(s.pubAddr))
  123. s.apply(s.node.Next())
  124. s.run()
  125. }
  126. func (s *Server) Join() {
  127. log.Println("joining cluster via peers", s.config.Peers)
  128. d, err := json.Marshal(&raft.Config{s.id, s.raftPubAddr, []byte(s.pubAddr)})
  129. if err != nil {
  130. panic(err)
  131. }
  132. b, err := json.Marshal(&raft.Message{From: s.id, Type: 2, Entries: []raft.Entry{{Type: 1, Data: d}}})
  133. if err != nil {
  134. panic(err)
  135. }
  136. for seed := range s.nodes {
  137. if err := s.t.send(seed+raftPrefix, b); err != nil {
  138. log.Println(err)
  139. continue
  140. }
  141. // todo(xiangli) WAIT for join to be committed or retry...
  142. break
  143. }
  144. s.run()
  145. }
  146. func (s *Server) Remove(id int) {
  147. d, err := json.Marshal(&raft.Config{NodeId: s.id})
  148. if err != nil {
  149. panic(err)
  150. }
  151. b, err := json.Marshal(&raft.Message{From: s.id, Type: 2, Entries: []raft.Entry{{Type: 2, Data: d}}})
  152. if err != nil {
  153. panic(err)
  154. }
  155. if err := s.t.send(s.raftPubAddr+raftPrefix, b); err != nil {
  156. log.Println(err)
  157. }
  158. // todo(xiangli) WAIT for remove to be committed or retry...
  159. }
  160. func (s *Server) run() {
  161. for {
  162. switch s.mode {
  163. case participant:
  164. s.runParticipant()
  165. case standby:
  166. s.runStandby()
  167. case stop:
  168. return
  169. default:
  170. panic("unsupport mode")
  171. }
  172. }
  173. }
  174. func (s *Server) runParticipant() {
  175. node := s.node
  176. recv := s.t.recv
  177. ticker := time.NewTicker(s.tickDuration)
  178. v2SyncTicker := time.NewTicker(time.Millisecond * 500)
  179. var proposal chan v2Proposal
  180. for {
  181. if node.HasLeader() {
  182. proposal = s.proposal
  183. } else {
  184. proposal = nil
  185. }
  186. select {
  187. case p := <-proposal:
  188. node.Propose(p)
  189. case msg := <-recv:
  190. node.Step(*msg)
  191. case <-ticker.C:
  192. node.Tick()
  193. case <-v2SyncTicker.C:
  194. node.Sync()
  195. case <-s.stop:
  196. log.Printf("Node: %d stopped\n", s.id)
  197. s.mode = stop
  198. return
  199. }
  200. s.apply(node.Next())
  201. s.send(node.Msgs())
  202. if node.IsRemoved() {
  203. // TODO: delete it after standby is implemented
  204. s.mode = stop
  205. log.Printf("Node: %d removed from participants\n", s.id)
  206. return
  207. }
  208. }
  209. }
  210. func (s *Server) runStandby() {
  211. panic("unimplemented")
  212. }
  213. func (s *Server) apply(ents []raft.Entry) {
  214. offset := s.node.Applied() - int64(len(ents)) + 1
  215. for i, ent := range ents {
  216. switch ent.Type {
  217. // expose raft entry type
  218. case raft.Normal:
  219. if len(ent.Data) == 0 {
  220. continue
  221. }
  222. s.v2apply(offset+int64(i), ent)
  223. case raft.AddNode:
  224. cfg := new(raft.Config)
  225. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  226. log.Println(err)
  227. break
  228. }
  229. if err := s.t.set(cfg.NodeId, cfg.Addr); err != nil {
  230. log.Println(err)
  231. break
  232. }
  233. log.Printf("Add Node %x %v %v\n", cfg.NodeId, cfg.Addr, string(cfg.Context))
  234. s.nodes[cfg.Addr] = true
  235. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  236. s.Store.Set(p, false, fmt.Sprintf("raft=%v&etcd=%v", cfg.Addr, string(cfg.Context)), store.Permanent)
  237. case raft.RemoveNode:
  238. cfg := new(raft.Config)
  239. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  240. log.Println(err)
  241. break
  242. }
  243. log.Printf("Remove Node %x\n", cfg.NodeId)
  244. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  245. s.Store.Delete(p, false, false)
  246. default:
  247. panic("unimplemented")
  248. }
  249. }
  250. }
  251. func (s *Server) send(msgs []raft.Message) {
  252. for i := range msgs {
  253. data, err := json.Marshal(msgs[i])
  254. if err != nil {
  255. // todo(xiangli): error handling
  256. log.Fatal(err)
  257. }
  258. // todo(xiangli): reuse routines and limit the number of sending routines
  259. // sync.Pool?
  260. go func(i int) {
  261. var err error
  262. if err = s.t.sendTo(msgs[i].To, data); err == nil {
  263. return
  264. }
  265. if err == errUnknownNode {
  266. err = s.fetchAddr(msgs[i].To)
  267. }
  268. if err == nil {
  269. err = s.t.sendTo(msgs[i].To, data)
  270. }
  271. if err != nil {
  272. log.Println(err)
  273. }
  274. }(i)
  275. }
  276. }
  277. func (s *Server) fetchAddr(nodeId int64) error {
  278. for seed := range s.nodes {
  279. if err := s.t.fetchAddr(seed, nodeId); err == nil {
  280. return nil
  281. }
  282. }
  283. return fmt.Errorf("cannot fetch the address of node %d", nodeId)
  284. }