etcd.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. package etcd
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. "net/http"
  8. "path"
  9. "time"
  10. "github.com/coreos/etcd/config"
  11. etcdErr "github.com/coreos/etcd/error"
  12. "github.com/coreos/etcd/raft"
  13. "github.com/coreos/etcd/store"
  14. )
  15. const (
  16. defaultHeartbeat = 1
  17. defaultElection = 5
  18. defaultTickDuration = time.Millisecond * 100
  19. v2machineKVPrefix = "/_etcd/machines"
  20. v2configKVPrefix = "/_etcd/config"
  21. v2Prefix = "/v2/keys"
  22. v2machinePrefix = "/v2/machines"
  23. v2peersPrefix = "/v2/peers"
  24. v2LeaderPrefix = "/v2/leader"
  25. v2StoreStatsPrefix = "/v2/stats/store"
  26. v2adminConfigPrefix = "/v2/admin/config"
  27. v2adminMachinesPrefix = "/v2/admin/machines/"
  28. raftPrefix = "/raft"
  29. )
  30. const (
  31. participant = iota
  32. standby
  33. stop
  34. )
  35. type Server struct {
  36. config *config.Config
  37. mode int
  38. id int64
  39. pubAddr string
  40. raftPubAddr string
  41. nodes map[string]bool
  42. tickDuration time.Duration
  43. proposal chan v2Proposal
  44. node *v2Raft
  45. addNodeC chan raft.Config
  46. removeNodeC chan raft.Config
  47. t *transporter
  48. client *v2client
  49. store.Store
  50. stop chan struct{}
  51. http.Handler
  52. }
  53. func New(c *config.Config, id int64) *Server {
  54. if err := c.Sanitize(); err != nil {
  55. log.Fatalf("failed sanitizing configuration: %v", err)
  56. }
  57. tc := &tls.Config{
  58. InsecureSkipVerify: true,
  59. }
  60. var err error
  61. if c.PeerTLSInfo().Scheme() == "https" {
  62. tc, err = c.PeerTLSInfo().ClientConfig()
  63. if err != nil {
  64. log.Fatal("failed to create raft transporter tls:", err)
  65. }
  66. }
  67. s := &Server{
  68. config: c,
  69. id: id,
  70. pubAddr: c.Addr,
  71. raftPubAddr: c.Peer.Addr,
  72. nodes: make(map[string]bool),
  73. tickDuration: defaultTickDuration,
  74. proposal: make(chan v2Proposal),
  75. node: &v2Raft{
  76. Node: raft.New(id, defaultHeartbeat, defaultElection),
  77. result: make(map[wait]chan interface{}),
  78. },
  79. addNodeC: make(chan raft.Config),
  80. removeNodeC: make(chan raft.Config),
  81. t: newTransporter(tc),
  82. client: newClient(tc),
  83. Store: store.New(),
  84. stop: make(chan struct{}),
  85. }
  86. for _, seed := range c.Peers {
  87. s.nodes[seed] = true
  88. }
  89. m := http.NewServeMux()
  90. m.Handle(v2Prefix+"/", handlerErr(s.serveValue))
  91. m.Handle(v2machinePrefix, handlerErr(s.serveMachines))
  92. m.Handle(v2peersPrefix, handlerErr(s.serveMachines))
  93. m.Handle(v2LeaderPrefix, handlerErr(s.serveLeader))
  94. m.Handle(v2StoreStatsPrefix, handlerErr(s.serveStoreStats))
  95. m.Handle(v2adminConfigPrefix, handlerErr(s.serveAdminConfig))
  96. m.Handle(v2adminMachinesPrefix, handlerErr(s.serveAdminMachines))
  97. s.Handler = m
  98. return s
  99. }
  100. func (s *Server) SetTick(d time.Duration) {
  101. s.tickDuration = d
  102. }
  103. func (s *Server) RaftHandler() http.Handler {
  104. return s.t
  105. }
  106. func (s *Server) ClusterConfig() *config.ClusterConfig {
  107. c := config.NewClusterConfig()
  108. // This is used for backward compatibility because it doesn't
  109. // set cluster config in older version.
  110. if e, err := s.Get(v2configKVPrefix, false, false); err == nil {
  111. json.Unmarshal([]byte(*e.Node.Value), c)
  112. }
  113. return c
  114. }
  115. func (s *Server) Run() {
  116. if len(s.config.Peers) == 0 {
  117. s.Bootstrap()
  118. } else {
  119. s.Join()
  120. }
  121. }
  122. func (s *Server) Stop() {
  123. close(s.stop)
  124. s.t.stop()
  125. }
  126. func (s *Server) Bootstrap() {
  127. log.Println("starting a bootstrap node")
  128. s.node.Campaign()
  129. s.node.Add(s.id, s.raftPubAddr, []byte(s.pubAddr))
  130. s.apply(s.node.Next())
  131. s.run()
  132. }
  133. func (s *Server) Join() {
  134. log.Println("joining cluster via peers", s.config.Peers)
  135. info := &context{
  136. MinVersion: store.MinVersion(),
  137. MaxVersion: store.MaxVersion(),
  138. ClientURL: s.pubAddr,
  139. PeerURL: s.raftPubAddr,
  140. }
  141. url := ""
  142. for i := 0; i < 5; i++ {
  143. for seed := range s.nodes {
  144. if err := s.client.AddMachine(seed, fmt.Sprint(s.id), info); err == nil {
  145. url = seed
  146. break
  147. } else {
  148. log.Println(err)
  149. }
  150. }
  151. if url != "" {
  152. break
  153. }
  154. time.Sleep(100 * time.Millisecond)
  155. }
  156. s.nodes = map[string]bool{url: true}
  157. s.run()
  158. }
  159. func (s *Server) Add(id int64, raftPubAddr string, pubAddr string) error {
  160. p := path.Join(v2machineKVPrefix, fmt.Sprint(id))
  161. index := s.Index()
  162. _, err := s.Get(p, false, false)
  163. if err == nil {
  164. return fmt.Errorf("existed node")
  165. }
  166. if v, ok := err.(*etcdErr.Error); !ok || v.ErrorCode != etcdErr.EcodeKeyNotFound {
  167. return err
  168. }
  169. for {
  170. if s.mode == stop {
  171. return fmt.Errorf("server is stopped")
  172. }
  173. s.addNodeC <- raft.Config{NodeId: id, Addr: raftPubAddr, Context: []byte(pubAddr)}
  174. w, err := s.Watch(p, true, false, index+1)
  175. if err != nil {
  176. return err
  177. }
  178. select {
  179. case v := <-w.EventChan:
  180. if v.Action == store.Set {
  181. return nil
  182. }
  183. index = v.Index()
  184. case <-time.After(4 * defaultHeartbeat * s.tickDuration):
  185. }
  186. }
  187. }
  188. func (s *Server) Remove(id int64) error {
  189. p := path.Join(v2machineKVPrefix, fmt.Sprint(id))
  190. index := s.Index()
  191. if _, err := s.Get(p, false, false); err != nil {
  192. return err
  193. }
  194. for {
  195. if s.mode == stop {
  196. return fmt.Errorf("server is stopped")
  197. }
  198. s.removeNodeC <- raft.Config{NodeId: id}
  199. w, err := s.Watch(p, true, false, index+1)
  200. if err != nil {
  201. return err
  202. }
  203. select {
  204. case v := <-w.EventChan:
  205. if v.Action == store.Delete {
  206. return nil
  207. }
  208. index = v.Index()
  209. case <-time.After(4 * defaultHeartbeat * s.tickDuration):
  210. }
  211. }
  212. }
  213. func (s *Server) run() {
  214. for {
  215. switch s.mode {
  216. case participant:
  217. s.runParticipant()
  218. case standby:
  219. s.runStandby()
  220. case stop:
  221. return
  222. default:
  223. panic("unsupport mode")
  224. }
  225. }
  226. }
  227. func (s *Server) runParticipant() {
  228. node := s.node
  229. addNodeC := s.addNodeC
  230. removeNodeC := s.removeNodeC
  231. recv := s.t.recv
  232. ticker := time.NewTicker(s.tickDuration)
  233. v2SyncTicker := time.NewTicker(time.Millisecond * 500)
  234. var proposal chan v2Proposal
  235. for {
  236. if node.HasLeader() {
  237. proposal = s.proposal
  238. } else {
  239. proposal = nil
  240. }
  241. select {
  242. case p := <-proposal:
  243. node.Propose(p)
  244. case c := <-addNodeC:
  245. node.UpdateConf(raft.AddNode, &c)
  246. case c := <-removeNodeC:
  247. node.UpdateConf(raft.RemoveNode, &c)
  248. case msg := <-recv:
  249. node.Step(*msg)
  250. case <-ticker.C:
  251. node.Tick()
  252. case <-v2SyncTicker.C:
  253. node.Sync()
  254. case <-s.stop:
  255. log.Printf("Node: %d stopped\n", s.id)
  256. s.mode = stop
  257. return
  258. }
  259. s.apply(node.Next())
  260. s.send(node.Msgs())
  261. if node.IsRemoved() {
  262. // TODO: delete it after standby is implemented
  263. s.mode = stop
  264. log.Printf("Node: %d removed from participants\n", s.id)
  265. return
  266. }
  267. }
  268. }
  269. func (s *Server) runStandby() {
  270. panic("unimplemented")
  271. }
  272. func (s *Server) apply(ents []raft.Entry) {
  273. offset := s.node.Applied() - int64(len(ents)) + 1
  274. for i, ent := range ents {
  275. switch ent.Type {
  276. // expose raft entry type
  277. case raft.Normal:
  278. if len(ent.Data) == 0 {
  279. continue
  280. }
  281. s.v2apply(offset+int64(i), ent)
  282. case raft.AddNode:
  283. cfg := new(raft.Config)
  284. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  285. log.Println(err)
  286. break
  287. }
  288. if err := s.t.set(cfg.NodeId, cfg.Addr); err != nil {
  289. log.Println(err)
  290. break
  291. }
  292. log.Printf("Add Node %x %v %v\n", cfg.NodeId, cfg.Addr, string(cfg.Context))
  293. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  294. if _, err := s.Store.Set(p, false, fmt.Sprintf("raft=%v&etcd=%v", cfg.Addr, string(cfg.Context)), store.Permanent); err == nil {
  295. s.nodes[cfg.Addr] = true
  296. }
  297. case raft.RemoveNode:
  298. cfg := new(raft.Config)
  299. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  300. log.Println(err)
  301. break
  302. }
  303. log.Printf("Remove Node %x\n", cfg.NodeId)
  304. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  305. if _, err := s.Store.Delete(p, false, false); err == nil {
  306. delete(s.nodes, cfg.Addr)
  307. }
  308. default:
  309. panic("unimplemented")
  310. }
  311. }
  312. }
  313. func (s *Server) send(msgs []raft.Message) {
  314. for i := range msgs {
  315. data, err := json.Marshal(msgs[i])
  316. if err != nil {
  317. // todo(xiangli): error handling
  318. log.Fatal(err)
  319. }
  320. // todo(xiangli): reuse routines and limit the number of sending routines
  321. // sync.Pool?
  322. go func(i int) {
  323. var err error
  324. if err = s.t.sendTo(msgs[i].To, data); err == nil {
  325. return
  326. }
  327. if err == errUnknownNode {
  328. err = s.fetchAddr(msgs[i].To)
  329. }
  330. if err == nil {
  331. err = s.t.sendTo(msgs[i].To, data)
  332. }
  333. if err != nil {
  334. log.Println(err)
  335. }
  336. }(i)
  337. }
  338. }
  339. func (s *Server) fetchAddr(nodeId int64) error {
  340. for seed := range s.nodes {
  341. if err := s.t.fetchAddr(seed, nodeId); err == nil {
  342. return nil
  343. }
  344. }
  345. return fmt.Errorf("cannot fetch the address of node %d", nodeId)
  346. }