etcd.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. package etcd
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. "net/http"
  8. "path"
  9. "time"
  10. "github.com/coreos/etcd/config"
  11. etcdErr "github.com/coreos/etcd/error"
  12. "github.com/coreos/etcd/raft"
  13. "github.com/coreos/etcd/store"
  14. )
  15. const (
  16. defaultHeartbeat = 1
  17. defaultElection = 5
  18. maxBufferedProposal = 128
  19. defaultTickDuration = time.Millisecond * 100
  20. v2machineKVPrefix = "/_etcd/machines"
  21. v2configKVPrefix = "/_etcd/config"
  22. v2Prefix = "/v2/keys"
  23. v2machinePrefix = "/v2/machines"
  24. v2peersPrefix = "/v2/peers"
  25. v2LeaderPrefix = "/v2/leader"
  26. v2StoreStatsPrefix = "/v2/stats/store"
  27. v2adminConfigPrefix = "/v2/admin/config"
  28. v2adminMachinesPrefix = "/v2/admin/machines/"
  29. raftPrefix = "/raft"
  30. )
  31. const (
  32. participant = iota
  33. standby
  34. stop
  35. )
  36. var (
  37. tmpErr = fmt.Errorf("try again")
  38. serverStopErr = fmt.Errorf("server is stopped")
  39. )
  40. type Server struct {
  41. config *config.Config
  42. mode int
  43. id int64
  44. pubAddr string
  45. raftPubAddr string
  46. nodes map[string]bool
  47. tickDuration time.Duration
  48. proposal chan v2Proposal
  49. node *v2Raft
  50. addNodeC chan raft.Config
  51. removeNodeC chan raft.Config
  52. t *transporter
  53. client *v2client
  54. store.Store
  55. stop chan struct{}
  56. http.Handler
  57. }
  58. func New(c *config.Config, id int64) *Server {
  59. if err := c.Sanitize(); err != nil {
  60. log.Fatalf("failed sanitizing configuration: %v", err)
  61. }
  62. tc := &tls.Config{
  63. InsecureSkipVerify: true,
  64. }
  65. var err error
  66. if c.PeerTLSInfo().Scheme() == "https" {
  67. tc, err = c.PeerTLSInfo().ClientConfig()
  68. if err != nil {
  69. log.Fatal("failed to create raft transporter tls:", err)
  70. }
  71. }
  72. s := &Server{
  73. config: c,
  74. id: id,
  75. pubAddr: c.Addr,
  76. raftPubAddr: c.Peer.Addr,
  77. nodes: make(map[string]bool),
  78. tickDuration: defaultTickDuration,
  79. proposal: make(chan v2Proposal, maxBufferedProposal),
  80. node: &v2Raft{
  81. Node: raft.New(id, defaultHeartbeat, defaultElection),
  82. result: make(map[wait]chan interface{}),
  83. },
  84. addNodeC: make(chan raft.Config),
  85. removeNodeC: make(chan raft.Config),
  86. t: newTransporter(tc),
  87. client: newClient(tc),
  88. Store: store.New(),
  89. stop: make(chan struct{}),
  90. }
  91. for _, seed := range c.Peers {
  92. s.nodes[seed] = true
  93. }
  94. m := http.NewServeMux()
  95. m.Handle(v2Prefix+"/", handlerErr(s.serveValue))
  96. m.Handle(v2machinePrefix, handlerErr(s.serveMachines))
  97. m.Handle(v2peersPrefix, handlerErr(s.serveMachines))
  98. m.Handle(v2LeaderPrefix, handlerErr(s.serveLeader))
  99. m.Handle(v2StoreStatsPrefix, handlerErr(s.serveStoreStats))
  100. m.Handle(v2adminConfigPrefix, handlerErr(s.serveAdminConfig))
  101. m.Handle(v2adminMachinesPrefix, handlerErr(s.serveAdminMachines))
  102. s.Handler = m
  103. return s
  104. }
  105. func (s *Server) SetTick(d time.Duration) {
  106. s.tickDuration = d
  107. }
  108. func (s *Server) RaftHandler() http.Handler {
  109. return s.t
  110. }
  111. func (s *Server) ClusterConfig() *config.ClusterConfig {
  112. c := config.NewClusterConfig()
  113. // This is used for backward compatibility because it doesn't
  114. // set cluster config in older version.
  115. if e, err := s.Get(v2configKVPrefix, false, false); err == nil {
  116. json.Unmarshal([]byte(*e.Node.Value), c)
  117. }
  118. return c
  119. }
  120. func (s *Server) Run() {
  121. if len(s.config.Peers) == 0 {
  122. s.Bootstrap()
  123. } else {
  124. s.Join()
  125. }
  126. }
  127. func (s *Server) Stop() {
  128. if s.mode == stop {
  129. return
  130. }
  131. s.mode = stop
  132. s.t.stop()
  133. close(s.stop)
  134. }
  135. func (s *Server) Bootstrap() {
  136. log.Println("starting a bootstrap node")
  137. s.node.Campaign()
  138. s.node.Add(s.id, s.raftPubAddr, []byte(s.pubAddr))
  139. s.apply(s.node.Next())
  140. s.run()
  141. }
  142. func (s *Server) Join() {
  143. log.Println("joining cluster via peers", s.config.Peers)
  144. info := &context{
  145. MinVersion: store.MinVersion(),
  146. MaxVersion: store.MaxVersion(),
  147. ClientURL: s.pubAddr,
  148. PeerURL: s.raftPubAddr,
  149. }
  150. url := ""
  151. for i := 0; i < 5; i++ {
  152. for seed := range s.nodes {
  153. if err := s.client.AddMachine(seed, fmt.Sprint(s.id), info); err == nil {
  154. url = seed
  155. break
  156. } else {
  157. log.Println(err)
  158. }
  159. }
  160. if url != "" {
  161. break
  162. }
  163. time.Sleep(100 * time.Millisecond)
  164. }
  165. s.nodes = map[string]bool{url: true}
  166. s.run()
  167. }
  168. func (s *Server) Add(id int64, raftPubAddr string, pubAddr string) error {
  169. p := path.Join(v2machineKVPrefix, fmt.Sprint(id))
  170. _, err := s.Get(p, false, false)
  171. if err == nil {
  172. return nil
  173. }
  174. if v, ok := err.(*etcdErr.Error); !ok || v.ErrorCode != etcdErr.EcodeKeyNotFound {
  175. return err
  176. }
  177. w, err := s.Watch(p, true, false, 0)
  178. if err != nil {
  179. log.Println("add error:", err)
  180. return tmpErr
  181. }
  182. select {
  183. case s.addNodeC <- raft.Config{NodeId: id, Addr: raftPubAddr, Context: []byte(pubAddr)}:
  184. case <-s.stop:
  185. return serverStopErr
  186. }
  187. select {
  188. case v := <-w.EventChan:
  189. if v.Action == store.Set {
  190. return nil
  191. }
  192. log.Println("add error: action =", v.Action)
  193. return tmpErr
  194. case <-time.After(4 * defaultHeartbeat * s.tickDuration):
  195. w.Remove()
  196. log.Println("add error: wait timeout")
  197. return tmpErr
  198. case <-s.stop:
  199. w.Remove()
  200. return serverStopErr
  201. }
  202. }
  203. func (s *Server) Remove(id int64) error {
  204. p := path.Join(v2machineKVPrefix, fmt.Sprint(id))
  205. v, err := s.Get(p, false, false)
  206. if err != nil {
  207. return nil
  208. }
  209. select {
  210. case s.removeNodeC <- raft.Config{NodeId: id}:
  211. case <-s.stop:
  212. return serverStopErr
  213. }
  214. // TODO(xiangli): do not need to watch if the
  215. // removal target is self
  216. w, err := s.Watch(p, true, false, v.Index()+1)
  217. if err != nil {
  218. log.Println("remove error:", err)
  219. return tmpErr
  220. }
  221. select {
  222. case v := <-w.EventChan:
  223. if v.Action == store.Delete {
  224. return nil
  225. }
  226. log.Println("remove error: action =", v.Action)
  227. return tmpErr
  228. case <-time.After(4 * defaultHeartbeat * s.tickDuration):
  229. w.Remove()
  230. log.Println("remove error: wait timeout")
  231. return tmpErr
  232. case <-s.stop:
  233. w.Remove()
  234. return serverStopErr
  235. }
  236. }
  237. func (s *Server) run() {
  238. for {
  239. switch s.mode {
  240. case participant:
  241. s.runParticipant()
  242. case standby:
  243. s.runStandby()
  244. case stop:
  245. return
  246. default:
  247. panic("unsupport mode")
  248. }
  249. }
  250. }
  251. func (s *Server) runParticipant() {
  252. node := s.node
  253. recv := s.t.recv
  254. ticker := time.NewTicker(s.tickDuration)
  255. v2SyncTicker := time.NewTicker(time.Millisecond * 500)
  256. defer node.StopProposalWaiters()
  257. var proposal chan v2Proposal
  258. var addNodeC, removeNodeC chan raft.Config
  259. for {
  260. if node.HasLeader() {
  261. proposal = s.proposal
  262. addNodeC = s.addNodeC
  263. removeNodeC = s.removeNodeC
  264. } else {
  265. proposal = nil
  266. addNodeC = nil
  267. removeNodeC = nil
  268. }
  269. select {
  270. case p := <-proposal:
  271. node.Propose(p)
  272. case c := <-addNodeC:
  273. node.UpdateConf(raft.AddNode, &c)
  274. case c := <-removeNodeC:
  275. node.UpdateConf(raft.RemoveNode, &c)
  276. case msg := <-recv:
  277. node.Step(*msg)
  278. case <-ticker.C:
  279. node.Tick()
  280. case <-v2SyncTicker.C:
  281. node.Sync()
  282. case <-s.stop:
  283. log.Printf("Node: %d stopped\n", s.id)
  284. return
  285. }
  286. s.apply(node.Next())
  287. s.send(node.Msgs())
  288. if node.IsRemoved() {
  289. // TODO: delete it after standby is implemented
  290. log.Printf("Node: %d removed from participants\n", s.id)
  291. s.Stop()
  292. return
  293. }
  294. }
  295. }
  296. func (s *Server) runStandby() {
  297. panic("unimplemented")
  298. }
  299. func (s *Server) apply(ents []raft.Entry) {
  300. offset := s.node.Applied() - int64(len(ents)) + 1
  301. for i, ent := range ents {
  302. switch ent.Type {
  303. // expose raft entry type
  304. case raft.Normal:
  305. if len(ent.Data) == 0 {
  306. continue
  307. }
  308. s.v2apply(offset+int64(i), ent)
  309. case raft.AddNode:
  310. cfg := new(raft.Config)
  311. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  312. log.Println(err)
  313. break
  314. }
  315. if err := s.t.set(cfg.NodeId, cfg.Addr); err != nil {
  316. log.Println(err)
  317. break
  318. }
  319. log.Printf("Add Node %x %v %v\n", cfg.NodeId, cfg.Addr, string(cfg.Context))
  320. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  321. if _, err := s.Store.Set(p, false, fmt.Sprintf("raft=%v&etcd=%v", cfg.Addr, string(cfg.Context)), store.Permanent); err == nil {
  322. s.nodes[cfg.Addr] = true
  323. }
  324. case raft.RemoveNode:
  325. cfg := new(raft.Config)
  326. if err := json.Unmarshal(ent.Data, cfg); err != nil {
  327. log.Println(err)
  328. break
  329. }
  330. log.Printf("Remove Node %x\n", cfg.NodeId)
  331. p := path.Join(v2machineKVPrefix, fmt.Sprint(cfg.NodeId))
  332. if _, err := s.Store.Delete(p, false, false); err == nil {
  333. delete(s.nodes, cfg.Addr)
  334. }
  335. default:
  336. panic("unimplemented")
  337. }
  338. }
  339. }
  340. func (s *Server) send(msgs []raft.Message) {
  341. for i := range msgs {
  342. data, err := json.Marshal(msgs[i])
  343. if err != nil {
  344. // todo(xiangli): error handling
  345. log.Fatal(err)
  346. }
  347. // todo(xiangli): reuse routines and limit the number of sending routines
  348. // sync.Pool?
  349. go func(i int) {
  350. var err error
  351. if err = s.t.sendTo(msgs[i].To, data); err == nil {
  352. return
  353. }
  354. if err == errUnknownNode {
  355. err = s.fetchAddr(msgs[i].To)
  356. }
  357. if err == nil {
  358. err = s.t.sendTo(msgs[i].To, data)
  359. }
  360. if err != nil {
  361. log.Println(err)
  362. }
  363. }(i)
  364. }
  365. }
  366. func (s *Server) fetchAddr(nodeId int64) error {
  367. for seed := range s.nodes {
  368. if err := s.t.fetchAddr(seed, nodeId); err == nil {
  369. return nil
  370. }
  371. }
  372. return fmt.Errorf("cannot fetch the address of node %d", nodeId)
  373. }