control.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. package gocql
  2. import (
  3. "errors"
  4. "fmt"
  5. "log"
  6. "math/rand"
  7. "net"
  8. "sync"
  9. "sync/atomic"
  10. "time"
  11. )
  12. // Ensure that the atomic variable is aligned to a 64bit boundary
  13. // so that atomic operations can be applied on 32bit architectures.
  14. type controlConn struct {
  15. connecting uint64
  16. session *Session
  17. conn atomic.Value
  18. retry RetryPolicy
  19. closeWg sync.WaitGroup
  20. quit chan struct{}
  21. }
  22. func createControlConn(session *Session) *controlConn {
  23. control := &controlConn{
  24. session: session,
  25. quit: make(chan struct{}),
  26. retry: &SimpleRetryPolicy{NumRetries: 3},
  27. }
  28. control.conn.Store((*Conn)(nil))
  29. return control
  30. }
  31. func (c *controlConn) heartBeat() {
  32. defer c.closeWg.Done()
  33. for {
  34. select {
  35. case <-c.quit:
  36. return
  37. case <-time.After(5 * time.Second):
  38. }
  39. resp, err := c.writeFrame(&writeOptionsFrame{})
  40. if err != nil {
  41. goto reconn
  42. }
  43. switch resp.(type) {
  44. case *supportedFrame:
  45. continue
  46. case error:
  47. goto reconn
  48. default:
  49. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  50. }
  51. reconn:
  52. c.reconnect(true)
  53. // time.Sleep(5 * time.Second)
  54. continue
  55. }
  56. }
  57. func (c *controlConn) connect(endpoints []string) error {
  58. // intial connection attmept, try to connect to each endpoint to get an initial
  59. // list of nodes.
  60. // shuffle endpoints so not all drivers will connect to the same initial
  61. // node.
  62. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  63. perm := r.Perm(len(endpoints))
  64. shuffled := make([]string, len(endpoints))
  65. for i, endpoint := range endpoints {
  66. shuffled[perm[i]] = endpoint
  67. }
  68. connCfg, err := connConfig(c.session)
  69. if err != nil {
  70. return err
  71. }
  72. // store that we are not connected so that reconnect wont happen if we error
  73. atomic.StoreInt64(&c.connecting, -1)
  74. var (
  75. conn *Conn
  76. )
  77. for _, addr := range shuffled {
  78. conn, err = Connect(JoinHostPort(addr, c.session.cfg.Port), connCfg, c, c.session)
  79. if err != nil {
  80. log.Printf("gocql: unable to dial %v: %v\n", addr, err)
  81. continue
  82. }
  83. // we should fetch the initial ring here and update initial host data. So that
  84. // when we return from here we have a ring topology ready to go.
  85. break
  86. }
  87. if conn == nil {
  88. // this is fatal, not going to connect a session
  89. return err
  90. }
  91. c.conn.Store(conn)
  92. atomic.StoreInt64(&c.connecting, 0)
  93. c.closeWg.Add(1)
  94. go c.heartBeat()
  95. return nil
  96. }
  97. func (c *controlConn) reconnect(refreshring bool) {
  98. if !atomic.CompareAndSwapInt64(&c.connecting, 0, 1) {
  99. return
  100. }
  101. success := false
  102. defer func() {
  103. // debounce reconnect a little
  104. if success {
  105. go func() {
  106. time.Sleep(500 * time.Millisecond)
  107. atomic.StoreInt64(&c.connecting, 0)
  108. }()
  109. } else {
  110. atomic.StoreInt64(&c.connecting, 0)
  111. }
  112. }()
  113. oldConn := c.conn.Load().(*Conn)
  114. // TODO: should have our own roundrobbin for hosts so that we can try each
  115. // in succession and guantee that we get a different host each time.
  116. host, conn := c.session.pool.Pick(nil)
  117. if conn == nil {
  118. return
  119. }
  120. newConn, err := Connect(conn.addr, conn.cfg, c, c.session)
  121. if err != nil {
  122. host.Mark(err)
  123. // TODO: add log handler for things like this
  124. return
  125. }
  126. frame, err := c.writeFrame(&writeRegisterFrame{
  127. events: []string{"TOPOLOGY_CHANGE", "STATUS_CHANGE", "STATUS_CHANGE"},
  128. })
  129. if err != nil {
  130. host.Mark(err)
  131. return
  132. } else if _, ok := frame.(*readyFrame); !ok {
  133. log.Printf("gocql: unexpected frame in response to register: got %T: %v\n", frame, frame)
  134. return
  135. }
  136. host.Mark(nil)
  137. c.conn.Store(newConn)
  138. success = true
  139. if oldConn != nil {
  140. oldConn.Close()
  141. }
  142. if refreshring {
  143. c.session.hostSource.refreshRing()
  144. }
  145. }
  146. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  147. if !closed {
  148. return
  149. }
  150. oldConn := c.conn.Load().(*Conn)
  151. if oldConn != conn {
  152. return
  153. }
  154. c.reconnect(true)
  155. }
  156. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  157. conn := c.conn.Load().(*Conn)
  158. if conn == nil {
  159. return nil, errNoControl
  160. }
  161. framer, err := conn.exec(w, nil)
  162. if err != nil {
  163. return nil, err
  164. }
  165. return framer.parseFrame()
  166. }
  167. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  168. const maxConnectAttempts = 5
  169. connectAttempts := 0
  170. for i := 0; i < maxConnectAttempts; i++ {
  171. conn := c.conn.Load().(*Conn)
  172. if conn == nil {
  173. if connectAttempts > maxConnectAttempts {
  174. break
  175. }
  176. connectAttempts++
  177. c.reconnect(false)
  178. continue
  179. }
  180. return fn(conn)
  181. }
  182. return &Iter{err: errNoControl}
  183. }
  184. // query will return nil if the connection is closed or nil
  185. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  186. q := c.session.Query(statement, values...).Consistency(One)
  187. for {
  188. iter = c.withConn(func(conn *Conn) *Iter {
  189. return conn.executeQuery(q)
  190. })
  191. q.attempts++
  192. if iter.err == nil || !c.retry.Attempt(q) {
  193. break
  194. }
  195. }
  196. return
  197. }
  198. func (c *controlConn) fetchHostInfo(addr net.IP, port int) (*HostInfo, error) {
  199. // TODO(zariel): we should probably move this into host_source or atleast
  200. // share code with it.
  201. isLocal := c.addr() == addr.String()
  202. var fn func(*HostInfo) error
  203. if isLocal {
  204. fn = func(host *HostInfo) error {
  205. // TODO(zariel): should we fetch rpc_address from here?
  206. iter := c.query("SELECT data_center, rack, host_id, tokens FROM system.local WHERE key='local'")
  207. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens)
  208. return iter.Close()
  209. }
  210. } else {
  211. fn = func(host *HostInfo) error {
  212. // TODO(zariel): should we fetch rpc_address from here?
  213. iter := c.query("SELECT data_center, rack, host_id, tokens FROM system.peers WHERE peer=?", addr)
  214. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens)
  215. return iter.Close()
  216. }
  217. }
  218. host := &HostInfo{}
  219. if err := fn(host); err != nil {
  220. return nil, err
  221. }
  222. host.peer = addr.String()
  223. return host, nil
  224. }
  225. func (c *controlConn) awaitSchemaAgreement() error {
  226. return c.withConn(func(conn *Conn) *Iter {
  227. return &Iter{err: conn.awaitSchemaAgreement()}
  228. }).err
  229. }
  230. func (c *controlConn) addr() string {
  231. conn := c.conn.Load().(*Conn)
  232. if conn == nil {
  233. return ""
  234. }
  235. return conn.addr
  236. }
  237. func (c *controlConn) close() {
  238. // TODO: handle more gracefully
  239. close(c.quit)
  240. c.closeWg.Wait()
  241. conn := c.conn.Load().(*Conn)
  242. if conn != nil {
  243. conn.Close()
  244. }
  245. }
  246. var errNoControl = errors.New("gocql: no control connection available")