control.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. package gocql
  2. import (
  3. "errors"
  4. "fmt"
  5. "log"
  6. "math/rand"
  7. "net"
  8. "sync"
  9. "sync/atomic"
  10. "time"
  11. )
  12. // Ensure that the atomic variable is aligned to a 64bit boundary
  13. // so that atomic operations can be applied on 32bit architectures.
  14. type controlConn struct {
  15. connecting uint64
  16. session *Session
  17. conn atomic.Value
  18. session *Session
  19. conn atomic.Value
  20. retry RetryPolicy
  21. closeWg sync.WaitGroup
  22. quit chan struct{}
  23. }
  24. func createControlConn(session *Session) *controlConn {
  25. control := &controlConn{
  26. session: session,
  27. quit: make(chan struct{}),
  28. retry: &SimpleRetryPolicy{NumRetries: 3},
  29. }
  30. control.conn.Store((*Conn)(nil))
  31. return control
  32. }
  33. func (c *controlConn) heartBeat() {
  34. defer c.closeWg.Done()
  35. for {
  36. select {
  37. case <-c.quit:
  38. return
  39. case <-time.After(5 * time.Second):
  40. }
  41. resp, err := c.writeFrame(&writeOptionsFrame{})
  42. if err != nil {
  43. goto reconn
  44. }
  45. switch resp.(type) {
  46. case *supportedFrame:
  47. continue
  48. case error:
  49. goto reconn
  50. default:
  51. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  52. }
  53. reconn:
  54. c.reconnect(true)
  55. // time.Sleep(5 * time.Second)
  56. continue
  57. }
  58. }
  59. func (c *controlConn) connect(endpoints []string) error {
  60. // intial connection attmept, try to connect to each endpoint to get an initial
  61. // list of nodes.
  62. // shuffle endpoints so not all drivers will connect to the same initial
  63. // node.
  64. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  65. perm := r.Perm(len(endpoints))
  66. shuffled := make([]string, len(endpoints))
  67. for i, endpoint := range endpoints {
  68. shuffled[perm[i]] = endpoint
  69. }
  70. // store that we are not connected so that reconnect wont happen if we error
  71. atomic.StoreInt64(&c.connecting, -1)
  72. var (
  73. conn *Conn
  74. err error
  75. )
  76. for _, addr := range shuffled {
  77. conn, err = c.session.connect(JoinHostPort(addr, c.session.cfg.Port), c)
  78. if err != nil {
  79. log.Printf("gocql: unable to control conn dial %v: %v\n", addr, err)
  80. continue
  81. }
  82. if err = c.registerEvents(conn); err != nil {
  83. conn.Close()
  84. continue
  85. }
  86. // we should fetch the initial ring here and update initial host data. So that
  87. // when we return from here we have a ring topology ready to go.
  88. break
  89. }
  90. if conn == nil {
  91. // this is fatal, not going to connect a session
  92. return err
  93. }
  94. c.conn.Store(conn)
  95. atomic.StoreInt64(&c.connecting, 0)
  96. c.closeWg.Add(1)
  97. go c.heartBeat()
  98. return nil
  99. }
  100. func (c *controlConn) registerEvents(conn *Conn) error {
  101. framer, err := conn.exec(&writeRegisterFrame{
  102. events: []string{"TOPOLOGY_CHANGE", "STATUS_CHANGE", "STATUS_CHANGE"},
  103. }, nil)
  104. if err != nil {
  105. return err
  106. }
  107. frame, err := framer.parseFrame()
  108. if err != nil {
  109. return err
  110. } else if _, ok := frame.(*readyFrame); !ok {
  111. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  112. }
  113. return nil
  114. }
  115. func (c *controlConn) reconnect(refreshring bool) {
  116. if !atomic.CompareAndSwapInt64(&c.connecting, 0, 1) {
  117. return
  118. }
  119. success := false
  120. defer func() {
  121. // debounce reconnect a little
  122. if success {
  123. go func() {
  124. time.Sleep(500 * time.Millisecond)
  125. atomic.StoreInt64(&c.connecting, 0)
  126. }()
  127. } else {
  128. atomic.StoreInt64(&c.connecting, 0)
  129. }
  130. }()
  131. oldConn := c.conn.Load().(*Conn)
  132. // TODO: should have our own roundrobbin for hosts so that we can try each
  133. // in succession and guantee that we get a different host each time.
  134. host, conn := c.session.pool.Pick(nil)
  135. if conn == nil {
  136. return
  137. }
  138. newConn, err := Connect(conn.addr, c.connCfg, c, c.session)
  139. if err != nil {
  140. host.Mark(err)
  141. // TODO: add log handler for things like this
  142. return
  143. }
  144. if err := c.registerEvents(newConn); err != nil {
  145. host.Mark(err)
  146. // TODO: handle this case better
  147. newConn.Close()
  148. log.Printf("gocql: unable to register events: %v\n", err)
  149. return
  150. }
  151. c.conn.Store(newConn)
  152. host.Mark(nil)
  153. success = true
  154. if oldConn != nil {
  155. oldConn.Close()
  156. }
  157. if refreshring {
  158. c.session.hostSource.refreshRing()
  159. }
  160. }
  161. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  162. if !closed {
  163. return
  164. }
  165. oldConn := c.conn.Load().(*Conn)
  166. if oldConn != conn {
  167. return
  168. }
  169. c.reconnect(true)
  170. }
  171. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  172. conn := c.conn.Load().(*Conn)
  173. if conn == nil {
  174. return nil, errNoControl
  175. }
  176. framer, err := conn.exec(w, nil)
  177. if err != nil {
  178. return nil, err
  179. }
  180. return framer.parseFrame()
  181. }
  182. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  183. const maxConnectAttempts = 5
  184. connectAttempts := 0
  185. for i := 0; i < maxConnectAttempts; i++ {
  186. conn := c.conn.Load().(*Conn)
  187. if conn == nil {
  188. if connectAttempts > maxConnectAttempts {
  189. break
  190. }
  191. connectAttempts++
  192. c.reconnect(false)
  193. continue
  194. }
  195. return fn(conn)
  196. }
  197. return &Iter{err: errNoControl}
  198. }
  199. // query will return nil if the connection is closed or nil
  200. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  201. q := c.session.Query(statement, values...).Consistency(One)
  202. for {
  203. iter = c.withConn(func(conn *Conn) *Iter {
  204. return conn.executeQuery(q)
  205. })
  206. q.attempts++
  207. if iter.err == nil || !c.retry.Attempt(q) {
  208. break
  209. }
  210. }
  211. return
  212. }
  213. func (c *controlConn) fetchHostInfo(addr net.IP, port int) (*HostInfo, error) {
  214. // TODO(zariel): we should probably move this into host_source or atleast
  215. // share code with it.
  216. isLocal := c.addr() == addr.String()
  217. var fn func(*HostInfo) error
  218. if isLocal {
  219. fn = func(host *HostInfo) error {
  220. // TODO(zariel): should we fetch rpc_address from here?
  221. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.local WHERE key='local'")
  222. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  223. return iter.Close()
  224. }
  225. } else {
  226. fn = func(host *HostInfo) error {
  227. // TODO(zariel): should we fetch rpc_address from here?
  228. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.peers WHERE peer=?", addr)
  229. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  230. return iter.Close()
  231. }
  232. }
  233. host := &HostInfo{}
  234. if err := fn(host); err != nil {
  235. return nil, err
  236. }
  237. host.peer = addr.String()
  238. return host, nil
  239. }
  240. func (c *controlConn) awaitSchemaAgreement() error {
  241. return c.withConn(func(conn *Conn) *Iter {
  242. return &Iter{err: conn.awaitSchemaAgreement()}
  243. }).err
  244. }
  245. func (c *controlConn) addr() string {
  246. conn := c.conn.Load().(*Conn)
  247. if conn == nil {
  248. return ""
  249. }
  250. return conn.addr
  251. }
  252. func (c *controlConn) close() {
  253. // TODO: handle more gracefully
  254. close(c.quit)
  255. c.closeWg.Wait()
  256. conn := c.conn.Load().(*Conn)
  257. if conn != nil {
  258. conn.Close()
  259. }
  260. }
  261. var errNoControl = errors.New("gocql: no control connection available")