control.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. package gocql
  2. import (
  3. "errors"
  4. "fmt"
  5. "sync/atomic"
  6. "time"
  7. )
  8. type controlConn struct {
  9. session *Session
  10. conn atomic.Value
  11. connecting uint64
  12. retry RetryPolicy
  13. quit chan struct{}
  14. }
  15. func createControlConn(session *Session) *controlConn {
  16. control := &controlConn{
  17. session: session,
  18. quit: make(chan struct{}),
  19. retry: &SimpleRetryPolicy{NumRetries: 3},
  20. }
  21. control.conn.Store((*Conn)(nil))
  22. go control.heartBeat()
  23. return control
  24. }
  25. func (c *controlConn) heartBeat() {
  26. for {
  27. select {
  28. case <-c.quit:
  29. return
  30. case <-time.After(5 * time.Second):
  31. }
  32. resp, err := c.writeFrame(&writeOptionsFrame{})
  33. if err != nil {
  34. goto reconn
  35. }
  36. switch resp.(type) {
  37. case *supportedFrame:
  38. continue
  39. case error:
  40. goto reconn
  41. default:
  42. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  43. }
  44. reconn:
  45. c.reconnect(true)
  46. // time.Sleep(5 * time.Second)
  47. continue
  48. }
  49. }
  50. func (c *controlConn) reconnect(refreshring bool) {
  51. if !atomic.CompareAndSwapUint64(&c.connecting, 0, 1) {
  52. return
  53. }
  54. success := false
  55. defer func() {
  56. // debounce reconnect a little
  57. if success {
  58. go func() {
  59. time.Sleep(500 * time.Millisecond)
  60. atomic.StoreUint64(&c.connecting, 0)
  61. }()
  62. } else {
  63. atomic.StoreUint64(&c.connecting, 0)
  64. }
  65. }()
  66. oldConn := c.conn.Load().(*Conn)
  67. // TODO: should have our own roundrobbin for hosts so that we can try each
  68. // in succession and guantee that we get a different host each time.
  69. host, conn := c.session.pool.Pick(nil)
  70. if conn == nil {
  71. return
  72. }
  73. newConn, err := Connect(conn.addr, conn.cfg, c)
  74. if err != nil {
  75. host.Mark(err)
  76. // TODO: add log handler for things like this
  77. return
  78. }
  79. host.Mark(nil)
  80. c.conn.Store(newConn)
  81. success = true
  82. if oldConn != nil {
  83. oldConn.Close()
  84. }
  85. if refreshring {
  86. c.session.hostSource.refreshRing()
  87. }
  88. }
  89. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  90. if !closed {
  91. return
  92. }
  93. oldConn := c.conn.Load().(*Conn)
  94. if oldConn != conn {
  95. return
  96. }
  97. c.reconnect(true)
  98. }
  99. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  100. conn := c.conn.Load().(*Conn)
  101. if conn == nil {
  102. return nil, errNoControl
  103. }
  104. framer, err := conn.exec(w, nil)
  105. if err != nil {
  106. return nil, err
  107. }
  108. return framer.parseFrame()
  109. }
  110. // query will return nil if the connection is closed or nil
  111. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  112. q := c.session.Query(statement, values...).Consistency(One)
  113. const maxConnectAttempts = 5
  114. connectAttempts := 0
  115. for {
  116. conn := c.conn.Load().(*Conn)
  117. if conn == nil {
  118. if connectAttempts > maxConnectAttempts {
  119. return &Iter{err: errNoControl}
  120. }
  121. connectAttempts++
  122. c.reconnect(false)
  123. continue
  124. }
  125. iter = conn.executeQuery(q)
  126. q.attempts++
  127. if iter.err == nil || !c.retry.Attempt(q) {
  128. break
  129. }
  130. }
  131. return
  132. }
  133. func (c *controlConn) awaitSchemaAgreement() (err error) {
  134. const (
  135. // TODO(zariel): if we export this make this configurable
  136. maxWaitTime = 60 * time.Second
  137. peerSchemas = "SELECT schema_version FROM system.peers"
  138. localSchemas = "SELECT schema_version FROM system.local WHERE key='local'"
  139. )
  140. endDeadline := time.Now().Add(maxWaitTime)
  141. for time.Now().Before(endDeadline) {
  142. iter := c.query(peerSchemas)
  143. versions := make(map[string]struct{})
  144. var schemaVersion string
  145. for iter.Scan(&schemaVersion) {
  146. versions[schemaVersion] = struct{}{}
  147. schemaVersion = ""
  148. }
  149. if err = iter.Close(); err != nil {
  150. goto cont
  151. }
  152. iter = c.query(localSchemas)
  153. for iter.Scan(&schemaVersion) {
  154. versions[schemaVersion] = struct{}{}
  155. schemaVersion = ""
  156. }
  157. if err = iter.Close(); err != nil {
  158. goto cont
  159. }
  160. if len(versions) <= 1 {
  161. return nil
  162. }
  163. cont:
  164. time.Sleep(200 * time.Millisecond)
  165. }
  166. if err != nil {
  167. return
  168. }
  169. // not exported
  170. return errors.New("gocql: cluster schema versions not consistent")
  171. }
  172. func (c *controlConn) addr() string {
  173. conn := c.conn.Load().(*Conn)
  174. if conn == nil {
  175. return ""
  176. }
  177. return conn.addr
  178. }
  179. func (c *controlConn) close() {
  180. // TODO: handle more gracefully
  181. close(c.quit)
  182. }
  183. var errNoControl = errors.New("gocql: no controll connection available")