control.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. package gocql
  2. import (
  3. "errors"
  4. "fmt"
  5. "sync/atomic"
  6. "time"
  7. )
  8. type controlConn struct {
  9. session *Session
  10. conn atomic.Value
  11. connecting uint64
  12. retry RetryPolicy
  13. quit chan struct{}
  14. }
  15. func createControlConn(session *Session) *controlConn {
  16. control := &controlConn{
  17. session: session,
  18. quit: make(chan struct{}),
  19. retry: &SimpleRetryPolicy{NumRetries: 3},
  20. }
  21. control.conn.Store((*Conn)(nil))
  22. go control.heartBeat()
  23. return control
  24. }
  25. func (c *controlConn) heartBeat() {
  26. for {
  27. select {
  28. case <-c.quit:
  29. return
  30. case <-time.After(5 * time.Second):
  31. }
  32. resp, err := c.writeFrame(&writeOptionsFrame{})
  33. if err != nil {
  34. goto reconn
  35. }
  36. switch resp.(type) {
  37. case *supportedFrame:
  38. continue
  39. case error:
  40. goto reconn
  41. default:
  42. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  43. }
  44. reconn:
  45. c.reconnect(true)
  46. // time.Sleep(5 * time.Second)
  47. continue
  48. }
  49. }
  50. func (c *controlConn) reconnect(refreshring bool) {
  51. if !atomic.CompareAndSwapUint64(&c.connecting, 0, 1) {
  52. return
  53. }
  54. success := false
  55. defer func() {
  56. // debounce reconnect a little
  57. if success {
  58. go func() {
  59. time.Sleep(500 * time.Millisecond)
  60. atomic.StoreUint64(&c.connecting, 0)
  61. }()
  62. } else {
  63. atomic.StoreUint64(&c.connecting, 0)
  64. }
  65. }()
  66. oldConn := c.conn.Load().(*Conn)
  67. // TODO: should have our own roundrobbin for hosts so that we can try each
  68. // in succession and guantee that we get a different host each time.
  69. conn := c.session.pool.Pick(nil)
  70. if conn == nil {
  71. return
  72. }
  73. newConn, err := Connect(conn.addr, conn.cfg, c)
  74. if err != nil {
  75. // TODO: add log handler for things like this
  76. return
  77. }
  78. c.conn.Store(newConn)
  79. success = true
  80. if oldConn != nil {
  81. oldConn.Close()
  82. }
  83. if refreshring {
  84. c.session.hostSource.refreshRing()
  85. }
  86. }
  87. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  88. if !closed {
  89. return
  90. }
  91. oldConn := c.conn.Load().(*Conn)
  92. if oldConn != conn {
  93. return
  94. }
  95. c.reconnect(true)
  96. }
  97. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  98. conn := c.conn.Load().(*Conn)
  99. if conn == nil {
  100. return nil, errNoControl
  101. }
  102. framer, err := conn.exec(w, nil)
  103. if err != nil {
  104. return nil, err
  105. }
  106. return framer.parseFrame()
  107. }
  108. // query will return nil if the connection is closed or nil
  109. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  110. q := c.session.Query(statement, values...).Consistency(One)
  111. const maxConnectAttempts = 5
  112. connectAttempts := 0
  113. for {
  114. conn := c.conn.Load().(*Conn)
  115. if conn == nil {
  116. if connectAttempts > maxConnectAttempts {
  117. return &Iter{err: errNoControl}
  118. }
  119. connectAttempts++
  120. c.reconnect(false)
  121. continue
  122. }
  123. iter = conn.executeQuery(q)
  124. q.attempts++
  125. if iter.err == nil || !c.retry.Attempt(q) {
  126. break
  127. }
  128. }
  129. return
  130. }
  131. func (c *controlConn) awaitSchemaAgreement() (err error) {
  132. const (
  133. // TODO(zariel): if we export this make this configurable
  134. maxWaitTime = 60 * time.Second
  135. peerSchemas = "SELECT schema_version FROM system.peers"
  136. localSchemas = "SELECT schema_version FROM system.local WHERE key='local'"
  137. )
  138. endDeadline := time.Now().Add(maxWaitTime)
  139. for time.Now().Before(endDeadline) {
  140. iter := c.query(peerSchemas)
  141. versions := make(map[string]struct{})
  142. var schemaVersion string
  143. for iter.Scan(&schemaVersion) {
  144. versions[schemaVersion] = struct{}{}
  145. schemaVersion = ""
  146. }
  147. if err = iter.Close(); err != nil {
  148. goto cont
  149. }
  150. iter = c.query(localSchemas)
  151. for iter.Scan(&schemaVersion) {
  152. versions[schemaVersion] = struct{}{}
  153. schemaVersion = ""
  154. }
  155. if err = iter.Close(); err != nil {
  156. goto cont
  157. }
  158. if len(versions) <= 1 {
  159. return nil
  160. }
  161. cont:
  162. time.Sleep(200 * time.Millisecond)
  163. }
  164. if err != nil {
  165. return
  166. }
  167. // not exported
  168. return errors.New("gocql: cluster schema versions not consistent")
  169. }
  170. func (c *controlConn) addr() string {
  171. conn := c.conn.Load().(*Conn)
  172. if conn == nil {
  173. return ""
  174. }
  175. return conn.addr
  176. }
  177. func (c *controlConn) close() {
  178. // TODO: handle more gracefully
  179. close(c.quit)
  180. }
  181. var errNoControl = errors.New("gocql: no controll connection available")