control.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. package gocql
  2. import (
  3. "errors"
  4. "fmt"
  5. "sync/atomic"
  6. "time"
  7. )
  8. type controlConn struct {
  9. session *Session
  10. conn atomic.Value
  11. connecting uint64
  12. retry RetryPolicy
  13. quit chan struct{}
  14. }
  15. func createControlConn(session *Session) *controlConn {
  16. control := &controlConn{
  17. session: session,
  18. quit: make(chan struct{}),
  19. retry: &SimpleRetryPolicy{NumRetries: 3},
  20. }
  21. control.conn.Store((*Conn)(nil))
  22. control.reconnect()
  23. go control.heartBeat()
  24. return control
  25. }
  26. func (c *controlConn) heartBeat() {
  27. for {
  28. select {
  29. case <-c.quit:
  30. return
  31. case <-time.After(5 * time.Second):
  32. }
  33. resp, err := c.writeFrame(&writeOptionsFrame{})
  34. if err != nil {
  35. goto reconn
  36. }
  37. switch resp.(type) {
  38. case *supportedFrame:
  39. continue
  40. case error:
  41. goto reconn
  42. default:
  43. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  44. }
  45. reconn:
  46. c.reconnect()
  47. time.Sleep(5 * time.Second)
  48. continue
  49. }
  50. }
  51. func (c *controlConn) reconnect() {
  52. if !atomic.CompareAndSwapUint64(&c.connecting, 0, 1) {
  53. return
  54. }
  55. success := false
  56. defer func() {
  57. // debounce reconnect a little
  58. if success {
  59. go func() {
  60. time.Sleep(500 * time.Millisecond)
  61. atomic.StoreUint64(&c.connecting, 0)
  62. }()
  63. } else {
  64. atomic.StoreUint64(&c.connecting, 0)
  65. }
  66. }()
  67. oldConn := c.conn.Load().(*Conn)
  68. // TODO: should have our own roundrobbin for hosts so that we can try each
  69. // in succession and guantee that we get a different host each time.
  70. conn := c.session.pool.Pick(nil)
  71. if conn == nil {
  72. return
  73. }
  74. newConn, err := Connect(conn.addr, conn.cfg, c)
  75. if err != nil {
  76. // TODO: add log handler for things like this
  77. return
  78. }
  79. c.conn.Store(newConn)
  80. success = true
  81. if oldConn != nil {
  82. oldConn.Close()
  83. }
  84. }
  85. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  86. if !closed {
  87. return
  88. }
  89. oldConn := c.conn.Load().(*Conn)
  90. if oldConn != conn {
  91. return
  92. }
  93. c.reconnect()
  94. }
  95. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  96. conn := c.conn.Load().(*Conn)
  97. if conn == nil {
  98. return nil, errNoControl
  99. }
  100. framer, err := conn.exec(w, nil)
  101. if err != nil {
  102. return nil, err
  103. }
  104. return framer.parseFrame()
  105. }
  106. // query will return nil if the connection is closed or nil
  107. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  108. q := c.session.Query(statement, values...).Consistency(One)
  109. const maxConnectAttempts = 5
  110. connectAttempts := 0
  111. for {
  112. conn := c.conn.Load().(*Conn)
  113. if conn == nil {
  114. if connectAttempts > maxConnectAttempts {
  115. return &Iter{err: errNoControl}
  116. }
  117. connectAttempts++
  118. c.reconnect()
  119. continue
  120. }
  121. iter = conn.executeQuery(q)
  122. q.attempts++
  123. if iter.err == nil || !c.retry.Attempt(q) {
  124. break
  125. }
  126. }
  127. return
  128. }
  129. func (c *controlConn) awaitSchemaAgreement() (err error) {
  130. const (
  131. // TODO(zariel): if we export this make this configurable
  132. maxWaitTime = 60 * time.Second
  133. peerSchemas = "SELECT schema_version FROM system.peers"
  134. localSchemas = "SELECT schema_version FROM system.local WHERE key='local'"
  135. )
  136. endDeadline := time.Now().Add(maxWaitTime)
  137. for time.Now().Before(endDeadline) {
  138. iter := c.query(peerSchemas)
  139. versions := make(map[string]struct{})
  140. var schemaVersion string
  141. for iter.Scan(&schemaVersion) {
  142. versions[schemaVersion] = struct{}{}
  143. schemaVersion = ""
  144. }
  145. if err = iter.Close(); err != nil {
  146. goto cont
  147. }
  148. iter = c.query(localSchemas)
  149. for iter.Scan(&schemaVersion) {
  150. versions[schemaVersion] = struct{}{}
  151. schemaVersion = ""
  152. }
  153. if err = iter.Close(); err != nil {
  154. goto cont
  155. }
  156. if len(versions) <= 1 {
  157. return nil
  158. }
  159. cont:
  160. time.Sleep(200 * time.Millisecond)
  161. }
  162. if err != nil {
  163. return
  164. }
  165. // not exported
  166. return errors.New("gocql: cluster schema versions not consistent")
  167. }
  168. func (c *controlConn) close() {
  169. // TODO: handle more gracefully
  170. close(c.quit)
  171. }
  172. var errNoControl = errors.New("gocql: no controll connection available")