control.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. package gocql
  2. import (
  3. crand "crypto/rand"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "math/rand"
  8. "net"
  9. "strconv"
  10. "sync/atomic"
  11. "time"
  12. )
  13. var (
  14. randr *rand.Rand
  15. )
  16. func init() {
  17. b := make([]byte, 4)
  18. if _, err := crand.Read(b); err != nil {
  19. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  20. }
  21. randr = rand.New(rand.NewSource(int64(readInt(b))))
  22. }
  23. // Ensure that the atomic variable is aligned to a 64bit boundary
  24. // so that atomic operations can be applied on 32bit architectures.
  25. type controlConn struct {
  26. session *Session
  27. conn atomic.Value
  28. retry RetryPolicy
  29. started int32
  30. quit chan struct{}
  31. }
  32. func createControlConn(session *Session) *controlConn {
  33. control := &controlConn{
  34. session: session,
  35. quit: make(chan struct{}),
  36. retry: &SimpleRetryPolicy{NumRetries: 3},
  37. }
  38. control.conn.Store((*Conn)(nil))
  39. return control
  40. }
  41. func (c *controlConn) heartBeat() {
  42. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  43. return
  44. }
  45. sleepTime := 1 * time.Second
  46. for {
  47. select {
  48. case <-c.quit:
  49. return
  50. case <-time.After(sleepTime):
  51. }
  52. resp, err := c.writeFrame(&writeOptionsFrame{})
  53. if err != nil {
  54. goto reconn
  55. }
  56. switch resp.(type) {
  57. case *supportedFrame:
  58. // Everything ok
  59. sleepTime = 5 * time.Second
  60. continue
  61. case error:
  62. goto reconn
  63. default:
  64. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  65. }
  66. reconn:
  67. // try to connect a bit faster
  68. sleepTime = 1 * time.Second
  69. c.reconnect(true)
  70. // time.Sleep(5 * time.Second)
  71. continue
  72. }
  73. }
  74. func (c *controlConn) shuffleDial(endpoints []string) (conn *Conn, err error) {
  75. perm := randr.Perm(len(endpoints))
  76. shuffled := make([]string, len(endpoints))
  77. for i, endpoint := range endpoints {
  78. shuffled[perm[i]] = endpoint
  79. }
  80. // shuffle endpoints so not all drivers will connect to the same initial
  81. // node.
  82. for _, addr := range shuffled {
  83. conn, err = c.session.connect(JoinHostPort(addr, c.session.cfg.Port), c)
  84. if err == nil {
  85. return
  86. }
  87. log.Printf("gocql: unable to dial control conn %v: %v\n", addr, err)
  88. }
  89. return
  90. }
  91. func (c *controlConn) connect(endpoints []string) error {
  92. conn, err := c.shuffleDial(endpoints)
  93. if err != nil {
  94. return fmt.Errorf("control: unable to connect: %v", err)
  95. } else if conn == nil {
  96. return errors.New("control: unable to connect to initial endpoints")
  97. }
  98. if err := c.setupConn(conn); err != nil {
  99. conn.Close()
  100. return fmt.Errorf("control: unable to setup connection: %v", err)
  101. }
  102. // we could fetch the initial ring here and update initial host data. So that
  103. // when we return from here we have a ring topology ready to go.
  104. go c.heartBeat()
  105. return nil
  106. }
  107. func (c *controlConn) setupConn(conn *Conn) error {
  108. if err := c.registerEvents(conn); err != nil {
  109. conn.Close()
  110. return err
  111. }
  112. c.conn.Store(conn)
  113. host, portstr, err := net.SplitHostPort(conn.conn.RemoteAddr().String())
  114. if err != nil {
  115. return err
  116. }
  117. port, err := strconv.Atoi(portstr)
  118. if err != nil {
  119. return err
  120. }
  121. c.session.handleNodeUp(net.ParseIP(host), port, false)
  122. return nil
  123. }
  124. func (c *controlConn) registerEvents(conn *Conn) error {
  125. var events []string
  126. if !c.session.cfg.Events.DisableTopologyEvents {
  127. events = append(events, "TOPOLOGY_CHANGE")
  128. }
  129. if !c.session.cfg.Events.DisableNodeStatusEvents {
  130. events = append(events, "STATUS_CHANGE")
  131. }
  132. if !c.session.cfg.Events.DisableSchemaEvents {
  133. events = append(events, "SCHEMA_CHANGE")
  134. }
  135. if len(events) == 0 {
  136. return nil
  137. }
  138. framer, err := conn.exec(&writeRegisterFrame{
  139. events: events,
  140. }, nil)
  141. if err != nil {
  142. return err
  143. }
  144. frame, err := framer.parseFrame()
  145. if err != nil {
  146. return err
  147. } else if _, ok := frame.(*readyFrame); !ok {
  148. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  149. }
  150. return nil
  151. }
  152. func (c *controlConn) reconnect(refreshring bool) {
  153. // TODO: simplify this function, use session.ring to get hosts instead of the
  154. // connection pool
  155. addr := c.addr()
  156. oldConn := c.conn.Load().(*Conn)
  157. if oldConn != nil {
  158. oldConn.Close()
  159. }
  160. var newConn *Conn
  161. if addr != "" {
  162. // try to connect to the old host
  163. conn, err := c.session.connect(addr, c)
  164. if err != nil {
  165. // host is dead
  166. // TODO: this is replicated in a few places
  167. ip, portStr, _ := net.SplitHostPort(addr)
  168. port, _ := strconv.Atoi(portStr)
  169. c.session.handleNodeDown(net.ParseIP(ip), port)
  170. } else {
  171. newConn = conn
  172. }
  173. }
  174. // TODO: should have our own roundrobbin for hosts so that we can try each
  175. // in succession and guantee that we get a different host each time.
  176. if newConn == nil {
  177. _, conn := c.session.pool.Pick(nil)
  178. if conn == nil {
  179. c.connect(c.session.ring.endpoints)
  180. return
  181. }
  182. var err error
  183. newConn, err = c.session.connect(conn.addr, c)
  184. if err != nil {
  185. // TODO: add log handler for things like this
  186. return
  187. }
  188. }
  189. if err := c.setupConn(newConn); err != nil {
  190. newConn.Close()
  191. log.Printf("gocql: control unable to register events: %v\n", err)
  192. return
  193. }
  194. if refreshring {
  195. c.session.hostSource.refreshRing()
  196. }
  197. }
  198. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  199. if !closed {
  200. return
  201. }
  202. oldConn := c.conn.Load().(*Conn)
  203. if oldConn != conn {
  204. return
  205. }
  206. c.reconnect(true)
  207. }
  208. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  209. conn := c.conn.Load().(*Conn)
  210. if conn == nil {
  211. return nil, errNoControl
  212. }
  213. framer, err := conn.exec(w, nil)
  214. if err != nil {
  215. return nil, err
  216. }
  217. return framer.parseFrame()
  218. }
  219. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  220. const maxConnectAttempts = 5
  221. connectAttempts := 0
  222. for i := 0; i < maxConnectAttempts; i++ {
  223. conn := c.conn.Load().(*Conn)
  224. if conn == nil {
  225. if connectAttempts > maxConnectAttempts {
  226. break
  227. }
  228. connectAttempts++
  229. c.reconnect(false)
  230. continue
  231. }
  232. return fn(conn)
  233. }
  234. return &Iter{err: errNoControl}
  235. }
  236. // query will return nil if the connection is closed or nil
  237. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  238. q := c.session.Query(statement, values...).Consistency(One)
  239. for {
  240. iter = c.withConn(func(conn *Conn) *Iter {
  241. return conn.executeQuery(q)
  242. })
  243. if gocqlDebug && iter.err != nil {
  244. log.Printf("control: error executing %q: %v\n", statement, iter.err)
  245. }
  246. q.attempts++
  247. if iter.err == nil || !c.retry.Attempt(q) {
  248. break
  249. }
  250. }
  251. return
  252. }
  253. func (c *controlConn) fetchHostInfo(addr net.IP, port int) (*HostInfo, error) {
  254. // TODO(zariel): we should probably move this into host_source or atleast
  255. // share code with it.
  256. hostname, _, err := net.SplitHostPort(c.addr())
  257. if err != nil {
  258. return nil, fmt.Errorf("unable to fetch host info, invalid conn addr: %q: %v", c.addr(), err)
  259. }
  260. isLocal := hostname == addr.String()
  261. var fn func(*HostInfo) error
  262. if isLocal {
  263. fn = func(host *HostInfo) error {
  264. // TODO(zariel): should we fetch rpc_address from here?
  265. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.local WHERE key='local'")
  266. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  267. return iter.Close()
  268. }
  269. } else {
  270. fn = func(host *HostInfo) error {
  271. // TODO(zariel): should we fetch rpc_address from here?
  272. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.peers WHERE peer=?", addr)
  273. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  274. return iter.Close()
  275. }
  276. }
  277. host := &HostInfo{
  278. port: port,
  279. }
  280. if err := fn(host); err != nil {
  281. return nil, err
  282. }
  283. host.peer = addr.String()
  284. return host, nil
  285. }
  286. func (c *controlConn) awaitSchemaAgreement() error {
  287. return c.withConn(func(conn *Conn) *Iter {
  288. return &Iter{err: conn.awaitSchemaAgreement()}
  289. }).err
  290. }
  291. func (c *controlConn) addr() string {
  292. conn := c.conn.Load().(*Conn)
  293. if conn == nil {
  294. return ""
  295. }
  296. return conn.addr
  297. }
  298. func (c *controlConn) close() {
  299. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  300. c.quit <- struct{}{}
  301. }
  302. conn := c.conn.Load().(*Conn)
  303. if conn != nil {
  304. conn.Close()
  305. }
  306. }
  307. var errNoControl = errors.New("gocql: no control connection available")