control.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. package gocql
  2. import (
  3. crand "crypto/rand"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "math/rand"
  8. "net"
  9. "strconv"
  10. "sync/atomic"
  11. "time"
  12. )
  13. var (
  14. randr *rand.Rand
  15. )
  16. func init() {
  17. b := make([]byte, 4)
  18. if _, err := crand.Read(b); err != nil {
  19. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  20. }
  21. randr = rand.New(rand.NewSource(int64(readInt(b))))
  22. }
  23. // Ensure that the atomic variable is aligned to a 64bit boundary
  24. // so that atomic operations can be applied on 32bit architectures.
  25. type controlConn struct {
  26. session *Session
  27. conn atomic.Value
  28. retry RetryPolicy
  29. started int32
  30. quit chan struct{}
  31. }
  32. func createControlConn(session *Session) *controlConn {
  33. control := &controlConn{
  34. session: session,
  35. quit: make(chan struct{}),
  36. retry: &SimpleRetryPolicy{NumRetries: 3},
  37. }
  38. control.conn.Store((*Conn)(nil))
  39. return control
  40. }
  41. func (c *controlConn) heartBeat() {
  42. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  43. return
  44. }
  45. sleepTime := 1 * time.Second
  46. for {
  47. select {
  48. case <-c.quit:
  49. return
  50. case <-time.After(sleepTime):
  51. }
  52. resp, err := c.writeFrame(&writeOptionsFrame{})
  53. if err != nil {
  54. goto reconn
  55. }
  56. switch resp.(type) {
  57. case *supportedFrame:
  58. // Everything ok
  59. sleepTime = 5 * time.Second
  60. continue
  61. case error:
  62. goto reconn
  63. default:
  64. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  65. }
  66. reconn:
  67. // try to connect a bit faster
  68. sleepTime = 1 * time.Second
  69. c.reconnect(true)
  70. // time.Sleep(5 * time.Second)
  71. continue
  72. }
  73. }
  74. func (c *controlConn) shuffleDial(endpoints []string) (conn *Conn, err error) {
  75. perm := randr.Perm(len(endpoints))
  76. shuffled := make([]string, len(endpoints))
  77. for i, endpoint := range endpoints {
  78. shuffled[perm[i]] = endpoint
  79. }
  80. // shuffle endpoints so not all drivers will connect to the same initial
  81. // node.
  82. for _, addr := range shuffled {
  83. if addr == "" {
  84. return nil, fmt.Errorf("control: invalid address: %q", addr)
  85. }
  86. port := c.session.cfg.Port
  87. addr = JoinHostPort(addr, port)
  88. host, portStr, err := net.SplitHostPort(addr)
  89. if err != nil {
  90. host = addr
  91. port = c.session.cfg.Port
  92. err = nil
  93. } else {
  94. port, err = strconv.Atoi(portStr)
  95. if err != nil {
  96. return nil, err
  97. }
  98. }
  99. hostInfo, _ := c.session.ring.addHostIfMissing(&HostInfo{peer: host, port: port})
  100. conn, err = c.session.connect(addr, c, hostInfo)
  101. if err == nil {
  102. return conn, err
  103. }
  104. log.Printf("gocql: unable to dial control conn %v: %v\n", addr, err)
  105. }
  106. return
  107. }
  108. func (c *controlConn) connect(endpoints []string) error {
  109. if len(endpoints) == 0 {
  110. return errors.New("control: no endpoints specified")
  111. }
  112. conn, err := c.shuffleDial(endpoints)
  113. if err != nil {
  114. return fmt.Errorf("control: unable to connect: %v", err)
  115. } else if conn == nil {
  116. return errors.New("control: unable to connect to initial endpoints")
  117. }
  118. if err := c.setupConn(conn); err != nil {
  119. conn.Close()
  120. return fmt.Errorf("control: unable to setup connection: %v", err)
  121. }
  122. // we could fetch the initial ring here and update initial host data. So that
  123. // when we return from here we have a ring topology ready to go.
  124. go c.heartBeat()
  125. return nil
  126. }
  127. func (c *controlConn) setupConn(conn *Conn) error {
  128. if err := c.registerEvents(conn); err != nil {
  129. conn.Close()
  130. return err
  131. }
  132. c.conn.Store(conn)
  133. host, portstr, err := net.SplitHostPort(conn.conn.RemoteAddr().String())
  134. if err != nil {
  135. return err
  136. }
  137. port, err := strconv.Atoi(portstr)
  138. if err != nil {
  139. return err
  140. }
  141. c.session.handleNodeUp(net.ParseIP(host), port, false)
  142. return nil
  143. }
  144. func (c *controlConn) registerEvents(conn *Conn) error {
  145. var events []string
  146. if !c.session.cfg.Events.DisableTopologyEvents {
  147. events = append(events, "TOPOLOGY_CHANGE")
  148. }
  149. if !c.session.cfg.Events.DisableNodeStatusEvents {
  150. events = append(events, "STATUS_CHANGE")
  151. }
  152. if !c.session.cfg.Events.DisableSchemaEvents {
  153. events = append(events, "SCHEMA_CHANGE")
  154. }
  155. if len(events) == 0 {
  156. return nil
  157. }
  158. framer, err := conn.exec(&writeRegisterFrame{
  159. events: events,
  160. }, nil)
  161. if err != nil {
  162. return err
  163. }
  164. frame, err := framer.parseFrame()
  165. if err != nil {
  166. return err
  167. } else if _, ok := frame.(*readyFrame); !ok {
  168. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  169. }
  170. return nil
  171. }
  172. func (c *controlConn) reconnect(refreshring bool) {
  173. // TODO: simplify this function, use session.ring to get hosts instead of the
  174. // connection pool
  175. addr := c.addr()
  176. oldConn := c.conn.Load().(*Conn)
  177. if oldConn != nil {
  178. oldConn.Close()
  179. }
  180. var newConn *Conn
  181. if addr != "" {
  182. // try to connect to the old host
  183. conn, err := c.session.connect(addr, c, oldConn.host)
  184. if err != nil {
  185. // host is dead
  186. // TODO: this is replicated in a few places
  187. ip, portStr, _ := net.SplitHostPort(addr)
  188. port, _ := strconv.Atoi(portStr)
  189. c.session.handleNodeDown(net.ParseIP(ip), port)
  190. } else {
  191. newConn = conn
  192. }
  193. }
  194. // TODO: should have our own roundrobbin for hosts so that we can try each
  195. // in succession and guantee that we get a different host each time.
  196. if newConn == nil {
  197. _, conn := c.session.pool.Pick(nil)
  198. if conn == nil {
  199. c.connect(c.session.ring.endpoints)
  200. return
  201. }
  202. var err error
  203. newConn, err = c.session.connect(conn.addr, c, conn.host)
  204. if err != nil {
  205. // TODO: add log handler for things like this
  206. return
  207. }
  208. }
  209. if err := c.setupConn(newConn); err != nil {
  210. newConn.Close()
  211. log.Printf("gocql: control unable to register events: %v\n", err)
  212. return
  213. }
  214. if refreshring {
  215. c.session.hostSource.refreshRing()
  216. }
  217. }
  218. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  219. if !closed {
  220. return
  221. }
  222. oldConn := c.conn.Load().(*Conn)
  223. if oldConn != conn {
  224. return
  225. }
  226. c.reconnect(true)
  227. }
  228. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  229. conn := c.conn.Load().(*Conn)
  230. if conn == nil {
  231. return nil, errNoControl
  232. }
  233. framer, err := conn.exec(w, nil)
  234. if err != nil {
  235. return nil, err
  236. }
  237. return framer.parseFrame()
  238. }
  239. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  240. const maxConnectAttempts = 5
  241. connectAttempts := 0
  242. for i := 0; i < maxConnectAttempts; i++ {
  243. conn := c.conn.Load().(*Conn)
  244. if conn == nil {
  245. if connectAttempts > maxConnectAttempts {
  246. break
  247. }
  248. connectAttempts++
  249. c.reconnect(false)
  250. continue
  251. }
  252. return fn(conn)
  253. }
  254. return &Iter{err: errNoControl}
  255. }
  256. // query will return nil if the connection is closed or nil
  257. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  258. q := c.session.Query(statement, values...).Consistency(One)
  259. for {
  260. iter = c.withConn(func(conn *Conn) *Iter {
  261. return conn.executeQuery(q)
  262. })
  263. if gocqlDebug && iter.err != nil {
  264. log.Printf("control: error executing %q: %v\n", statement, iter.err)
  265. }
  266. q.attempts++
  267. if iter.err == nil || !c.retry.Attempt(q) {
  268. break
  269. }
  270. }
  271. return
  272. }
  273. func (c *controlConn) fetchHostInfo(addr net.IP, port int) (*HostInfo, error) {
  274. // TODO(zariel): we should probably move this into host_source or atleast
  275. // share code with it.
  276. hostname, _, err := net.SplitHostPort(c.addr())
  277. if err != nil {
  278. return nil, fmt.Errorf("unable to fetch host info, invalid conn addr: %q: %v", c.addr(), err)
  279. }
  280. isLocal := hostname == addr.String()
  281. var fn func(*HostInfo) error
  282. if isLocal {
  283. fn = func(host *HostInfo) error {
  284. // TODO(zariel): should we fetch rpc_address from here?
  285. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.local WHERE key='local'")
  286. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  287. return iter.Close()
  288. }
  289. } else {
  290. fn = func(host *HostInfo) error {
  291. // TODO(zariel): should we fetch rpc_address from here?
  292. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.peers WHERE peer=?", addr)
  293. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  294. return iter.Close()
  295. }
  296. }
  297. host := &HostInfo{
  298. port: port,
  299. }
  300. if err := fn(host); err != nil {
  301. return nil, err
  302. }
  303. host.peer = addr.String()
  304. return host, nil
  305. }
  306. func (c *controlConn) awaitSchemaAgreement() error {
  307. return c.withConn(func(conn *Conn) *Iter {
  308. return &Iter{err: conn.awaitSchemaAgreement()}
  309. }).err
  310. }
  311. func (c *controlConn) addr() string {
  312. conn := c.conn.Load().(*Conn)
  313. if conn == nil {
  314. return ""
  315. }
  316. return conn.addr
  317. }
  318. func (c *controlConn) close() {
  319. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  320. c.quit <- struct{}{}
  321. }
  322. conn := c.conn.Load().(*Conn)
  323. if conn != nil {
  324. conn.Close()
  325. }
  326. }
  327. var errNoControl = errors.New("gocql: no control connection available")