control.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package gocql
  2. import (
  3. crand "crypto/rand"
  4. "errors"
  5. "fmt"
  6. "golang.org/x/net/context"
  7. "log"
  8. "math/rand"
  9. "net"
  10. "strconv"
  11. "sync/atomic"
  12. "time"
  13. )
  14. var (
  15. randr *rand.Rand
  16. )
  17. func init() {
  18. b := make([]byte, 4)
  19. if _, err := crand.Read(b); err != nil {
  20. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  21. }
  22. randr = rand.New(rand.NewSource(int64(readInt(b))))
  23. }
  24. // Ensure that the atomic variable is aligned to a 64bit boundary
  25. // so that atomic operations can be applied on 32bit architectures.
  26. type controlConn struct {
  27. session *Session
  28. conn atomic.Value
  29. retry RetryPolicy
  30. started int32
  31. quit chan struct{}
  32. }
  33. func createControlConn(session *Session) *controlConn {
  34. control := &controlConn{
  35. session: session,
  36. quit: make(chan struct{}),
  37. retry: &SimpleRetryPolicy{NumRetries: 3},
  38. }
  39. control.conn.Store((*Conn)(nil))
  40. return control
  41. }
  42. func (c *controlConn) heartBeat() {
  43. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  44. return
  45. }
  46. sleepTime := 1 * time.Second
  47. for {
  48. select {
  49. case <-c.quit:
  50. return
  51. case <-time.After(sleepTime):
  52. }
  53. resp, err := c.writeFrame(&writeOptionsFrame{})
  54. if err != nil {
  55. goto reconn
  56. }
  57. switch resp.(type) {
  58. case *supportedFrame:
  59. // Everything ok
  60. sleepTime = 5 * time.Second
  61. continue
  62. case error:
  63. goto reconn
  64. default:
  65. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  66. }
  67. reconn:
  68. // try to connect a bit faster
  69. sleepTime = 1 * time.Second
  70. c.reconnect(true)
  71. // time.Sleep(5 * time.Second)
  72. continue
  73. }
  74. }
  75. func hostInfo(addr string, defaultPort int) (*HostInfo, error) {
  76. var port int
  77. host, portStr, err := net.SplitHostPort(addr)
  78. if err != nil {
  79. host = addr
  80. port = defaultPort
  81. } else {
  82. port, err = strconv.Atoi(portStr)
  83. if err != nil {
  84. return nil, err
  85. }
  86. }
  87. return &HostInfo{peer: host, port: port}, nil
  88. }
  89. func (c *controlConn) shuffleDial(endpoints []string) (conn *Conn, err error) {
  90. perm := randr.Perm(len(endpoints))
  91. shuffled := make([]string, len(endpoints))
  92. for i, endpoint := range endpoints {
  93. shuffled[perm[i]] = endpoint
  94. }
  95. // shuffle endpoints so not all drivers will connect to the same initial
  96. // node.
  97. for _, addr := range shuffled {
  98. if addr == "" {
  99. return nil, fmt.Errorf("invalid address: %q", addr)
  100. }
  101. port := c.session.cfg.Port
  102. addr = JoinHostPort(addr, port)
  103. var host *HostInfo
  104. host, err = hostInfo(addr, port)
  105. if err != nil {
  106. return nil, fmt.Errorf("invalid address: %q: %v", addr, err)
  107. }
  108. hostInfo, _ := c.session.ring.addHostIfMissing(host)
  109. conn, err = c.session.connect(addr, c, hostInfo)
  110. if err == nil {
  111. return conn, err
  112. }
  113. log.Printf("gocql: unable to dial control conn %v: %v\n", addr, err)
  114. }
  115. if err != nil {
  116. return nil, err
  117. }
  118. return conn, nil
  119. }
  120. func (c *controlConn) connect(endpoints []string) error {
  121. if len(endpoints) == 0 {
  122. return errors.New("control: no endpoints specified")
  123. }
  124. conn, err := c.shuffleDial(endpoints)
  125. if err != nil {
  126. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  127. }
  128. if err := c.setupConn(conn); err != nil {
  129. conn.Close()
  130. return fmt.Errorf("control: unable to setup connection: %v", err)
  131. }
  132. // we could fetch the initial ring here and update initial host data. So that
  133. // when we return from here we have a ring topology ready to go.
  134. go c.heartBeat()
  135. return nil
  136. }
  137. func (c *controlConn) setupConn(conn *Conn) error {
  138. if err := c.registerEvents(conn); err != nil {
  139. conn.Close()
  140. return err
  141. }
  142. c.conn.Store(conn)
  143. host, portstr, err := net.SplitHostPort(conn.conn.RemoteAddr().String())
  144. if err != nil {
  145. return err
  146. }
  147. port, err := strconv.Atoi(portstr)
  148. if err != nil {
  149. return err
  150. }
  151. c.session.handleNodeUp(net.ParseIP(host), port, false)
  152. return nil
  153. }
  154. func (c *controlConn) registerEvents(conn *Conn) error {
  155. var events []string
  156. if !c.session.cfg.Events.DisableTopologyEvents {
  157. events = append(events, "TOPOLOGY_CHANGE")
  158. }
  159. if !c.session.cfg.Events.DisableNodeStatusEvents {
  160. events = append(events, "STATUS_CHANGE")
  161. }
  162. if !c.session.cfg.Events.DisableSchemaEvents {
  163. events = append(events, "SCHEMA_CHANGE")
  164. }
  165. if len(events) == 0 {
  166. return nil
  167. }
  168. framer, err := conn.exec(context.Background(),
  169. &writeRegisterFrame{
  170. events: events,
  171. }, nil)
  172. if err != nil {
  173. return err
  174. }
  175. frame, err := framer.parseFrame()
  176. if err != nil {
  177. return err
  178. } else if _, ok := frame.(*readyFrame); !ok {
  179. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  180. }
  181. return nil
  182. }
  183. func (c *controlConn) reconnect(refreshring bool) {
  184. // TODO: simplify this function, use session.ring to get hosts instead of the
  185. // connection pool
  186. addr := c.addr()
  187. oldConn := c.conn.Load().(*Conn)
  188. if oldConn != nil {
  189. oldConn.Close()
  190. }
  191. var newConn *Conn
  192. if addr != "" {
  193. // try to connect to the old host
  194. conn, err := c.session.connect(addr, c, oldConn.host)
  195. if err != nil {
  196. // host is dead
  197. // TODO: this is replicated in a few places
  198. ip, portStr, _ := net.SplitHostPort(addr)
  199. port, _ := strconv.Atoi(portStr)
  200. c.session.handleNodeDown(net.ParseIP(ip), port)
  201. } else {
  202. newConn = conn
  203. }
  204. }
  205. // TODO: should have our own roundrobbin for hosts so that we can try each
  206. // in succession and guantee that we get a different host each time.
  207. if newConn == nil {
  208. host := c.session.ring.rrHost()
  209. if host == nil {
  210. c.connect(c.session.ring.endpoints)
  211. return
  212. }
  213. var err error
  214. newConn, err = c.session.connect(host.Peer(), c, host)
  215. if err != nil {
  216. // TODO: add log handler for things like this
  217. return
  218. }
  219. }
  220. if err := c.setupConn(newConn); err != nil {
  221. newConn.Close()
  222. log.Printf("gocql: control unable to register events: %v\n", err)
  223. return
  224. }
  225. if refreshring {
  226. c.session.hostSource.refreshRing()
  227. }
  228. }
  229. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  230. if !closed {
  231. return
  232. }
  233. oldConn := c.conn.Load().(*Conn)
  234. if oldConn != conn {
  235. return
  236. }
  237. c.reconnect(true)
  238. }
  239. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  240. conn := c.conn.Load().(*Conn)
  241. if conn == nil {
  242. return nil, errNoControl
  243. }
  244. framer, err := conn.exec(context.Background(), w, nil)
  245. if err != nil {
  246. return nil, err
  247. }
  248. return framer.parseFrame()
  249. }
  250. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  251. const maxConnectAttempts = 5
  252. connectAttempts := 0
  253. for i := 0; i < maxConnectAttempts; i++ {
  254. conn := c.conn.Load().(*Conn)
  255. if conn == nil {
  256. if connectAttempts > maxConnectAttempts {
  257. break
  258. }
  259. connectAttempts++
  260. c.reconnect(false)
  261. continue
  262. }
  263. return fn(conn)
  264. }
  265. return &Iter{err: errNoControl}
  266. }
  267. // query will return nil if the connection is closed or nil
  268. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  269. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{})
  270. for {
  271. iter = c.withConn(func(conn *Conn) *Iter {
  272. return conn.executeQuery(q)
  273. })
  274. if gocqlDebug && iter.err != nil {
  275. log.Printf("control: error executing %q: %v\n", statement, iter.err)
  276. }
  277. q.attempts++
  278. if iter.err == nil || !c.retry.Attempt(q) {
  279. break
  280. }
  281. }
  282. return
  283. }
  284. func (c *controlConn) fetchHostInfo(addr net.IP, port int) (*HostInfo, error) {
  285. // TODO(zariel): we should probably move this into host_source or atleast
  286. // share code with it.
  287. hostname, _, err := net.SplitHostPort(c.addr())
  288. if err != nil {
  289. return nil, fmt.Errorf("unable to fetch host info, invalid conn addr: %q: %v", c.addr(), err)
  290. }
  291. isLocal := hostname == addr.String()
  292. var fn func(*HostInfo) error
  293. if isLocal {
  294. fn = func(host *HostInfo) error {
  295. // TODO(zariel): should we fetch rpc_address from here?
  296. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.local WHERE key='local'")
  297. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  298. return iter.Close()
  299. }
  300. } else {
  301. fn = func(host *HostInfo) error {
  302. // TODO(zariel): should we fetch rpc_address from here?
  303. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.peers WHERE peer=?", addr)
  304. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  305. return iter.Close()
  306. }
  307. }
  308. host := &HostInfo{
  309. port: port,
  310. }
  311. if err := fn(host); err != nil {
  312. return nil, err
  313. }
  314. host.peer = addr.String()
  315. return host, nil
  316. }
  317. func (c *controlConn) awaitSchemaAgreement() error {
  318. return c.withConn(func(conn *Conn) *Iter {
  319. return &Iter{err: conn.awaitSchemaAgreement()}
  320. }).err
  321. }
  322. func (c *controlConn) addr() string {
  323. conn := c.conn.Load().(*Conn)
  324. if conn == nil {
  325. return ""
  326. }
  327. return conn.addr
  328. }
  329. func (c *controlConn) close() {
  330. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  331. c.quit <- struct{}{}
  332. }
  333. conn := c.conn.Load().(*Conn)
  334. if conn != nil {
  335. conn.Close()
  336. }
  337. }
  338. var errNoControl = errors.New("gocql: no control connection available")