control.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. package gocql
  2. import (
  3. "context"
  4. crand "crypto/rand"
  5. "errors"
  6. "fmt"
  7. "math/rand"
  8. "net"
  9. "os"
  10. "regexp"
  11. "strconv"
  12. "sync"
  13. "sync/atomic"
  14. "time"
  15. )
  16. var (
  17. randr *rand.Rand
  18. mutRandr sync.Mutex
  19. )
  20. func init() {
  21. b := make([]byte, 4)
  22. if _, err := crand.Read(b); err != nil {
  23. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  24. }
  25. randr = rand.New(rand.NewSource(int64(readInt(b))))
  26. }
  27. // Ensure that the atomic variable is aligned to a 64bit boundary
  28. // so that atomic operations can be applied on 32bit architectures.
  29. type controlConn struct {
  30. started int32
  31. reconnecting int32
  32. session *Session
  33. conn atomic.Value
  34. retry RetryPolicy
  35. quit chan struct{}
  36. }
  37. func createControlConn(session *Session) *controlConn {
  38. control := &controlConn{
  39. session: session,
  40. quit: make(chan struct{}),
  41. retry: &SimpleRetryPolicy{NumRetries: 3},
  42. }
  43. control.conn.Store((*connHost)(nil))
  44. return control
  45. }
  46. func (c *controlConn) heartBeat() {
  47. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  48. return
  49. }
  50. sleepTime := 1 * time.Second
  51. timer := time.NewTimer(sleepTime)
  52. defer timer.Stop()
  53. for {
  54. timer.Reset(sleepTime)
  55. select {
  56. case <-c.quit:
  57. return
  58. case <-timer.C:
  59. }
  60. resp, err := c.writeFrame(&writeOptionsFrame{})
  61. if err != nil {
  62. goto reconn
  63. }
  64. switch resp.(type) {
  65. case *supportedFrame:
  66. // Everything ok
  67. sleepTime = 5 * time.Second
  68. continue
  69. case error:
  70. goto reconn
  71. default:
  72. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  73. }
  74. reconn:
  75. // try to connect a bit faster
  76. sleepTime = 1 * time.Second
  77. c.reconnect(true)
  78. continue
  79. }
  80. }
  81. var hostLookupPreferV4 = os.Getenv("GOCQL_HOST_LOOKUP_PREFER_V4") == "true"
  82. func hostInfo(addr string, defaultPort int) ([]*HostInfo, error) {
  83. var port int
  84. host, portStr, err := net.SplitHostPort(addr)
  85. if err != nil {
  86. host = addr
  87. port = defaultPort
  88. } else {
  89. port, err = strconv.Atoi(portStr)
  90. if err != nil {
  91. return nil, err
  92. }
  93. }
  94. var hosts []*HostInfo
  95. // Check if host is a literal IP address
  96. if ip := net.ParseIP(host); ip != nil {
  97. hosts = append(hosts, &HostInfo{hostname: host, connectAddress: ip, port: port})
  98. return hosts, nil
  99. }
  100. // Look up host in DNS
  101. ips, err := LookupIP(host)
  102. if err != nil {
  103. return nil, err
  104. } else if len(ips) == 0 {
  105. return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr)
  106. }
  107. // Filter to v4 addresses if any present
  108. if hostLookupPreferV4 {
  109. var preferredIPs []net.IP
  110. for _, v := range ips {
  111. if v4 := v.To4(); v4 != nil {
  112. preferredIPs = append(preferredIPs, v4)
  113. }
  114. }
  115. if len(preferredIPs) != 0 {
  116. ips = preferredIPs
  117. }
  118. }
  119. for _, ip := range ips {
  120. hosts = append(hosts, &HostInfo{hostname: host, connectAddress: ip, port: port})
  121. }
  122. return hosts, nil
  123. }
  124. func shuffleHosts(hosts []*HostInfo) []*HostInfo {
  125. shuffled := make([]*HostInfo, len(hosts))
  126. copy(shuffled, hosts)
  127. mutRandr.Lock()
  128. randr.Shuffle(len(hosts), func(i, j int) {
  129. shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
  130. })
  131. mutRandr.Unlock()
  132. return shuffled
  133. }
  134. func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) {
  135. // shuffle endpoints so not all drivers will connect to the same initial
  136. // node.
  137. shuffled := shuffleHosts(endpoints)
  138. cfg := *c.session.connCfg
  139. cfg.disableCoalesce = true
  140. var err error
  141. for _, host := range shuffled {
  142. var conn *Conn
  143. conn, err = c.session.dial(c.session.ctx, host, &cfg, c)
  144. if err == nil {
  145. return conn, nil
  146. }
  147. Logger.Printf("gocql: unable to dial control conn %v: %v\n", host.ConnectAddress(), err)
  148. }
  149. return nil, err
  150. }
  151. // this is going to be version dependant and a nightmare to maintain :(
  152. var protocolSupportRe = regexp.MustCompile(`the lowest supported version is \d+ and the greatest is (\d+)$`)
  153. func parseProtocolFromError(err error) int {
  154. // I really wish this had the actual info in the error frame...
  155. matches := protocolSupportRe.FindAllStringSubmatch(err.Error(), -1)
  156. if len(matches) != 1 || len(matches[0]) != 2 {
  157. if verr, ok := err.(*protocolError); ok {
  158. return int(verr.frame.Header().version.version())
  159. }
  160. return 0
  161. }
  162. max, err := strconv.Atoi(matches[0][1])
  163. if err != nil {
  164. return 0
  165. }
  166. return max
  167. }
  168. func (c *controlConn) discoverProtocol(hosts []*HostInfo) (int, error) {
  169. hosts = shuffleHosts(hosts)
  170. connCfg := *c.session.connCfg
  171. connCfg.ProtoVersion = 4 // TODO: define maxProtocol
  172. handler := connErrorHandlerFn(func(c *Conn, err error, closed bool) {
  173. // we should never get here, but if we do it means we connected to a
  174. // host successfully which means our attempted protocol version worked
  175. if !closed {
  176. c.Close()
  177. }
  178. })
  179. var err error
  180. for _, host := range hosts {
  181. var conn *Conn
  182. conn, err = c.session.dial(c.session.ctx, host, &connCfg, handler)
  183. if conn != nil {
  184. conn.Close()
  185. }
  186. if err == nil {
  187. return connCfg.ProtoVersion, nil
  188. }
  189. if proto := parseProtocolFromError(err); proto > 0 {
  190. return proto, nil
  191. }
  192. }
  193. return 0, err
  194. }
  195. func (c *controlConn) connect(hosts []*HostInfo) error {
  196. if len(hosts) == 0 {
  197. return errors.New("control: no endpoints specified")
  198. }
  199. conn, err := c.shuffleDial(hosts)
  200. if err != nil {
  201. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  202. }
  203. if err := c.setupConn(conn); err != nil {
  204. conn.Close()
  205. return fmt.Errorf("control: unable to setup connection: %v", err)
  206. }
  207. // we could fetch the initial ring here and update initial host data. So that
  208. // when we return from here we have a ring topology ready to go.
  209. go c.heartBeat()
  210. return nil
  211. }
  212. type connHost struct {
  213. conn *Conn
  214. host *HostInfo
  215. }
  216. func (c *controlConn) setupConn(conn *Conn) error {
  217. if err := c.registerEvents(conn); err != nil {
  218. conn.Close()
  219. return err
  220. }
  221. // TODO(zariel): do we need to fetch host info everytime
  222. // the control conn connects? Surely we have it cached?
  223. host, err := conn.localHostInfo(context.TODO())
  224. if err != nil {
  225. return err
  226. }
  227. ch := &connHost{
  228. conn: conn,
  229. host: host,
  230. }
  231. c.conn.Store(ch)
  232. c.session.handleNodeUp(host.ConnectAddress(), host.Port(), false)
  233. return nil
  234. }
  235. func (c *controlConn) registerEvents(conn *Conn) error {
  236. var events []string
  237. if !c.session.cfg.Events.DisableTopologyEvents {
  238. events = append(events, "TOPOLOGY_CHANGE")
  239. }
  240. if !c.session.cfg.Events.DisableNodeStatusEvents {
  241. events = append(events, "STATUS_CHANGE")
  242. }
  243. if !c.session.cfg.Events.DisableSchemaEvents {
  244. events = append(events, "SCHEMA_CHANGE")
  245. }
  246. if len(events) == 0 {
  247. return nil
  248. }
  249. framer, err := conn.exec(context.Background(),
  250. &writeRegisterFrame{
  251. events: events,
  252. }, nil)
  253. if err != nil {
  254. return err
  255. }
  256. frame, err := framer.parseFrame()
  257. if err != nil {
  258. return err
  259. } else if _, ok := frame.(*readyFrame); !ok {
  260. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  261. }
  262. return nil
  263. }
  264. func (c *controlConn) reconnect(refreshring bool) {
  265. if !atomic.CompareAndSwapInt32(&c.reconnecting, 0, 1) {
  266. return
  267. }
  268. defer atomic.StoreInt32(&c.reconnecting, 0)
  269. // TODO: simplify this function, use session.ring to get hosts instead of the
  270. // connection pool
  271. var host *HostInfo
  272. ch := c.getConn()
  273. if ch != nil {
  274. host = ch.host
  275. ch.conn.Close()
  276. }
  277. var newConn *Conn
  278. if host != nil {
  279. // try to connect to the old host
  280. conn, err := c.session.connect(c.session.ctx, host, c)
  281. if err != nil {
  282. // host is dead
  283. // TODO: this is replicated in a few places
  284. if c.session.cfg.ConvictionPolicy.AddFailure(err, host) {
  285. c.session.handleNodeDown(host.ConnectAddress(), host.Port())
  286. }
  287. } else {
  288. newConn = conn
  289. }
  290. }
  291. // TODO: should have our own round-robin for hosts so that we can try each
  292. // in succession and guarantee that we get a different host each time.
  293. if newConn == nil {
  294. host := c.session.ring.rrHost()
  295. if host == nil {
  296. c.connect(c.session.ring.endpoints)
  297. return
  298. }
  299. var err error
  300. newConn, err = c.session.connect(c.session.ctx, host, c)
  301. if err != nil {
  302. // TODO: add log handler for things like this
  303. return
  304. }
  305. }
  306. if err := c.setupConn(newConn); err != nil {
  307. newConn.Close()
  308. Logger.Printf("gocql: control unable to register events: %v\n", err)
  309. return
  310. }
  311. if refreshring {
  312. c.session.hostSource.refreshRing()
  313. }
  314. }
  315. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  316. if !closed {
  317. return
  318. }
  319. oldConn := c.getConn()
  320. // If connection has long gone, and not been attempted for awhile,
  321. // it's possible to have oldConn as nil here (#1297).
  322. if oldConn != nil && oldConn.conn != conn {
  323. return
  324. }
  325. c.reconnect(false)
  326. }
  327. func (c *controlConn) getConn() *connHost {
  328. return c.conn.Load().(*connHost)
  329. }
  330. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  331. ch := c.getConn()
  332. if ch == nil {
  333. return nil, errNoControl
  334. }
  335. framer, err := ch.conn.exec(context.Background(), w, nil)
  336. if err != nil {
  337. return nil, err
  338. }
  339. return framer.parseFrame()
  340. }
  341. func (c *controlConn) withConnHost(fn func(*connHost) *Iter) *Iter {
  342. const maxConnectAttempts = 5
  343. connectAttempts := 0
  344. for i := 0; i < maxConnectAttempts; i++ {
  345. ch := c.getConn()
  346. if ch == nil {
  347. if connectAttempts > maxConnectAttempts {
  348. break
  349. }
  350. connectAttempts++
  351. c.reconnect(false)
  352. continue
  353. }
  354. return fn(ch)
  355. }
  356. return &Iter{err: errNoControl}
  357. }
  358. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  359. return c.withConnHost(func(ch *connHost) *Iter {
  360. return fn(ch.conn)
  361. })
  362. }
  363. // query will return nil if the connection is closed or nil
  364. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  365. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{}).Trace(nil)
  366. for {
  367. iter = c.withConn(func(conn *Conn) *Iter {
  368. return conn.executeQuery(context.TODO(), q)
  369. })
  370. if gocqlDebug && iter.err != nil {
  371. Logger.Printf("control: error executing %q: %v\n", statement, iter.err)
  372. }
  373. q.AddAttempts(1, c.getConn().host)
  374. if iter.err == nil || !c.retry.Attempt(q) {
  375. break
  376. }
  377. }
  378. return
  379. }
  380. func (c *controlConn) awaitSchemaAgreement() error {
  381. return c.withConn(func(conn *Conn) *Iter {
  382. return &Iter{err: conn.awaitSchemaAgreement(context.TODO())}
  383. }).err
  384. }
  385. func (c *controlConn) close() {
  386. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  387. c.quit <- struct{}{}
  388. }
  389. ch := c.getConn()
  390. if ch != nil {
  391. ch.conn.Close()
  392. }
  393. }
  394. var errNoControl = errors.New("gocql: no control connection available")