control.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. package gocql
  2. import (
  3. "context"
  4. crand "crypto/rand"
  5. "errors"
  6. "fmt"
  7. "math/rand"
  8. "net"
  9. "os"
  10. "regexp"
  11. "strconv"
  12. "sync"
  13. "sync/atomic"
  14. "time"
  15. )
  16. var (
  17. randr *rand.Rand
  18. mutRandr sync.Mutex
  19. )
  20. func init() {
  21. b := make([]byte, 4)
  22. if _, err := crand.Read(b); err != nil {
  23. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  24. }
  25. randr = rand.New(rand.NewSource(int64(readInt(b))))
  26. }
  27. // Ensure that the atomic variable is aligned to a 64bit boundary
  28. // so that atomic operations can be applied on 32bit architectures.
  29. type controlConn struct {
  30. started int32
  31. reconnecting int32
  32. session *Session
  33. conn atomic.Value
  34. retry RetryPolicy
  35. quit chan struct{}
  36. }
  37. func createControlConn(session *Session) *controlConn {
  38. control := &controlConn{
  39. session: session,
  40. quit: make(chan struct{}),
  41. retry: &SimpleRetryPolicy{NumRetries: 3},
  42. }
  43. control.conn.Store((*connHost)(nil))
  44. return control
  45. }
  46. func (c *controlConn) heartBeat() {
  47. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  48. return
  49. }
  50. sleepTime := 1 * time.Second
  51. for {
  52. select {
  53. case <-c.quit:
  54. return
  55. case <-time.After(sleepTime):
  56. }
  57. resp, err := c.writeFrame(&writeOptionsFrame{})
  58. if err != nil {
  59. goto reconn
  60. }
  61. switch resp.(type) {
  62. case *supportedFrame:
  63. // Everything ok
  64. sleepTime = 5 * time.Second
  65. continue
  66. case error:
  67. goto reconn
  68. default:
  69. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  70. }
  71. reconn:
  72. // try to connect a bit faster
  73. sleepTime = 1 * time.Second
  74. c.reconnect(true)
  75. // time.Sleep(5 * time.Second)
  76. continue
  77. }
  78. }
  79. var hostLookupPreferV4 = os.Getenv("GOCQL_HOST_LOOKUP_PREFER_V4") == "true"
  80. func hostInfo(addr string, defaultPort int) (*HostInfo, error) {
  81. var port int
  82. host, portStr, err := net.SplitHostPort(addr)
  83. if err != nil {
  84. host = addr
  85. port = defaultPort
  86. } else {
  87. port, err = strconv.Atoi(portStr)
  88. if err != nil {
  89. return nil, err
  90. }
  91. }
  92. ip := net.ParseIP(host)
  93. if ip == nil {
  94. ips, err := net.LookupIP(host)
  95. if err != nil {
  96. return nil, err
  97. } else if len(ips) == 0 {
  98. return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr)
  99. }
  100. if hostLookupPreferV4 {
  101. for _, v := range ips {
  102. if v4 := v.To4(); v4 != nil {
  103. ip = v4
  104. break
  105. }
  106. }
  107. if ip == nil {
  108. ip = ips[0]
  109. }
  110. } else {
  111. // TODO(zariel): should we check that we can connect to any of the ips?
  112. ip = ips[0]
  113. }
  114. }
  115. return &HostInfo{connectAddress: ip, port: port}, nil
  116. }
  117. func shuffleHosts(hosts []*HostInfo) []*HostInfo {
  118. mutRandr.Lock()
  119. perm := randr.Perm(len(hosts))
  120. mutRandr.Unlock()
  121. shuffled := make([]*HostInfo, len(hosts))
  122. for i, host := range hosts {
  123. shuffled[perm[i]] = host
  124. }
  125. return shuffled
  126. }
  127. func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) {
  128. // shuffle endpoints so not all drivers will connect to the same initial
  129. // node.
  130. shuffled := shuffleHosts(endpoints)
  131. var err error
  132. for _, host := range shuffled {
  133. var conn *Conn
  134. conn, err = c.session.connect(host, c)
  135. if err == nil {
  136. return conn, nil
  137. }
  138. Logger.Printf("gocql: unable to dial control conn %v: %v\n", host.ConnectAddress(), err)
  139. }
  140. return nil, err
  141. }
  142. // this is going to be version dependant and a nightmare to maintain :(
  143. var protocolSupportRe = regexp.MustCompile(`the lowest supported version is \d+ and the greatest is (\d+)$`)
  144. func parseProtocolFromError(err error) int {
  145. // I really wish this had the actual info in the error frame...
  146. matches := protocolSupportRe.FindAllStringSubmatch(err.Error(), -1)
  147. if len(matches) != 1 || len(matches[0]) != 2 {
  148. if verr, ok := err.(*protocolError); ok {
  149. return int(verr.frame.Header().version.version())
  150. }
  151. return 0
  152. }
  153. max, err := strconv.Atoi(matches[0][1])
  154. if err != nil {
  155. return 0
  156. }
  157. return max
  158. }
  159. func (c *controlConn) discoverProtocol(hosts []*HostInfo) (int, error) {
  160. hosts = shuffleHosts(hosts)
  161. connCfg := *c.session.connCfg
  162. connCfg.ProtoVersion = 4 // TODO: define maxProtocol
  163. handler := connErrorHandlerFn(func(c *Conn, err error, closed bool) {
  164. // we should never get here, but if we do it means we connected to a
  165. // host successfully which means our attempted protocol version worked
  166. if !closed {
  167. c.Close()
  168. }
  169. })
  170. var err error
  171. for _, host := range hosts {
  172. var conn *Conn
  173. conn, err = c.session.dial(host.ConnectAddress(), host.Port(), &connCfg, handler)
  174. if conn != nil {
  175. conn.Close()
  176. }
  177. if err == nil {
  178. return connCfg.ProtoVersion, nil
  179. }
  180. if proto := parseProtocolFromError(err); proto > 0 {
  181. return proto, nil
  182. }
  183. }
  184. return 0, err
  185. }
  186. func (c *controlConn) connect(hosts []*HostInfo) error {
  187. if len(hosts) == 0 {
  188. return errors.New("control: no endpoints specified")
  189. }
  190. conn, err := c.shuffleDial(hosts)
  191. if err != nil {
  192. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  193. }
  194. if err := c.setupConn(conn); err != nil {
  195. conn.Close()
  196. return fmt.Errorf("control: unable to setup connection: %v", err)
  197. }
  198. // we could fetch the initial ring here and update initial host data. So that
  199. // when we return from here we have a ring topology ready to go.
  200. go c.heartBeat()
  201. return nil
  202. }
  203. type connHost struct {
  204. conn *Conn
  205. host *HostInfo
  206. }
  207. func (c *controlConn) setupConn(conn *Conn) error {
  208. if err := c.registerEvents(conn); err != nil {
  209. conn.Close()
  210. return err
  211. }
  212. // TODO(zariel): do we need to fetch host info everytime
  213. // the control conn connects? Surely we have it cached?
  214. host, err := conn.localHostInfo()
  215. if err != nil {
  216. return err
  217. }
  218. ch := &connHost{
  219. conn: conn,
  220. host: host,
  221. }
  222. c.conn.Store(ch)
  223. // c.session.handleNodeUp(host.ConnectAddress(), host.Port(), false)
  224. return nil
  225. }
  226. func (c *controlConn) registerEvents(conn *Conn) error {
  227. var events []string
  228. if !c.session.cfg.Events.DisableTopologyEvents {
  229. events = append(events, "TOPOLOGY_CHANGE")
  230. }
  231. if !c.session.cfg.Events.DisableNodeStatusEvents {
  232. events = append(events, "STATUS_CHANGE")
  233. }
  234. if !c.session.cfg.Events.DisableSchemaEvents {
  235. events = append(events, "SCHEMA_CHANGE")
  236. }
  237. if len(events) == 0 {
  238. return nil
  239. }
  240. framer, err := conn.exec(context.Background(),
  241. &writeRegisterFrame{
  242. events: events,
  243. }, nil)
  244. if err != nil {
  245. return err
  246. }
  247. frame, err := framer.parseFrame()
  248. if err != nil {
  249. return err
  250. } else if _, ok := frame.(*readyFrame); !ok {
  251. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  252. }
  253. return nil
  254. }
  255. func (c *controlConn) reconnect(refreshring bool) {
  256. if !atomic.CompareAndSwapInt32(&c.reconnecting, 0, 1) {
  257. return
  258. }
  259. defer atomic.StoreInt32(&c.reconnecting, 0)
  260. // TODO: simplify this function, use session.ring to get hosts instead of the
  261. // connection pool
  262. var host *HostInfo
  263. ch := c.getConn()
  264. if ch != nil {
  265. host = ch.host
  266. ch.conn.Close()
  267. }
  268. var newConn *Conn
  269. if host != nil {
  270. // try to connect to the old host
  271. conn, err := c.session.connect(host, c)
  272. if err != nil {
  273. // host is dead
  274. // TODO: this is replicated in a few places
  275. c.session.handleNodeDown(host.ConnectAddress(), host.Port())
  276. } else {
  277. newConn = conn
  278. }
  279. }
  280. // TODO: should have our own round-robin for hosts so that we can try each
  281. // in succession and guarantee that we get a different host each time.
  282. if newConn == nil {
  283. host := c.session.ring.rrHost()
  284. if host == nil {
  285. c.connect(c.session.ring.endpoints)
  286. return
  287. }
  288. var err error
  289. newConn, err = c.session.connect(host, c)
  290. if err != nil {
  291. // TODO: add log handler for things like this
  292. return
  293. }
  294. }
  295. if err := c.setupConn(newConn); err != nil {
  296. newConn.Close()
  297. Logger.Printf("gocql: control unable to register events: %v\n", err)
  298. return
  299. }
  300. if refreshring {
  301. c.session.hostSource.refreshRing()
  302. }
  303. }
  304. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  305. if !closed {
  306. return
  307. }
  308. oldConn := c.getConn()
  309. if oldConn.conn != conn {
  310. return
  311. }
  312. c.reconnect(true)
  313. }
  314. func (c *controlConn) getConn() *connHost {
  315. return c.conn.Load().(*connHost)
  316. }
  317. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  318. ch := c.getConn()
  319. if ch == nil {
  320. return nil, errNoControl
  321. }
  322. framer, err := ch.conn.exec(context.Background(), w, nil)
  323. if err != nil {
  324. return nil, err
  325. }
  326. return framer.parseFrame()
  327. }
  328. func (c *controlConn) withConnHost(fn func(*connHost) *Iter) *Iter {
  329. const maxConnectAttempts = 5
  330. connectAttempts := 0
  331. for i := 0; i < maxConnectAttempts; i++ {
  332. ch := c.getConn()
  333. if ch == nil {
  334. if connectAttempts > maxConnectAttempts {
  335. break
  336. }
  337. connectAttempts++
  338. c.reconnect(false)
  339. continue
  340. }
  341. return fn(ch)
  342. }
  343. return &Iter{err: errNoControl}
  344. }
  345. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  346. return c.withConnHost(func(ch *connHost) *Iter {
  347. return fn(ch.conn)
  348. })
  349. }
  350. // query will return nil if the connection is closed or nil
  351. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  352. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{}).Trace(nil)
  353. for {
  354. iter = c.withConn(func(conn *Conn) *Iter {
  355. return conn.executeQuery(q)
  356. })
  357. if gocqlDebug && iter.err != nil {
  358. Logger.Printf("control: error executing %q: %v\n", statement, iter.err)
  359. }
  360. q.attempts++
  361. if iter.err == nil || !c.retry.Attempt(q) {
  362. break
  363. }
  364. }
  365. return
  366. }
  367. func (c *controlConn) awaitSchemaAgreement() error {
  368. return c.withConn(func(conn *Conn) *Iter {
  369. return &Iter{err: conn.awaitSchemaAgreement()}
  370. }).err
  371. }
  372. func (c *controlConn) close() {
  373. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  374. c.quit <- struct{}{}
  375. }
  376. ch := c.getConn()
  377. if ch != nil {
  378. ch.conn.Close()
  379. }
  380. }
  381. var errNoControl = errors.New("gocql: no control connection available")