control.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. package gocql
  2. import (
  3. "context"
  4. crand "crypto/rand"
  5. "errors"
  6. "fmt"
  7. "math/rand"
  8. "net"
  9. "regexp"
  10. "strconv"
  11. "sync"
  12. "sync/atomic"
  13. "time"
  14. )
  15. var (
  16. randr *rand.Rand
  17. mutRandr sync.Mutex
  18. )
  19. func init() {
  20. b := make([]byte, 4)
  21. if _, err := crand.Read(b); err != nil {
  22. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  23. }
  24. randr = rand.New(rand.NewSource(int64(readInt(b))))
  25. }
  26. // Ensure that the atomic variable is aligned to a 64bit boundary
  27. // so that atomic operations can be applied on 32bit architectures.
  28. type controlConn struct {
  29. session *Session
  30. conn atomic.Value
  31. retry RetryPolicy
  32. started int32
  33. quit chan struct{}
  34. }
  35. func createControlConn(session *Session) *controlConn {
  36. control := &controlConn{
  37. session: session,
  38. quit: make(chan struct{}),
  39. retry: &SimpleRetryPolicy{NumRetries: 3},
  40. }
  41. control.conn.Store((*Conn)(nil))
  42. return control
  43. }
  44. func (c *controlConn) heartBeat() {
  45. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  46. return
  47. }
  48. sleepTime := 1 * time.Second
  49. for {
  50. select {
  51. case <-c.quit:
  52. return
  53. case <-time.After(sleepTime):
  54. }
  55. resp, err := c.writeFrame(&writeOptionsFrame{})
  56. if err != nil {
  57. goto reconn
  58. }
  59. switch resp.(type) {
  60. case *supportedFrame:
  61. // Everything ok
  62. sleepTime = 5 * time.Second
  63. continue
  64. case error:
  65. goto reconn
  66. default:
  67. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  68. }
  69. reconn:
  70. // try to connect a bit faster
  71. sleepTime = 1 * time.Second
  72. c.reconnect(true)
  73. // time.Sleep(5 * time.Second)
  74. continue
  75. }
  76. }
  77. var hostLookupPreferV4 = false
  78. func hostInfo(addr string, defaultPort int) (*HostInfo, error) {
  79. var port int
  80. host, portStr, err := net.SplitHostPort(addr)
  81. if err != nil {
  82. host = addr
  83. port = defaultPort
  84. } else {
  85. port, err = strconv.Atoi(portStr)
  86. if err != nil {
  87. return nil, err
  88. }
  89. }
  90. ip := net.ParseIP(host)
  91. if ip == nil {
  92. ips, err := net.LookupIP(host)
  93. if err != nil {
  94. return nil, err
  95. } else if len(ips) == 0 {
  96. return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr)
  97. }
  98. if hostLookupPreferV4 {
  99. for _, v := range ips {
  100. if v4 := v.To4(); v4 != nil {
  101. ip = v4
  102. break
  103. }
  104. }
  105. if ip == nil {
  106. ip = ips[0]
  107. }
  108. } else {
  109. // TODO(zariel): should we check that we can connect to any of the ips?
  110. ip = ips[0]
  111. }
  112. }
  113. return &HostInfo{connectAddress: ip, port: port}, nil
  114. }
  115. func shuffleHosts(hosts []*HostInfo) []*HostInfo {
  116. mutRandr.Lock()
  117. perm := randr.Perm(len(hosts))
  118. mutRandr.Unlock()
  119. shuffled := make([]*HostInfo, len(hosts))
  120. for i, host := range hosts {
  121. shuffled[perm[i]] = host
  122. }
  123. return shuffled
  124. }
  125. func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) {
  126. // shuffle endpoints so not all drivers will connect to the same initial
  127. // node.
  128. shuffled := shuffleHosts(endpoints)
  129. var err error
  130. for _, host := range shuffled {
  131. var conn *Conn
  132. conn, err = c.session.connect(host, c)
  133. if err == nil {
  134. return conn, nil
  135. }
  136. Logger.Printf("gocql: unable to dial control conn %v: %v\n", host.ConnectAddress(), err)
  137. }
  138. return nil, err
  139. }
  140. // this is going to be version dependant and a nightmare to maintain :(
  141. var protocolSupportRe = regexp.MustCompile(`the lowest supported version is \d+ and the greatest is (\d+)$`)
  142. func parseProtocolFromError(err error) int {
  143. // I really wish this had the actual info in the error frame...
  144. matches := protocolSupportRe.FindAllStringSubmatch(err.Error(), -1)
  145. if len(matches) != 1 || len(matches[0]) != 2 {
  146. if verr, ok := err.(*protocolError); ok {
  147. return int(verr.frame.Header().version.version())
  148. }
  149. return 0
  150. }
  151. max, err := strconv.Atoi(matches[0][1])
  152. if err != nil {
  153. return 0
  154. }
  155. return max
  156. }
  157. func (c *controlConn) discoverProtocol(hosts []*HostInfo) (int, error) {
  158. hosts = shuffleHosts(hosts)
  159. connCfg := *c.session.connCfg
  160. connCfg.ProtoVersion = 4 // TODO: define maxProtocol
  161. handler := connErrorHandlerFn(func(c *Conn, err error, closed bool) {
  162. // we should never get here, but if we do it means we connected to a
  163. // host successfully which means our attempted protocol version worked
  164. })
  165. var err error
  166. for _, host := range hosts {
  167. var conn *Conn
  168. conn, err = Connect(host, &connCfg, handler, c.session)
  169. if err == nil {
  170. conn.Close()
  171. return connCfg.ProtoVersion, nil
  172. }
  173. if proto := parseProtocolFromError(err); proto > 0 {
  174. return proto, nil
  175. }
  176. }
  177. return 0, err
  178. }
  179. func (c *controlConn) connect(hosts []*HostInfo) error {
  180. if len(hosts) == 0 {
  181. return errors.New("control: no endpoints specified")
  182. }
  183. conn, err := c.shuffleDial(hosts)
  184. if err != nil {
  185. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  186. }
  187. if err := c.setupConn(conn); err != nil {
  188. conn.Close()
  189. return fmt.Errorf("control: unable to setup connection: %v", err)
  190. }
  191. // we could fetch the initial ring here and update initial host data. So that
  192. // when we return from here we have a ring topology ready to go.
  193. go c.heartBeat()
  194. return nil
  195. }
  196. func (c *controlConn) setupConn(conn *Conn) error {
  197. if err := c.registerEvents(conn); err != nil {
  198. conn.Close()
  199. return err
  200. }
  201. c.conn.Store(conn)
  202. if v, ok := conn.conn.RemoteAddr().(*net.TCPAddr); ok {
  203. c.session.handleNodeUp(copyBytes(v.IP), v.Port, false)
  204. return nil
  205. }
  206. host, portstr, err := net.SplitHostPort(conn.conn.RemoteAddr().String())
  207. if err != nil {
  208. return err
  209. }
  210. port, err := strconv.Atoi(portstr)
  211. if err != nil {
  212. return err
  213. }
  214. ip := net.ParseIP(host)
  215. if ip == nil {
  216. return fmt.Errorf("invalid remote addr: addr=%v host=%q", conn.conn.RemoteAddr(), host)
  217. }
  218. c.session.handleNodeUp(ip, port, false)
  219. return nil
  220. }
  221. func (c *controlConn) registerEvents(conn *Conn) error {
  222. var events []string
  223. if !c.session.cfg.Events.DisableTopologyEvents {
  224. events = append(events, "TOPOLOGY_CHANGE")
  225. }
  226. if !c.session.cfg.Events.DisableNodeStatusEvents {
  227. events = append(events, "STATUS_CHANGE")
  228. }
  229. if !c.session.cfg.Events.DisableSchemaEvents {
  230. events = append(events, "SCHEMA_CHANGE")
  231. }
  232. if len(events) == 0 {
  233. return nil
  234. }
  235. framer, err := conn.exec(context.Background(),
  236. &writeRegisterFrame{
  237. events: events,
  238. }, nil)
  239. if err != nil {
  240. return err
  241. }
  242. frame, err := framer.parseFrame()
  243. if err != nil {
  244. return err
  245. } else if _, ok := frame.(*readyFrame); !ok {
  246. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  247. }
  248. return nil
  249. }
  250. func (c *controlConn) reconnect(refreshring bool) {
  251. // TODO: simplify this function, use session.ring to get hosts instead of the
  252. // connection pool
  253. var host *HostInfo
  254. oldConn := c.conn.Load().(*Conn)
  255. if oldConn != nil {
  256. host = oldConn.host
  257. oldConn.Close()
  258. }
  259. var newConn *Conn
  260. if host != nil {
  261. // try to connect to the old host
  262. conn, err := c.session.connect(host, c)
  263. if err != nil {
  264. // host is dead
  265. // TODO: this is replicated in a few places
  266. c.session.handleNodeDown(host.ConnectAddress(), host.Port())
  267. } else {
  268. newConn = conn
  269. }
  270. }
  271. // TODO: should have our own round-robin for hosts so that we can try each
  272. // in succession and guarantee that we get a different host each time.
  273. if newConn == nil {
  274. host := c.session.ring.rrHost()
  275. if host == nil {
  276. c.connect(c.session.ring.endpoints)
  277. return
  278. }
  279. var err error
  280. newConn, err = c.session.connect(host, c)
  281. if err != nil {
  282. // TODO: add log handler for things like this
  283. return
  284. }
  285. }
  286. if err := c.setupConn(newConn); err != nil {
  287. newConn.Close()
  288. Logger.Printf("gocql: control unable to register events: %v\n", err)
  289. return
  290. }
  291. if refreshring {
  292. c.session.hostSource.refreshRing()
  293. }
  294. }
  295. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  296. if !closed {
  297. return
  298. }
  299. oldConn := c.conn.Load().(*Conn)
  300. if oldConn != conn {
  301. return
  302. }
  303. c.reconnect(true)
  304. }
  305. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  306. conn := c.conn.Load().(*Conn)
  307. if conn == nil {
  308. return nil, errNoControl
  309. }
  310. framer, err := conn.exec(context.Background(), w, nil)
  311. if err != nil {
  312. return nil, err
  313. }
  314. return framer.parseFrame()
  315. }
  316. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  317. const maxConnectAttempts = 5
  318. connectAttempts := 0
  319. for i := 0; i < maxConnectAttempts; i++ {
  320. conn := c.conn.Load().(*Conn)
  321. if conn == nil {
  322. if connectAttempts > maxConnectAttempts {
  323. break
  324. }
  325. connectAttempts++
  326. c.reconnect(false)
  327. continue
  328. }
  329. return fn(conn)
  330. }
  331. return &Iter{err: errNoControl}
  332. }
  333. // query will return nil if the connection is closed or nil
  334. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  335. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{}).Trace(nil)
  336. for {
  337. iter = c.withConn(func(conn *Conn) *Iter {
  338. return conn.executeQuery(q)
  339. })
  340. if gocqlDebug && iter.err != nil {
  341. Logger.Printf("control: error executing %q: %v\n", statement, iter.err)
  342. }
  343. q.attempts++
  344. if iter.err == nil || !c.retry.Attempt(q) {
  345. break
  346. }
  347. }
  348. return
  349. }
  350. func (c *controlConn) awaitSchemaAgreement() error {
  351. return c.withConn(func(conn *Conn) *Iter {
  352. return &Iter{err: conn.awaitSchemaAgreement()}
  353. }).err
  354. }
  355. func (c *controlConn) GetHostInfo() *HostInfo {
  356. conn := c.conn.Load().(*Conn)
  357. if conn == nil {
  358. return nil
  359. }
  360. return conn.host
  361. }
  362. func (c *controlConn) close() {
  363. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  364. c.quit <- struct{}{}
  365. }
  366. conn := c.conn.Load().(*Conn)
  367. if conn != nil {
  368. conn.Close()
  369. }
  370. }
  371. var errNoControl = errors.New("gocql: no control connection available")