control.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. package gocql
  2. import (
  3. crand "crypto/rand"
  4. "errors"
  5. "fmt"
  6. "math/rand"
  7. "net"
  8. "regexp"
  9. "strconv"
  10. "sync/atomic"
  11. "time"
  12. "golang.org/x/net/context"
  13. )
  14. var (
  15. randr *rand.Rand
  16. )
  17. func init() {
  18. b := make([]byte, 4)
  19. if _, err := crand.Read(b); err != nil {
  20. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  21. }
  22. randr = rand.New(rand.NewSource(int64(readInt(b))))
  23. }
  24. // Ensure that the atomic variable is aligned to a 64bit boundary
  25. // so that atomic operations can be applied on 32bit architectures.
  26. type controlConn struct {
  27. session *Session
  28. conn atomic.Value
  29. retry RetryPolicy
  30. started int32
  31. quit chan struct{}
  32. }
  33. func createControlConn(session *Session) *controlConn {
  34. control := &controlConn{
  35. session: session,
  36. quit: make(chan struct{}),
  37. retry: &SimpleRetryPolicy{NumRetries: 3},
  38. }
  39. control.conn.Store((*Conn)(nil))
  40. return control
  41. }
  42. func (c *controlConn) heartBeat() {
  43. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  44. return
  45. }
  46. sleepTime := 1 * time.Second
  47. for {
  48. select {
  49. case <-c.quit:
  50. return
  51. case <-time.After(sleepTime):
  52. }
  53. resp, err := c.writeFrame(&writeOptionsFrame{})
  54. if err != nil {
  55. goto reconn
  56. }
  57. switch resp.(type) {
  58. case *supportedFrame:
  59. // Everything ok
  60. sleepTime = 5 * time.Second
  61. continue
  62. case error:
  63. goto reconn
  64. default:
  65. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  66. }
  67. reconn:
  68. // try to connect a bit faster
  69. sleepTime = 1 * time.Second
  70. c.reconnect(true)
  71. // time.Sleep(5 * time.Second)
  72. continue
  73. }
  74. }
  75. var hostLookupPreferV4 = false
  76. func hostInfo(addr string, defaultPort int) (*HostInfo, error) {
  77. var port int
  78. host, portStr, err := net.SplitHostPort(addr)
  79. if err != nil {
  80. host = addr
  81. port = defaultPort
  82. } else {
  83. port, err = strconv.Atoi(portStr)
  84. if err != nil {
  85. return nil, err
  86. }
  87. }
  88. ip := net.ParseIP(host)
  89. if ip == nil {
  90. ips, err := net.LookupIP(host)
  91. if err != nil {
  92. return nil, err
  93. } else if len(ips) == 0 {
  94. return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr)
  95. }
  96. if hostLookupPreferV4 {
  97. for _, v := range ips {
  98. if v4 := v.To4(); v4 != nil {
  99. ip = v4
  100. break
  101. }
  102. }
  103. if ip == nil {
  104. ip = ips[0]
  105. }
  106. } else {
  107. // TODO(zariel): should we check that we can connect to any of the ips?
  108. ip = ips[0]
  109. }
  110. }
  111. return &HostInfo{peer: ip, port: port}, nil
  112. }
  113. func shuffleHosts(hosts []*HostInfo) []*HostInfo {
  114. perm := randr.Perm(len(hosts))
  115. shuffled := make([]*HostInfo, len(hosts))
  116. for i, host := range hosts {
  117. shuffled[perm[i]] = host
  118. }
  119. return shuffled
  120. }
  121. func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) {
  122. // shuffle endpoints so not all drivers will connect to the same initial
  123. // node.
  124. shuffled := shuffleHosts(endpoints)
  125. var err error
  126. for _, host := range shuffled {
  127. var conn *Conn
  128. conn, err = c.session.connect(host, c)
  129. if err == nil {
  130. return conn, nil
  131. }
  132. Logger.Printf("gocql: unable to dial control conn %v: %v\n", host.Peer(), err)
  133. }
  134. return nil, err
  135. }
  136. // this is going to be version dependant and a nightmare to maintain :(
  137. var protocolSupportRe = regexp.MustCompile(`the lowest supported version is \d+ and the greatest is (\d+)$`)
  138. func parseProtocolFromError(err error) int {
  139. // I really wish this had the actual info in the error frame...
  140. matches := protocolSupportRe.FindAllStringSubmatch(err.Error(), -1)
  141. if len(matches) != 1 || len(matches[0]) != 2 {
  142. if verr, ok := err.(*protocolError); ok {
  143. return int(verr.frame.Header().version.version())
  144. }
  145. return 0
  146. }
  147. max, err := strconv.Atoi(matches[0][1])
  148. if err != nil {
  149. return 0
  150. }
  151. return max
  152. }
  153. func (c *controlConn) discoverProtocol(hosts []*HostInfo) (int, error) {
  154. hosts = shuffleHosts(hosts)
  155. connCfg := *c.session.connCfg
  156. connCfg.ProtoVersion = 4 // TODO: define maxProtocol
  157. handler := connErrorHandlerFn(func(c *Conn, err error, closed bool) {
  158. // we should never get here, but if we do it means we connected to a
  159. // host successfully which means our attempted protocol version worked
  160. })
  161. var err error
  162. for _, host := range hosts {
  163. var conn *Conn
  164. conn, err = Connect(host, &connCfg, handler, c.session)
  165. if err == nil {
  166. conn.Close()
  167. return connCfg.ProtoVersion, nil
  168. }
  169. if proto := parseProtocolFromError(err); proto > 0 {
  170. return proto, nil
  171. }
  172. }
  173. return 0, err
  174. }
  175. func (c *controlConn) connect(hosts []*HostInfo) error {
  176. if len(hosts) == 0 {
  177. return errors.New("control: no endpoints specified")
  178. }
  179. conn, err := c.shuffleDial(hosts)
  180. if err != nil {
  181. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  182. }
  183. if err := c.setupConn(conn); err != nil {
  184. conn.Close()
  185. return fmt.Errorf("control: unable to setup connection: %v", err)
  186. }
  187. // we could fetch the initial ring here and update initial host data. So that
  188. // when we return from here we have a ring topology ready to go.
  189. go c.heartBeat()
  190. return nil
  191. }
  192. func (c *controlConn) setupConn(conn *Conn) error {
  193. if err := c.registerEvents(conn); err != nil {
  194. conn.Close()
  195. return err
  196. }
  197. c.conn.Store(conn)
  198. host, portstr, err := net.SplitHostPort(conn.conn.RemoteAddr().String())
  199. if err != nil {
  200. return err
  201. }
  202. port, err := strconv.Atoi(portstr)
  203. if err != nil {
  204. return err
  205. }
  206. c.session.handleNodeUp(net.ParseIP(host), port, false)
  207. return nil
  208. }
  209. func (c *controlConn) registerEvents(conn *Conn) error {
  210. var events []string
  211. if !c.session.cfg.Events.DisableTopologyEvents {
  212. events = append(events, "TOPOLOGY_CHANGE")
  213. }
  214. if !c.session.cfg.Events.DisableNodeStatusEvents {
  215. events = append(events, "STATUS_CHANGE")
  216. }
  217. if !c.session.cfg.Events.DisableSchemaEvents {
  218. events = append(events, "SCHEMA_CHANGE")
  219. }
  220. if len(events) == 0 {
  221. return nil
  222. }
  223. framer, err := conn.exec(context.Background(),
  224. &writeRegisterFrame{
  225. events: events,
  226. }, nil)
  227. if err != nil {
  228. return err
  229. }
  230. frame, err := framer.parseFrame()
  231. if err != nil {
  232. return err
  233. } else if _, ok := frame.(*readyFrame); !ok {
  234. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  235. }
  236. return nil
  237. }
  238. func (c *controlConn) reconnect(refreshring bool) {
  239. // TODO: simplify this function, use session.ring to get hosts instead of the
  240. // connection pool
  241. var host *HostInfo
  242. oldConn := c.conn.Load().(*Conn)
  243. if oldConn != nil {
  244. host = oldConn.host
  245. oldConn.Close()
  246. }
  247. var newConn *Conn
  248. if host != nil {
  249. // try to connect to the old host
  250. conn, err := c.session.connect(host, c)
  251. if err != nil {
  252. // host is dead
  253. // TODO: this is replicated in a few places
  254. c.session.handleNodeDown(host.Peer(), host.Port())
  255. } else {
  256. newConn = conn
  257. }
  258. }
  259. // TODO: should have our own round-robin for hosts so that we can try each
  260. // in succession and guarantee that we get a different host each time.
  261. if newConn == nil {
  262. host := c.session.ring.rrHost()
  263. if host == nil {
  264. c.connect(c.session.ring.endpoints)
  265. return
  266. }
  267. var err error
  268. newConn, err = c.session.connect(host, c)
  269. if err != nil {
  270. // TODO: add log handler for things like this
  271. return
  272. }
  273. }
  274. if err := c.setupConn(newConn); err != nil {
  275. newConn.Close()
  276. Logger.Printf("gocql: control unable to register events: %v\n", err)
  277. return
  278. }
  279. if refreshring {
  280. c.session.hostSource.refreshRing()
  281. }
  282. }
  283. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  284. if !closed {
  285. return
  286. }
  287. oldConn := c.conn.Load().(*Conn)
  288. if oldConn != conn {
  289. return
  290. }
  291. c.reconnect(true)
  292. }
  293. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  294. conn := c.conn.Load().(*Conn)
  295. if conn == nil {
  296. return nil, errNoControl
  297. }
  298. framer, err := conn.exec(context.Background(), w, nil)
  299. if err != nil {
  300. return nil, err
  301. }
  302. return framer.parseFrame()
  303. }
  304. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  305. const maxConnectAttempts = 5
  306. connectAttempts := 0
  307. for i := 0; i < maxConnectAttempts; i++ {
  308. conn := c.conn.Load().(*Conn)
  309. if conn == nil {
  310. if connectAttempts > maxConnectAttempts {
  311. break
  312. }
  313. connectAttempts++
  314. c.reconnect(false)
  315. continue
  316. }
  317. return fn(conn)
  318. }
  319. return &Iter{err: errNoControl}
  320. }
  321. // query will return nil if the connection is closed or nil
  322. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  323. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{})
  324. for {
  325. iter = c.withConn(func(conn *Conn) *Iter {
  326. return conn.executeQuery(q)
  327. })
  328. if gocqlDebug && iter.err != nil {
  329. Logger.Printf("control: error executing %q: %v\n", statement, iter.err)
  330. }
  331. q.attempts++
  332. if iter.err == nil || !c.retry.Attempt(q) {
  333. break
  334. }
  335. }
  336. return
  337. }
  338. func (c *controlConn) fetchHostInfo(ip net.IP, port int) (*HostInfo, error) {
  339. // TODO(zariel): we should probably move this into host_source or atleast
  340. // share code with it.
  341. localHost := c.host()
  342. if localHost == nil {
  343. return nil, errors.New("unable to fetch host info, invalid conn host")
  344. }
  345. isLocal := localHost.Peer().Equal(ip)
  346. var fn func(*HostInfo) error
  347. // TODO(zariel): fetch preferred_ip address (is it >3.x only?)
  348. if isLocal {
  349. fn = func(host *HostInfo) error {
  350. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.local WHERE key='local'")
  351. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  352. return iter.Close()
  353. }
  354. } else {
  355. fn = func(host *HostInfo) error {
  356. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.peers WHERE peer=?", ip)
  357. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  358. return iter.Close()
  359. }
  360. }
  361. host := &HostInfo{
  362. port: port,
  363. peer: ip,
  364. }
  365. if err := fn(host); err != nil {
  366. return nil, err
  367. }
  368. return host, nil
  369. }
  370. func (c *controlConn) awaitSchemaAgreement() error {
  371. return c.withConn(func(conn *Conn) *Iter {
  372. return &Iter{err: conn.awaitSchemaAgreement()}
  373. }).err
  374. }
  375. func (c *controlConn) host() *HostInfo {
  376. conn := c.conn.Load().(*Conn)
  377. if conn == nil {
  378. return nil
  379. }
  380. return conn.host
  381. }
  382. func (c *controlConn) close() {
  383. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  384. c.quit <- struct{}{}
  385. }
  386. conn := c.conn.Load().(*Conn)
  387. if conn != nil {
  388. conn.Close()
  389. }
  390. }
  391. var errNoControl = errors.New("gocql: no control connection available")