control.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. package gocql
  2. import (
  3. crand "crypto/rand"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "math/rand"
  8. "net"
  9. "regexp"
  10. "strconv"
  11. "sync/atomic"
  12. "time"
  13. "golang.org/x/net/context"
  14. )
  15. var (
  16. randr *rand.Rand
  17. )
  18. func init() {
  19. b := make([]byte, 4)
  20. if _, err := crand.Read(b); err != nil {
  21. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  22. }
  23. randr = rand.New(rand.NewSource(int64(readInt(b))))
  24. }
  25. // Ensure that the atomic variable is aligned to a 64bit boundary
  26. // so that atomic operations can be applied on 32bit architectures.
  27. type controlConn struct {
  28. session *Session
  29. conn atomic.Value
  30. retry RetryPolicy
  31. started int32
  32. quit chan struct{}
  33. }
  34. func createControlConn(session *Session) *controlConn {
  35. control := &controlConn{
  36. session: session,
  37. quit: make(chan struct{}),
  38. retry: &SimpleRetryPolicy{NumRetries: 3},
  39. }
  40. control.conn.Store((*Conn)(nil))
  41. return control
  42. }
  43. func (c *controlConn) heartBeat() {
  44. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  45. return
  46. }
  47. sleepTime := 1 * time.Second
  48. for {
  49. select {
  50. case <-c.quit:
  51. return
  52. case <-time.After(sleepTime):
  53. }
  54. resp, err := c.writeFrame(&writeOptionsFrame{})
  55. if err != nil {
  56. goto reconn
  57. }
  58. switch resp.(type) {
  59. case *supportedFrame:
  60. // Everything ok
  61. sleepTime = 5 * time.Second
  62. continue
  63. case error:
  64. goto reconn
  65. default:
  66. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  67. }
  68. reconn:
  69. // try to connect a bit faster
  70. sleepTime = 1 * time.Second
  71. c.reconnect(true)
  72. // time.Sleep(5 * time.Second)
  73. continue
  74. }
  75. }
  76. var hostLookupPreferV4 = false
  77. func hostInfo(addr string, defaultPort int) (*HostInfo, error) {
  78. var port int
  79. host, portStr, err := net.SplitHostPort(addr)
  80. if err != nil {
  81. host = addr
  82. port = defaultPort
  83. } else {
  84. port, err = strconv.Atoi(portStr)
  85. if err != nil {
  86. return nil, err
  87. }
  88. }
  89. ip := net.ParseIP(host)
  90. if ip == nil {
  91. ips, err := net.LookupIP(host)
  92. if err != nil {
  93. return nil, err
  94. } else if len(ips) == 0 {
  95. return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr)
  96. }
  97. if hostLookupPreferV4 {
  98. for _, v := range ips {
  99. if v4 := v.To4(); v4 != nil {
  100. ip = v4
  101. break
  102. }
  103. }
  104. if ip == nil {
  105. ip = ips[0]
  106. }
  107. } else {
  108. // TODO(zariel): should we check that we can connect to any of the ips?
  109. ip = ips[0]
  110. }
  111. }
  112. return &HostInfo{peer: ip, port: port}, nil
  113. }
  114. func shuffleHosts(hosts []*HostInfo) []*HostInfo {
  115. perm := randr.Perm(len(hosts))
  116. shuffled := make([]*HostInfo, len(hosts))
  117. for i, host := range hosts {
  118. shuffled[perm[i]] = host
  119. }
  120. return shuffled
  121. }
  122. func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) {
  123. // shuffle endpoints so not all drivers will connect to the same initial
  124. // node.
  125. shuffled := shuffleHosts(endpoints)
  126. var err error
  127. for _, host := range shuffled {
  128. var conn *Conn
  129. conn, err = c.session.connect(host, c)
  130. if err == nil {
  131. return conn, nil
  132. }
  133. log.Printf("gocql: unable to dial control conn %v: %v\n", host.Peer(), err)
  134. }
  135. return nil, err
  136. }
  137. // this is going to be version dependant and a nightmare to maintain :(
  138. var protocolSupportRe = regexp.MustCompile(`the lowest supported version is \d+ and the greatest is (\d+)$`)
  139. func parseProtocolFromError(err error) int {
  140. // I really wish this had the actual info in the error frame...
  141. matches := protocolSupportRe.FindAllStringSubmatch(err.Error(), -1)
  142. if len(matches) != 1 || len(matches[0]) != 2 {
  143. if verr, ok := err.(*protocolError); ok {
  144. return int(verr.frame.Header().version.version())
  145. }
  146. return 0
  147. }
  148. max, err := strconv.Atoi(matches[0][1])
  149. if err != nil {
  150. return 0
  151. }
  152. return max
  153. }
  154. func (c *controlConn) discoverProtocol(hosts []*HostInfo) (int, error) {
  155. hosts = shuffleHosts(hosts)
  156. connCfg := *c.session.connCfg
  157. connCfg.ProtoVersion = 4 // TODO: define maxProtocol
  158. handler := connErrorHandlerFn(func(c *Conn, err error, closed bool) {
  159. // we should never get here, but if we do it means we connected to a
  160. // host successfully which means our attempted protocol version worked
  161. })
  162. var err error
  163. for _, host := range hosts {
  164. var conn *Conn
  165. conn, err = Connect(host, &connCfg, handler, c.session)
  166. if err == nil {
  167. conn.Close()
  168. return connCfg.ProtoVersion, nil
  169. }
  170. if proto := parseProtocolFromError(err); proto > 0 {
  171. return proto, nil
  172. }
  173. }
  174. return 0, err
  175. }
  176. func (c *controlConn) connect(hosts []*HostInfo) error {
  177. if len(hosts) == 0 {
  178. return errors.New("control: no endpoints specified")
  179. }
  180. conn, err := c.shuffleDial(hosts)
  181. if err != nil {
  182. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  183. }
  184. if err := c.setupConn(conn); err != nil {
  185. conn.Close()
  186. return fmt.Errorf("control: unable to setup connection: %v", err)
  187. }
  188. // we could fetch the initial ring here and update initial host data. So that
  189. // when we return from here we have a ring topology ready to go.
  190. go c.heartBeat()
  191. return nil
  192. }
  193. func (c *controlConn) setupConn(conn *Conn) error {
  194. if err := c.registerEvents(conn); err != nil {
  195. conn.Close()
  196. return err
  197. }
  198. c.conn.Store(conn)
  199. host, portstr, err := net.SplitHostPort(conn.conn.RemoteAddr().String())
  200. if err != nil {
  201. return err
  202. }
  203. port, err := strconv.Atoi(portstr)
  204. if err != nil {
  205. return err
  206. }
  207. c.session.handleNodeUp(net.ParseIP(host), port, false)
  208. return nil
  209. }
  210. func (c *controlConn) registerEvents(conn *Conn) error {
  211. var events []string
  212. if !c.session.cfg.Events.DisableTopologyEvents {
  213. events = append(events, "TOPOLOGY_CHANGE")
  214. }
  215. if !c.session.cfg.Events.DisableNodeStatusEvents {
  216. events = append(events, "STATUS_CHANGE")
  217. }
  218. if !c.session.cfg.Events.DisableSchemaEvents {
  219. events = append(events, "SCHEMA_CHANGE")
  220. }
  221. if len(events) == 0 {
  222. return nil
  223. }
  224. framer, err := conn.exec(context.Background(),
  225. &writeRegisterFrame{
  226. events: events,
  227. }, nil)
  228. if err != nil {
  229. return err
  230. }
  231. frame, err := framer.parseFrame()
  232. if err != nil {
  233. return err
  234. } else if _, ok := frame.(*readyFrame); !ok {
  235. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  236. }
  237. return nil
  238. }
  239. func (c *controlConn) reconnect(refreshring bool) {
  240. // TODO: simplify this function, use session.ring to get hosts instead of the
  241. // connection pool
  242. var host *HostInfo
  243. oldConn := c.conn.Load().(*Conn)
  244. if oldConn != nil {
  245. host = oldConn.host
  246. oldConn.Close()
  247. }
  248. var newConn *Conn
  249. if host != nil {
  250. // try to connect to the old host
  251. conn, err := c.session.connect(host, c)
  252. if err != nil {
  253. // host is dead
  254. // TODO: this is replicated in a few places
  255. c.session.handleNodeDown(host.Peer(), host.Port())
  256. } else {
  257. newConn = conn
  258. }
  259. }
  260. // TODO: should have our own round-robin for hosts so that we can try each
  261. // in succession and guarantee that we get a different host each time.
  262. if newConn == nil {
  263. host := c.session.ring.rrHost()
  264. if host == nil {
  265. c.connect(c.session.ring.endpoints)
  266. return
  267. }
  268. var err error
  269. newConn, err = c.session.connect(host, c)
  270. if err != nil {
  271. // TODO: add log handler for things like this
  272. return
  273. }
  274. }
  275. if err := c.setupConn(newConn); err != nil {
  276. newConn.Close()
  277. log.Printf("gocql: control unable to register events: %v\n", err)
  278. return
  279. }
  280. if refreshring {
  281. c.session.hostSource.refreshRing()
  282. }
  283. }
  284. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  285. if !closed {
  286. return
  287. }
  288. oldConn := c.conn.Load().(*Conn)
  289. if oldConn != conn {
  290. return
  291. }
  292. c.reconnect(true)
  293. }
  294. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  295. conn := c.conn.Load().(*Conn)
  296. if conn == nil {
  297. return nil, errNoControl
  298. }
  299. framer, err := conn.exec(context.Background(), w, nil)
  300. if err != nil {
  301. return nil, err
  302. }
  303. return framer.parseFrame()
  304. }
  305. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  306. const maxConnectAttempts = 5
  307. connectAttempts := 0
  308. for i := 0; i < maxConnectAttempts; i++ {
  309. conn := c.conn.Load().(*Conn)
  310. if conn == nil {
  311. if connectAttempts > maxConnectAttempts {
  312. break
  313. }
  314. connectAttempts++
  315. c.reconnect(false)
  316. continue
  317. }
  318. return fn(conn)
  319. }
  320. return &Iter{err: errNoControl}
  321. }
  322. // query will return nil if the connection is closed or nil
  323. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  324. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{})
  325. for {
  326. iter = c.withConn(func(conn *Conn) *Iter {
  327. return conn.executeQuery(q)
  328. })
  329. if gocqlDebug && iter.err != nil {
  330. log.Printf("control: error executing %q: %v\n", statement, iter.err)
  331. }
  332. q.attempts++
  333. if iter.err == nil || !c.retry.Attempt(q) {
  334. break
  335. }
  336. }
  337. return
  338. }
  339. func (c *controlConn) fetchHostInfo(ip net.IP, port int) (*HostInfo, error) {
  340. // TODO(zariel): we should probably move this into host_source or atleast
  341. // share code with it.
  342. localHost := c.host()
  343. if localHost == nil {
  344. return nil, errors.New("unable to fetch host info, invalid conn host")
  345. }
  346. isLocal := localHost.Peer().Equal(ip)
  347. var fn func(*HostInfo) error
  348. // TODO(zariel): fetch preferred_ip address (is it >3.x only?)
  349. if isLocal {
  350. fn = func(host *HostInfo) error {
  351. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.local WHERE key='local'")
  352. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  353. return iter.Close()
  354. }
  355. } else {
  356. fn = func(host *HostInfo) error {
  357. iter := c.query("SELECT data_center, rack, host_id, tokens, release_version FROM system.peers WHERE peer=?", ip)
  358. iter.Scan(&host.dataCenter, &host.rack, &host.hostId, &host.tokens, &host.version)
  359. return iter.Close()
  360. }
  361. }
  362. host := &HostInfo{
  363. port: port,
  364. peer: ip,
  365. }
  366. if err := fn(host); err != nil {
  367. return nil, err
  368. }
  369. return host, nil
  370. }
  371. func (c *controlConn) awaitSchemaAgreement() error {
  372. return c.withConn(func(conn *Conn) *Iter {
  373. return &Iter{err: conn.awaitSchemaAgreement()}
  374. }).err
  375. }
  376. func (c *controlConn) host() *HostInfo {
  377. conn := c.conn.Load().(*Conn)
  378. if conn == nil {
  379. return nil
  380. }
  381. return conn.host
  382. }
  383. func (c *controlConn) close() {
  384. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  385. c.quit <- struct{}{}
  386. }
  387. conn := c.conn.Load().(*Conn)
  388. if conn != nil {
  389. conn.Close()
  390. }
  391. }
  392. var errNoControl = errors.New("gocql: no control connection available")