control.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. package gocql
  2. import (
  3. "context"
  4. crand "crypto/rand"
  5. "errors"
  6. "fmt"
  7. "math/rand"
  8. "net"
  9. "regexp"
  10. "strconv"
  11. "sync"
  12. "sync/atomic"
  13. "time"
  14. )
  15. var (
  16. randr *rand.Rand
  17. mutRandr sync.Mutex
  18. )
  19. func init() {
  20. b := make([]byte, 4)
  21. if _, err := crand.Read(b); err != nil {
  22. panic(fmt.Sprintf("unable to seed random number generator: %v", err))
  23. }
  24. randr = rand.New(rand.NewSource(int64(readInt(b))))
  25. }
  26. // Ensure that the atomic variable is aligned to a 64bit boundary
  27. // so that atomic operations can be applied on 32bit architectures.
  28. type controlConn struct {
  29. started int32
  30. reconnecting int32
  31. session *Session
  32. conn atomic.Value
  33. retry RetryPolicy
  34. quit chan struct{}
  35. }
  36. func createControlConn(session *Session) *controlConn {
  37. control := &controlConn{
  38. session: session,
  39. quit: make(chan struct{}),
  40. retry: &SimpleRetryPolicy{NumRetries: 3},
  41. }
  42. control.conn.Store((*connHost)(nil))
  43. return control
  44. }
  45. func (c *controlConn) heartBeat() {
  46. if !atomic.CompareAndSwapInt32(&c.started, 0, 1) {
  47. return
  48. }
  49. sleepTime := 1 * time.Second
  50. for {
  51. select {
  52. case <-c.quit:
  53. return
  54. case <-time.After(sleepTime):
  55. }
  56. resp, err := c.writeFrame(&writeOptionsFrame{})
  57. if err != nil {
  58. goto reconn
  59. }
  60. switch resp.(type) {
  61. case *supportedFrame:
  62. // Everything ok
  63. sleepTime = 5 * time.Second
  64. continue
  65. case error:
  66. goto reconn
  67. default:
  68. panic(fmt.Sprintf("gocql: unknown frame in response to options: %T", resp))
  69. }
  70. reconn:
  71. // try to connect a bit faster
  72. sleepTime = 1 * time.Second
  73. c.reconnect(true)
  74. // time.Sleep(5 * time.Second)
  75. continue
  76. }
  77. }
  78. var hostLookupPreferV4 = false
  79. func hostInfo(addr string, defaultPort int) (*HostInfo, error) {
  80. var port int
  81. host, portStr, err := net.SplitHostPort(addr)
  82. if err != nil {
  83. host = addr
  84. port = defaultPort
  85. } else {
  86. port, err = strconv.Atoi(portStr)
  87. if err != nil {
  88. return nil, err
  89. }
  90. }
  91. ip := net.ParseIP(host)
  92. if ip == nil {
  93. ips, err := net.LookupIP(host)
  94. if err != nil {
  95. return nil, err
  96. } else if len(ips) == 0 {
  97. return nil, fmt.Errorf("No IP's returned from DNS lookup for %q", addr)
  98. }
  99. if hostLookupPreferV4 {
  100. for _, v := range ips {
  101. if v4 := v.To4(); v4 != nil {
  102. ip = v4
  103. break
  104. }
  105. }
  106. if ip == nil {
  107. ip = ips[0]
  108. }
  109. } else {
  110. // TODO(zariel): should we check that we can connect to any of the ips?
  111. ip = ips[0]
  112. }
  113. }
  114. return &HostInfo{connectAddress: ip, port: port}, nil
  115. }
  116. func shuffleHosts(hosts []*HostInfo) []*HostInfo {
  117. mutRandr.Lock()
  118. perm := randr.Perm(len(hosts))
  119. mutRandr.Unlock()
  120. shuffled := make([]*HostInfo, len(hosts))
  121. for i, host := range hosts {
  122. shuffled[perm[i]] = host
  123. }
  124. return shuffled
  125. }
  126. func (c *controlConn) shuffleDial(endpoints []*HostInfo) (*Conn, error) {
  127. // shuffle endpoints so not all drivers will connect to the same initial
  128. // node.
  129. shuffled := shuffleHosts(endpoints)
  130. var err error
  131. for _, host := range shuffled {
  132. var conn *Conn
  133. conn, err = c.session.connect(host, c)
  134. if err == nil {
  135. return conn, nil
  136. }
  137. Logger.Printf("gocql: unable to dial control conn %v: %v\n", host.ConnectAddress(), err)
  138. }
  139. return nil, err
  140. }
  141. // this is going to be version dependant and a nightmare to maintain :(
  142. var protocolSupportRe = regexp.MustCompile(`the lowest supported version is \d+ and the greatest is (\d+)$`)
  143. func parseProtocolFromError(err error) int {
  144. // I really wish this had the actual info in the error frame...
  145. matches := protocolSupportRe.FindAllStringSubmatch(err.Error(), -1)
  146. if len(matches) != 1 || len(matches[0]) != 2 {
  147. if verr, ok := err.(*protocolError); ok {
  148. return int(verr.frame.Header().version.version())
  149. }
  150. return 0
  151. }
  152. max, err := strconv.Atoi(matches[0][1])
  153. if err != nil {
  154. return 0
  155. }
  156. return max
  157. }
  158. func (c *controlConn) discoverProtocol(hosts []*HostInfo) (int, error) {
  159. hosts = shuffleHosts(hosts)
  160. connCfg := *c.session.connCfg
  161. connCfg.ProtoVersion = 4 // TODO: define maxProtocol
  162. handler := connErrorHandlerFn(func(c *Conn, err error, closed bool) {
  163. // we should never get here, but if we do it means we connected to a
  164. // host successfully which means our attempted protocol version worked
  165. if !closed {
  166. c.Close()
  167. }
  168. })
  169. var err error
  170. for _, host := range hosts {
  171. var conn *Conn
  172. conn, err = c.session.dial(host.ConnectAddress(), host.Port(), &connCfg, handler)
  173. if conn != nil {
  174. conn.Close()
  175. }
  176. if err == nil {
  177. return connCfg.ProtoVersion, nil
  178. }
  179. if proto := parseProtocolFromError(err); proto > 0 {
  180. return proto, nil
  181. }
  182. }
  183. return 0, err
  184. }
  185. func (c *controlConn) connect(hosts []*HostInfo) error {
  186. if len(hosts) == 0 {
  187. return errors.New("control: no endpoints specified")
  188. }
  189. conn, err := c.shuffleDial(hosts)
  190. if err != nil {
  191. return fmt.Errorf("control: unable to connect to initial hosts: %v", err)
  192. }
  193. if err := c.setupConn(conn); err != nil {
  194. conn.Close()
  195. return fmt.Errorf("control: unable to setup connection: %v", err)
  196. }
  197. // we could fetch the initial ring here and update initial host data. So that
  198. // when we return from here we have a ring topology ready to go.
  199. go c.heartBeat()
  200. return nil
  201. }
  202. type connHost struct {
  203. conn *Conn
  204. host *HostInfo
  205. }
  206. func (c *controlConn) setupConn(conn *Conn) error {
  207. if err := c.registerEvents(conn); err != nil {
  208. conn.Close()
  209. return err
  210. }
  211. // TODO(zariel): do we need to fetch host info everytime
  212. // the control conn connects? Surely we have it cached?
  213. host, err := conn.localHostInfo()
  214. if err != nil {
  215. return err
  216. }
  217. ch := &connHost{
  218. conn: conn,
  219. host: host,
  220. }
  221. c.conn.Store(ch)
  222. // c.session.handleNodeUp(host.ConnectAddress(), host.Port(), false)
  223. return nil
  224. }
  225. func (c *controlConn) registerEvents(conn *Conn) error {
  226. var events []string
  227. if !c.session.cfg.Events.DisableTopologyEvents {
  228. events = append(events, "TOPOLOGY_CHANGE")
  229. }
  230. if !c.session.cfg.Events.DisableNodeStatusEvents {
  231. events = append(events, "STATUS_CHANGE")
  232. }
  233. if !c.session.cfg.Events.DisableSchemaEvents {
  234. events = append(events, "SCHEMA_CHANGE")
  235. }
  236. if len(events) == 0 {
  237. return nil
  238. }
  239. framer, err := conn.exec(context.Background(),
  240. &writeRegisterFrame{
  241. events: events,
  242. }, nil)
  243. if err != nil {
  244. return err
  245. }
  246. frame, err := framer.parseFrame()
  247. if err != nil {
  248. return err
  249. } else if _, ok := frame.(*readyFrame); !ok {
  250. return fmt.Errorf("unexpected frame in response to register: got %T: %v\n", frame, frame)
  251. }
  252. return nil
  253. }
  254. func (c *controlConn) reconnect(refreshring bool) {
  255. if !atomic.CompareAndSwapInt32(&c.reconnecting, 0, 1) {
  256. return
  257. }
  258. defer atomic.StoreInt32(&c.reconnecting, 0)
  259. // TODO: simplify this function, use session.ring to get hosts instead of the
  260. // connection pool
  261. var host *HostInfo
  262. ch := c.getConn()
  263. if ch != nil {
  264. host = ch.host
  265. ch.conn.Close()
  266. }
  267. var newConn *Conn
  268. if host != nil {
  269. // try to connect to the old host
  270. conn, err := c.session.connect(host, c)
  271. if err != nil {
  272. // host is dead
  273. // TODO: this is replicated in a few places
  274. c.session.handleNodeDown(host.ConnectAddress(), host.Port())
  275. } else {
  276. newConn = conn
  277. }
  278. }
  279. // TODO: should have our own round-robin for hosts so that we can try each
  280. // in succession and guarantee that we get a different host each time.
  281. if newConn == nil {
  282. host := c.session.ring.rrHost()
  283. if host == nil {
  284. c.connect(c.session.ring.endpoints)
  285. return
  286. }
  287. var err error
  288. newConn, err = c.session.connect(host, c)
  289. if err != nil {
  290. // TODO: add log handler for things like this
  291. return
  292. }
  293. }
  294. if err := c.setupConn(newConn); err != nil {
  295. newConn.Close()
  296. Logger.Printf("gocql: control unable to register events: %v\n", err)
  297. return
  298. }
  299. if refreshring {
  300. c.session.hostSource.refreshRing()
  301. }
  302. }
  303. func (c *controlConn) HandleError(conn *Conn, err error, closed bool) {
  304. if !closed {
  305. return
  306. }
  307. oldConn := c.getConn()
  308. if oldConn.conn != conn {
  309. return
  310. }
  311. c.reconnect(true)
  312. }
  313. func (c *controlConn) getConn() *connHost {
  314. return c.conn.Load().(*connHost)
  315. }
  316. func (c *controlConn) writeFrame(w frameWriter) (frame, error) {
  317. ch := c.getConn()
  318. if ch == nil {
  319. return nil, errNoControl
  320. }
  321. framer, err := ch.conn.exec(context.Background(), w, nil)
  322. if err != nil {
  323. return nil, err
  324. }
  325. return framer.parseFrame()
  326. }
  327. func (c *controlConn) withConnHost(fn func(*connHost) *Iter) *Iter {
  328. const maxConnectAttempts = 5
  329. connectAttempts := 0
  330. for i := 0; i < maxConnectAttempts; i++ {
  331. ch := c.getConn()
  332. if ch == nil {
  333. if connectAttempts > maxConnectAttempts {
  334. break
  335. }
  336. connectAttempts++
  337. c.reconnect(false)
  338. continue
  339. }
  340. return fn(ch)
  341. }
  342. return &Iter{err: errNoControl}
  343. }
  344. func (c *controlConn) withConn(fn func(*Conn) *Iter) *Iter {
  345. return c.withConnHost(func(ch *connHost) *Iter {
  346. return fn(ch.conn)
  347. })
  348. }
  349. // query will return nil if the connection is closed or nil
  350. func (c *controlConn) query(statement string, values ...interface{}) (iter *Iter) {
  351. q := c.session.Query(statement, values...).Consistency(One).RoutingKey([]byte{}).Trace(nil)
  352. for {
  353. iter = c.withConn(func(conn *Conn) *Iter {
  354. return conn.executeQuery(q)
  355. })
  356. if gocqlDebug && iter.err != nil {
  357. Logger.Printf("control: error executing %q: %v\n", statement, iter.err)
  358. }
  359. q.attempts++
  360. if iter.err == nil || !c.retry.Attempt(q) {
  361. break
  362. }
  363. }
  364. return
  365. }
  366. func (c *controlConn) awaitSchemaAgreement() error {
  367. return c.withConn(func(conn *Conn) *Iter {
  368. return &Iter{err: conn.awaitSchemaAgreement()}
  369. }).err
  370. }
  371. func (c *controlConn) close() {
  372. if atomic.CompareAndSwapInt32(&c.started, 1, -1) {
  373. c.quit <- struct{}{}
  374. }
  375. ch := c.getConn()
  376. if ch != nil {
  377. ch.conn.Close()
  378. }
  379. }
  380. var errNoControl = errors.New("gocql: no control connection available")