gocql.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. // Copyright (c) 2012 The gocql Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // The gocql package provides a database/sql driver for CQL, the Cassandra
  5. // query language.
  6. //
  7. // This package requires a recent version of Cassandra (≥ 1.2) that supports
  8. // CQL 3.0 and the new native protocol. The native protocol is still considered
  9. // beta and must be enabled manually in Cassandra 1.2 by setting
  10. // "start_native_transport" to true in conf/cassandra.yaml.
  11. //
  12. // Example Usage:
  13. //
  14. // db, err := sql.Open("gocql", "localhost:9042 keyspace=system")
  15. // // ...
  16. // rows, err := db.Query("SELECT keyspace_name FROM schema_keyspaces")
  17. // // ...
  18. // for rows.Next() {
  19. // var keyspace string
  20. // err = rows.Scan(&keyspace)
  21. // // ...
  22. // fmt.Println(keyspace)
  23. // }
  24. // if err := rows.Err(); err != nil {
  25. // // ...
  26. // }
  27. //
  28. package gocql
  29. import (
  30. "bytes"
  31. "code.google.com/p/snappy-go/snappy"
  32. "database/sql"
  33. "database/sql/driver"
  34. "encoding/binary"
  35. "fmt"
  36. "io"
  37. "net"
  38. "strings"
  39. "time"
  40. )
  41. const (
  42. protoRequest byte = 0x01
  43. protoResponse byte = 0x81
  44. opError byte = 0x00
  45. opStartup byte = 0x01
  46. opReady byte = 0x02
  47. opAuthenticate byte = 0x03
  48. opCredentials byte = 0x04
  49. opOptions byte = 0x05
  50. opSupported byte = 0x06
  51. opQuery byte = 0x07
  52. opResult byte = 0x08
  53. opPrepare byte = 0x09
  54. opExecute byte = 0x0A
  55. opLAST byte = 0x0A // not a real opcode -- used to check for valid opcodes
  56. flagCompressed byte = 0x01
  57. keyVersion string = "CQL_VERSION"
  58. keyCompression string = "COMPRESSION"
  59. keyspaceQuery string = "USE "
  60. )
  61. var consistencyLevels = map[string]byte{"any": 0x00, "one": 0x01, "two": 0x02,
  62. "three": 0x03, "quorum": 0x04, "all": 0x05, "local_quorum": 0x06, "each_quorum": 0x07}
  63. type drv struct{}
  64. func (d drv) Open(name string) (driver.Conn, error) {
  65. return Open(name)
  66. }
  67. type connection struct {
  68. c net.Conn
  69. address string
  70. alive bool
  71. pool *pool
  72. }
  73. type pool struct {
  74. connections []*connection
  75. i int
  76. keyspace string
  77. version string
  78. compression string
  79. consistency byte
  80. dead bool
  81. stop chan struct{}
  82. }
  83. func Open(name string) (*pool, error) {
  84. parts := strings.Split(name, " ")
  85. var addresses []string
  86. if len(parts) >= 1 {
  87. addresses = strings.Split(parts[0], ",")
  88. }
  89. version := "3.0.0"
  90. var (
  91. keyspace string
  92. compression string
  93. consistency byte = 0x01
  94. ok bool
  95. )
  96. for i := 1; i < len(parts); i++ {
  97. switch {
  98. case parts[i] == "":
  99. continue
  100. case strings.HasPrefix(parts[i], "keyspace="):
  101. keyspace = strings.TrimSpace(parts[i][9:])
  102. case strings.HasPrefix(parts[i], "compression="):
  103. compression = strings.TrimSpace(parts[i][12:])
  104. if compression != "snappy" {
  105. return nil, fmt.Errorf("unknown compression algorithm %q",
  106. compression)
  107. }
  108. case strings.HasPrefix(parts[i], "version="):
  109. version = strings.TrimSpace(parts[i][8:])
  110. case strings.HasPrefix(parts[i], "consistency="):
  111. cs := strings.TrimSpace(parts[i][12:])
  112. if consistency, ok = consistencyLevels[cs]; !ok {
  113. return nil, fmt.Errorf("unknown consistency level %q", cs)
  114. }
  115. default:
  116. return nil, fmt.Errorf("unsupported option %q", parts[i])
  117. }
  118. }
  119. pool := &pool{
  120. keyspace: keyspace,
  121. version: version,
  122. compression: compression,
  123. consistency: consistency,
  124. stop: make(chan struct{}),
  125. }
  126. for _, address := range addresses {
  127. pool.connections = append(pool.connections, &connection{address: address, pool: pool})
  128. }
  129. pool.join()
  130. return pool, nil
  131. }
  132. func (cn *connection) open() {
  133. cn.alive = false
  134. var err error
  135. cn.c, err = net.Dial("tcp", cn.address)
  136. if err != nil {
  137. return
  138. }
  139. var (
  140. version = cn.pool.version
  141. compression = cn.pool.compression
  142. keyspace = cn.pool.keyspace
  143. )
  144. b := &bytes.Buffer{}
  145. if compression != "" {
  146. binary.Write(b, binary.BigEndian, uint16(2))
  147. } else {
  148. binary.Write(b, binary.BigEndian, uint16(1))
  149. }
  150. binary.Write(b, binary.BigEndian, uint16(len(keyVersion)))
  151. b.WriteString(keyVersion)
  152. binary.Write(b, binary.BigEndian, uint16(len(version)))
  153. b.WriteString(version)
  154. if compression != "" {
  155. binary.Write(b, binary.BigEndian, uint16(len(keyCompression)))
  156. b.WriteString(keyCompression)
  157. binary.Write(b, binary.BigEndian, uint16(len(compression)))
  158. b.WriteString(compression)
  159. }
  160. if err := cn.sendUncompressed(opStartup, b.Bytes()); err != nil {
  161. return
  162. }
  163. opcode, _, err := cn.recv()
  164. if err != nil {
  165. return
  166. }
  167. if opcode != opReady {
  168. return
  169. }
  170. if keyspace != "" {
  171. cn.UseKeyspace(keyspace)
  172. }
  173. cn.alive = true
  174. }
  175. // close a connection actively, typically used when there's an error and we want to ensure
  176. // we don't repeatedly try to use the broken connection
  177. func (cn *connection) close() {
  178. cn.c.Close()
  179. cn.c = nil // ensure we generate ErrBadConn when cn gets reused
  180. cn.alive = false
  181. // Check if the entire pool is dead
  182. for _, cn := range cn.pool.connections {
  183. if cn.alive {
  184. return
  185. }
  186. }
  187. cn.pool.dead = false
  188. }
  189. // explicitly send a request as uncompressed
  190. // This is only really needed for the "startup" handshake
  191. func (cn *connection) sendUncompressed(opcode byte, body []byte) error {
  192. return cn._send(opcode, body, false)
  193. }
  194. func (cn *connection) send(opcode byte, body []byte) error {
  195. return cn._send(opcode, body, cn.pool.compression == "snappy" && len(body) > 0)
  196. }
  197. func (cn *connection) _send(opcode byte, body []byte, compression bool) error {
  198. if cn.c == nil {
  199. return driver.ErrBadConn
  200. }
  201. var flags byte = 0x00
  202. if compression {
  203. var err error
  204. body, err = snappy.Encode(nil, body)
  205. if err != nil {
  206. return err
  207. }
  208. flags = flagCompressed
  209. }
  210. frame := make([]byte, len(body)+8)
  211. frame[0] = protoRequest
  212. frame[1] = flags
  213. frame[2] = 0
  214. frame[3] = opcode
  215. binary.BigEndian.PutUint32(frame[4:8], uint32(len(body)))
  216. copy(frame[8:], body)
  217. if _, err := cn.c.Write(frame); err != nil {
  218. return err
  219. }
  220. return nil
  221. }
  222. func (cn *connection) recv() (byte, []byte, error) {
  223. if cn.c == nil {
  224. return 0, nil, driver.ErrBadConn
  225. }
  226. header := make([]byte, 8)
  227. if _, err := io.ReadFull(cn.c, header); err != nil {
  228. cn.close() // better assume that the connection is broken (may have read some bytes)
  229. return 0, nil, err
  230. }
  231. // verify that the frame starts with version==1 and req/resp flag==response
  232. // this may be overly conservative in that future versions may be backwards compatible
  233. // in that case simply amend the check...
  234. if header[0] != protoResponse {
  235. cn.close()
  236. return 0, nil, fmt.Errorf("unsupported frame version or not a response: 0x%x (header=%v)", header[0], header)
  237. }
  238. // verify that the flags field has only a single flag set, again, this may
  239. // be overly conservative if additional flags are backwards-compatible
  240. if header[1] > 1 {
  241. cn.close()
  242. return 0, nil, fmt.Errorf("unsupported frame flags: 0x%x (header=%v)", header[1], header)
  243. }
  244. opcode := header[3]
  245. if opcode > opLAST {
  246. cn.close()
  247. return 0, nil, fmt.Errorf("unknown opcode: 0x%x (header=%v)", opcode, header)
  248. }
  249. length := binary.BigEndian.Uint32(header[4:8])
  250. var body []byte
  251. if length > 0 {
  252. if length > 256*1024*1024 { // spec says 256MB is max
  253. cn.close()
  254. return 0, nil, fmt.Errorf("frame too large: %d (header=%v)", length, header)
  255. }
  256. body = make([]byte, length)
  257. if _, err := io.ReadFull(cn.c, body); err != nil {
  258. cn.close() // better assume that the connection is broken
  259. return 0, nil, err
  260. }
  261. }
  262. if header[1]&flagCompressed != 0 && cn.pool.compression == "snappy" {
  263. var err error
  264. body, err = snappy.Decode(nil, body)
  265. if err != nil {
  266. cn.close()
  267. return 0, nil, err
  268. }
  269. }
  270. if opcode == opError {
  271. code := binary.BigEndian.Uint32(body[0:4])
  272. msglen := binary.BigEndian.Uint16(body[4:6])
  273. msg := string(body[6 : 6+msglen])
  274. return opcode, body, Error{Code: int(code), Msg: msg}
  275. }
  276. return opcode, body, nil
  277. }
  278. func (p *pool) conn() (*connection, error) {
  279. if p.dead {
  280. return nil, driver.ErrBadConn
  281. }
  282. totalConnections := len(p.connections)
  283. start := p.i + 1 // make sure that we start from the next position in the ring
  284. for i := 0; i < totalConnections; i++ {
  285. idx := (i + start) % totalConnections
  286. cn := p.connections[idx]
  287. if cn.alive {
  288. p.i = idx // set the new 'i' so the ring will start again in the right place
  289. return cn, nil
  290. }
  291. }
  292. // we've exhausted the pool, gonna have a bad time
  293. p.dead = true
  294. return nil, driver.ErrBadConn
  295. }
  296. func (p *pool) join() {
  297. p.reconnect()
  298. // Every 1 second, we want to try reconnecting to disconnected nodes
  299. go func() {
  300. for {
  301. select {
  302. case <-p.stop:
  303. return
  304. default:
  305. p.reconnect()
  306. time.Sleep(time.Second)
  307. }
  308. }
  309. }()
  310. }
  311. func (p *pool) reconnect() {
  312. for _, cn := range p.connections {
  313. if !cn.alive {
  314. cn.open()
  315. }
  316. }
  317. }
  318. func (p *pool) Begin() (driver.Tx, error) {
  319. if p.dead {
  320. return nil, driver.ErrBadConn
  321. }
  322. return p, nil
  323. }
  324. func (p *pool) Commit() error {
  325. if p.dead {
  326. return driver.ErrBadConn
  327. }
  328. return nil
  329. }
  330. func (p *pool) Close() error {
  331. if p.dead {
  332. return driver.ErrBadConn
  333. }
  334. for _, cn := range p.connections {
  335. cn.close()
  336. }
  337. p.stop <- struct{}{}
  338. p.dead = true
  339. return nil
  340. }
  341. func (p *pool) Rollback() error {
  342. if p.dead {
  343. return driver.ErrBadConn
  344. }
  345. return nil
  346. }
  347. func (p *pool) Prepare(query string) (driver.Stmt, error) {
  348. // Explicitly check if the query is a "USE <keyspace>"
  349. // Since it needs to be special cased and run on each server
  350. if strings.HasPrefix(query, keyspaceQuery) {
  351. keyspace := query[len(keyspaceQuery):]
  352. p.UseKeyspace(keyspace)
  353. return &statement{}, nil
  354. }
  355. for {
  356. cn, err := p.conn()
  357. if err != nil {
  358. return nil, err
  359. }
  360. st, err := cn.Prepare(query)
  361. if err != nil {
  362. // the cn has gotten marked as dead already
  363. if p.dead {
  364. // The entire pool is dead, so we bubble up the ErrBadConn
  365. return nil, driver.ErrBadConn
  366. } else {
  367. continue // Retry request on another cn
  368. }
  369. }
  370. return st, nil
  371. }
  372. }
  373. func (p *pool) UseKeyspace(keyspace string) {
  374. p.keyspace = keyspace
  375. for _, cn := range p.connections {
  376. cn.UseKeyspace(keyspace)
  377. }
  378. }
  379. func (cn *connection) UseKeyspace(keyspace string) error {
  380. st, err := cn.Prepare(keyspaceQuery + keyspace)
  381. if err != nil {
  382. return err
  383. }
  384. if _, err = st.Exec([]driver.Value{}); err != nil {
  385. return err
  386. }
  387. return nil
  388. }
  389. func (cn *connection) Prepare(query string) (driver.Stmt, error) {
  390. body := make([]byte, len(query)+4)
  391. binary.BigEndian.PutUint32(body[0:4], uint32(len(query)))
  392. copy(body[4:], []byte(query))
  393. if err := cn.send(opPrepare, body); err != nil {
  394. return nil, err
  395. }
  396. opcode, body, err := cn.recv()
  397. if err != nil {
  398. return nil, err
  399. }
  400. if opcode != opResult || binary.BigEndian.Uint32(body) != 4 {
  401. return nil, fmt.Errorf("expected prepared result")
  402. }
  403. n := int(binary.BigEndian.Uint16(body[4:]))
  404. prepared := body[6 : 6+n]
  405. columns, meta, _ := parseMeta(body[6+n:])
  406. return &statement{cn: cn, query: query,
  407. prepared: prepared, columns: columns, meta: meta}, nil
  408. }
  409. type statement struct {
  410. cn *connection
  411. query string
  412. prepared []byte
  413. columns []string
  414. meta []uint16
  415. }
  416. func (s *statement) Close() error {
  417. return nil
  418. }
  419. func (st *statement) ColumnConverter(idx int) driver.ValueConverter {
  420. return (&columnEncoder{st.meta}).ColumnConverter(idx)
  421. }
  422. func (st *statement) NumInput() int {
  423. return len(st.columns)
  424. }
  425. func parseMeta(body []byte) ([]string, []uint16, int) {
  426. flags := binary.BigEndian.Uint32(body)
  427. globalTableSpec := flags&1 == 1
  428. columnCount := int(binary.BigEndian.Uint32(body[4:]))
  429. i := 8
  430. if globalTableSpec {
  431. l := int(binary.BigEndian.Uint16(body[i:]))
  432. keyspace := string(body[i+2 : i+2+l])
  433. i += 2 + l
  434. l = int(binary.BigEndian.Uint16(body[i:]))
  435. tablename := string(body[i+2 : i+2+l])
  436. i += 2 + l
  437. _, _ = keyspace, tablename
  438. }
  439. columns := make([]string, columnCount)
  440. meta := make([]uint16, columnCount)
  441. for c := 0; c < columnCount; c++ {
  442. l := int(binary.BigEndian.Uint16(body[i:]))
  443. columns[c] = string(body[i+2 : i+2+l])
  444. i += 2 + l
  445. meta[c] = binary.BigEndian.Uint16(body[i:])
  446. i += 2
  447. }
  448. return columns, meta, i
  449. }
  450. func (st *statement) exec(v []driver.Value) error {
  451. sz := 6 + len(st.prepared)
  452. for i := range v {
  453. if b, ok := v[i].([]byte); ok {
  454. sz += len(b) + 4
  455. }
  456. }
  457. body, p := make([]byte, sz), 4+len(st.prepared)
  458. binary.BigEndian.PutUint16(body, uint16(len(st.prepared)))
  459. copy(body[2:], st.prepared)
  460. binary.BigEndian.PutUint16(body[p-2:], uint16(len(v)))
  461. for i := range v {
  462. b, ok := v[i].([]byte)
  463. if !ok {
  464. return fmt.Errorf("unsupported type %T at column %d", v[i], i)
  465. }
  466. binary.BigEndian.PutUint32(body[p:], uint32(len(b)))
  467. copy(body[p+4:], b)
  468. p += 4 + len(b)
  469. }
  470. binary.BigEndian.PutUint16(body[p:], uint16(st.cn.pool.consistency))
  471. if err := st.cn.send(opExecute, body); err != nil {
  472. return err
  473. }
  474. return nil
  475. }
  476. func (st *statement) Exec(v []driver.Value) (driver.Result, error) {
  477. if st.cn == nil {
  478. return nil, nil
  479. }
  480. if err := st.exec(v); err != nil {
  481. return nil, err
  482. }
  483. opcode, body, err := st.cn.recv()
  484. if err != nil {
  485. return nil, err
  486. }
  487. _, _ = opcode, body
  488. return nil, nil
  489. }
  490. func (st *statement) Query(v []driver.Value) (driver.Rows, error) {
  491. if err := st.exec(v); err != nil {
  492. return nil, err
  493. }
  494. opcode, body, err := st.cn.recv()
  495. if err != nil {
  496. return nil, err
  497. }
  498. kind := binary.BigEndian.Uint32(body[0:4])
  499. if opcode != opResult || kind != 2 {
  500. return nil, fmt.Errorf("expected rows as result")
  501. }
  502. columns, meta, n := parseMeta(body[4:])
  503. i := n + 4
  504. rows := &rows{
  505. columns: columns,
  506. meta: meta,
  507. numRows: int(binary.BigEndian.Uint32(body[i:])),
  508. }
  509. i += 4
  510. rows.body = body[i:]
  511. return rows, nil
  512. }
  513. type rows struct {
  514. columns []string
  515. meta []uint16
  516. body []byte
  517. row int
  518. numRows int
  519. }
  520. func (r *rows) Close() error {
  521. return nil
  522. }
  523. func (r *rows) Columns() []string {
  524. return r.columns
  525. }
  526. func (r *rows) Next(values []driver.Value) error {
  527. if r.row >= r.numRows {
  528. return io.EOF
  529. }
  530. for column := 0; column < len(r.columns); column++ {
  531. n := int32(binary.BigEndian.Uint32(r.body))
  532. r.body = r.body[4:]
  533. if n >= 0 {
  534. values[column] = decode(r.body[:n], r.meta[column])
  535. r.body = r.body[n:]
  536. } else {
  537. values[column] = nil
  538. }
  539. }
  540. r.row++
  541. return nil
  542. }
  543. type Error struct {
  544. Code int
  545. Msg string
  546. }
  547. func (e Error) Error() string {
  548. return e.Msg
  549. }
  550. func init() {
  551. sql.Register("gocql", &drv{})
  552. }