broker.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. package sarama
  2. import (
  3. "fmt"
  4. "io"
  5. "net"
  6. "strconv"
  7. "sync"
  8. "sync/atomic"
  9. "time"
  10. )
  11. // BrokerConfig is used to pass multiple configuration options to Broker.Open.
  12. type BrokerConfig struct {
  13. MaxOpenRequests int // How many outstanding requests the broker is allowed to have before blocking attempts to send (default 5).
  14. // All three of the below configurations are similar to the `socket.timeout.ms` setting in JVM kafka.
  15. DialTimeout time.Duration // How long to wait for the initial connection to succeed before timing out and returning an error (default 30s).
  16. ReadTimeout time.Duration // How long to wait for a response before timing out and returning an error (default 30s).
  17. WriteTimeout time.Duration // How long to wait for a transmit to succeed before timing out and returning an error (default 30s).
  18. }
  19. // NewBrokerConfig returns a new broker configuration with sane defaults.
  20. func NewBrokerConfig() *BrokerConfig {
  21. return &BrokerConfig{
  22. MaxOpenRequests: 5,
  23. DialTimeout: 30 * time.Second,
  24. ReadTimeout: 30 * time.Second,
  25. WriteTimeout: 30 * time.Second,
  26. }
  27. }
  28. // Validate checks a BrokerConfig instance. This will return a
  29. // ConfigurationError if the specified values don't make sense.
  30. func (config *BrokerConfig) Validate() error {
  31. if config.MaxOpenRequests <= 0 {
  32. return ConfigurationError("Invalid MaxOpenRequests")
  33. }
  34. if config.DialTimeout <= 0 {
  35. return ConfigurationError("Invalid DialTimeout")
  36. }
  37. if config.ReadTimeout <= 0 {
  38. return ConfigurationError("Invalid ReadTimeout")
  39. }
  40. if config.WriteTimeout <= 0 {
  41. return ConfigurationError("Invalid WriteTimeout")
  42. }
  43. return nil
  44. }
  45. // Broker represents a single Kafka broker connection. All operations on this object are entirely concurrency-safe.
  46. type Broker struct {
  47. id int32
  48. addr string
  49. conf *BrokerConfig
  50. correlationID int32
  51. conn net.Conn
  52. connErr error
  53. lock sync.Mutex
  54. opened int32
  55. responses chan responsePromise
  56. done chan bool
  57. }
  58. type responsePromise struct {
  59. correlationID int32
  60. packets chan []byte
  61. errors chan error
  62. }
  63. // NewBroker creates and returns a Broker targetting the given host:port address.
  64. // This does not attempt to actually connect, you have to call Open() for that.
  65. func NewBroker(addr string) *Broker {
  66. return &Broker{id: -1, addr: addr}
  67. }
  68. // Open tries to connect to the Broker if it is not already connected or connecting, but does not block
  69. // waiting for the connection to complete. This means that any subsequent operations on the broker will
  70. // block waiting for the connection to succeed or fail. To get the effect of a fully synchronous Open call,
  71. // follow it by a call to Connected(). The only errors Open will return directly are ConfigurationError or
  72. // AlreadyConnected. If conf is nil, the result of NewBrokerConfig() is used.
  73. func (b *Broker) Open(conf *BrokerConfig) error {
  74. if conf == nil {
  75. conf = NewBrokerConfig()
  76. }
  77. err := conf.Validate()
  78. if err != nil {
  79. return err
  80. }
  81. if !atomic.CompareAndSwapInt32(&b.opened, 0, 1) {
  82. return ErrAlreadyConnected
  83. }
  84. b.lock.Lock()
  85. if b.conn != nil {
  86. b.lock.Unlock()
  87. Logger.Printf("Failed to connect to broker %s: %s\n", b.addr, ErrAlreadyConnected)
  88. return ErrAlreadyConnected
  89. }
  90. go withRecover(func() {
  91. defer b.lock.Unlock()
  92. b.conn, b.connErr = net.DialTimeout("tcp", b.addr, conf.DialTimeout)
  93. if b.connErr != nil {
  94. b.conn = nil
  95. atomic.StoreInt32(&b.opened, 0)
  96. Logger.Printf("Failed to connect to broker %s: %s\n", b.addr, b.connErr)
  97. return
  98. }
  99. b.conf = conf
  100. b.done = make(chan bool)
  101. b.responses = make(chan responsePromise, b.conf.MaxOpenRequests-1)
  102. Logger.Printf("Connected to broker %s\n", b.addr)
  103. go withRecover(b.responseReceiver)
  104. })
  105. return nil
  106. }
  107. // Connected returns true if the broker is connected and false otherwise. If the broker is not
  108. // connected but it had tried to connect, the error from that connection attempt is also returned.
  109. func (b *Broker) Connected() (bool, error) {
  110. b.lock.Lock()
  111. defer b.lock.Unlock()
  112. return b.conn != nil, b.connErr
  113. }
  114. func (b *Broker) Close() (err error) {
  115. b.lock.Lock()
  116. defer b.lock.Unlock()
  117. defer func() {
  118. if err == nil {
  119. Logger.Printf("Closed connection to broker %s\n", b.addr)
  120. } else {
  121. Logger.Printf("Failed to close connection to broker %s: %s\n", b.addr, err)
  122. }
  123. }()
  124. if b.conn == nil {
  125. return ErrNotConnected
  126. }
  127. close(b.responses)
  128. <-b.done
  129. err = b.conn.Close()
  130. b.conn = nil
  131. b.connErr = nil
  132. b.done = nil
  133. b.responses = nil
  134. atomic.StoreInt32(&b.opened, 0)
  135. return
  136. }
  137. // ID returns the broker ID retrieved from Kafka's metadata, or -1 if that is not known.
  138. func (b *Broker) ID() int32 {
  139. return b.id
  140. }
  141. // Addr returns the broker address as either retrieved from Kafka's metadata or passed to NewBroker.
  142. func (b *Broker) Addr() string {
  143. return b.addr
  144. }
  145. func (b *Broker) GetMetadata(clientID string, request *MetadataRequest) (*MetadataResponse, error) {
  146. response := new(MetadataResponse)
  147. err := b.sendAndReceive(clientID, request, response)
  148. if err != nil {
  149. return nil, err
  150. }
  151. return response, nil
  152. }
  153. func (b *Broker) GetConsumerMetadata(clientID string, request *ConsumerMetadataRequest) (*ConsumerMetadataResponse, error) {
  154. response := new(ConsumerMetadataResponse)
  155. err := b.sendAndReceive(clientID, request, response)
  156. if err != nil {
  157. return nil, err
  158. }
  159. return response, nil
  160. }
  161. func (b *Broker) GetAvailableOffsets(clientID string, request *OffsetRequest) (*OffsetResponse, error) {
  162. response := new(OffsetResponse)
  163. err := b.sendAndReceive(clientID, request, response)
  164. if err != nil {
  165. return nil, err
  166. }
  167. return response, nil
  168. }
  169. func (b *Broker) Produce(clientID string, request *ProduceRequest) (*ProduceResponse, error) {
  170. var response *ProduceResponse
  171. var err error
  172. if request.RequiredAcks == NoResponse {
  173. err = b.sendAndReceive(clientID, request, nil)
  174. } else {
  175. response = new(ProduceResponse)
  176. err = b.sendAndReceive(clientID, request, response)
  177. }
  178. if err != nil {
  179. return nil, err
  180. }
  181. return response, nil
  182. }
  183. func (b *Broker) Fetch(clientID string, request *FetchRequest) (*FetchResponse, error) {
  184. response := new(FetchResponse)
  185. err := b.sendAndReceive(clientID, request, response)
  186. if err != nil {
  187. return nil, err
  188. }
  189. return response, nil
  190. }
  191. func (b *Broker) CommitOffset(clientID string, request *OffsetCommitRequest) (*OffsetCommitResponse, error) {
  192. response := new(OffsetCommitResponse)
  193. err := b.sendAndReceive(clientID, request, response)
  194. if err != nil {
  195. return nil, err
  196. }
  197. return response, nil
  198. }
  199. func (b *Broker) FetchOffset(clientID string, request *OffsetFetchRequest) (*OffsetFetchResponse, error) {
  200. response := new(OffsetFetchResponse)
  201. err := b.sendAndReceive(clientID, request, response)
  202. if err != nil {
  203. return nil, err
  204. }
  205. return response, nil
  206. }
  207. func (b *Broker) send(clientID string, req requestEncoder, promiseResponse bool) (*responsePromise, error) {
  208. b.lock.Lock()
  209. defer b.lock.Unlock()
  210. if b.conn == nil {
  211. if b.connErr != nil {
  212. return nil, b.connErr
  213. }
  214. return nil, ErrNotConnected
  215. }
  216. fullRequest := request{b.correlationID, clientID, req}
  217. buf, err := encode(&fullRequest)
  218. if err != nil {
  219. return nil, err
  220. }
  221. err = b.conn.SetWriteDeadline(time.Now().Add(b.conf.WriteTimeout))
  222. if err != nil {
  223. return nil, err
  224. }
  225. _, err = b.conn.Write(buf)
  226. if err != nil {
  227. return nil, err
  228. }
  229. b.correlationID++
  230. if !promiseResponse {
  231. return nil, nil
  232. }
  233. promise := responsePromise{fullRequest.correlationID, make(chan []byte), make(chan error)}
  234. b.responses <- promise
  235. return &promise, nil
  236. }
  237. func (b *Broker) sendAndReceive(clientID string, req requestEncoder, res decoder) error {
  238. promise, err := b.send(clientID, req, res != nil)
  239. if err != nil {
  240. return err
  241. }
  242. if promise == nil {
  243. return nil
  244. }
  245. select {
  246. case buf := <-promise.packets:
  247. return decode(buf, res)
  248. case err = <-promise.errors:
  249. return err
  250. }
  251. }
  252. func (b *Broker) decode(pd packetDecoder) (err error) {
  253. b.id, err = pd.getInt32()
  254. if err != nil {
  255. return err
  256. }
  257. host, err := pd.getString()
  258. if err != nil {
  259. return err
  260. }
  261. port, err := pd.getInt32()
  262. if err != nil {
  263. return err
  264. }
  265. b.addr = fmt.Sprint(host, ":", port)
  266. return nil
  267. }
  268. func (b *Broker) encode(pe packetEncoder) (err error) {
  269. host, portstr, err := net.SplitHostPort(b.addr)
  270. if err != nil {
  271. return err
  272. }
  273. port, err := strconv.Atoi(portstr)
  274. if err != nil {
  275. return err
  276. }
  277. pe.putInt32(b.id)
  278. err = pe.putString(host)
  279. if err != nil {
  280. return err
  281. }
  282. pe.putInt32(int32(port))
  283. return nil
  284. }
  285. func (b *Broker) responseReceiver() {
  286. header := make([]byte, 8)
  287. for response := range b.responses {
  288. err := b.conn.SetReadDeadline(time.Now().Add(b.conf.ReadTimeout))
  289. if err != nil {
  290. response.errors <- err
  291. continue
  292. }
  293. _, err = io.ReadFull(b.conn, header)
  294. if err != nil {
  295. response.errors <- err
  296. continue
  297. }
  298. decodedHeader := responseHeader{}
  299. err = decode(header, &decodedHeader)
  300. if err != nil {
  301. response.errors <- err
  302. continue
  303. }
  304. if decodedHeader.correlationID != response.correlationID {
  305. // TODO if decoded ID < cur ID, discard until we catch up
  306. // TODO if decoded ID > cur ID, save it so when cur ID catches up we have a response
  307. response.errors <- PacketDecodingError{fmt.Sprintf("CorrelationID didn't match, wanted %d, got %d", response.correlationID, decodedHeader.correlationID)}
  308. continue
  309. }
  310. buf := make([]byte, decodedHeader.length-4)
  311. _, err = io.ReadFull(b.conn, buf)
  312. if err != nil {
  313. // XXX: the above ReadFull call inherits the same ReadDeadline set at the top of this loop, so it may
  314. // fail with a timeout error. If this happens, our connection is permanently toast since we will no longer
  315. // be aligned correctly on the stream (we'll be reading garbage Kafka headers from the middle of data).
  316. // Can we/should we fail harder in that case?
  317. response.errors <- err
  318. continue
  319. }
  320. response.packets <- buf
  321. }
  322. close(b.done)
  323. }