consumer.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. package sarama
  2. import (
  3. "fmt"
  4. "sync"
  5. "time"
  6. )
  7. // ConsumerMessage encapsulates a Kafka message returned by the consumer.
  8. type ConsumerMessage struct {
  9. Key, Value []byte
  10. Topic string
  11. Partition int32
  12. Offset int64
  13. }
  14. // ConsumerError is what is provided to the user when an error occurs.
  15. // It wraps an error and includes the topic and partition.
  16. type ConsumerError struct {
  17. Topic string
  18. Partition int32
  19. Err error
  20. }
  21. func (ce ConsumerError) Error() string {
  22. return fmt.Sprintf("kafka: error while consuming %s/%d: %s", ce.Topic, ce.Partition, ce.Err)
  23. }
  24. // ConsumerErrors is a type that wraps a batch of errors and implements the Error interface.
  25. // It can be returned from the PartitionConsumer's Close methods to avoid the need to manually drain errors
  26. // when stopping.
  27. type ConsumerErrors []*ConsumerError
  28. func (ce ConsumerErrors) Error() string {
  29. return fmt.Sprintf("kafka: %d errors while consuming", len(ce))
  30. }
  31. // Consumer manages PartitionConsumers which process Kafka messages from brokers. You MUST call Close()
  32. // on a consumer to avoid leaks, it will not be garbage-collected automatically when it passes out of
  33. // scope.
  34. type Consumer interface {
  35. // Topics returns the set of available topics as retrieved from the cluster metadata.
  36. // This method is the same as Client.Topics(), and is provided for convenience.
  37. Topics() ([]string, error)
  38. // Partitions returns the sorted list of all partition IDs for the given topic.
  39. // This method is the same as Client.Pertitions(), and is provided for convenience.
  40. Partitions(topic string) ([]int32, error)
  41. // ConsumePartition creates a PartitionConsumer on the given topic/partition with the given offset. It will
  42. // return an error if this Consumer is already consuming on the given topic/partition. Offset can be a
  43. // literal offset, or OffsetNewest or OffsetOldest
  44. ConsumePartition(topic string, partition int32, offset int64) (PartitionConsumer, error)
  45. // Close shuts down the consumer. It must be called after all child PartitionConsumers have already been closed.
  46. Close() error
  47. }
  48. type consumer struct {
  49. client Client
  50. conf *Config
  51. ownClient bool
  52. lock sync.Mutex
  53. children map[string]map[int32]*partitionConsumer
  54. brokerConsumers map[*Broker]*brokerConsumer
  55. }
  56. // NewConsumer creates a new consumer using the given broker addresses and configuration.
  57. func NewConsumer(addrs []string, config *Config) (Consumer, error) {
  58. client, err := NewClient(addrs, config)
  59. if err != nil {
  60. return nil, err
  61. }
  62. c, err := NewConsumerFromClient(client)
  63. if err != nil {
  64. return nil, err
  65. }
  66. c.(*consumer).ownClient = true
  67. return c, nil
  68. }
  69. // NewConsumerFromClient creates a new consumer using the given client. It is still
  70. // necessary to call Close() on the underlying client when shutting down this consumer.
  71. func NewConsumerFromClient(client Client) (Consumer, error) {
  72. // Check that we are not dealing with a closed Client before processing any other arguments
  73. if client.Closed() {
  74. return nil, ErrClosedClient
  75. }
  76. c := &consumer{
  77. client: client,
  78. conf: client.Config(),
  79. children: make(map[string]map[int32]*partitionConsumer),
  80. brokerConsumers: make(map[*Broker]*brokerConsumer),
  81. }
  82. return c, nil
  83. }
  84. func (c *consumer) Close() error {
  85. if c.ownClient {
  86. return c.client.Close()
  87. }
  88. return nil
  89. }
  90. func (c *consumer) Topics() ([]string, error) {
  91. return c.client.Topics()
  92. }
  93. func (c *consumer) Partitions(topic string) ([]int32, error) {
  94. return c.client.Partitions(topic)
  95. }
  96. func (c *consumer) ConsumePartition(topic string, partition int32, offset int64) (PartitionConsumer, error) {
  97. child := &partitionConsumer{
  98. consumer: c,
  99. conf: c.conf,
  100. topic: topic,
  101. partition: partition,
  102. messages: make(chan *ConsumerMessage, c.conf.ChannelBufferSize),
  103. errors: make(chan *ConsumerError, c.conf.ChannelBufferSize),
  104. trigger: make(chan none, 1),
  105. dying: make(chan none),
  106. fetchSize: c.conf.Consumer.Fetch.Default,
  107. }
  108. if err := child.chooseStartingOffset(offset); err != nil {
  109. return nil, err
  110. }
  111. var leader *Broker
  112. var err error
  113. if leader, err = c.client.Leader(child.topic, child.partition); err != nil {
  114. return nil, err
  115. }
  116. if err := c.addChild(child); err != nil {
  117. return nil, err
  118. }
  119. go withRecover(child.dispatcher)
  120. child.broker = c.refBrokerConsumer(leader)
  121. child.broker.input <- child
  122. return child, nil
  123. }
  124. func (c *consumer) addChild(child *partitionConsumer) error {
  125. c.lock.Lock()
  126. defer c.lock.Unlock()
  127. topicChildren := c.children[child.topic]
  128. if topicChildren == nil {
  129. topicChildren = make(map[int32]*partitionConsumer)
  130. c.children[child.topic] = topicChildren
  131. }
  132. if topicChildren[child.partition] != nil {
  133. return ConfigurationError("That topic/partition is already being consumed")
  134. }
  135. topicChildren[child.partition] = child
  136. return nil
  137. }
  138. func (c *consumer) removeChild(child *partitionConsumer) {
  139. c.lock.Lock()
  140. defer c.lock.Unlock()
  141. delete(c.children[child.topic], child.partition)
  142. }
  143. func (c *consumer) refBrokerConsumer(broker *Broker) *brokerConsumer {
  144. c.lock.Lock()
  145. defer c.lock.Unlock()
  146. brokerWorker := c.brokerConsumers[broker]
  147. if brokerWorker == nil {
  148. brokerWorker = &brokerConsumer{
  149. consumer: c,
  150. broker: broker,
  151. input: make(chan *partitionConsumer),
  152. newSubscriptions: make(chan []*partitionConsumer),
  153. wait: make(chan none),
  154. subscriptions: make(map[*partitionConsumer]none),
  155. refs: 0,
  156. }
  157. go withRecover(brokerWorker.subscriptionManager)
  158. go withRecover(brokerWorker.subscriptionConsumer)
  159. c.brokerConsumers[broker] = brokerWorker
  160. }
  161. brokerWorker.refs++
  162. return brokerWorker
  163. }
  164. func (c *consumer) unrefBrokerConsumer(brokerWorker *brokerConsumer) {
  165. c.lock.Lock()
  166. defer c.lock.Unlock()
  167. brokerWorker.refs--
  168. if brokerWorker.refs == 0 {
  169. close(brokerWorker.input)
  170. if c.brokerConsumers[brokerWorker.broker] == brokerWorker {
  171. delete(c.brokerConsumers, brokerWorker.broker)
  172. }
  173. }
  174. }
  175. func (c *consumer) abandonBrokerConsumer(brokerWorker *brokerConsumer) {
  176. c.lock.Lock()
  177. defer c.lock.Unlock()
  178. delete(c.brokerConsumers, brokerWorker.broker)
  179. }
  180. // PartitionConsumer
  181. // PartitionConsumer processes Kafka messages from a given topic and partition. You MUST call Close()
  182. // or AsyncClose() on a PartitionConsumer to avoid leaks, it will not be garbage-collected automatically
  183. // when it passes out of scope.
  184. //
  185. // The simplest way of using a PartitionConsumer is to loop over its Messages channel using a for/range
  186. // loop. The PartitionConsumer will only stop itself in one case: when the offset being consumed is reported
  187. // as out of range by the brokers. In this case you should decide what you want to do (try a different offset,
  188. // notify a human, etc) and handle it appropriately. For all other error cases, it will just keep retrying.
  189. // By default, it logs these errors to sarama.Logger; if you want to be notified directly of all errors, set
  190. // your config's Consumer.Return.Errors to true and read from the Errors channel, using a select statement
  191. // or a separate goroutine. Check out the Consumer examples to see implementations of these different approaches.
  192. type PartitionConsumer interface {
  193. // AsyncClose initiates a shutdown of the PartitionConsumer. This method will return immediately,
  194. // after which you should wait until the 'messages' and 'errors' channel are drained.
  195. // It is required to call this function, or Close before a consumer object passes out of scope,
  196. // as it will otherwise leak memory. You must call this before calling Close on the underlying
  197. // client.
  198. AsyncClose()
  199. // Close stops the PartitionConsumer from fetching messages. It is required to call this function
  200. // (or AsyncClose) before a consumer object passes out of scope, as it will otherwise leak memory. You must
  201. // call this before calling Close on the underlying client.
  202. Close() error
  203. // Messages returns the read channel for the messages that are returned by the broker.
  204. Messages() <-chan *ConsumerMessage
  205. // Errors returns a read channel of errors that occured during consuming, if enabled. By default,
  206. // errors are logged and not returned over this channel. If you want to implement any custom errpr
  207. // handling, set your config's Consumer.Return.Errors setting to true, and read from this channel.
  208. Errors() <-chan *ConsumerError
  209. }
  210. type partitionConsumer struct {
  211. consumer *consumer
  212. conf *Config
  213. topic string
  214. partition int32
  215. broker *brokerConsumer
  216. messages chan *ConsumerMessage
  217. errors chan *ConsumerError
  218. trigger, dying chan none
  219. fetchSize int32
  220. offset int64
  221. }
  222. func (child *partitionConsumer) sendError(err error) {
  223. cErr := &ConsumerError{
  224. Topic: child.topic,
  225. Partition: child.partition,
  226. Err: err,
  227. }
  228. if child.conf.Consumer.Return.Errors {
  229. child.errors <- cErr
  230. } else {
  231. Logger.Println(cErr)
  232. }
  233. }
  234. func (child *partitionConsumer) dispatcher() {
  235. for _ = range child.trigger {
  236. select {
  237. case <-child.dying:
  238. close(child.trigger)
  239. case <-time.After(child.conf.Consumer.Retry.Backoff):
  240. if child.broker != nil {
  241. child.consumer.unrefBrokerConsumer(child.broker)
  242. child.broker = nil
  243. }
  244. Logger.Printf("consumer/%s/%d finding new broker\n", child.topic, child.partition)
  245. if err := child.dispatch(); err != nil {
  246. child.sendError(err)
  247. child.trigger <- none{}
  248. }
  249. }
  250. }
  251. if child.broker != nil {
  252. child.consumer.unrefBrokerConsumer(child.broker)
  253. }
  254. child.consumer.removeChild(child)
  255. close(child.messages)
  256. close(child.errors)
  257. }
  258. func (child *partitionConsumer) dispatch() error {
  259. if err := child.consumer.client.RefreshMetadata(child.topic); err != nil {
  260. return err
  261. }
  262. var leader *Broker
  263. var err error
  264. if leader, err = child.consumer.client.Leader(child.topic, child.partition); err != nil {
  265. return err
  266. }
  267. child.broker = child.consumer.refBrokerConsumer(leader)
  268. child.broker.input <- child
  269. return nil
  270. }
  271. func (child *partitionConsumer) chooseStartingOffset(offset int64) error {
  272. newestOffset, err := child.consumer.client.GetOffset(child.topic, child.partition, OffsetNewest)
  273. if err != nil {
  274. return err
  275. }
  276. oldestOffset, err := child.consumer.client.GetOffset(child.topic, child.partition, OffsetOldest)
  277. if err != nil {
  278. return err
  279. }
  280. switch {
  281. case offset == OffsetNewest:
  282. child.offset = newestOffset
  283. case offset == OffsetOldest:
  284. child.offset = oldestOffset
  285. case offset >= oldestOffset && offset <= newestOffset:
  286. child.offset = offset
  287. default:
  288. return ErrOffsetOutOfRange
  289. }
  290. return nil
  291. }
  292. func (child *partitionConsumer) Messages() <-chan *ConsumerMessage {
  293. return child.messages
  294. }
  295. func (child *partitionConsumer) Errors() <-chan *ConsumerError {
  296. return child.errors
  297. }
  298. func (child *partitionConsumer) AsyncClose() {
  299. // this triggers whatever worker owns this child to abandon it and close its trigger channel, which causes
  300. // the dispatcher to exit its loop, which removes it from the consumer then closes its 'messages' and
  301. // 'errors' channel (alternatively, if the child is already at the dispatcher for some reason, that will
  302. // also just close itself)
  303. close(child.dying)
  304. }
  305. func (child *partitionConsumer) Close() error {
  306. child.AsyncClose()
  307. go withRecover(func() {
  308. for _ = range child.messages {
  309. // drain
  310. }
  311. })
  312. var errors ConsumerErrors
  313. for err := range child.errors {
  314. errors = append(errors, err)
  315. }
  316. if len(errors) > 0 {
  317. return errors
  318. }
  319. return nil
  320. }
  321. func (child *partitionConsumer) handleResponse(response *FetchResponse) error {
  322. block := response.GetBlock(child.topic, child.partition)
  323. if block == nil {
  324. return ErrIncompleteResponse
  325. }
  326. if block.Err != ErrNoError {
  327. return block.Err
  328. }
  329. if len(block.MsgSet.Messages) == 0 {
  330. // We got no messages. If we got a trailing one then we need to ask for more data.
  331. // Otherwise we just poll again and wait for one to be produced...
  332. if block.MsgSet.PartialTrailingMessage {
  333. if child.conf.Consumer.Fetch.Max > 0 && child.fetchSize == child.conf.Consumer.Fetch.Max {
  334. // we can't ask for more data, we've hit the configured limit
  335. child.sendError(ErrMessageTooLarge)
  336. child.offset++ // skip this one so we can keep processing future messages
  337. } else {
  338. child.fetchSize *= 2
  339. if child.conf.Consumer.Fetch.Max > 0 && child.fetchSize > child.conf.Consumer.Fetch.Max {
  340. child.fetchSize = child.conf.Consumer.Fetch.Max
  341. }
  342. }
  343. }
  344. return nil
  345. }
  346. // we got messages, reset our fetch size in case it was increased for a previous request
  347. child.fetchSize = child.conf.Consumer.Fetch.Default
  348. incomplete := false
  349. atLeastOne := false
  350. prelude := true
  351. for _, msgBlock := range block.MsgSet.Messages {
  352. for _, msg := range msgBlock.Messages() {
  353. if prelude && msg.Offset < child.offset {
  354. continue
  355. }
  356. prelude = false
  357. if msg.Offset >= child.offset {
  358. atLeastOne = true
  359. child.messages <- &ConsumerMessage{
  360. Topic: child.topic,
  361. Partition: child.partition,
  362. Key: msg.Msg.Key,
  363. Value: msg.Msg.Value,
  364. Offset: msg.Offset,
  365. }
  366. child.offset = msg.Offset + 1
  367. } else {
  368. incomplete = true
  369. }
  370. }
  371. }
  372. if incomplete || !atLeastOne {
  373. return ErrIncompleteResponse
  374. }
  375. return nil
  376. }
  377. // brokerConsumer
  378. type brokerConsumer struct {
  379. consumer *consumer
  380. broker *Broker
  381. input chan *partitionConsumer
  382. newSubscriptions chan []*partitionConsumer
  383. wait chan none
  384. subscriptions map[*partitionConsumer]none
  385. refs int
  386. }
  387. func (w *brokerConsumer) subscriptionManager() {
  388. var buffer []*partitionConsumer
  389. // The subscriptionManager constantly accepts new subscriptions on `input` (even when the main subscriptionConsumer
  390. // goroutine is in the middle of a network request) and batches it up. The main worker goroutine picks
  391. // up a batch of new subscriptions between every network request by reading from `newSubscriptions`, so we give
  392. // it nil if no new subscriptions are available. We also write to `wait` only when new subscriptions is available,
  393. // so the main goroutine can block waiting for work if it has none.
  394. for {
  395. if len(buffer) > 0 {
  396. select {
  397. case event, ok := <-w.input:
  398. if !ok {
  399. goto done
  400. }
  401. buffer = append(buffer, event)
  402. case w.newSubscriptions <- buffer:
  403. buffer = nil
  404. case w.wait <- none{}:
  405. }
  406. } else {
  407. select {
  408. case event, ok := <-w.input:
  409. if !ok {
  410. goto done
  411. }
  412. buffer = append(buffer, event)
  413. case w.newSubscriptions <- nil:
  414. }
  415. }
  416. }
  417. done:
  418. close(w.wait)
  419. if len(buffer) > 0 {
  420. w.newSubscriptions <- buffer
  421. }
  422. close(w.newSubscriptions)
  423. }
  424. func (w *brokerConsumer) subscriptionConsumer() {
  425. <-w.wait // wait for our first piece of work
  426. // the subscriptionConsumer ensures we will get nil right away if no new subscriptions is available
  427. for newSubscriptions := range w.newSubscriptions {
  428. w.updateSubscriptionCache(newSubscriptions)
  429. if len(w.subscriptions) == 0 {
  430. // We're about to be shut down or we're about to receive more subscriptions.
  431. // Either way, the signal just hasn't propagated to our goroutine yet.
  432. <-w.wait
  433. continue
  434. }
  435. response, err := w.fetchNewMessages()
  436. if err != nil {
  437. Logger.Printf("consumer/broker/%d disconnecting due to error processing FetchRequest: %s\n", w.broker.ID(), err)
  438. w.abort(err)
  439. return
  440. }
  441. for child := range w.subscriptions {
  442. if err := child.handleResponse(response); err != nil {
  443. switch err {
  444. case ErrOffsetOutOfRange:
  445. // there's no point in retrying this it will just fail the same way again
  446. // so shut it down and force the user to choose what to do
  447. child.AsyncClose()
  448. fallthrough
  449. default:
  450. child.sendError(err)
  451. fallthrough
  452. case ErrUnknownTopicOrPartition, ErrNotLeaderForPartition, ErrLeaderNotAvailable:
  453. // these three are not fatal errors, but do require redispatching
  454. child.trigger <- none{}
  455. delete(w.subscriptions, child)
  456. Logger.Printf("consumer/broker/%d abandoned subscription to %s/%d because %s\n", w.broker.ID(), child.topic, child.partition, err)
  457. }
  458. }
  459. }
  460. }
  461. }
  462. func (w *brokerConsumer) updateSubscriptionCache(newSubscriptions []*partitionConsumer) {
  463. // take new subscriptions, and abandon subscriptions that have been closed
  464. for _, child := range newSubscriptions {
  465. w.subscriptions[child] = none{}
  466. Logger.Printf("consumer/broker/%d added subscription to %s/%d\n", w.broker.ID(), child.topic, child.partition)
  467. }
  468. for child := range w.subscriptions {
  469. select {
  470. case <-child.dying:
  471. close(child.trigger)
  472. delete(w.subscriptions, child)
  473. Logger.Printf("consumer/broker/%d closed dead subscription to %s/%d\n", w.broker.ID(), child.topic, child.partition)
  474. default:
  475. }
  476. }
  477. }
  478. func (w *brokerConsumer) abort(err error) {
  479. w.consumer.abandonBrokerConsumer(w)
  480. _ = w.broker.Close() // we don't care about the error this might return, we already have one
  481. for child := range w.subscriptions {
  482. child.sendError(err)
  483. child.trigger <- none{}
  484. }
  485. for newSubscription := range w.newSubscriptions {
  486. for _, child := range newSubscription {
  487. child.sendError(err)
  488. child.trigger <- none{}
  489. }
  490. }
  491. }
  492. func (w *brokerConsumer) fetchNewMessages() (*FetchResponse, error) {
  493. request := &FetchRequest{
  494. MinBytes: w.consumer.conf.Consumer.Fetch.Min,
  495. MaxWaitTime: int32(w.consumer.conf.Consumer.MaxWaitTime / time.Millisecond),
  496. }
  497. for child := range w.subscriptions {
  498. request.AddBlock(child.topic, child.partition, child.offset, child.fetchSize)
  499. }
  500. return w.broker.Fetch(request)
  501. }