consumer.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. package sarama
  2. import (
  3. "fmt"
  4. "sync"
  5. "time"
  6. )
  7. // ConsumerMessage encapsulates a Kafka message returned by the consumer.
  8. type ConsumerMessage struct {
  9. Key, Value []byte
  10. Topic string
  11. Partition int32
  12. Offset int64
  13. }
  14. // ConsumerError is what is provided to the user when an error occurs.
  15. // It wraps an error and includes the topic and partition.
  16. type ConsumerError struct {
  17. Topic string
  18. Partition int32
  19. Err error
  20. }
  21. func (ce ConsumerError) Error() string {
  22. return fmt.Sprintf("kafka: error while consuming %s/%d: %s", ce.Topic, ce.Partition, ce.Err)
  23. }
  24. // ConsumerErrors is a type that wraps a batch of errors and implements the Error interface.
  25. // It can be returned from the PartitionConsumer's Close methods to avoid the need to manually drain errors
  26. // when stopping.
  27. type ConsumerErrors []*ConsumerError
  28. func (ce ConsumerErrors) Error() string {
  29. return fmt.Sprintf("kafka: %d errors while consuming", len(ce))
  30. }
  31. // Consumer manages PartitionConsumers which process Kafka messages from brokers. You MUST call Close()
  32. // on a consumer to avoid leaks, it will not be garbage-collected automatically when it passes out of
  33. // scope.
  34. type Consumer interface {
  35. // ConsumePartition creates a PartitionConsumer on the given topic/partition with the given offset. It will
  36. // return an error if this Consumer is already consuming on the given topic/partition. Offset can be a
  37. // literal offset, or OffsetNewest or OffsetOldest
  38. ConsumePartition(topic string, partition int32, offset int64) (PartitionConsumer, error)
  39. // Close shuts down the consumer. It must be called after all child PartitionConsumers have already been closed.
  40. Close() error
  41. }
  42. type consumer struct {
  43. client *Client
  44. conf *Config
  45. ownClient bool
  46. lock sync.Mutex
  47. children map[string]map[int32]*partitionConsumer
  48. brokerConsumers map[*Broker]*brokerConsumer
  49. }
  50. // NewConsumer creates a new consumer using the given broker addresses and configuration.
  51. func NewConsumer(addrs []string, config *Config) (Consumer, error) {
  52. client, err := NewClient(addrs, config)
  53. if err != nil {
  54. return nil, err
  55. }
  56. c, err := NewConsumerFromClient(client)
  57. if err != nil {
  58. return nil, err
  59. }
  60. c.(*consumer).ownClient = true
  61. return c, nil
  62. }
  63. // NewConsumerFromClient creates a new consumer using the given client.
  64. func NewConsumerFromClient(client *Client) (Consumer, error) {
  65. // Check that we are not dealing with a closed Client before processing any other arguments
  66. if client.Closed() {
  67. return nil, ErrClosedClient
  68. }
  69. c := &consumer{
  70. client: client,
  71. conf: client.conf,
  72. children: make(map[string]map[int32]*partitionConsumer),
  73. brokerConsumers: make(map[*Broker]*brokerConsumer),
  74. }
  75. return c, nil
  76. }
  77. func (c *consumer) Close() error {
  78. if c.ownClient {
  79. return c.client.Close()
  80. }
  81. return nil
  82. }
  83. const (
  84. // OffsetNewest causes the consumer to start at the most recent available offset, as
  85. // determined by querying the broker.
  86. OffsetNewest int64 = -1
  87. // OffsetOldest causes the consumer to start at the oldest available offset, as
  88. // determined by querying the broker.
  89. OffsetOldest int64 = -2
  90. )
  91. func (c *consumer) ConsumePartition(topic string, partition int32, offset int64) (PartitionConsumer, error) {
  92. child := &partitionConsumer{
  93. consumer: c,
  94. conf: c.conf,
  95. topic: topic,
  96. partition: partition,
  97. messages: make(chan *ConsumerMessage, c.conf.ChannelBufferSize),
  98. errors: make(chan *ConsumerError, c.conf.ChannelBufferSize),
  99. trigger: make(chan none, 1),
  100. dying: make(chan none),
  101. fetchSize: c.conf.Consumer.Fetch.Default,
  102. }
  103. if err := child.chooseStartingOffset(offset); err != nil {
  104. return nil, err
  105. }
  106. if leader, err := c.client.Leader(child.topic, child.partition); err != nil {
  107. return nil, err
  108. } else {
  109. child.broker = leader
  110. }
  111. if err := c.addChild(child); err != nil {
  112. return nil, err
  113. }
  114. go withRecover(child.dispatcher)
  115. brokerWorker := c.refBrokerConsumer(child.broker)
  116. brokerWorker.input <- child
  117. return child, nil
  118. }
  119. func (c *consumer) addChild(child *partitionConsumer) error {
  120. c.lock.Lock()
  121. defer c.lock.Unlock()
  122. topicChildren := c.children[child.topic]
  123. if topicChildren == nil {
  124. topicChildren = make(map[int32]*partitionConsumer)
  125. c.children[child.topic] = topicChildren
  126. }
  127. if topicChildren[child.partition] != nil {
  128. return ConfigurationError("That topic/partition is already being consumed")
  129. }
  130. topicChildren[child.partition] = child
  131. return nil
  132. }
  133. func (c *consumer) removeChild(child *partitionConsumer) {
  134. c.lock.Lock()
  135. defer c.lock.Unlock()
  136. delete(c.children[child.topic], child.partition)
  137. }
  138. func (c *consumer) refBrokerConsumer(broker *Broker) *brokerConsumer {
  139. c.lock.Lock()
  140. defer c.lock.Unlock()
  141. brokerWorker := c.brokerConsumers[broker]
  142. if brokerWorker == nil {
  143. brokerWorker = &brokerConsumer{
  144. consumer: c,
  145. broker: broker,
  146. input: make(chan *partitionConsumer),
  147. newSubscriptions: make(chan []*partitionConsumer),
  148. wait: make(chan none),
  149. subscriptions: make(map[*partitionConsumer]none),
  150. refs: 1,
  151. }
  152. go withRecover(brokerWorker.subscriptionManager)
  153. go withRecover(brokerWorker.subscriptionConsumer)
  154. c.brokerConsumers[broker] = brokerWorker
  155. } else {
  156. brokerWorker.refs++
  157. }
  158. return brokerWorker
  159. }
  160. func (c *consumer) unrefBrokerConsumer(broker *Broker) {
  161. c.lock.Lock()
  162. defer c.lock.Unlock()
  163. brokerWorker := c.brokerConsumers[broker]
  164. brokerWorker.refs--
  165. if brokerWorker.refs == 0 {
  166. close(brokerWorker.input)
  167. delete(c.brokerConsumers, broker)
  168. }
  169. }
  170. // PartitionConsumer
  171. // PartitionConsumer processes Kafka messages from a given topic and partition. You MUST call Close()
  172. // on a consumer to avoid leaks, it will not be garbage-collected automatically when it passes out of
  173. // scope (this is in addition to calling Close on the underlying consumer's client, which is still necessary).
  174. // You have to read from both the Messages and Errors channels to prevent the consumer from locking eventually.
  175. type PartitionConsumer interface {
  176. // AsyncClose initiates a shutdown of the PartitionConsumer. This method will return immediately,
  177. // after which you should wait until the 'messages' and 'errors' channel are drained.
  178. // It is required to call this function, or Close before a consumer object passes out of scope,
  179. // as it will otherwise leak memory. You must call this before calling Close on the underlying
  180. // client.
  181. AsyncClose()
  182. // Close stops the PartitionConsumer from fetching messages. It is required to call this function
  183. // (or AsyncClose) before a consumer object passes out of scope, as it will otherwise leak memory. You must
  184. // call this before calling Close on the underlying client.
  185. Close() error
  186. // Errors returns the read channel for any errors that occurred while consuming the partition.
  187. // You have to read this channel to prevent the consumer from deadlock. Under no circumstances,
  188. // the partition consumer will shut down by itself. It will just wait until it is able to continue
  189. // consuming messages. If you want to shut down your consumer, you will have trigger it yourself
  190. // by consuming this channel and calling Close or AsyncClose when appropriate.
  191. Errors() <-chan *ConsumerError
  192. // Messages returns the read channel for the messages that are returned by the broker
  193. Messages() <-chan *ConsumerMessage
  194. }
  195. type partitionConsumer struct {
  196. consumer *consumer
  197. conf *Config
  198. topic string
  199. partition int32
  200. broker *Broker
  201. messages chan *ConsumerMessage
  202. errors chan *ConsumerError
  203. trigger, dying chan none
  204. fetchSize int32
  205. offset int64
  206. }
  207. func (child *partitionConsumer) sendError(err error) {
  208. child.errors <- &ConsumerError{
  209. Topic: child.topic,
  210. Partition: child.partition,
  211. Err: err,
  212. }
  213. }
  214. func (child *partitionConsumer) dispatcher() {
  215. for _ = range child.trigger {
  216. select {
  217. case <-child.dying:
  218. close(child.trigger)
  219. default:
  220. if child.broker != nil {
  221. child.consumer.unrefBrokerConsumer(child.broker)
  222. child.broker = nil
  223. }
  224. if err := child.dispatch(); err != nil {
  225. child.sendError(err)
  226. child.trigger <- none{}
  227. // there's no point in trying again *right* away
  228. select {
  229. case <-child.dying:
  230. close(child.trigger)
  231. case <-time.After(10 * time.Second):
  232. }
  233. }
  234. }
  235. }
  236. if child.broker != nil {
  237. child.consumer.unrefBrokerConsumer(child.broker)
  238. }
  239. child.consumer.removeChild(child)
  240. close(child.messages)
  241. close(child.errors)
  242. }
  243. func (child *partitionConsumer) dispatch() error {
  244. if err := child.consumer.client.RefreshTopicMetadata(child.topic); err != nil {
  245. return err
  246. }
  247. if leader, err := child.consumer.client.Leader(child.topic, child.partition); err != nil {
  248. return err
  249. } else {
  250. child.broker = leader
  251. }
  252. brokerWorker := child.consumer.refBrokerConsumer(child.broker)
  253. brokerWorker.input <- child
  254. return nil
  255. }
  256. func (child *partitionConsumer) chooseStartingOffset(offset int64) (err error) {
  257. var where OffsetTime
  258. switch offset {
  259. case OffsetNewest:
  260. where = LatestOffsets
  261. case OffsetOldest:
  262. where = EarliestOffset
  263. default:
  264. if offset < 0 {
  265. return ConfigurationError("Invalid offset")
  266. }
  267. child.offset = offset
  268. return nil
  269. }
  270. child.offset, err = child.consumer.client.GetOffset(child.topic, child.partition, where)
  271. return err
  272. }
  273. func (child *partitionConsumer) Messages() <-chan *ConsumerMessage {
  274. return child.messages
  275. }
  276. func (child *partitionConsumer) Errors() <-chan *ConsumerError {
  277. return child.errors
  278. }
  279. func (child *partitionConsumer) AsyncClose() {
  280. // this triggers whatever worker owns this child to abandon it and close its trigger channel, which causes
  281. // the dispatcher to exit its loop, which removes it from the consumer then closes its 'messages' and
  282. // 'errors' channel (alternatively, if the child is already at the dispatcher for some reason, that will
  283. // also just close itself)
  284. close(child.dying)
  285. }
  286. func (child *partitionConsumer) Close() error {
  287. child.AsyncClose()
  288. go withRecover(func() {
  289. for _ = range child.messages {
  290. // drain
  291. }
  292. })
  293. var errors ConsumerErrors
  294. for err := range child.errors {
  295. errors = append(errors, err)
  296. }
  297. if len(errors) > 0 {
  298. return errors
  299. }
  300. return nil
  301. }
  302. // brokerConsumer
  303. type brokerConsumer struct {
  304. consumer *consumer
  305. broker *Broker
  306. input chan *partitionConsumer
  307. newSubscriptions chan []*partitionConsumer
  308. wait chan none
  309. subscriptions map[*partitionConsumer]none
  310. refs int
  311. }
  312. func (w *brokerConsumer) subscriptionManager() {
  313. var buffer []*partitionConsumer
  314. // The subscriptionManager constantly accepts new subscriptions on `input` (even when the main subscriptionConsumer
  315. // goroutine is in the middle of a network request) and batches it up. The main worker goroutine picks
  316. // up a batch of new subscriptions between every network request by reading from `newSubscriptions`, so we give
  317. // it nil if no new subscriptions are available. We also write to `wait` only when new subscriptions is available,
  318. // so the main goroutine can block waiting for work if it has none.
  319. for {
  320. if len(buffer) > 0 {
  321. select {
  322. case event, ok := <-w.input:
  323. if !ok {
  324. goto done
  325. }
  326. buffer = append(buffer, event)
  327. case w.newSubscriptions <- buffer:
  328. buffer = nil
  329. case w.wait <- none{}:
  330. }
  331. } else {
  332. select {
  333. case event, ok := <-w.input:
  334. if !ok {
  335. goto done
  336. }
  337. buffer = append(buffer, event)
  338. case w.newSubscriptions <- nil:
  339. }
  340. }
  341. }
  342. done:
  343. close(w.wait)
  344. if len(buffer) > 0 {
  345. w.newSubscriptions <- buffer
  346. }
  347. close(w.newSubscriptions)
  348. }
  349. func (w *brokerConsumer) subscriptionConsumer() {
  350. <-w.wait // wait for our first piece of work
  351. // the subscriptionConsumer ensures we will get nil right away if no new subscriptions is available
  352. for newSubscriptions := range w.newSubscriptions {
  353. w.updateSubscriptionCache(newSubscriptions)
  354. if len(w.subscriptions) == 0 {
  355. // We're about to be shut down or we're about to receive more subscriptions.
  356. // Either way, the signal just hasn't propagated to our goroutine yet.
  357. <-w.wait
  358. continue
  359. }
  360. response, err := w.fetchNewMessages()
  361. if err != nil {
  362. Logger.Printf("Unexpected error processing FetchRequest; disconnecting broker %s: %s\n", w.broker.addr, err)
  363. w.abort(err)
  364. return
  365. }
  366. for child := range w.subscriptions {
  367. block := response.GetBlock(child.topic, child.partition)
  368. if block == nil {
  369. child.sendError(ErrIncompleteResponse)
  370. child.trigger <- none{}
  371. delete(w.subscriptions, child)
  372. continue
  373. }
  374. w.handleResponse(child, block)
  375. }
  376. }
  377. }
  378. func (w *brokerConsumer) updateSubscriptionCache(newSubscriptions []*partitionConsumer) {
  379. // take new subscriptions, and abandon subscriptions that have been closed
  380. for _, child := range newSubscriptions {
  381. w.subscriptions[child] = none{}
  382. }
  383. for child := range w.subscriptions {
  384. select {
  385. case <-child.dying:
  386. close(child.trigger)
  387. delete(w.subscriptions, child)
  388. default:
  389. }
  390. }
  391. }
  392. func (w *brokerConsumer) abort(err error) {
  393. _ = w.broker.Close() // we don't care about the error this might return, we already have one
  394. w.consumer.client.disconnectBroker(w.broker)
  395. for child := range w.subscriptions {
  396. child.sendError(err)
  397. child.trigger <- none{}
  398. }
  399. for newSubscription := range w.newSubscriptions {
  400. for _, child := range newSubscription {
  401. child.sendError(err)
  402. child.trigger <- none{}
  403. }
  404. }
  405. }
  406. func (w *brokerConsumer) fetchNewMessages() (*FetchResponse, error) {
  407. request := &FetchRequest{
  408. MinBytes: w.consumer.conf.Consumer.Fetch.Min,
  409. MaxWaitTime: int32(w.consumer.conf.Consumer.MaxWaitTime / time.Millisecond),
  410. }
  411. for child := range w.subscriptions {
  412. request.AddBlock(child.topic, child.partition, child.offset, child.fetchSize)
  413. }
  414. return w.broker.Fetch(request)
  415. }
  416. func (w *brokerConsumer) handleResponse(child *partitionConsumer, block *FetchResponseBlock) {
  417. switch block.Err {
  418. case ErrNoError:
  419. break
  420. default:
  421. child.sendError(block.Err)
  422. fallthrough
  423. case ErrUnknownTopicOrPartition, ErrNotLeaderForPartition, ErrLeaderNotAvailable:
  424. // doesn't belong to us, redispatch it
  425. child.trigger <- none{}
  426. delete(w.subscriptions, child)
  427. return
  428. }
  429. if len(block.MsgSet.Messages) == 0 {
  430. // We got no messages. If we got a trailing one then we need to ask for more data.
  431. // Otherwise we just poll again and wait for one to be produced...
  432. if block.MsgSet.PartialTrailingMessage {
  433. if child.conf.Consumer.Fetch.Max > 0 && child.fetchSize == child.conf.Consumer.Fetch.Max {
  434. // we can't ask for more data, we've hit the configured limit
  435. child.sendError(ErrMessageTooLarge)
  436. child.offset++ // skip this one so we can keep processing future messages
  437. } else {
  438. child.fetchSize *= 2
  439. if child.conf.Consumer.Fetch.Max > 0 && child.fetchSize > child.conf.Consumer.Fetch.Max {
  440. child.fetchSize = child.conf.Consumer.Fetch.Max
  441. }
  442. }
  443. }
  444. return
  445. }
  446. // we got messages, reset our fetch size in case it was increased for a previous request
  447. child.fetchSize = child.conf.Consumer.Fetch.Default
  448. incomplete := false
  449. atLeastOne := false
  450. prelude := true
  451. for _, msgBlock := range block.MsgSet.Messages {
  452. for _, msg := range msgBlock.Messages() {
  453. if prelude && msg.Offset < child.offset {
  454. continue
  455. }
  456. prelude = false
  457. if msg.Offset >= child.offset {
  458. atLeastOne = true
  459. child.messages <- &ConsumerMessage{
  460. Topic: child.topic,
  461. Partition: child.partition,
  462. Key: msg.Msg.Key,
  463. Value: msg.Msg.Value,
  464. Offset: msg.Offset,
  465. }
  466. child.offset = msg.Offset + 1
  467. } else {
  468. incomplete = true
  469. }
  470. }
  471. }
  472. if incomplete || !atLeastOne {
  473. child.sendError(ErrIncompleteResponse)
  474. child.trigger <- none{}
  475. delete(w.subscriptions, child)
  476. }
  477. }