consumer.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. package sarama
  2. import (
  3. "fmt"
  4. "sync"
  5. "time"
  6. )
  7. // ConsumerMessage encapsulates a Kafka message returned by the consumer.
  8. type ConsumerMessage struct {
  9. Key, Value []byte
  10. Topic string
  11. Partition int32
  12. Offset int64
  13. }
  14. // ConsumerError is what is provided to the user when an error occurs.
  15. // It wraps an error and includes the topic and partition.
  16. type ConsumerError struct {
  17. Topic string
  18. Partition int32
  19. Err error
  20. }
  21. func (ce ConsumerError) Error() string {
  22. return fmt.Sprintf("kafka: error while consuming %s/%d: %s", ce.Topic, ce.Partition, ce.Err)
  23. }
  24. // ConsumerErrors is a type that wraps a batch of errors and implements the Error interface.
  25. // It can be returned from the PartitionConsumer's Close methods to avoid the need to manually drain errors
  26. // when stopping.
  27. type ConsumerErrors []*ConsumerError
  28. func (ce ConsumerErrors) Error() string {
  29. return fmt.Sprintf("kafka: %d errors while consuming", len(ce))
  30. }
  31. // Consumer manages PartitionConsumers which process Kafka messages from brokers. You MUST call Close()
  32. // on a consumer to avoid leaks, it will not be garbage-collected automatically when it passes out of
  33. // scope.
  34. type Consumer struct {
  35. client *Client
  36. conf *Config
  37. ownClient bool
  38. lock sync.Mutex
  39. children map[string]map[int32]*PartitionConsumer
  40. brokerConsumers map[*Broker]*brokerConsumer
  41. }
  42. // NewConsumer creates a new consumer using the given broker addresses and configuration.
  43. func NewConsumer(addrs []string, config *Config) (*Consumer, error) {
  44. client, err := NewClient(addrs, config)
  45. if err != nil {
  46. return nil, err
  47. }
  48. c, err := NewConsumerFromClient(client)
  49. if err != nil {
  50. return nil, err
  51. }
  52. c.ownClient = true
  53. return c, nil
  54. }
  55. // NewConsumerFromClient creates a new consumer using the given client.
  56. func NewConsumerFromClient(client *Client) (*Consumer, error) {
  57. // Check that we are not dealing with a closed Client before processing any other arguments
  58. if client.Closed() {
  59. return nil, ErrClosedClient
  60. }
  61. c := &Consumer{
  62. client: client,
  63. conf: client.conf,
  64. children: make(map[string]map[int32]*PartitionConsumer),
  65. brokerConsumers: make(map[*Broker]*brokerConsumer),
  66. }
  67. return c, nil
  68. }
  69. // Close shuts down the consumer. It must be called after all child PartitionConsumers have already been closed.
  70. func (c *Consumer) Close() error {
  71. if c.ownClient {
  72. return c.client.Close()
  73. }
  74. return nil
  75. }
  76. const (
  77. // OffsetNewest causes the consumer to start at the most recent available offset, as
  78. // determined by querying the broker.
  79. OffsetNewest int64 = -1
  80. // OffsetOldest causes the consumer to start at the oldest available offset, as
  81. // determined by querying the broker.
  82. OffsetOldest int64 = -2
  83. )
  84. // ConsumePartition creates a PartitionConsumer on the given topic/partition with the given offset. It will
  85. // return an error if this Consumer is already consuming on the given topic/partition. Offset can be a
  86. // literal offset, or OffsetNewest or OffsetOldest
  87. func (c *Consumer) ConsumePartition(topic string, partition int32, offset int64) (*PartitionConsumer, error) {
  88. child := &PartitionConsumer{
  89. consumer: c,
  90. conf: c.conf,
  91. topic: topic,
  92. partition: partition,
  93. messages: make(chan *ConsumerMessage, c.conf.ChannelBufferSize),
  94. errors: make(chan *ConsumerError, c.conf.ChannelBufferSize),
  95. trigger: make(chan none, 1),
  96. dying: make(chan none),
  97. fetchSize: c.conf.Consumer.Fetch.Default,
  98. }
  99. if err := child.chooseStartingOffset(offset); err != nil {
  100. return nil, err
  101. }
  102. if leader, err := c.client.Leader(child.topic, child.partition); err != nil {
  103. return nil, err
  104. } else {
  105. child.broker = leader
  106. }
  107. if err := c.addChild(child); err != nil {
  108. return nil, err
  109. }
  110. go withRecover(child.dispatcher)
  111. brokerWorker := c.refBrokerConsumer(child.broker)
  112. brokerWorker.input <- child
  113. return child, nil
  114. }
  115. func (c *Consumer) addChild(child *PartitionConsumer) error {
  116. c.lock.Lock()
  117. defer c.lock.Unlock()
  118. topicChildren := c.children[child.topic]
  119. if topicChildren == nil {
  120. topicChildren = make(map[int32]*PartitionConsumer)
  121. c.children[child.topic] = topicChildren
  122. }
  123. if topicChildren[child.partition] != nil {
  124. return ConfigurationError("That topic/partition is already being consumed")
  125. }
  126. topicChildren[child.partition] = child
  127. return nil
  128. }
  129. func (c *Consumer) removeChild(child *PartitionConsumer) {
  130. c.lock.Lock()
  131. defer c.lock.Unlock()
  132. delete(c.children[child.topic], child.partition)
  133. }
  134. func (c *Consumer) refBrokerConsumer(broker *Broker) *brokerConsumer {
  135. c.lock.Lock()
  136. defer c.lock.Unlock()
  137. brokerWorker := c.brokerConsumers[broker]
  138. if brokerWorker == nil {
  139. brokerWorker = &brokerConsumer{
  140. consumer: c,
  141. broker: broker,
  142. input: make(chan *PartitionConsumer),
  143. newSubscriptions: make(chan []*PartitionConsumer),
  144. wait: make(chan none),
  145. subscriptions: make(map[*PartitionConsumer]none),
  146. refs: 1,
  147. }
  148. go withRecover(brokerWorker.subscriptionManager)
  149. go withRecover(brokerWorker.subscriptionConsumer)
  150. c.brokerConsumers[broker] = brokerWorker
  151. } else {
  152. brokerWorker.refs++
  153. }
  154. return brokerWorker
  155. }
  156. func (c *Consumer) unrefBrokerConsumer(broker *Broker) {
  157. c.lock.Lock()
  158. defer c.lock.Unlock()
  159. brokerWorker := c.brokerConsumers[broker]
  160. brokerWorker.refs--
  161. if brokerWorker.refs == 0 {
  162. close(brokerWorker.input)
  163. delete(c.brokerConsumers, broker)
  164. }
  165. }
  166. // PartitionConsumer
  167. // PartitionConsumer processes Kafka messages from a given topic and partition. You MUST call Close()
  168. // on a consumer to avoid leaks, it will not be garbage-collected automatically when it passes out of
  169. // scope (this is in addition to calling Close on the underlying consumer's client, which is still necessary).
  170. // You have to read from both the Messages and Errors channels to prevent the consumer from locking eventually.
  171. type PartitionConsumer struct {
  172. consumer *Consumer
  173. conf *Config
  174. topic string
  175. partition int32
  176. broker *Broker
  177. messages chan *ConsumerMessage
  178. errors chan *ConsumerError
  179. trigger, dying chan none
  180. fetchSize int32
  181. offset int64
  182. }
  183. func (child *PartitionConsumer) sendError(err error) {
  184. child.errors <- &ConsumerError{
  185. Topic: child.topic,
  186. Partition: child.partition,
  187. Err: err,
  188. }
  189. }
  190. func (child *PartitionConsumer) dispatcher() {
  191. for _ = range child.trigger {
  192. select {
  193. case <-child.dying:
  194. close(child.trigger)
  195. default:
  196. if child.broker != nil {
  197. child.consumer.unrefBrokerConsumer(child.broker)
  198. child.broker = nil
  199. }
  200. if err := child.dispatch(); err != nil {
  201. child.sendError(err)
  202. child.trigger <- none{}
  203. // there's no point in trying again *right* away
  204. select {
  205. case <-child.dying:
  206. close(child.trigger)
  207. case <-time.After(10 * time.Second):
  208. }
  209. }
  210. }
  211. }
  212. if child.broker != nil {
  213. child.consumer.unrefBrokerConsumer(child.broker)
  214. }
  215. child.consumer.removeChild(child)
  216. close(child.messages)
  217. close(child.errors)
  218. }
  219. func (child *PartitionConsumer) dispatch() error {
  220. if err := child.consumer.client.RefreshTopicMetadata(child.topic); err != nil {
  221. return err
  222. }
  223. if leader, err := child.consumer.client.Leader(child.topic, child.partition); err != nil {
  224. return err
  225. } else {
  226. child.broker = leader
  227. }
  228. brokerWorker := child.consumer.refBrokerConsumer(child.broker)
  229. brokerWorker.input <- child
  230. return nil
  231. }
  232. func (child *PartitionConsumer) chooseStartingOffset(offset int64) (err error) {
  233. var where OffsetTime
  234. switch offset {
  235. case OffsetNewest:
  236. where = LatestOffsets
  237. case OffsetOldest:
  238. where = EarliestOffset
  239. default:
  240. if offset < 0 {
  241. return ConfigurationError("Invalid offset")
  242. }
  243. child.offset = offset
  244. return nil
  245. }
  246. child.offset, err = child.consumer.client.GetOffset(child.topic, child.partition, where)
  247. return err
  248. }
  249. // Messages returns the read channel for the messages that are returned by the broker
  250. func (child *PartitionConsumer) Messages() <-chan *ConsumerMessage {
  251. return child.messages
  252. }
  253. // Errors returns the read channel for any errors that occurred while consuming the partition.
  254. // You have to read this channel to prevent the consumer from deadlock. Under no circumstances,
  255. // the partition consumer will shut down by itself. It will just wait until it is able to continue
  256. // consuming messages. If you want to shut down your consumer, you will have trigger it yourself
  257. // by consuming this channel and calling Close or AsyncClose when appropriate.
  258. func (child *PartitionConsumer) Errors() <-chan *ConsumerError {
  259. return child.errors
  260. }
  261. // AsyncClose initiates a shutdown of the PartitionConsumer. This method will return immediately,
  262. // after which you should wait until the 'messages' and 'errors' channel are drained.
  263. // It is required to call this function, or Close before a consumer object passes out of scope,
  264. // as it will otherwise leak memory. You must call this before calling Close on the underlying
  265. // client.
  266. func (child *PartitionConsumer) AsyncClose() {
  267. // this triggers whatever worker owns this child to abandon it and close its trigger channel, which causes
  268. // the dispatcher to exit its loop, which removes it from the consumer then closes its 'messages' and
  269. // 'errors' channel (alternatively, if the child is already at the dispatcher for some reason, that will
  270. // also just close itself)
  271. close(child.dying)
  272. }
  273. // Close stops the PartitionConsumer from fetching messages. It is required to call this function
  274. // (or AsyncClose) before a consumer object passes out of scope, as it will otherwise leak memory. You must
  275. // call this before calling Close on the underlying client.
  276. func (child *PartitionConsumer) Close() error {
  277. child.AsyncClose()
  278. go withRecover(func() {
  279. for _ = range child.messages {
  280. // drain
  281. }
  282. })
  283. var errors ConsumerErrors
  284. for err := range child.errors {
  285. errors = append(errors, err)
  286. }
  287. if len(errors) > 0 {
  288. return errors
  289. }
  290. return nil
  291. }
  292. // brokerConsumer
  293. type brokerConsumer struct {
  294. consumer *Consumer
  295. broker *Broker
  296. input chan *PartitionConsumer
  297. newSubscriptions chan []*PartitionConsumer
  298. wait chan none
  299. subscriptions map[*PartitionConsumer]none
  300. refs int
  301. }
  302. func (w *brokerConsumer) subscriptionManager() {
  303. var buffer []*PartitionConsumer
  304. // The subscriptionManager constantly accepts new subscriptions on `input` (even when the main subscriptionConsumer
  305. // goroutine is in the middle of a network request) and batches it up. The main worker goroutine picks
  306. // up a batch of new subscriptions between every network request by reading from `newSubscriptions`, so we give
  307. // it nil if no new subscriptions are available. We also write to `wait` only when new subscriptions is available,
  308. // so the main goroutine can block waiting for work if it has none.
  309. for {
  310. if len(buffer) > 0 {
  311. select {
  312. case event, ok := <-w.input:
  313. if !ok {
  314. goto done
  315. }
  316. buffer = append(buffer, event)
  317. case w.newSubscriptions <- buffer:
  318. buffer = nil
  319. case w.wait <- none{}:
  320. }
  321. } else {
  322. select {
  323. case event, ok := <-w.input:
  324. if !ok {
  325. goto done
  326. }
  327. buffer = append(buffer, event)
  328. case w.newSubscriptions <- nil:
  329. }
  330. }
  331. }
  332. done:
  333. close(w.wait)
  334. if len(buffer) > 0 {
  335. w.newSubscriptions <- buffer
  336. }
  337. close(w.newSubscriptions)
  338. }
  339. func (w *brokerConsumer) subscriptionConsumer() {
  340. <-w.wait // wait for our first piece of work
  341. // the subscriptionConsumer ensures we will get nil right away if no new subscriptions is available
  342. for newSubscriptions := range w.newSubscriptions {
  343. w.updateSubscriptionCache(newSubscriptions)
  344. if len(w.subscriptions) == 0 {
  345. // We're about to be shut down or we're about to receive more subscriptions.
  346. // Either way, the signal just hasn't propagated to our goroutine yet.
  347. <-w.wait
  348. continue
  349. }
  350. response, err := w.fetchNewMessages()
  351. if err != nil {
  352. Logger.Printf("Unexpected error processing FetchRequest; disconnecting broker %s: %s\n", w.broker.addr, err)
  353. w.abort(err)
  354. return
  355. }
  356. for child := range w.subscriptions {
  357. block := response.GetBlock(child.topic, child.partition)
  358. if block == nil {
  359. child.sendError(ErrIncompleteResponse)
  360. child.trigger <- none{}
  361. delete(w.subscriptions, child)
  362. continue
  363. }
  364. w.handleResponse(child, block)
  365. }
  366. }
  367. }
  368. func (w *brokerConsumer) updateSubscriptionCache(newSubscriptions []*PartitionConsumer) {
  369. // take new subscriptions, and abandon subscriptions that have been closed
  370. for _, child := range newSubscriptions {
  371. w.subscriptions[child] = none{}
  372. }
  373. for child := range w.subscriptions {
  374. select {
  375. case <-child.dying:
  376. close(child.trigger)
  377. delete(w.subscriptions, child)
  378. default:
  379. }
  380. }
  381. }
  382. func (w *brokerConsumer) abort(err error) {
  383. _ = w.broker.Close() // we don't care about the error this might return, we already have one
  384. w.consumer.client.disconnectBroker(w.broker)
  385. for child := range w.subscriptions {
  386. child.sendError(err)
  387. child.trigger <- none{}
  388. }
  389. for newSubscription := range w.newSubscriptions {
  390. for _, child := range newSubscription {
  391. child.sendError(err)
  392. child.trigger <- none{}
  393. }
  394. }
  395. }
  396. func (w *brokerConsumer) fetchNewMessages() (*FetchResponse, error) {
  397. request := &FetchRequest{
  398. MinBytes: w.consumer.conf.Consumer.Fetch.Min,
  399. MaxWaitTime: int32(w.consumer.conf.Consumer.MaxWaitTime / time.Millisecond),
  400. }
  401. for child := range w.subscriptions {
  402. request.AddBlock(child.topic, child.partition, child.offset, child.fetchSize)
  403. }
  404. return w.broker.Fetch(request)
  405. }
  406. func (w *brokerConsumer) handleResponse(child *PartitionConsumer, block *FetchResponseBlock) {
  407. switch block.Err {
  408. case ErrNoError:
  409. break
  410. default:
  411. child.sendError(block.Err)
  412. fallthrough
  413. case ErrUnknownTopicOrPartition, ErrNotLeaderForPartition, ErrLeaderNotAvailable:
  414. // doesn't belong to us, redispatch it
  415. child.trigger <- none{}
  416. delete(w.subscriptions, child)
  417. return
  418. }
  419. if len(block.MsgSet.Messages) == 0 {
  420. // We got no messages. If we got a trailing one then we need to ask for more data.
  421. // Otherwise we just poll again and wait for one to be produced...
  422. if block.MsgSet.PartialTrailingMessage {
  423. if child.conf.Consumer.Fetch.Max > 0 && child.fetchSize == child.conf.Consumer.Fetch.Max {
  424. // we can't ask for more data, we've hit the configured limit
  425. child.sendError(ErrMessageTooLarge)
  426. child.offset++ // skip this one so we can keep processing future messages
  427. } else {
  428. child.fetchSize *= 2
  429. if child.conf.Consumer.Fetch.Max > 0 && child.fetchSize > child.conf.Consumer.Fetch.Max {
  430. child.fetchSize = child.conf.Consumer.Fetch.Max
  431. }
  432. }
  433. }
  434. return
  435. }
  436. // we got messages, reset our fetch size in case it was increased for a previous request
  437. child.fetchSize = child.conf.Consumer.Fetch.Default
  438. incomplete := false
  439. atLeastOne := false
  440. prelude := true
  441. for _, msgBlock := range block.MsgSet.Messages {
  442. for _, msg := range msgBlock.Messages() {
  443. if prelude && msg.Offset < child.offset {
  444. continue
  445. }
  446. prelude = false
  447. if msg.Offset >= child.offset {
  448. atLeastOne = true
  449. child.messages <- &ConsumerMessage{
  450. Topic: child.topic,
  451. Partition: child.partition,
  452. Key: msg.Msg.Key,
  453. Value: msg.Msg.Value,
  454. Offset: msg.Offset,
  455. }
  456. child.offset = msg.Offset + 1
  457. } else {
  458. incomplete = true
  459. }
  460. }
  461. }
  462. if incomplete || !atLeastOne {
  463. child.sendError(ErrIncompleteResponse)
  464. child.trigger <- none{}
  465. delete(w.subscriptions, child)
  466. }
  467. }