consumer.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. package sarama
  2. import (
  3. "fmt"
  4. "sync"
  5. "time"
  6. )
  7. // OffsetMethod is passed in ConsumerConfig to tell the consumer how to determine the starting offset.
  8. type OffsetMethod int
  9. const (
  10. // OffsetMethodNewest causes the consumer to start at the most recent available offset, as
  11. // determined by querying the broker.
  12. OffsetMethodNewest OffsetMethod = iota
  13. // OffsetMethodOldest causes the consumer to start at the oldest available offset, as
  14. // determined by querying the broker.
  15. OffsetMethodOldest
  16. // OffsetMethodManual causes the consumer to interpret the OffsetValue in the ConsumerConfig as the
  17. // offset at which to start, allowing the user to manually specify their desired starting offset.
  18. OffsetMethodManual
  19. )
  20. // ConsumerConfig is used to pass multiple configuration options to NewConsumer.
  21. type ConsumerConfig struct {
  22. // The minimum amount of data to fetch in a request - the broker will wait until at least this many bytes are available.
  23. // The default is 1, as 0 causes the consumer to spin when no messages are available.
  24. MinFetchSize int32
  25. // The maximum amount of time the broker will wait for MinFetchSize bytes to become available before it
  26. // returns fewer than that anyways. The default is 250ms, since 0 causes the consumer to spin when no events are available.
  27. // 100-500ms is a reasonable range for most cases. Kafka only supports precision up to milliseconds; nanoseconds will be truncated.
  28. MaxWaitTime time.Duration
  29. }
  30. // NewConsumerConfig creates a ConsumerConfig instance with sane defaults.
  31. func NewConsumerConfig() *ConsumerConfig {
  32. return &ConsumerConfig{
  33. MinFetchSize: 1,
  34. MaxWaitTime: 250 * time.Millisecond,
  35. }
  36. }
  37. // Validate checks a ConsumerConfig instance. It will return a
  38. // ConfigurationError if the specified value doesn't make sense.
  39. func (config *ConsumerConfig) Validate() error {
  40. if config.MinFetchSize <= 0 {
  41. return ConfigurationError("Invalid MinFetchSize")
  42. }
  43. if config.MaxWaitTime < 1*time.Millisecond {
  44. return ConfigurationError("Invalid MaxWaitTime, it needs to be at least 1ms")
  45. } else if config.MaxWaitTime < 100*time.Millisecond {
  46. Logger.Println("ConsumerConfig.MaxWaitTime is very low, which can cause high CPU and network usage. See documentation for details.")
  47. } else if config.MaxWaitTime%time.Millisecond != 0 {
  48. Logger.Println("ConsumerConfig.MaxWaitTime only supports millisecond precision; nanoseconds will be truncated.")
  49. }
  50. return nil
  51. }
  52. // PartitionConsumerConfig is used to pass multiple configuration options to AddPartition
  53. type PartitionConsumerConfig struct {
  54. // The default (maximum) amount of data to fetch from the broker in each request. The default is 32768 bytes.
  55. DefaultFetchSize int32
  56. // The maximum permittable message size - messages larger than this will return ErrMessageTooLarge. The default of 0 is
  57. // treated as no limit.
  58. MaxMessageSize int32
  59. // The method used to determine at which offset to begin consuming messages. The default is to start at the most recent message.
  60. OffsetMethod OffsetMethod
  61. // Interpreted differently according to the value of OffsetMethod.
  62. OffsetValue int64
  63. // The number of events to buffer in the Messages and Errors channel. Having this non-zero permits the
  64. // consumer to continue fetching messages in the background while client code consumes events,
  65. // greatly improving throughput. The default is 64.
  66. ChannelBufferSize int
  67. }
  68. // NewPartitionConsumerConfig creates a PartitionConsumerConfig with sane defaults.
  69. func NewPartitionConsumerConfig() *PartitionConsumerConfig {
  70. return &PartitionConsumerConfig{
  71. DefaultFetchSize: 32768,
  72. ChannelBufferSize: 64,
  73. }
  74. }
  75. // Validate checks a PartitionConsumerConfig instance. It will return a
  76. // ConfigurationError if the specified value doesn't make sense.
  77. func (config *PartitionConsumerConfig) Validate() error {
  78. if config.DefaultFetchSize <= 0 {
  79. return ConfigurationError("Invalid DefaultFetchSize")
  80. }
  81. if config.MaxMessageSize < 0 {
  82. return ConfigurationError("Invalid MaxMessageSize")
  83. }
  84. if config.ChannelBufferSize < 0 {
  85. return ConfigurationError("Invalid ChannelBufferSize")
  86. }
  87. return nil
  88. }
  89. // ConsumerMessage encapsulates a Kafka message returned by the consumer.
  90. type ConsumerMessage struct {
  91. Key, Value []byte
  92. Topic string
  93. Partition int32
  94. Offset int64
  95. }
  96. // ConsumerError is what is provided to the user when an error occurs.
  97. // It wraps an error and includes the topic and partition.
  98. type ConsumerError struct {
  99. Topic string
  100. Partition int32
  101. Err error
  102. }
  103. func (ce ConsumerError) Error() string {
  104. return fmt.Sprintf("kafka: error while consuming %s/%d: %s", ce.Topic, ce.Partition, ce.Err)
  105. }
  106. // ConsumerErrors is a type that wraps a batch of errors and implements the Error interface.
  107. // It can be returned from the PartitionConsumer's Close methods to avoid the need to manually drain errors
  108. // when stopping.
  109. type ConsumerErrors []*ConsumerError
  110. func (ce ConsumerErrors) Error() string {
  111. return fmt.Sprintf("kafka: %d errors while consuming", len(ce))
  112. }
  113. // Consumer manages PartitionConsumers which process Kafka messages from brokers.
  114. type Consumer struct {
  115. client *Client
  116. config ConsumerConfig
  117. lock sync.Mutex
  118. children map[string]map[int32]*PartitionConsumer
  119. brokerConsumers map[*Broker]*brokerConsumer
  120. }
  121. // NewConsumer creates a new consumer attached to the given client.
  122. func NewConsumer(client *Client, config *ConsumerConfig) (*Consumer, error) {
  123. // Check that we are not dealing with a closed Client before processing any other arguments
  124. if client.Closed() {
  125. return nil, ErrClosedClient
  126. }
  127. if config == nil {
  128. config = NewConsumerConfig()
  129. }
  130. if err := config.Validate(); err != nil {
  131. return nil, err
  132. }
  133. c := &Consumer{
  134. client: client,
  135. config: *config,
  136. children: make(map[string]map[int32]*PartitionConsumer),
  137. brokerConsumers: make(map[*Broker]*brokerConsumer),
  138. }
  139. return c, nil
  140. }
  141. // ConsumePartition creates a PartitionConsumer on the given topic/partition with the given configuration. It will
  142. // return an error if this Consumer is already consuming on the given topic/partition.
  143. func (c *Consumer) ConsumePartition(topic string, partition int32, config *PartitionConsumerConfig) (*PartitionConsumer, error) {
  144. if config == nil {
  145. config = NewPartitionConsumerConfig()
  146. }
  147. if err := config.Validate(); err != nil {
  148. return nil, err
  149. }
  150. child := &PartitionConsumer{
  151. consumer: c,
  152. config: *config,
  153. topic: topic,
  154. partition: partition,
  155. messages: make(chan *ConsumerMessage, config.ChannelBufferSize),
  156. errors: make(chan *ConsumerError, config.ChannelBufferSize),
  157. trigger: make(chan none, 1),
  158. dying: make(chan none),
  159. fetchSize: config.DefaultFetchSize,
  160. }
  161. if err := child.chooseStartingOffset(); err != nil {
  162. return nil, err
  163. }
  164. if leader, err := c.client.Leader(child.topic, child.partition); err != nil {
  165. return nil, err
  166. } else {
  167. child.broker = leader
  168. }
  169. if err := c.addChild(child); err != nil {
  170. return nil, err
  171. }
  172. go withRecover(child.dispatcher)
  173. brokerWorker := c.refBrokerConsumer(child.broker)
  174. brokerWorker.input <- child
  175. return child, nil
  176. }
  177. func (c *Consumer) addChild(child *PartitionConsumer) error {
  178. c.lock.Lock()
  179. defer c.lock.Unlock()
  180. topicChildren := c.children[child.topic]
  181. if topicChildren == nil {
  182. topicChildren = make(map[int32]*PartitionConsumer)
  183. c.children[child.topic] = topicChildren
  184. }
  185. if topicChildren[child.partition] != nil {
  186. return ConfigurationError("That topic/partition is already being consumed")
  187. }
  188. topicChildren[child.partition] = child
  189. return nil
  190. }
  191. func (c *Consumer) removeChild(child *PartitionConsumer) {
  192. c.lock.Lock()
  193. defer c.lock.Unlock()
  194. delete(c.children[child.topic], child.partition)
  195. }
  196. func (c *Consumer) refBrokerConsumer(broker *Broker) *brokerConsumer {
  197. c.lock.Lock()
  198. defer c.lock.Unlock()
  199. brokerWorker := c.brokerConsumers[broker]
  200. if brokerWorker == nil {
  201. brokerWorker = &brokerConsumer{
  202. consumer: c,
  203. broker: broker,
  204. input: make(chan *PartitionConsumer),
  205. newSubscriptions: make(chan []*PartitionConsumer),
  206. wait: make(chan none),
  207. subscriptions: make(map[*PartitionConsumer]none),
  208. refs: 1,
  209. }
  210. go withRecover(brokerWorker.subscriptionManager)
  211. go withRecover(brokerWorker.subscriptionConsumer)
  212. c.brokerConsumers[broker] = brokerWorker
  213. } else {
  214. brokerWorker.refs++
  215. }
  216. return brokerWorker
  217. }
  218. func (c *Consumer) unrefBrokerConsumer(broker *Broker) {
  219. c.lock.Lock()
  220. defer c.lock.Unlock()
  221. brokerWorker := c.brokerConsumers[broker]
  222. brokerWorker.refs--
  223. if brokerWorker.refs == 0 {
  224. close(brokerWorker.input)
  225. delete(c.brokerConsumers, broker)
  226. }
  227. }
  228. // PartitionConsumer
  229. // PartitionConsumer processes Kafka messages from a given topic and partition. You MUST call Close()
  230. // on a consumer to avoid leaks, it will not be garbage-collected automatically when it passes out of
  231. // scope (this is in addition to calling Close on the underlying consumer's client, which is still necessary).
  232. // You have to read from both the Messages and Errors channels to prevent the consumer from locking eventually.
  233. type PartitionConsumer struct {
  234. consumer *Consumer
  235. config PartitionConsumerConfig
  236. topic string
  237. partition int32
  238. broker *Broker
  239. messages chan *ConsumerMessage
  240. errors chan *ConsumerError
  241. trigger, dying chan none
  242. fetchSize int32
  243. offset int64
  244. }
  245. func (child *PartitionConsumer) sendError(err error) {
  246. child.errors <- &ConsumerError{
  247. Topic: child.topic,
  248. Partition: child.partition,
  249. Err: err,
  250. }
  251. }
  252. func (child *PartitionConsumer) dispatcher() {
  253. for _ = range child.trigger {
  254. select {
  255. case <-child.dying:
  256. close(child.trigger)
  257. default:
  258. if child.broker != nil {
  259. child.consumer.unrefBrokerConsumer(child.broker)
  260. child.broker = nil
  261. }
  262. if err := child.dispatch(); err != nil {
  263. child.sendError(err)
  264. child.trigger <- none{}
  265. // there's no point in trying again *right* away
  266. select {
  267. case <-child.dying:
  268. close(child.trigger)
  269. case <-time.After(10 * time.Second):
  270. }
  271. }
  272. }
  273. }
  274. if child.broker != nil {
  275. child.consumer.unrefBrokerConsumer(child.broker)
  276. }
  277. child.consumer.removeChild(child)
  278. close(child.messages)
  279. close(child.errors)
  280. }
  281. func (child *PartitionConsumer) dispatch() error {
  282. if err := child.consumer.client.RefreshTopicMetadata(child.topic); err != nil {
  283. return err
  284. }
  285. if leader, err := child.consumer.client.Leader(child.topic, child.partition); err != nil {
  286. return err
  287. } else {
  288. child.broker = leader
  289. }
  290. brokerWorker := child.consumer.refBrokerConsumer(child.broker)
  291. brokerWorker.input <- child
  292. return nil
  293. }
  294. func (child *PartitionConsumer) chooseStartingOffset() (err error) {
  295. var where OffsetTime
  296. switch child.config.OffsetMethod {
  297. case OffsetMethodManual:
  298. if child.config.OffsetValue < 0 {
  299. return ConfigurationError("OffsetValue cannot be < 0 when OffsetMethod is MANUAL")
  300. }
  301. child.offset = child.config.OffsetValue
  302. return nil
  303. case OffsetMethodNewest:
  304. where = LatestOffsets
  305. case OffsetMethodOldest:
  306. where = EarliestOffset
  307. default:
  308. return ConfigurationError("Invalid OffsetMethod")
  309. }
  310. child.offset, err = child.consumer.client.GetOffset(child.topic, child.partition, where)
  311. return err
  312. }
  313. // Messages returns the read channel for the messages that are returned by the broker
  314. func (child *PartitionConsumer) Messages() <-chan *ConsumerMessage {
  315. return child.messages
  316. }
  317. // Errors returns the read channel for any errors that occurred while consuming the partition.
  318. // You have to read this channel to prevent the consumer from deadlock. Under no circumstances,
  319. // the partition consumer will shut down by itself. It will just wait until it is able to continue
  320. // consuming messages. If you want to shut down your consumer, you will have trigger it yourself
  321. // by consuming this channel and calling Close or AsyncClose when appropriate.
  322. func (child *PartitionConsumer) Errors() <-chan *ConsumerError {
  323. return child.errors
  324. }
  325. // AsyncClose initiates a shutdown of the PartitionConsumer. This method will return immediately,
  326. // after which you should wait until the 'messages' and 'errors' channel are drained.
  327. // It is required to call this function, or Close before a consumer object passes out of scope,
  328. // as it will otherwise leak memory. You must call this before calling Close on the underlying
  329. // client.
  330. func (child *PartitionConsumer) AsyncClose() {
  331. // this triggers whatever worker owns this child to abandon it and close its trigger channel, which causes
  332. // the dispatcher to exit its loop, which removes it from the consumer then closes its 'messages' and
  333. // 'errors' channel (alternatively, if the child is already at the dispatcher for some reason, that will
  334. // also just close itself)
  335. close(child.dying)
  336. }
  337. // Close stops the PartitionConsumer from fetching messages. It is required to call this function,
  338. // or AsyncCose before a consumer object passes out of scope, as it will otherwise leak memory. You must
  339. // call this before calling Close on the underlying client.
  340. func (child *PartitionConsumer) Close() error {
  341. child.AsyncClose()
  342. go withRecover(func() {
  343. for _ = range child.messages {
  344. // drain
  345. }
  346. })
  347. var errors ConsumerErrors
  348. for err := range child.errors {
  349. errors = append(errors, err)
  350. }
  351. if len(errors) > 0 {
  352. return errors
  353. }
  354. return nil
  355. }
  356. // brokerConsumer
  357. type brokerConsumer struct {
  358. consumer *Consumer
  359. broker *Broker
  360. input chan *PartitionConsumer
  361. newSubscriptions chan []*PartitionConsumer
  362. wait chan none
  363. subscriptions map[*PartitionConsumer]none
  364. refs int
  365. }
  366. func (w *brokerConsumer) subscriptionManager() {
  367. var buffer []*PartitionConsumer
  368. // The subscriptionManager constantly accepts new subscriptions on `input` (even when the main subscriptionConsumer
  369. // goroutine is in the middle of a network request) and batches it up. The main worker goroutine picks
  370. // up a batch of new subscriptions between every network request by reading from `newSubscriptions`, so we give
  371. // it nil if no new subscriptions are available. We also write to `wait` only when new subscriptions is available,
  372. // so the main goroutine can block waiting for work if it has none.
  373. for {
  374. if len(buffer) > 0 {
  375. select {
  376. case event, ok := <-w.input:
  377. if !ok {
  378. goto done
  379. }
  380. buffer = append(buffer, event)
  381. case w.newSubscriptions <- buffer:
  382. buffer = nil
  383. case w.wait <- none{}:
  384. }
  385. } else {
  386. select {
  387. case event, ok := <-w.input:
  388. if !ok {
  389. goto done
  390. }
  391. buffer = append(buffer, event)
  392. case w.newSubscriptions <- nil:
  393. }
  394. }
  395. }
  396. done:
  397. close(w.wait)
  398. if len(buffer) > 0 {
  399. w.newSubscriptions <- buffer
  400. }
  401. close(w.newSubscriptions)
  402. }
  403. func (w *brokerConsumer) subscriptionConsumer() {
  404. <-w.wait // wait for our first piece of work
  405. // the subscriptionConsumer ensures we will get nil right away if no new subscriptions is available
  406. for newSubscriptions := range w.newSubscriptions {
  407. w.updateSubscriptionCache(newSubscriptions)
  408. if len(w.subscriptions) == 0 {
  409. // We're about to be shut down or we're about to receive more subscriptions.
  410. // Either way, the signal just hasn't propagated to our goroutine yet.
  411. <-w.wait
  412. continue
  413. }
  414. response, err := w.fetchNewMessages()
  415. if err != nil {
  416. Logger.Printf("Unexpected error processing FetchRequest; disconnecting broker %s: %s\n", w.broker.addr, err)
  417. w.abort(err)
  418. return
  419. }
  420. for child := range w.subscriptions {
  421. block := response.GetBlock(child.topic, child.partition)
  422. if block == nil {
  423. child.sendError(ErrIncompleteResponse)
  424. child.trigger <- none{}
  425. delete(w.subscriptions, child)
  426. continue
  427. }
  428. w.handleResponse(child, block)
  429. }
  430. }
  431. }
  432. func (w *brokerConsumer) updateSubscriptionCache(newSubscriptions []*PartitionConsumer) {
  433. // take new subscriptions, and abandon subscriptions that have been closed
  434. for _, child := range newSubscriptions {
  435. w.subscriptions[child] = none{}
  436. }
  437. for child := range w.subscriptions {
  438. select {
  439. case <-child.dying:
  440. close(child.trigger)
  441. delete(w.subscriptions, child)
  442. default:
  443. }
  444. }
  445. }
  446. func (w *brokerConsumer) abort(err error) {
  447. _ = w.broker.Close() // we don't care about the error this might return, we already have one
  448. w.consumer.client.disconnectBroker(w.broker)
  449. for child := range w.subscriptions {
  450. child.sendError(err)
  451. child.trigger <- none{}
  452. }
  453. for newSubscription := range w.newSubscriptions {
  454. for _, child := range newSubscription {
  455. child.sendError(err)
  456. child.trigger <- none{}
  457. }
  458. }
  459. }
  460. func (w *brokerConsumer) fetchNewMessages() (*FetchResponse, error) {
  461. request := &FetchRequest{
  462. MinBytes: w.consumer.config.MinFetchSize,
  463. MaxWaitTime: int32(w.consumer.config.MaxWaitTime / time.Millisecond),
  464. }
  465. for child := range w.subscriptions {
  466. request.AddBlock(child.topic, child.partition, child.offset, child.fetchSize)
  467. }
  468. return w.broker.Fetch(w.consumer.client.id, request)
  469. }
  470. func (w *brokerConsumer) handleResponse(child *PartitionConsumer, block *FetchResponseBlock) {
  471. switch block.Err {
  472. case ErrNoError:
  473. break
  474. default:
  475. child.sendError(block.Err)
  476. fallthrough
  477. case ErrUnknownTopicOrPartition, ErrNotLeaderForPartition, ErrLeaderNotAvailable:
  478. // doesn't belong to us, redispatch it
  479. child.trigger <- none{}
  480. delete(w.subscriptions, child)
  481. return
  482. }
  483. if len(block.MsgSet.Messages) == 0 {
  484. // We got no messages. If we got a trailing one then we need to ask for more data.
  485. // Otherwise we just poll again and wait for one to be produced...
  486. if block.MsgSet.PartialTrailingMessage {
  487. if child.config.MaxMessageSize > 0 && child.fetchSize == child.config.MaxMessageSize {
  488. // we can't ask for more data, we've hit the configured limit
  489. child.sendError(ErrMessageTooLarge)
  490. child.offset++ // skip this one so we can keep processing future messages
  491. } else {
  492. child.fetchSize *= 2
  493. if child.config.MaxMessageSize > 0 && child.fetchSize > child.config.MaxMessageSize {
  494. child.fetchSize = child.config.MaxMessageSize
  495. }
  496. }
  497. }
  498. return
  499. }
  500. // we got messages, reset our fetch size in case it was increased for a previous request
  501. child.fetchSize = child.config.DefaultFetchSize
  502. incomplete := false
  503. atLeastOne := false
  504. prelude := true
  505. for _, msgBlock := range block.MsgSet.Messages {
  506. for _, msg := range msgBlock.Messages() {
  507. if prelude && msg.Offset < child.offset {
  508. continue
  509. }
  510. prelude = false
  511. if msg.Offset >= child.offset {
  512. atLeastOne = true
  513. child.messages <- &ConsumerMessage{
  514. Topic: child.topic,
  515. Partition: child.partition,
  516. Key: msg.Msg.Key,
  517. Value: msg.Msg.Value,
  518. Offset: msg.Offset,
  519. }
  520. child.offset = msg.Offset + 1
  521. } else {
  522. incomplete = true
  523. }
  524. }
  525. }
  526. if incomplete || !atLeastOne {
  527. child.sendError(ErrIncompleteResponse)
  528. child.trigger <- none{}
  529. delete(w.subscriptions, child)
  530. }
  531. }