async_producer.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. package sarama
  2. import (
  3. "fmt"
  4. "sync"
  5. "time"
  6. "github.com/eapache/go-resiliency/breaker"
  7. "github.com/eapache/queue"
  8. )
  9. func forceFlushThreshold() int {
  10. return int(MaxRequestSize - (10 * 1024)) // 10KiB is safety room for misc. overhead, we might want to calculate this more precisely?
  11. }
  12. // AsyncProducer publishes Kafka messages using a non-blocking API. It routes messages
  13. // to the correct broker for the provided topic-partition, refreshing metadata as appropriate,
  14. // and parses responses for errors. You must read from the Errors() channel or the
  15. // producer will deadlock. You must call Close() or AsyncClose() on a producer to avoid
  16. // leaks: it will not be garbage-collected automatically when it passes out of
  17. // scope.
  18. type AsyncProducer interface {
  19. // AsyncClose triggers a shutdown of the producer, flushing any messages it may have
  20. // buffered. The shutdown has completed when both the Errors and Successes channels
  21. // have been closed. When calling AsyncClose, you *must* continue to read from those
  22. // channels in order to drain the results of any messages in flight.
  23. AsyncClose()
  24. // Close shuts down the producer and flushes any messages it may have buffered.
  25. // You must call this function before a producer object passes out of scope, as
  26. // it may otherwise leak memory. You must call this before calling Close on the
  27. // underlying client.
  28. Close() error
  29. // Input is the input channel for the user to write messages to that they wish to send.
  30. Input() chan<- *ProducerMessage
  31. // Successes is the success output channel back to the user when AckSuccesses is confured.
  32. // If Return.Successes is true, you MUST read from this channel or the Producer will deadlock.
  33. // It is suggested that you send and read messages together in a single select statement.
  34. Successes() <-chan *ProducerMessage
  35. // Errors is the error output channel back to the user. You MUST read from this channel
  36. // or the Producer will deadlock when the channel is full. Alternatively, you can set
  37. // Producer.Return.Errors in your config to false, which prevents errors to be returned.
  38. Errors() <-chan *ProducerError
  39. }
  40. type asyncProducer struct {
  41. client Client
  42. conf *Config
  43. ownClient bool
  44. errors chan *ProducerError
  45. input, successes, retries chan *ProducerMessage
  46. brokers map[*Broker]chan *ProducerMessage
  47. brokerRefs map[chan *ProducerMessage]int
  48. brokerLock sync.Mutex
  49. }
  50. // NewAsyncProducer creates a new AsyncProducer using the given broker addresses and configuration.
  51. func NewAsyncProducer(addrs []string, conf *Config) (AsyncProducer, error) {
  52. client, err := NewClient(addrs, conf)
  53. if err != nil {
  54. return nil, err
  55. }
  56. p, err := NewAsyncProducerFromClient(client)
  57. if err != nil {
  58. return nil, err
  59. }
  60. p.(*asyncProducer).ownClient = true
  61. return p, nil
  62. }
  63. // NewAsyncProducerFromClient creates a new Producer using the given client. It is still
  64. // necessary to call Close() on the underlying client when shutting down this producer.
  65. func NewAsyncProducerFromClient(client Client) (AsyncProducer, error) {
  66. // Check that we are not dealing with a closed Client before processing any other arguments
  67. if client.Closed() {
  68. return nil, ErrClosedClient
  69. }
  70. p := &asyncProducer{
  71. client: client,
  72. conf: client.Config(),
  73. errors: make(chan *ProducerError),
  74. input: make(chan *ProducerMessage),
  75. successes: make(chan *ProducerMessage),
  76. retries: make(chan *ProducerMessage),
  77. brokers: make(map[*Broker]chan *ProducerMessage),
  78. brokerRefs: make(map[chan *ProducerMessage]int),
  79. }
  80. // launch our singleton dispatchers
  81. go withRecover(p.topicDispatcher)
  82. go withRecover(p.retryHandler)
  83. return p, nil
  84. }
  85. type flagSet int8
  86. const (
  87. chaser flagSet = 1 << iota // message is last in a group that failed
  88. ref // add a reference to a singleton channel
  89. unref // remove a reference from a singleton channel
  90. shutdown // start the shutdown process
  91. )
  92. // ProducerMessage is the collection of elements passed to the Producer in order to send a message.
  93. type ProducerMessage struct {
  94. Topic string // The Kafka topic for this message.
  95. Key Encoder // The partitioning key for this message. It must implement the Encoder interface. Pre-existing Encoders include StringEncoder and ByteEncoder.
  96. Value Encoder // The actual message to store in Kafka. It must implement the Encoder interface. Pre-existing Encoders include StringEncoder and ByteEncoder.
  97. // These are filled in by the producer as the message is processed
  98. Offset int64 // Offset is the offset of the message stored on the broker. This is only guaranteed to be defined if the message was successfully delivered and RequiredAcks is not NoResponse.
  99. Partition int32 // Partition is the partition that the message was sent to. This is only guaranteed to be defined if the message was successfully delivered.
  100. Metadata interface{} // This field is used to hold arbitrary data you wish to include so it will be available when receiving on the Successes and Errors channels. Sarama completely ignores this field and is only to be used for pass-through data.
  101. retries int
  102. flags flagSet
  103. }
  104. func (m *ProducerMessage) byteSize() int {
  105. size := 26 // the metadata overhead of CRC, flags, etc.
  106. if m.Key != nil {
  107. size += m.Key.Length()
  108. }
  109. if m.Value != nil {
  110. size += m.Value.Length()
  111. }
  112. return size
  113. }
  114. // ProducerError is the type of error generated when the producer fails to deliver a message.
  115. // It contains the original ProducerMessage as well as the actual error value.
  116. type ProducerError struct {
  117. Msg *ProducerMessage
  118. Err error
  119. }
  120. func (pe ProducerError) Error() string {
  121. return fmt.Sprintf("kafka: Failed to produce message to topic %s: %s", pe.Msg.Topic, pe.Err)
  122. }
  123. // ProducerErrors is a type that wraps a batch of "ProducerError"s and implements the Error interface.
  124. // It can be returned from the Producer's Close method to avoid the need to manually drain the Errors channel
  125. // when closing a producer.
  126. type ProducerErrors []*ProducerError
  127. func (pe ProducerErrors) Error() string {
  128. return fmt.Sprintf("kafka: Failed to deliver %d messages.", len(pe))
  129. }
  130. func (p *asyncProducer) Errors() <-chan *ProducerError {
  131. return p.errors
  132. }
  133. func (p *asyncProducer) Successes() <-chan *ProducerMessage {
  134. return p.successes
  135. }
  136. func (p *asyncProducer) Input() chan<- *ProducerMessage {
  137. return p.input
  138. }
  139. func (p *asyncProducer) Close() error {
  140. p.AsyncClose()
  141. if p.conf.Producer.Return.Successes {
  142. go withRecover(func() {
  143. for _ = range p.successes {
  144. }
  145. })
  146. }
  147. var errors ProducerErrors
  148. if p.conf.Producer.Return.Errors {
  149. for event := range p.errors {
  150. errors = append(errors, event)
  151. }
  152. }
  153. if len(errors) > 0 {
  154. return errors
  155. }
  156. return nil
  157. }
  158. func (p *asyncProducer) AsyncClose() {
  159. go withRecover(func() {
  160. p.input <- &ProducerMessage{flags: shutdown}
  161. })
  162. }
  163. ///////////////////////////////////////////
  164. // In normal processing, a message flows through the following functions from top to bottom,
  165. // starting at topicDispatcher (which reads from Producer.input) and ending in flusher
  166. // (which sends the message to the broker). In cases where a message must be retried, it goes
  167. // through retryHandler before being returned to the top of the flow.
  168. ///////////////////////////////////////////
  169. // singleton
  170. // dispatches messages by topic
  171. func (p *asyncProducer) topicDispatcher() {
  172. handlers := make(map[string]chan *ProducerMessage)
  173. for msg := range p.input {
  174. if msg == nil {
  175. Logger.Println("Something tried to send a nil message, it was ignored.")
  176. continue
  177. }
  178. if msg.flags&shutdown != 0 {
  179. Logger.Println("Producer shutting down.")
  180. break
  181. }
  182. if (p.conf.Producer.Compression == CompressionNone && msg.Value != nil && msg.Value.Length() > p.conf.Producer.MaxMessageBytes) ||
  183. (msg.byteSize() > p.conf.Producer.MaxMessageBytes) {
  184. p.returnError(msg, ErrMessageSizeTooLarge)
  185. continue
  186. }
  187. handler := handlers[msg.Topic]
  188. if handler == nil {
  189. p.retries <- &ProducerMessage{flags: ref}
  190. newHandler := make(chan *ProducerMessage, p.conf.ChannelBufferSize)
  191. topic := msg.Topic // block local because go's closure semantics suck
  192. go withRecover(func() { p.partitionDispatcher(topic, newHandler) })
  193. handler = newHandler
  194. handlers[msg.Topic] = handler
  195. }
  196. handler <- msg
  197. }
  198. for _, handler := range handlers {
  199. close(handler)
  200. }
  201. p.retries <- &ProducerMessage{flags: shutdown}
  202. for msg := range p.input {
  203. p.returnError(msg, ErrShuttingDown)
  204. }
  205. if p.ownClient {
  206. err := p.client.Close()
  207. if err != nil {
  208. Logger.Println("producer/shutdown failed to close the embedded client:", err)
  209. }
  210. }
  211. close(p.errors)
  212. close(p.successes)
  213. }
  214. // one per topic
  215. // partitions messages, then dispatches them by partition
  216. func (p *asyncProducer) partitionDispatcher(topic string, input chan *ProducerMessage) {
  217. handlers := make(map[int32]chan *ProducerMessage)
  218. partitioner := p.conf.Producer.Partitioner(topic)
  219. breaker := breaker.New(3, 1, 10*time.Second)
  220. for msg := range input {
  221. if msg.retries == 0 {
  222. err := breaker.Run(func() error {
  223. return p.assignPartition(partitioner, msg)
  224. })
  225. if err != nil {
  226. p.returnError(msg, err)
  227. continue
  228. }
  229. }
  230. handler := handlers[msg.Partition]
  231. if handler == nil {
  232. p.retries <- &ProducerMessage{flags: ref}
  233. newHandler := make(chan *ProducerMessage, p.conf.ChannelBufferSize)
  234. topic := msg.Topic // block local because go's closure semantics suck
  235. partition := msg.Partition // block local because go's closure semantics suck
  236. go withRecover(func() { p.leaderDispatcher(topic, partition, newHandler) })
  237. handler = newHandler
  238. handlers[msg.Partition] = handler
  239. }
  240. handler <- msg
  241. }
  242. for _, handler := range handlers {
  243. close(handler)
  244. }
  245. p.retries <- &ProducerMessage{flags: unref}
  246. }
  247. // one per partition per topic
  248. // dispatches messages to the appropriate broker
  249. // also responsible for maintaining message order during retries
  250. func (p *asyncProducer) leaderDispatcher(topic string, partition int32, input chan *ProducerMessage) {
  251. var leader *Broker
  252. var output chan *ProducerMessage
  253. breaker := breaker.New(3, 1, 10*time.Second)
  254. doUpdate := func() (err error) {
  255. if err = p.client.RefreshMetadata(topic); err != nil {
  256. return err
  257. }
  258. if leader, err = p.client.Leader(topic, partition); err != nil {
  259. return err
  260. }
  261. output = p.getBrokerProducer(leader)
  262. return nil
  263. }
  264. // try to prefetch the leader; if this doesn't work, we'll do a proper breaker-protected refresh-and-fetch
  265. // on the first message
  266. leader, _ = p.client.Leader(topic, partition)
  267. if leader != nil {
  268. output = p.getBrokerProducer(leader)
  269. }
  270. // highWatermark tracks the "current" retry level, which is the only one where we actually let messages through,
  271. // all other messages get buffered in retryState[msg.retries].buf to preserve ordering
  272. // retryState[msg.retries].expectChaser simply tracks whether we've seen a chaser message for a given level (and
  273. // therefore whether our buffer is complete and safe to flush)
  274. highWatermark := 0
  275. retryState := make([]struct {
  276. buf []*ProducerMessage
  277. expectChaser bool
  278. }, p.conf.Producer.Retry.Max+1)
  279. for msg := range input {
  280. if msg.retries > highWatermark {
  281. // new, higher, retry level; send off a chaser so that we know when everything "in between" has made it
  282. // back to us and we can safely flush the backlog (otherwise we risk re-ordering messages)
  283. highWatermark = msg.retries
  284. Logger.Printf("producer/leader state change to [retrying-%d] on %s/%d\n", highWatermark, topic, partition)
  285. retryState[msg.retries].expectChaser = true
  286. output <- &ProducerMessage{Topic: topic, Partition: partition, flags: chaser, retries: msg.retries - 1}
  287. Logger.Printf("producer/leader abandoning broker %d on %s/%d\n", leader.ID(), topic, partition)
  288. p.unrefBrokerProducer(leader, output)
  289. output = nil
  290. time.Sleep(p.conf.Producer.Retry.Backoff)
  291. } else if highWatermark > 0 {
  292. // we are retrying something (else highWatermark would be 0) but this message is not a *new* retry level
  293. if msg.retries < highWatermark {
  294. // in fact this message is not even the current retry level, so buffer it for now (unless it's a just a chaser)
  295. if msg.flags&chaser == chaser {
  296. retryState[msg.retries].expectChaser = false
  297. } else {
  298. retryState[msg.retries].buf = append(retryState[msg.retries].buf, msg)
  299. }
  300. continue
  301. } else if msg.flags&chaser == chaser {
  302. // this message is of the current retry level (msg.retries == highWatermark) and the chaser flag is set,
  303. // meaning this retry level is done and we can go down (at least) one level and flush that
  304. retryState[highWatermark].expectChaser = false
  305. Logger.Printf("producer/leader state change to [normal-%d] on %s/%d\n", highWatermark, topic, partition)
  306. for {
  307. highWatermark--
  308. Logger.Printf("producer/leader state change to [flushing-%d] on %s/%d\n", highWatermark, topic, partition)
  309. if output == nil {
  310. if err := breaker.Run(doUpdate); err != nil {
  311. p.returnErrors(retryState[highWatermark].buf, err)
  312. goto flushDone
  313. }
  314. Logger.Printf("producer/leader selected broker %d on %s/%d\n", leader.ID(), topic, partition)
  315. }
  316. for _, msg := range retryState[highWatermark].buf {
  317. output <- msg
  318. }
  319. flushDone:
  320. retryState[highWatermark].buf = nil
  321. if retryState[highWatermark].expectChaser {
  322. Logger.Printf("producer/leader state change to [retrying-%d] on %s/%d\n", highWatermark, topic, partition)
  323. break
  324. } else {
  325. Logger.Printf("producer/leader state change to [normal-%d] on %s/%d\n", highWatermark, topic, partition)
  326. if highWatermark == 0 {
  327. break
  328. }
  329. }
  330. }
  331. continue
  332. }
  333. }
  334. // if we made it this far then the current msg contains real data, and can be sent to the next goroutine
  335. // without breaking any of our ordering guarantees
  336. if output == nil {
  337. if err := breaker.Run(doUpdate); err != nil {
  338. p.returnError(msg, err)
  339. time.Sleep(p.conf.Producer.Retry.Backoff)
  340. continue
  341. }
  342. Logger.Printf("producer/leader selected broker %d on %s/%d\n", leader.ID(), topic, partition)
  343. }
  344. output <- msg
  345. }
  346. if output != nil {
  347. p.unrefBrokerProducer(leader, output)
  348. }
  349. p.retries <- &ProducerMessage{flags: unref}
  350. }
  351. // one per broker
  352. // groups messages together into appropriately-sized batches for sending to the broker
  353. // based on https://godoc.org/github.com/eapache/channels#BatchingChannel
  354. func (p *asyncProducer) messageAggregator(broker *Broker, input chan *ProducerMessage) {
  355. var (
  356. timer <-chan time.Time
  357. buffer []*ProducerMessage
  358. flushTriggered chan []*ProducerMessage
  359. bytesAccumulated int
  360. defaultFlush bool
  361. )
  362. if p.conf.Producer.Flush.Frequency == 0 && p.conf.Producer.Flush.Bytes == 0 && p.conf.Producer.Flush.Messages == 0 {
  363. defaultFlush = true
  364. }
  365. output := make(chan []*ProducerMessage)
  366. go withRecover(func() { p.flusher(broker, output) })
  367. for {
  368. select {
  369. case msg := <-input:
  370. if msg == nil {
  371. goto shutdown
  372. }
  373. if (bytesAccumulated+msg.byteSize() >= forceFlushThreshold()) ||
  374. (p.conf.Producer.Compression != CompressionNone && bytesAccumulated+msg.byteSize() >= p.conf.Producer.MaxMessageBytes) ||
  375. (p.conf.Producer.Flush.MaxMessages > 0 && len(buffer) >= p.conf.Producer.Flush.MaxMessages) {
  376. Logger.Println("producer/aggregator maximum request accumulated, forcing blocking flush")
  377. output <- buffer
  378. timer = nil
  379. buffer = nil
  380. flushTriggered = nil
  381. bytesAccumulated = 0
  382. }
  383. buffer = append(buffer, msg)
  384. bytesAccumulated += msg.byteSize()
  385. if defaultFlush ||
  386. msg.flags&chaser == chaser ||
  387. (p.conf.Producer.Flush.Messages > 0 && len(buffer) >= p.conf.Producer.Flush.Messages) ||
  388. (p.conf.Producer.Flush.Bytes > 0 && bytesAccumulated >= p.conf.Producer.Flush.Bytes) {
  389. flushTriggered = output
  390. } else if p.conf.Producer.Flush.Frequency > 0 && timer == nil {
  391. timer = time.After(p.conf.Producer.Flush.Frequency)
  392. }
  393. case <-timer:
  394. flushTriggered = output
  395. case flushTriggered <- buffer:
  396. timer = nil
  397. buffer = nil
  398. flushTriggered = nil
  399. bytesAccumulated = 0
  400. }
  401. }
  402. shutdown:
  403. if len(buffer) > 0 {
  404. output <- buffer
  405. }
  406. close(output)
  407. }
  408. // one per broker
  409. // takes a batch at a time from the messageAggregator and sends to the broker
  410. func (p *asyncProducer) flusher(broker *Broker, input chan []*ProducerMessage) {
  411. var closing error
  412. currentRetries := make(map[string]map[int32]error)
  413. Logger.Printf("producer/flusher/%d starting up\n", broker.ID())
  414. for batch := range input {
  415. if closing != nil {
  416. p.retryMessages(batch, closing)
  417. continue
  418. }
  419. // group messages by topic/partition
  420. msgSets := make(map[string]map[int32][]*ProducerMessage)
  421. for i, msg := range batch {
  422. if currentRetries[msg.Topic] != nil && currentRetries[msg.Topic][msg.Partition] != nil {
  423. if msg.flags&chaser == chaser {
  424. // we can start processing this topic/partition again
  425. Logger.Printf("producer/flusher/%d state change to [normal] on %s/%d\n",
  426. broker.ID(), msg.Topic, msg.Partition)
  427. currentRetries[msg.Topic][msg.Partition] = nil
  428. }
  429. p.retryMessages([]*ProducerMessage{msg}, currentRetries[msg.Topic][msg.Partition])
  430. batch[i] = nil // to prevent it being returned/retried twice
  431. continue
  432. }
  433. partitionSet := msgSets[msg.Topic]
  434. if partitionSet == nil {
  435. partitionSet = make(map[int32][]*ProducerMessage)
  436. msgSets[msg.Topic] = partitionSet
  437. }
  438. partitionSet[msg.Partition] = append(partitionSet[msg.Partition], msg)
  439. }
  440. request := p.buildRequest(msgSets)
  441. if request == nil {
  442. continue
  443. }
  444. response, err := broker.Produce(request)
  445. switch err.(type) {
  446. case nil:
  447. break
  448. case PacketEncodingError:
  449. p.returnErrors(batch, err)
  450. continue
  451. default:
  452. Logger.Printf("producer/flusher/%d state change to [closing] because %s\n", broker.ID(), err)
  453. p.abandonBrokerConnection(broker)
  454. p.retryMessages(batch, err)
  455. _ = broker.Close()
  456. closing = err
  457. continue
  458. }
  459. if response == nil {
  460. // this only happens when RequiredAcks is NoResponse, so we have to assume success
  461. if p.conf.Producer.Return.Successes {
  462. p.returnSuccesses(batch)
  463. }
  464. continue
  465. }
  466. // we iterate through the blocks in the request, not the response, so that we notice
  467. // if the response is missing a block completely
  468. for topic, partitionSet := range msgSets {
  469. for partition, msgs := range partitionSet {
  470. block := response.GetBlock(topic, partition)
  471. if block == nil {
  472. p.returnErrors(msgs, ErrIncompleteResponse)
  473. continue
  474. }
  475. switch block.Err {
  476. case ErrNoError:
  477. // All the messages for this topic-partition were delivered successfully!
  478. if p.conf.Producer.Return.Successes {
  479. for i := range msgs {
  480. msgs[i].Offset = block.Offset + int64(i)
  481. }
  482. p.returnSuccesses(msgs)
  483. }
  484. case ErrUnknownTopicOrPartition, ErrNotLeaderForPartition, ErrLeaderNotAvailable,
  485. ErrRequestTimedOut, ErrNotEnoughReplicas, ErrNotEnoughReplicasAfterAppend:
  486. Logger.Printf("producer/flusher/%d state change to [retrying] on %s/%d because %v\n",
  487. broker.ID(), topic, partition, block.Err)
  488. if currentRetries[topic] == nil {
  489. currentRetries[topic] = make(map[int32]error)
  490. }
  491. currentRetries[topic][partition] = block.Err
  492. p.retryMessages(msgs, block.Err)
  493. default:
  494. p.returnErrors(msgs, block.Err)
  495. }
  496. }
  497. }
  498. }
  499. Logger.Printf("producer/flusher/%d shut down\n", broker.ID())
  500. p.retries <- &ProducerMessage{flags: unref}
  501. }
  502. // singleton
  503. // effectively a "bridge" between the flushers and the topicDispatcher in order to avoid deadlock
  504. // based on https://godoc.org/github.com/eapache/channels#InfiniteChannel
  505. func (p *asyncProducer) retryHandler() {
  506. var (
  507. msg *ProducerMessage
  508. buf = queue.New()
  509. refs = 0
  510. shuttingDown = false
  511. )
  512. for {
  513. if buf.Length() == 0 {
  514. msg = <-p.retries
  515. } else {
  516. select {
  517. case msg = <-p.retries:
  518. case p.input <- buf.Peek().(*ProducerMessage):
  519. buf.Remove()
  520. continue
  521. }
  522. }
  523. if msg.flags&ref != 0 {
  524. refs++
  525. } else if msg.flags&unref != 0 {
  526. refs--
  527. if refs == 0 && shuttingDown {
  528. break
  529. }
  530. } else if msg.flags&shutdown != 0 {
  531. shuttingDown = true
  532. if refs == 0 {
  533. break
  534. }
  535. } else {
  536. buf.Add(msg)
  537. }
  538. }
  539. close(p.retries)
  540. for buf.Length() != 0 {
  541. p.input <- buf.Peek().(*ProducerMessage)
  542. buf.Remove()
  543. }
  544. close(p.input)
  545. }
  546. ///////////////////////////////////////////
  547. ///////////////////////////////////////////
  548. // utility functions
  549. func (p *asyncProducer) assignPartition(partitioner Partitioner, msg *ProducerMessage) error {
  550. var partitions []int32
  551. var err error
  552. if partitioner.RequiresConsistency() {
  553. partitions, err = p.client.Partitions(msg.Topic)
  554. } else {
  555. partitions, err = p.client.WritablePartitions(msg.Topic)
  556. }
  557. if err != nil {
  558. return err
  559. }
  560. numPartitions := int32(len(partitions))
  561. if numPartitions == 0 {
  562. return ErrLeaderNotAvailable
  563. }
  564. choice, err := partitioner.Partition(msg, numPartitions)
  565. if err != nil {
  566. return err
  567. } else if choice < 0 || choice >= numPartitions {
  568. return ErrInvalidPartition
  569. }
  570. msg.Partition = partitions[choice]
  571. return nil
  572. }
  573. func (p *asyncProducer) buildRequest(batch map[string]map[int32][]*ProducerMessage) *ProduceRequest {
  574. req := &ProduceRequest{RequiredAcks: p.conf.Producer.RequiredAcks, Timeout: int32(p.conf.Producer.Timeout / time.Millisecond)}
  575. empty := true
  576. for topic, partitionSet := range batch {
  577. for partition, msgSet := range partitionSet {
  578. setToSend := new(MessageSet)
  579. setSize := 0
  580. for _, msg := range msgSet {
  581. var keyBytes, valBytes []byte
  582. var err error
  583. if msg.Key != nil {
  584. if keyBytes, err = msg.Key.Encode(); err != nil {
  585. p.returnError(msg, err)
  586. continue
  587. }
  588. }
  589. if msg.Value != nil {
  590. if valBytes, err = msg.Value.Encode(); err != nil {
  591. p.returnError(msg, err)
  592. continue
  593. }
  594. }
  595. if p.conf.Producer.Compression != CompressionNone && setSize+msg.byteSize() > p.conf.Producer.MaxMessageBytes {
  596. // compression causes message-sets to be wrapped as single messages, which have tighter
  597. // size requirements, so we have to respect those limits
  598. valBytes, err := encode(setToSend)
  599. if err != nil {
  600. Logger.Println(err) // if this happens, it's basically our fault.
  601. panic(err)
  602. }
  603. req.AddMessage(topic, partition, &Message{Codec: p.conf.Producer.Compression, Key: nil, Value: valBytes})
  604. setToSend = new(MessageSet)
  605. setSize = 0
  606. }
  607. setSize += msg.byteSize()
  608. setToSend.addMessage(&Message{Codec: CompressionNone, Key: keyBytes, Value: valBytes})
  609. empty = false
  610. }
  611. if p.conf.Producer.Compression == CompressionNone {
  612. req.AddSet(topic, partition, setToSend)
  613. } else {
  614. valBytes, err := encode(setToSend)
  615. if err != nil {
  616. Logger.Println(err) // if this happens, it's basically our fault.
  617. panic(err)
  618. }
  619. req.AddMessage(topic, partition, &Message{Codec: p.conf.Producer.Compression, Key: nil, Value: valBytes})
  620. }
  621. }
  622. }
  623. if empty {
  624. return nil
  625. }
  626. return req
  627. }
  628. func (p *asyncProducer) returnError(msg *ProducerMessage, err error) {
  629. msg.flags = 0
  630. msg.retries = 0
  631. pErr := &ProducerError{Msg: msg, Err: err}
  632. if p.conf.Producer.Return.Errors {
  633. p.errors <- pErr
  634. } else {
  635. Logger.Println(pErr)
  636. }
  637. }
  638. func (p *asyncProducer) returnErrors(batch []*ProducerMessage, err error) {
  639. for _, msg := range batch {
  640. if msg != nil {
  641. p.returnError(msg, err)
  642. }
  643. }
  644. }
  645. func (p *asyncProducer) returnSuccesses(batch []*ProducerMessage) {
  646. for _, msg := range batch {
  647. if msg != nil {
  648. msg.flags = 0
  649. p.successes <- msg
  650. }
  651. }
  652. }
  653. func (p *asyncProducer) retryMessages(batch []*ProducerMessage, err error) {
  654. for _, msg := range batch {
  655. if msg == nil {
  656. continue
  657. }
  658. if msg.retries >= p.conf.Producer.Retry.Max {
  659. p.returnError(msg, err)
  660. } else {
  661. msg.retries++
  662. p.retries <- msg
  663. }
  664. }
  665. }
  666. func (p *asyncProducer) getBrokerProducer(broker *Broker) chan *ProducerMessage {
  667. p.brokerLock.Lock()
  668. defer p.brokerLock.Unlock()
  669. bp := p.brokers[broker]
  670. if bp == nil {
  671. p.retries <- &ProducerMessage{flags: ref}
  672. bp = make(chan *ProducerMessage)
  673. p.brokers[broker] = bp
  674. p.brokerRefs[bp] = 0
  675. go withRecover(func() { p.messageAggregator(broker, bp) })
  676. }
  677. p.brokerRefs[bp]++
  678. return bp
  679. }
  680. func (p *asyncProducer) unrefBrokerProducer(broker *Broker, bp chan *ProducerMessage) {
  681. p.brokerLock.Lock()
  682. defer p.brokerLock.Unlock()
  683. p.brokerRefs[bp]--
  684. if p.brokerRefs[bp] == 0 {
  685. close(bp)
  686. delete(p.brokerRefs, bp)
  687. if p.brokers[broker] == bp {
  688. delete(p.brokers, broker)
  689. }
  690. }
  691. }
  692. func (p *asyncProducer) abandonBrokerConnection(broker *Broker) {
  693. p.brokerLock.Lock()
  694. defer p.brokerLock.Unlock()
  695. delete(p.brokers, broker)
  696. }