consumer_group.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. package sarama
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "sort"
  7. "sync"
  8. "time"
  9. )
  10. // ErrClosedConsumerGroup is the error returned when a method is called on a consumer group that has been closed.
  11. var ErrClosedConsumerGroup = errors.New("kafka: tried to use a consumer group that was closed")
  12. // ConsumerGroup is responsible for dividing up processing of topics and partitions
  13. // over a collection of processes (the members of the consumer group).
  14. type ConsumerGroup interface {
  15. // Consume joins a cluster of consumers for a given list of topics and
  16. // starts a blocking ConsumerGroupSession through the ConsumerGroupHandler.
  17. //
  18. // The life-cycle of a session is represented by the following steps:
  19. //
  20. // 1. The consumers join the group (as explained in https://kafka.apache.org/documentation/#intro_consumers)
  21. // and is assigned their "fair share" of partitions, aka 'claims'.
  22. // 2. Before processing starts, the handler's Setup() hook is called to notify the user
  23. // of the claims and allow any necessary preparation or alteration of state.
  24. // 3. For each of the assigned claims the handler's ConsumeClaim() function is then called
  25. // in a separate goroutine which requires it to be thread-safe. Any state must be carefully protected
  26. // from concurrent reads/writes.
  27. // 4. The session will persist until one of the ConsumeClaim() functions exits. This can be either when the
  28. // parent context is cancelled or when a server-side rebalance cycle is initiated.
  29. // 5. Once all the ConsumeClaim() loops have exited, the handler's Cleanup() hook is called
  30. // to allow the user to perform any final tasks before a rebalance.
  31. // 6. Finally, marked offsets are committed one last time before claims are released.
  32. //
  33. // Please note, that once a rebalance is triggered, sessions must be completed within
  34. // Config.Consumer.Group.Rebalance.Timeout. This means that ConsumeClaim() functions must exit
  35. // as quickly as possible to allow time for Cleanup() and the final offset commit. If the timeout
  36. // is exceeded, the consumer will be removed from the group by Kafka, which will cause offset
  37. // commit failures.
  38. Consume(ctx context.Context, topics []string, handler ConsumerGroupHandler) error
  39. // Errors returns a read channel of errors that occurred during the consumer life-cycle.
  40. // By default, errors are logged and not returned over this channel.
  41. // If you want to implement any custom error handling, set your config's
  42. // Consumer.Return.Errors setting to true, and read from this channel.
  43. Errors() <-chan error
  44. // Close stops the ConsumerGroup and detaches any running sessions. It is required to call
  45. // this function before the object passes out of scope, as it will otherwise leak memory.
  46. Close() error
  47. }
  48. type consumerGroup struct {
  49. client Client
  50. config *Config
  51. consumer Consumer
  52. groupID string
  53. memberID string
  54. errors chan error
  55. lock sync.Mutex
  56. closed chan none
  57. closeOnce sync.Once
  58. userData []byte
  59. }
  60. // NewConsumerGroup creates a new consumer group the given broker addresses and configuration.
  61. func NewConsumerGroup(addrs []string, groupID string, config *Config) (ConsumerGroup, error) {
  62. client, err := NewClient(addrs, config)
  63. if err != nil {
  64. return nil, err
  65. }
  66. c, err := newConsumerGroup(groupID, client)
  67. if err != nil {
  68. _ = client.Close()
  69. }
  70. return c, err
  71. }
  72. // NewConsumerGroupFromClient creates a new consumer group using the given client. It is still
  73. // necessary to call Close() on the underlying client when shutting down this consumer.
  74. // PLEASE NOTE: consumer groups can only re-use but not share clients.
  75. func NewConsumerGroupFromClient(groupID string, client Client) (ConsumerGroup, error) {
  76. // For clients passed in by the client, ensure we don't
  77. // call Close() on it.
  78. cli := &nopCloserClient{client}
  79. return newConsumerGroup(groupID, cli)
  80. }
  81. func newConsumerGroup(groupID string, client Client) (ConsumerGroup, error) {
  82. config := client.Config()
  83. if !config.Version.IsAtLeast(V0_10_2_0) {
  84. return nil, ConfigurationError("consumer groups require Version to be >= V0_10_2_0")
  85. }
  86. consumer, err := NewConsumerFromClient(client)
  87. if err != nil {
  88. return nil, err
  89. }
  90. return &consumerGroup{
  91. client: client,
  92. consumer: consumer,
  93. config: config,
  94. groupID: groupID,
  95. errors: make(chan error, config.ChannelBufferSize),
  96. closed: make(chan none),
  97. }, nil
  98. }
  99. // Errors implements ConsumerGroup.
  100. func (c *consumerGroup) Errors() <-chan error { return c.errors }
  101. // Close implements ConsumerGroup.
  102. func (c *consumerGroup) Close() (err error) {
  103. c.closeOnce.Do(func() {
  104. close(c.closed)
  105. // leave group
  106. if e := c.leave(); e != nil {
  107. err = e
  108. }
  109. // drain errors
  110. go func() {
  111. close(c.errors)
  112. }()
  113. for e := range c.errors {
  114. err = e
  115. }
  116. if e := c.client.Close(); e != nil {
  117. err = e
  118. }
  119. })
  120. return
  121. }
  122. // Consume implements ConsumerGroup.
  123. func (c *consumerGroup) Consume(ctx context.Context, topics []string, handler ConsumerGroupHandler) error {
  124. // Ensure group is not closed
  125. select {
  126. case <-c.closed:
  127. return ErrClosedConsumerGroup
  128. default:
  129. }
  130. c.lock.Lock()
  131. defer c.lock.Unlock()
  132. // Quick exit when no topics are provided
  133. if len(topics) == 0 {
  134. return fmt.Errorf("no topics provided")
  135. }
  136. // Refresh metadata for requested topics
  137. if err := c.client.RefreshMetadata(topics...); err != nil {
  138. return err
  139. }
  140. // Init session
  141. sess, err := c.newSession(ctx, topics, handler, c.config.Consumer.Group.Rebalance.Retry.Max)
  142. if err == ErrClosedClient {
  143. return ErrClosedConsumerGroup
  144. } else if err != nil {
  145. return err
  146. }
  147. // loop check topic partition numbers changed
  148. // will trigger rebalance when any topic partitions number had changed
  149. // avoid Consume function called again that will generate more than loopCheckPartitionNumbers coroutine
  150. go c.loopCheckPartitionNumbers(topics, sess)
  151. // Wait for session exit signal
  152. <-sess.ctx.Done()
  153. // Gracefully release session claims
  154. return sess.release(true)
  155. }
  156. func (c *consumerGroup) retryNewSession(ctx context.Context, topics []string, handler ConsumerGroupHandler, retries int, refreshCoordinator bool) (*consumerGroupSession, error) {
  157. select {
  158. case <-c.closed:
  159. return nil, ErrClosedConsumerGroup
  160. case <-time.After(c.config.Consumer.Group.Rebalance.Retry.Backoff):
  161. }
  162. if refreshCoordinator {
  163. err := c.client.RefreshCoordinator(c.groupID)
  164. if err != nil {
  165. return c.retryNewSession(ctx, topics, handler, retries, true)
  166. }
  167. }
  168. return c.newSession(ctx, topics, handler, retries-1)
  169. }
  170. func (c *consumerGroup) newSession(ctx context.Context, topics []string, handler ConsumerGroupHandler, retries int) (*consumerGroupSession, error) {
  171. coordinator, err := c.client.Coordinator(c.groupID)
  172. if err != nil {
  173. if retries <= 0 {
  174. return nil, err
  175. }
  176. return c.retryNewSession(ctx, topics, handler, retries, true)
  177. }
  178. // Join consumer group
  179. join, err := c.joinGroupRequest(coordinator, topics)
  180. if err != nil {
  181. _ = coordinator.Close()
  182. return nil, err
  183. }
  184. switch join.Err {
  185. case ErrNoError:
  186. c.memberID = join.MemberId
  187. case ErrUnknownMemberId, ErrIllegalGeneration: // reset member ID and retry immediately
  188. c.memberID = ""
  189. return c.newSession(ctx, topics, handler, retries)
  190. case ErrNotCoordinatorForConsumer: // retry after backoff with coordinator refresh
  191. if retries <= 0 {
  192. return nil, join.Err
  193. }
  194. return c.retryNewSession(ctx, topics, handler, retries, true)
  195. case ErrRebalanceInProgress: // retry after backoff
  196. if retries <= 0 {
  197. return nil, join.Err
  198. }
  199. return c.retryNewSession(ctx, topics, handler, retries, false)
  200. default:
  201. return nil, join.Err
  202. }
  203. // Prepare distribution plan if we joined as the leader
  204. var plan BalanceStrategyPlan
  205. if join.LeaderId == join.MemberId {
  206. members, err := join.GetMembers()
  207. if err != nil {
  208. return nil, err
  209. }
  210. plan, err = c.balance(members)
  211. if err != nil {
  212. return nil, err
  213. }
  214. }
  215. // Sync consumer group
  216. sync, err := c.syncGroupRequest(coordinator, plan, join.GenerationId)
  217. if err != nil {
  218. _ = coordinator.Close()
  219. return nil, err
  220. }
  221. switch sync.Err {
  222. case ErrNoError:
  223. case ErrUnknownMemberId, ErrIllegalGeneration: // reset member ID and retry immediately
  224. c.memberID = ""
  225. return c.newSession(ctx, topics, handler, retries)
  226. case ErrNotCoordinatorForConsumer: // retry after backoff with coordinator refresh
  227. if retries <= 0 {
  228. return nil, sync.Err
  229. }
  230. return c.retryNewSession(ctx, topics, handler, retries, true)
  231. case ErrRebalanceInProgress: // retry after backoff
  232. if retries <= 0 {
  233. return nil, sync.Err
  234. }
  235. return c.retryNewSession(ctx, topics, handler, retries, false)
  236. default:
  237. return nil, sync.Err
  238. }
  239. // Retrieve and sort claims
  240. var claims map[string][]int32
  241. if len(sync.MemberAssignment) > 0 {
  242. members, err := sync.GetMemberAssignment()
  243. if err != nil {
  244. return nil, err
  245. }
  246. claims = members.Topics
  247. c.userData = members.UserData
  248. for _, partitions := range claims {
  249. sort.Sort(int32Slice(partitions))
  250. }
  251. }
  252. return newConsumerGroupSession(ctx, c, claims, join.MemberId, join.GenerationId, handler)
  253. }
  254. func (c *consumerGroup) joinGroupRequest(coordinator *Broker, topics []string) (*JoinGroupResponse, error) {
  255. req := &JoinGroupRequest{
  256. GroupId: c.groupID,
  257. MemberId: c.memberID,
  258. SessionTimeout: int32(c.config.Consumer.Group.Session.Timeout / time.Millisecond),
  259. ProtocolType: "consumer",
  260. }
  261. if c.config.Version.IsAtLeast(V0_10_1_0) {
  262. req.Version = 1
  263. req.RebalanceTimeout = int32(c.config.Consumer.Group.Rebalance.Timeout / time.Millisecond)
  264. }
  265. // use static user-data if configured, otherwise use consumer-group userdata from the last sync
  266. userData := c.config.Consumer.Group.Member.UserData
  267. if len(userData) == 0 {
  268. userData = c.userData
  269. }
  270. meta := &ConsumerGroupMemberMetadata{
  271. Topics: topics,
  272. UserData: userData,
  273. }
  274. strategy := c.config.Consumer.Group.Rebalance.Strategy
  275. if err := req.AddGroupProtocolMetadata(strategy.Name(), meta); err != nil {
  276. return nil, err
  277. }
  278. return coordinator.JoinGroup(req)
  279. }
  280. func (c *consumerGroup) syncGroupRequest(coordinator *Broker, plan BalanceStrategyPlan, generationID int32) (*SyncGroupResponse, error) {
  281. req := &SyncGroupRequest{
  282. GroupId: c.groupID,
  283. MemberId: c.memberID,
  284. GenerationId: generationID,
  285. }
  286. strategy := c.config.Consumer.Group.Rebalance.Strategy
  287. for memberID, topics := range plan {
  288. assignment := &ConsumerGroupMemberAssignment{Topics: topics}
  289. userDataBytes, err := strategy.AssignmentData(memberID, topics, generationID)
  290. if err != nil {
  291. return nil, err
  292. }
  293. assignment.UserData = userDataBytes
  294. if err := req.AddGroupAssignmentMember(memberID, assignment); err != nil {
  295. return nil, err
  296. }
  297. }
  298. return coordinator.SyncGroup(req)
  299. }
  300. func (c *consumerGroup) heartbeatRequest(coordinator *Broker, memberID string, generationID int32) (*HeartbeatResponse, error) {
  301. req := &HeartbeatRequest{
  302. GroupId: c.groupID,
  303. MemberId: memberID,
  304. GenerationId: generationID,
  305. }
  306. return coordinator.Heartbeat(req)
  307. }
  308. func (c *consumerGroup) balance(members map[string]ConsumerGroupMemberMetadata) (BalanceStrategyPlan, error) {
  309. topics := make(map[string][]int32)
  310. for _, meta := range members {
  311. for _, topic := range meta.Topics {
  312. topics[topic] = nil
  313. }
  314. }
  315. for topic := range topics {
  316. partitions, err := c.client.Partitions(topic)
  317. if err != nil {
  318. return nil, err
  319. }
  320. topics[topic] = partitions
  321. }
  322. strategy := c.config.Consumer.Group.Rebalance.Strategy
  323. return strategy.Plan(members, topics)
  324. }
  325. // Leaves the cluster, called by Close.
  326. func (c *consumerGroup) leave() error {
  327. c.lock.Lock()
  328. defer c.lock.Unlock()
  329. if c.memberID == "" {
  330. return nil
  331. }
  332. coordinator, err := c.client.Coordinator(c.groupID)
  333. if err != nil {
  334. return err
  335. }
  336. resp, err := coordinator.LeaveGroup(&LeaveGroupRequest{
  337. GroupId: c.groupID,
  338. MemberId: c.memberID,
  339. })
  340. if err != nil {
  341. _ = coordinator.Close()
  342. return err
  343. }
  344. // Unset memberID
  345. c.memberID = ""
  346. // Check response
  347. switch resp.Err {
  348. case ErrRebalanceInProgress, ErrUnknownMemberId, ErrNoError:
  349. return nil
  350. default:
  351. return resp.Err
  352. }
  353. }
  354. func (c *consumerGroup) handleError(err error, topic string, partition int32) {
  355. if _, ok := err.(*ConsumerError); !ok && topic != "" && partition > -1 {
  356. err = &ConsumerError{
  357. Topic: topic,
  358. Partition: partition,
  359. Err: err,
  360. }
  361. }
  362. if !c.config.Consumer.Return.Errors {
  363. Logger.Println(err)
  364. return
  365. }
  366. select {
  367. case <-c.closed:
  368. //consumer is closed
  369. return
  370. default:
  371. }
  372. select {
  373. case c.errors <- err:
  374. default:
  375. // no error listener
  376. }
  377. }
  378. func (c *consumerGroup) loopCheckPartitionNumbers(topics []string, session *consumerGroupSession) {
  379. pause := time.NewTicker(c.config.Metadata.RefreshFrequency)
  380. defer session.cancel()
  381. defer pause.Stop()
  382. var oldTopicToPartitionNum map[string]int
  383. var err error
  384. if oldTopicToPartitionNum, err = c.topicToPartitionNumbers(topics); err != nil {
  385. return
  386. }
  387. for {
  388. if newTopicToPartitionNum, err := c.topicToPartitionNumbers(topics); err != nil {
  389. return
  390. } else {
  391. for topic, num := range oldTopicToPartitionNum {
  392. if newTopicToPartitionNum[topic] != num {
  393. return // trigger the end of the session on exit
  394. }
  395. }
  396. }
  397. select {
  398. case <-pause.C:
  399. case <-session.ctx.Done():
  400. Logger.Printf("loop check partition number coroutine will exit, topics %s", topics)
  401. // if session closed by other, should be exited
  402. return
  403. case <-c.closed:
  404. return
  405. }
  406. }
  407. }
  408. func (c *consumerGroup) topicToPartitionNumbers(topics []string) (map[string]int, error) {
  409. topicToPartitionNum := make(map[string]int, len(topics))
  410. for _, topic := range topics {
  411. if partitionNum, err := c.client.Partitions(topic); err != nil {
  412. Logger.Printf("Consumer Group topic %s get partition number failed %v", topic, err)
  413. return nil, err
  414. } else {
  415. topicToPartitionNum[topic] = len(partitionNum)
  416. }
  417. }
  418. return topicToPartitionNum, nil
  419. }
  420. // --------------------------------------------------------------------
  421. // ConsumerGroupSession represents a consumer group member session.
  422. type ConsumerGroupSession interface {
  423. // Claims returns information about the claimed partitions by topic.
  424. Claims() map[string][]int32
  425. // MemberID returns the cluster member ID.
  426. MemberID() string
  427. // GenerationID returns the current generation ID.
  428. GenerationID() int32
  429. // MarkOffset marks the provided offset, alongside a metadata string
  430. // that represents the state of the partition consumer at that point in time. The
  431. // metadata string can be used by another consumer to restore that state, so it
  432. // can resume consumption.
  433. //
  434. // To follow upstream conventions, you are expected to mark the offset of the
  435. // next message to read, not the last message read. Thus, when calling `MarkOffset`
  436. // you should typically add one to the offset of the last consumed message.
  437. //
  438. // Note: calling MarkOffset does not necessarily commit the offset to the backend
  439. // store immediately for efficiency reasons, and it may never be committed if
  440. // your application crashes. This means that you may end up processing the same
  441. // message twice, and your processing should ideally be idempotent.
  442. MarkOffset(topic string, partition int32, offset int64, metadata string)
  443. // ResetOffset resets to the provided offset, alongside a metadata string that
  444. // represents the state of the partition consumer at that point in time. Reset
  445. // acts as a counterpart to MarkOffset, the difference being that it allows to
  446. // reset an offset to an earlier or smaller value, where MarkOffset only
  447. // allows incrementing the offset. cf MarkOffset for more details.
  448. ResetOffset(topic string, partition int32, offset int64, metadata string)
  449. // MarkMessage marks a message as consumed.
  450. MarkMessage(msg *ConsumerMessage, metadata string)
  451. // Context returns the session context.
  452. Context() context.Context
  453. }
  454. type consumerGroupSession struct {
  455. parent *consumerGroup
  456. memberID string
  457. generationID int32
  458. handler ConsumerGroupHandler
  459. claims map[string][]int32
  460. offsets *offsetManager
  461. ctx context.Context
  462. cancel func()
  463. waitGroup sync.WaitGroup
  464. releaseOnce sync.Once
  465. hbDying, hbDead chan none
  466. }
  467. func newConsumerGroupSession(ctx context.Context, parent *consumerGroup, claims map[string][]int32, memberID string, generationID int32, handler ConsumerGroupHandler) (*consumerGroupSession, error) {
  468. // init offset manager
  469. offsets, err := newOffsetManagerFromClient(parent.groupID, memberID, generationID, parent.client)
  470. if err != nil {
  471. return nil, err
  472. }
  473. // init context
  474. ctx, cancel := context.WithCancel(ctx)
  475. // init session
  476. sess := &consumerGroupSession{
  477. parent: parent,
  478. memberID: memberID,
  479. generationID: generationID,
  480. handler: handler,
  481. offsets: offsets,
  482. claims: claims,
  483. ctx: ctx,
  484. cancel: cancel,
  485. hbDying: make(chan none),
  486. hbDead: make(chan none),
  487. }
  488. // start heartbeat loop
  489. go sess.heartbeatLoop()
  490. // create a POM for each claim
  491. for topic, partitions := range claims {
  492. for _, partition := range partitions {
  493. pom, err := offsets.ManagePartition(topic, partition)
  494. if err != nil {
  495. _ = sess.release(false)
  496. return nil, err
  497. }
  498. // handle POM errors
  499. go func(topic string, partition int32) {
  500. for err := range pom.Errors() {
  501. sess.parent.handleError(err, topic, partition)
  502. }
  503. }(topic, partition)
  504. }
  505. }
  506. // perform setup
  507. if err := handler.Setup(sess); err != nil {
  508. _ = sess.release(true)
  509. return nil, err
  510. }
  511. // start consuming
  512. for topic, partitions := range claims {
  513. for _, partition := range partitions {
  514. sess.waitGroup.Add(1)
  515. go func(topic string, partition int32) {
  516. defer sess.waitGroup.Done()
  517. // cancel the as session as soon as the first
  518. // goroutine exits
  519. defer sess.cancel()
  520. // consume a single topic/partition, blocking
  521. sess.consume(topic, partition)
  522. }(topic, partition)
  523. }
  524. }
  525. return sess, nil
  526. }
  527. func (s *consumerGroupSession) Claims() map[string][]int32 { return s.claims }
  528. func (s *consumerGroupSession) MemberID() string { return s.memberID }
  529. func (s *consumerGroupSession) GenerationID() int32 { return s.generationID }
  530. func (s *consumerGroupSession) MarkOffset(topic string, partition int32, offset int64, metadata string) {
  531. if pom := s.offsets.findPOM(topic, partition); pom != nil {
  532. pom.MarkOffset(offset, metadata)
  533. }
  534. }
  535. func (s *consumerGroupSession) ResetOffset(topic string, partition int32, offset int64, metadata string) {
  536. if pom := s.offsets.findPOM(topic, partition); pom != nil {
  537. pom.ResetOffset(offset, metadata)
  538. }
  539. }
  540. func (s *consumerGroupSession) MarkMessage(msg *ConsumerMessage, metadata string) {
  541. s.MarkOffset(msg.Topic, msg.Partition, msg.Offset+1, metadata)
  542. }
  543. func (s *consumerGroupSession) Context() context.Context {
  544. return s.ctx
  545. }
  546. func (s *consumerGroupSession) consume(topic string, partition int32) {
  547. // quick exit if rebalance is due
  548. select {
  549. case <-s.ctx.Done():
  550. return
  551. case <-s.parent.closed:
  552. return
  553. default:
  554. }
  555. // get next offset
  556. offset := s.parent.config.Consumer.Offsets.Initial
  557. if pom := s.offsets.findPOM(topic, partition); pom != nil {
  558. offset, _ = pom.NextOffset()
  559. }
  560. // create new claim
  561. claim, err := newConsumerGroupClaim(s, topic, partition, offset)
  562. if err != nil {
  563. s.parent.handleError(err, topic, partition)
  564. return
  565. }
  566. // handle errors
  567. go func() {
  568. for err := range claim.Errors() {
  569. s.parent.handleError(err, topic, partition)
  570. }
  571. }()
  572. // trigger close when session is done
  573. go func() {
  574. select {
  575. case <-s.ctx.Done():
  576. case <-s.parent.closed:
  577. }
  578. claim.AsyncClose()
  579. }()
  580. // start processing
  581. if err := s.handler.ConsumeClaim(s, claim); err != nil {
  582. s.parent.handleError(err, topic, partition)
  583. }
  584. // ensure consumer is closed & drained
  585. claim.AsyncClose()
  586. for _, err := range claim.waitClosed() {
  587. s.parent.handleError(err, topic, partition)
  588. }
  589. }
  590. func (s *consumerGroupSession) release(withCleanup bool) (err error) {
  591. // signal release, stop heartbeat
  592. s.cancel()
  593. // wait for consumers to exit
  594. s.waitGroup.Wait()
  595. // perform release
  596. s.releaseOnce.Do(func() {
  597. if withCleanup {
  598. if e := s.handler.Cleanup(s); e != nil {
  599. s.parent.handleError(e, "", -1)
  600. err = e
  601. }
  602. }
  603. if e := s.offsets.Close(); e != nil {
  604. err = e
  605. }
  606. close(s.hbDying)
  607. <-s.hbDead
  608. })
  609. return
  610. }
  611. func (s *consumerGroupSession) heartbeatLoop() {
  612. defer close(s.hbDead)
  613. defer s.cancel() // trigger the end of the session on exit
  614. pause := time.NewTicker(s.parent.config.Consumer.Group.Heartbeat.Interval)
  615. defer pause.Stop()
  616. retries := s.parent.config.Metadata.Retry.Max
  617. for {
  618. coordinator, err := s.parent.client.Coordinator(s.parent.groupID)
  619. if err != nil {
  620. if retries <= 0 {
  621. s.parent.handleError(err, "", -1)
  622. return
  623. }
  624. select {
  625. case <-s.hbDying:
  626. return
  627. case <-time.After(s.parent.config.Metadata.Retry.Backoff):
  628. retries--
  629. }
  630. continue
  631. }
  632. resp, err := s.parent.heartbeatRequest(coordinator, s.memberID, s.generationID)
  633. if err != nil {
  634. _ = coordinator.Close()
  635. if retries <= 0 {
  636. s.parent.handleError(err, "", -1)
  637. return
  638. }
  639. retries--
  640. continue
  641. }
  642. switch resp.Err {
  643. case ErrNoError:
  644. retries = s.parent.config.Metadata.Retry.Max
  645. case ErrRebalanceInProgress, ErrUnknownMemberId, ErrIllegalGeneration:
  646. return
  647. default:
  648. s.parent.handleError(err, "", -1)
  649. return
  650. }
  651. select {
  652. case <-pause.C:
  653. case <-s.hbDying:
  654. return
  655. }
  656. }
  657. }
  658. // --------------------------------------------------------------------
  659. // ConsumerGroupHandler instances are used to handle individual topic/partition claims.
  660. // It also provides hooks for your consumer group session life-cycle and allow you to
  661. // trigger logic before or after the consume loop(s).
  662. //
  663. // PLEASE NOTE that handlers are likely be called from several goroutines concurrently,
  664. // ensure that all state is safely protected against race conditions.
  665. type ConsumerGroupHandler interface {
  666. // Setup is run at the beginning of a new session, before ConsumeClaim.
  667. Setup(ConsumerGroupSession) error
  668. // Cleanup is run at the end of a session, once all ConsumeClaim goroutines have exited
  669. // but before the offsets are committed for the very last time.
  670. Cleanup(ConsumerGroupSession) error
  671. // ConsumeClaim must start a consumer loop of ConsumerGroupClaim's Messages().
  672. // Once the Messages() channel is closed, the Handler must finish its processing
  673. // loop and exit.
  674. ConsumeClaim(ConsumerGroupSession, ConsumerGroupClaim) error
  675. }
  676. // ConsumerGroupClaim processes Kafka messages from a given topic and partition within a consumer group.
  677. type ConsumerGroupClaim interface {
  678. // Topic returns the consumed topic name.
  679. Topic() string
  680. // Partition returns the consumed partition.
  681. Partition() int32
  682. // InitialOffset returns the initial offset that was used as a starting point for this claim.
  683. InitialOffset() int64
  684. // HighWaterMarkOffset returns the high water mark offset of the partition,
  685. // i.e. the offset that will be used for the next message that will be produced.
  686. // You can use this to determine how far behind the processing is.
  687. HighWaterMarkOffset() int64
  688. // Messages returns the read channel for the messages that are returned by
  689. // the broker. The messages channel will be closed when a new rebalance cycle
  690. // is due. You must finish processing and mark offsets within
  691. // Config.Consumer.Group.Session.Timeout before the topic/partition is eventually
  692. // re-assigned to another group member.
  693. Messages() <-chan *ConsumerMessage
  694. }
  695. type consumerGroupClaim struct {
  696. topic string
  697. partition int32
  698. offset int64
  699. PartitionConsumer
  700. }
  701. func newConsumerGroupClaim(sess *consumerGroupSession, topic string, partition int32, offset int64) (*consumerGroupClaim, error) {
  702. pcm, err := sess.parent.consumer.ConsumePartition(topic, partition, offset)
  703. if err == ErrOffsetOutOfRange {
  704. offset = sess.parent.config.Consumer.Offsets.Initial
  705. pcm, err = sess.parent.consumer.ConsumePartition(topic, partition, offset)
  706. }
  707. if err != nil {
  708. return nil, err
  709. }
  710. go func() {
  711. for err := range pcm.Errors() {
  712. sess.parent.handleError(err, topic, partition)
  713. }
  714. }()
  715. return &consumerGroupClaim{
  716. topic: topic,
  717. partition: partition,
  718. offset: offset,
  719. PartitionConsumer: pcm,
  720. }, nil
  721. }
  722. func (c *consumerGroupClaim) Topic() string { return c.topic }
  723. func (c *consumerGroupClaim) Partition() int32 { return c.partition }
  724. func (c *consumerGroupClaim) InitialOffset() int64 { return c.offset }
  725. // Drains messages and errors, ensures the claim is fully closed.
  726. func (c *consumerGroupClaim) waitClosed() (errs ConsumerErrors) {
  727. go func() {
  728. for range c.Messages() {
  729. }
  730. }()
  731. for err := range c.Errors() {
  732. errs = append(errs, err)
  733. }
  734. return
  735. }