consumer_group.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. package sarama
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "sort"
  7. "sync"
  8. "time"
  9. )
  10. // ErrClosedConsumerGroup is the error returned when a method is called on a consumer group that has been closed.
  11. var ErrClosedConsumerGroup = errors.New("kafka: tried to use a consumer group that was closed")
  12. // ConsumerGroup is responsible for dividing up processing of topics and partitions
  13. // over a collection of processes (the members of the consumer group).
  14. type ConsumerGroup interface {
  15. // Consume joins a cluster of consumers for a given list of topics and
  16. // starts a blocking ConsumerGroupSession through the ConsumerGroupHandler.
  17. //
  18. // The life-cycle of a session is represented by the following steps:
  19. //
  20. // 1. The consumers join the group (as explained in https://kafka.apache.org/documentation/#intro_consumers)
  21. // and is assigned their "fair share" of partitions, aka 'claims'.
  22. // 2. Before processing starts, the handler's Setup() hook is called to notify the user
  23. // of the claims and allow any necessary preparation or alteration of state.
  24. // 3. For each of the assigned claims the handler's ConsumeClaim() function is then called
  25. // in a separate goroutine which requires it to be thread-safe. Any state must be carefully protected
  26. // from concurrent reads/writes.
  27. // 4. The session will persist until one of the ConsumeClaim() functions exits. This can be either when the
  28. // parent context is cancelled or when a server-side rebalance cycle is initiated.
  29. // 5. Once all the ConsumeClaim() loops have exited, the handler's Cleanup() hook is called
  30. // to allow the user to perform any final tasks before a rebalance.
  31. // 6. Finally, marked offsets are committed one last time before claims are released.
  32. //
  33. // Please note, that once a rebalance is triggered, sessions must be completed within
  34. // Config.Consumer.Group.Rebalance.Timeout. This means that ConsumeClaim() functions must exit
  35. // as quickly as possible to allow time for Cleanup() and the final offset commit. If the timeout
  36. // is exceeded, the consumer will be removed from the group by Kafka, which will cause offset
  37. // commit failures.
  38. Consume(ctx context.Context, topics []string, handler ConsumerGroupHandler) error
  39. // Errors returns a read channel of errors that occurred during the consumer life-cycle.
  40. // By default, errors are logged and not returned over this channel.
  41. // If you want to implement any custom error handling, set your config's
  42. // Consumer.Return.Errors setting to true, and read from this channel.
  43. Errors() <-chan error
  44. // Close stops the ConsumerGroup and detaches any running sessions. It is required to call
  45. // this function before the object passes out of scope, as it will otherwise leak memory.
  46. Close() error
  47. }
  48. type consumerGroup struct {
  49. client Client
  50. config *Config
  51. consumer Consumer
  52. groupID string
  53. memberID string
  54. errors chan error
  55. lock sync.Mutex
  56. closed chan none
  57. closeOnce sync.Once
  58. userData []byte
  59. }
  60. // NewConsumerGroup creates a new consumer group the given broker addresses and configuration.
  61. func NewConsumerGroup(addrs []string, groupID string, config *Config) (ConsumerGroup, error) {
  62. client, err := NewClient(addrs, config)
  63. if err != nil {
  64. return nil, err
  65. }
  66. c, err := newConsumerGroup(groupID, client)
  67. if err != nil {
  68. _ = client.Close()
  69. }
  70. return c, err
  71. }
  72. // NewConsumerGroupFromClient creates a new consumer group using the given client. It is still
  73. // necessary to call Close() on the underlying client when shutting down this consumer.
  74. // PLEASE NOTE: consumer groups can only re-use but not share clients.
  75. func NewConsumerGroupFromClient(groupID string, client Client) (ConsumerGroup, error) {
  76. // For clients passed in by the client, ensure we don't
  77. // call Close() on it.
  78. cli := &nopCloserClient{client}
  79. return newConsumerGroup(groupID, cli)
  80. }
  81. func newConsumerGroup(groupID string, client Client) (ConsumerGroup, error) {
  82. config := client.Config()
  83. if !config.Version.IsAtLeast(V0_10_2_0) {
  84. return nil, ConfigurationError("consumer groups require Version to be >= V0_10_2_0")
  85. }
  86. consumer, err := NewConsumerFromClient(client)
  87. if err != nil {
  88. return nil, err
  89. }
  90. return &consumerGroup{
  91. client: client,
  92. consumer: consumer,
  93. config: config,
  94. groupID: groupID,
  95. errors: make(chan error, config.ChannelBufferSize),
  96. closed: make(chan none),
  97. }, nil
  98. }
  99. // Errors implements ConsumerGroup.
  100. func (c *consumerGroup) Errors() <-chan error { return c.errors }
  101. // Close implements ConsumerGroup.
  102. func (c *consumerGroup) Close() (err error) {
  103. c.closeOnce.Do(func() {
  104. close(c.closed)
  105. // leave group
  106. if e := c.leave(); e != nil {
  107. err = e
  108. }
  109. // drain errors
  110. go func() {
  111. close(c.errors)
  112. }()
  113. for e := range c.errors {
  114. err = e
  115. }
  116. if e := c.client.Close(); e != nil {
  117. err = e
  118. }
  119. })
  120. return
  121. }
  122. // Consume implements ConsumerGroup.
  123. func (c *consumerGroup) Consume(ctx context.Context, topics []string, handler ConsumerGroupHandler) error {
  124. // Ensure group is not closed
  125. select {
  126. case <-c.closed:
  127. return ErrClosedConsumerGroup
  128. default:
  129. }
  130. c.lock.Lock()
  131. defer c.lock.Unlock()
  132. // Quick exit when no topics are provided
  133. if len(topics) == 0 {
  134. return fmt.Errorf("no topics provided")
  135. }
  136. // Refresh metadata for requested topics
  137. if err := c.client.RefreshMetadata(topics...); err != nil {
  138. return err
  139. }
  140. // Init session
  141. sess, err := c.newSession(ctx, topics, handler, c.config.Consumer.Group.Rebalance.Retry.Max)
  142. if err == ErrClosedClient {
  143. return ErrClosedConsumerGroup
  144. } else if err != nil {
  145. return err
  146. }
  147. // loop check topic partition numbers changed
  148. // will trigger rebalance when any topic partitions number had changed
  149. // avoid Consume function called again that will generate more than loopCheckPartitionNumbers coroutine
  150. go c.loopCheckPartitionNumbers(topics, sess)
  151. // Wait for session exit signal
  152. <-sess.ctx.Done()
  153. // Gracefully release session claims
  154. return sess.release(true)
  155. }
  156. func (c *consumerGroup) retryNewSession(ctx context.Context, topics []string, handler ConsumerGroupHandler, retries int, refreshCoordinator bool) (*consumerGroupSession, error) {
  157. select {
  158. case <-c.closed:
  159. return nil, ErrClosedConsumerGroup
  160. case <-time.After(c.config.Consumer.Group.Rebalance.Retry.Backoff):
  161. }
  162. if refreshCoordinator {
  163. err := c.client.RefreshCoordinator(c.groupID)
  164. if err != nil {
  165. return c.retryNewSession(ctx, topics, handler, retries, true)
  166. }
  167. }
  168. return c.newSession(ctx, topics, handler, retries-1)
  169. }
  170. func (c *consumerGroup) newSession(ctx context.Context, topics []string, handler ConsumerGroupHandler, retries int) (*consumerGroupSession, error) {
  171. coordinator, err := c.client.Coordinator(c.groupID)
  172. if err != nil {
  173. if retries <= 0 {
  174. return nil, err
  175. }
  176. return c.retryNewSession(ctx, topics, handler, retries, true)
  177. }
  178. // Join consumer group
  179. join, err := c.joinGroupRequest(coordinator, topics)
  180. if err != nil {
  181. _ = coordinator.Close()
  182. return nil, err
  183. }
  184. switch join.Err {
  185. case ErrNoError:
  186. c.memberID = join.MemberId
  187. case ErrUnknownMemberId, ErrIllegalGeneration: // reset member ID and retry immediately
  188. c.memberID = ""
  189. return c.newSession(ctx, topics, handler, retries)
  190. case ErrNotCoordinatorForConsumer: // retry after backoff with coordinator refresh
  191. if retries <= 0 {
  192. return nil, join.Err
  193. }
  194. return c.retryNewSession(ctx, topics, handler, retries, true)
  195. case ErrRebalanceInProgress: // retry after backoff
  196. if retries <= 0 {
  197. return nil, join.Err
  198. }
  199. return c.retryNewSession(ctx, topics, handler, retries, false)
  200. default:
  201. return nil, join.Err
  202. }
  203. // Prepare distribution plan if we joined as the leader
  204. var plan BalanceStrategyPlan
  205. if join.LeaderId == join.MemberId {
  206. members, err := join.GetMembers()
  207. if err != nil {
  208. return nil, err
  209. }
  210. plan, err = c.balance(members)
  211. if err != nil {
  212. return nil, err
  213. }
  214. }
  215. // Sync consumer group
  216. sync, err := c.syncGroupRequest(coordinator, plan, join.GenerationId)
  217. if err != nil {
  218. _ = coordinator.Close()
  219. return nil, err
  220. }
  221. switch sync.Err {
  222. case ErrNoError:
  223. case ErrUnknownMemberId, ErrIllegalGeneration: // reset member ID and retry immediately
  224. c.memberID = ""
  225. return c.newSession(ctx, topics, handler, retries)
  226. case ErrNotCoordinatorForConsumer: // retry after backoff with coordinator refresh
  227. if retries <= 0 {
  228. return nil, sync.Err
  229. }
  230. return c.retryNewSession(ctx, topics, handler, retries, true)
  231. case ErrRebalanceInProgress: // retry after backoff
  232. if retries <= 0 {
  233. return nil, sync.Err
  234. }
  235. return c.retryNewSession(ctx, topics, handler, retries, false)
  236. default:
  237. return nil, sync.Err
  238. }
  239. // Retrieve and sort claims
  240. var claims map[string][]int32
  241. if len(sync.MemberAssignment) > 0 {
  242. members, err := sync.GetMemberAssignment()
  243. if err != nil {
  244. return nil, err
  245. }
  246. claims = members.Topics
  247. c.userData = members.UserData
  248. for _, partitions := range claims {
  249. sort.Sort(int32Slice(partitions))
  250. }
  251. }
  252. return newConsumerGroupSession(ctx, c, claims, join.MemberId, join.GenerationId, handler)
  253. }
  254. func (c *consumerGroup) joinGroupRequest(coordinator *Broker, topics []string) (*JoinGroupResponse, error) {
  255. req := &JoinGroupRequest{
  256. GroupId: c.groupID,
  257. MemberId: c.memberID,
  258. SessionTimeout: int32(c.config.Consumer.Group.Session.Timeout / time.Millisecond),
  259. ProtocolType: "consumer",
  260. }
  261. if c.config.Version.IsAtLeast(V0_10_1_0) {
  262. req.Version = 1
  263. req.RebalanceTimeout = int32(c.config.Consumer.Group.Rebalance.Timeout / time.Millisecond)
  264. }
  265. // use static user-data if configured, otherwise use consumer-group userdata from the last sync
  266. userData := c.config.Consumer.Group.Member.UserData
  267. if len(userData) == 0 {
  268. userData = c.userData
  269. }
  270. meta := &ConsumerGroupMemberMetadata{
  271. Topics: topics,
  272. UserData: userData,
  273. }
  274. strategy := c.config.Consumer.Group.Rebalance.Strategy
  275. if err := req.AddGroupProtocolMetadata(strategy.Name(), meta); err != nil {
  276. return nil, err
  277. }
  278. return coordinator.JoinGroup(req)
  279. }
  280. func (c *consumerGroup) syncGroupRequest(coordinator *Broker, plan BalanceStrategyPlan, generationID int32) (*SyncGroupResponse, error) {
  281. req := &SyncGroupRequest{
  282. GroupId: c.groupID,
  283. MemberId: c.memberID,
  284. GenerationId: generationID,
  285. }
  286. for memberID, topics := range plan {
  287. assignment := &ConsumerGroupMemberAssignment{Topics: topics}
  288. // Include topic assignments in group-assignment userdata for each consumer-group member
  289. if c.config.Consumer.Group.Rebalance.Strategy.Name() == StickyBalanceStrategyName {
  290. userDataBytes, err := encode(&StickyAssignorUserDataV1{
  291. Topics: topics,
  292. Generation: generationID,
  293. }, nil)
  294. if err != nil {
  295. return nil, err
  296. }
  297. assignment.UserData = userDataBytes
  298. }
  299. if err := req.AddGroupAssignmentMember(memberID, assignment); err != nil {
  300. return nil, err
  301. }
  302. }
  303. return coordinator.SyncGroup(req)
  304. }
  305. func (c *consumerGroup) heartbeatRequest(coordinator *Broker, memberID string, generationID int32) (*HeartbeatResponse, error) {
  306. req := &HeartbeatRequest{
  307. GroupId: c.groupID,
  308. MemberId: memberID,
  309. GenerationId: generationID,
  310. }
  311. return coordinator.Heartbeat(req)
  312. }
  313. func (c *consumerGroup) balance(members map[string]ConsumerGroupMemberMetadata) (BalanceStrategyPlan, error) {
  314. topics := make(map[string][]int32)
  315. for _, meta := range members {
  316. for _, topic := range meta.Topics {
  317. topics[topic] = nil
  318. }
  319. }
  320. for topic := range topics {
  321. partitions, err := c.client.Partitions(topic)
  322. if err != nil {
  323. return nil, err
  324. }
  325. topics[topic] = partitions
  326. }
  327. strategy := c.config.Consumer.Group.Rebalance.Strategy
  328. return strategy.Plan(members, topics)
  329. }
  330. // Leaves the cluster, called by Close.
  331. func (c *consumerGroup) leave() error {
  332. c.lock.Lock()
  333. defer c.lock.Unlock()
  334. if c.memberID == "" {
  335. return nil
  336. }
  337. coordinator, err := c.client.Coordinator(c.groupID)
  338. if err != nil {
  339. return err
  340. }
  341. resp, err := coordinator.LeaveGroup(&LeaveGroupRequest{
  342. GroupId: c.groupID,
  343. MemberId: c.memberID,
  344. })
  345. if err != nil {
  346. _ = coordinator.Close()
  347. return err
  348. }
  349. // Unset memberID
  350. c.memberID = ""
  351. // Check response
  352. switch resp.Err {
  353. case ErrRebalanceInProgress, ErrUnknownMemberId, ErrNoError:
  354. return nil
  355. default:
  356. return resp.Err
  357. }
  358. }
  359. func (c *consumerGroup) handleError(err error, topic string, partition int32) {
  360. if _, ok := err.(*ConsumerError); !ok && topic != "" && partition > -1 {
  361. err = &ConsumerError{
  362. Topic: topic,
  363. Partition: partition,
  364. Err: err,
  365. }
  366. }
  367. if !c.config.Consumer.Return.Errors {
  368. Logger.Println(err)
  369. return
  370. }
  371. select {
  372. case <-c.closed:
  373. //consumer is closed
  374. return
  375. default:
  376. }
  377. select {
  378. case c.errors <- err:
  379. default:
  380. // no error listener
  381. }
  382. }
  383. func (c *consumerGroup) loopCheckPartitionNumbers(topics []string, session *consumerGroupSession) {
  384. pause := time.NewTicker(c.config.Metadata.RefreshFrequency)
  385. defer session.cancel()
  386. defer pause.Stop()
  387. var oldTopicToPartitionNum map[string]int
  388. var err error
  389. if oldTopicToPartitionNum, err = c.topicToPartitionNumbers(topics); err != nil {
  390. return
  391. }
  392. for {
  393. if newTopicToPartitionNum, err := c.topicToPartitionNumbers(topics); err != nil {
  394. return
  395. } else {
  396. for topic, num := range oldTopicToPartitionNum {
  397. if newTopicToPartitionNum[topic] != num {
  398. return // trigger the end of the session on exit
  399. }
  400. }
  401. }
  402. select {
  403. case <-pause.C:
  404. case <-session.ctx.Done():
  405. Logger.Printf("loop check partition number coroutine will exit, topics %s", topics)
  406. // if session closed by other, should be exited
  407. return
  408. case <-c.closed:
  409. return
  410. }
  411. }
  412. }
  413. func (c *consumerGroup) topicToPartitionNumbers(topics []string) (map[string]int, error) {
  414. topicToPartitionNum := make(map[string]int, len(topics))
  415. for _, topic := range topics {
  416. if partitionNum, err := c.client.Partitions(topic); err != nil {
  417. Logger.Printf("Consumer Group topic %s get partition number failed %v", topic, err)
  418. return nil, err
  419. } else {
  420. topicToPartitionNum[topic] = len(partitionNum)
  421. }
  422. }
  423. return topicToPartitionNum, nil
  424. }
  425. // --------------------------------------------------------------------
  426. // ConsumerGroupSession represents a consumer group member session.
  427. type ConsumerGroupSession interface {
  428. // Claims returns information about the claimed partitions by topic.
  429. Claims() map[string][]int32
  430. // MemberID returns the cluster member ID.
  431. MemberID() string
  432. // GenerationID returns the current generation ID.
  433. GenerationID() int32
  434. // MarkOffset marks the provided offset, alongside a metadata string
  435. // that represents the state of the partition consumer at that point in time. The
  436. // metadata string can be used by another consumer to restore that state, so it
  437. // can resume consumption.
  438. //
  439. // To follow upstream conventions, you are expected to mark the offset of the
  440. // next message to read, not the last message read. Thus, when calling `MarkOffset`
  441. // you should typically add one to the offset of the last consumed message.
  442. //
  443. // Note: calling MarkOffset does not necessarily commit the offset to the backend
  444. // store immediately for efficiency reasons, and it may never be committed if
  445. // your application crashes. This means that you may end up processing the same
  446. // message twice, and your processing should ideally be idempotent.
  447. MarkOffset(topic string, partition int32, offset int64, metadata string)
  448. // ResetOffset resets to the provided offset, alongside a metadata string that
  449. // represents the state of the partition consumer at that point in time. Reset
  450. // acts as a counterpart to MarkOffset, the difference being that it allows to
  451. // reset an offset to an earlier or smaller value, where MarkOffset only
  452. // allows incrementing the offset. cf MarkOffset for more details.
  453. ResetOffset(topic string, partition int32, offset int64, metadata string)
  454. // MarkMessage marks a message as consumed.
  455. MarkMessage(msg *ConsumerMessage, metadata string)
  456. // Context returns the session context.
  457. Context() context.Context
  458. }
  459. type consumerGroupSession struct {
  460. parent *consumerGroup
  461. memberID string
  462. generationID int32
  463. handler ConsumerGroupHandler
  464. claims map[string][]int32
  465. offsets *offsetManager
  466. ctx context.Context
  467. cancel func()
  468. waitGroup sync.WaitGroup
  469. releaseOnce sync.Once
  470. hbDying, hbDead chan none
  471. }
  472. func newConsumerGroupSession(ctx context.Context, parent *consumerGroup, claims map[string][]int32, memberID string, generationID int32, handler ConsumerGroupHandler) (*consumerGroupSession, error) {
  473. // init offset manager
  474. offsets, err := newOffsetManagerFromClient(parent.groupID, memberID, generationID, parent.client)
  475. if err != nil {
  476. return nil, err
  477. }
  478. // init context
  479. ctx, cancel := context.WithCancel(ctx)
  480. // init session
  481. sess := &consumerGroupSession{
  482. parent: parent,
  483. memberID: memberID,
  484. generationID: generationID,
  485. handler: handler,
  486. offsets: offsets,
  487. claims: claims,
  488. ctx: ctx,
  489. cancel: cancel,
  490. hbDying: make(chan none),
  491. hbDead: make(chan none),
  492. }
  493. // start heartbeat loop
  494. go sess.heartbeatLoop()
  495. // create a POM for each claim
  496. for topic, partitions := range claims {
  497. for _, partition := range partitions {
  498. pom, err := offsets.ManagePartition(topic, partition)
  499. if err != nil {
  500. _ = sess.release(false)
  501. return nil, err
  502. }
  503. // handle POM errors
  504. go func(topic string, partition int32) {
  505. for err := range pom.Errors() {
  506. sess.parent.handleError(err, topic, partition)
  507. }
  508. }(topic, partition)
  509. }
  510. }
  511. // perform setup
  512. if err := handler.Setup(sess); err != nil {
  513. _ = sess.release(true)
  514. return nil, err
  515. }
  516. // start consuming
  517. for topic, partitions := range claims {
  518. for _, partition := range partitions {
  519. sess.waitGroup.Add(1)
  520. go func(topic string, partition int32) {
  521. defer sess.waitGroup.Done()
  522. // cancel the as session as soon as the first
  523. // goroutine exits
  524. defer sess.cancel()
  525. // consume a single topic/partition, blocking
  526. sess.consume(topic, partition)
  527. }(topic, partition)
  528. }
  529. }
  530. return sess, nil
  531. }
  532. func (s *consumerGroupSession) Claims() map[string][]int32 { return s.claims }
  533. func (s *consumerGroupSession) MemberID() string { return s.memberID }
  534. func (s *consumerGroupSession) GenerationID() int32 { return s.generationID }
  535. func (s *consumerGroupSession) MarkOffset(topic string, partition int32, offset int64, metadata string) {
  536. if pom := s.offsets.findPOM(topic, partition); pom != nil {
  537. pom.MarkOffset(offset, metadata)
  538. }
  539. }
  540. func (s *consumerGroupSession) ResetOffset(topic string, partition int32, offset int64, metadata string) {
  541. if pom := s.offsets.findPOM(topic, partition); pom != nil {
  542. pom.ResetOffset(offset, metadata)
  543. }
  544. }
  545. func (s *consumerGroupSession) MarkMessage(msg *ConsumerMessage, metadata string) {
  546. s.MarkOffset(msg.Topic, msg.Partition, msg.Offset+1, metadata)
  547. }
  548. func (s *consumerGroupSession) Context() context.Context {
  549. return s.ctx
  550. }
  551. func (s *consumerGroupSession) consume(topic string, partition int32) {
  552. // quick exit if rebalance is due
  553. select {
  554. case <-s.ctx.Done():
  555. return
  556. case <-s.parent.closed:
  557. return
  558. default:
  559. }
  560. // get next offset
  561. offset := s.parent.config.Consumer.Offsets.Initial
  562. if pom := s.offsets.findPOM(topic, partition); pom != nil {
  563. offset, _ = pom.NextOffset()
  564. }
  565. // create new claim
  566. claim, err := newConsumerGroupClaim(s, topic, partition, offset)
  567. if err != nil {
  568. s.parent.handleError(err, topic, partition)
  569. return
  570. }
  571. // handle errors
  572. go func() {
  573. for err := range claim.Errors() {
  574. s.parent.handleError(err, topic, partition)
  575. }
  576. }()
  577. // trigger close when session is done
  578. go func() {
  579. select {
  580. case <-s.ctx.Done():
  581. case <-s.parent.closed:
  582. }
  583. claim.AsyncClose()
  584. }()
  585. // start processing
  586. if err := s.handler.ConsumeClaim(s, claim); err != nil {
  587. s.parent.handleError(err, topic, partition)
  588. }
  589. // ensure consumer is closed & drained
  590. claim.AsyncClose()
  591. for _, err := range claim.waitClosed() {
  592. s.parent.handleError(err, topic, partition)
  593. }
  594. }
  595. func (s *consumerGroupSession) release(withCleanup bool) (err error) {
  596. // signal release, stop heartbeat
  597. s.cancel()
  598. // wait for consumers to exit
  599. s.waitGroup.Wait()
  600. // perform release
  601. s.releaseOnce.Do(func() {
  602. if withCleanup {
  603. if e := s.handler.Cleanup(s); e != nil {
  604. s.parent.handleError(e, "", -1)
  605. err = e
  606. }
  607. }
  608. if e := s.offsets.Close(); e != nil {
  609. err = e
  610. }
  611. close(s.hbDying)
  612. <-s.hbDead
  613. })
  614. return
  615. }
  616. func (s *consumerGroupSession) heartbeatLoop() {
  617. defer close(s.hbDead)
  618. defer s.cancel() // trigger the end of the session on exit
  619. pause := time.NewTicker(s.parent.config.Consumer.Group.Heartbeat.Interval)
  620. defer pause.Stop()
  621. retries := s.parent.config.Metadata.Retry.Max
  622. for {
  623. coordinator, err := s.parent.client.Coordinator(s.parent.groupID)
  624. if err != nil {
  625. if retries <= 0 {
  626. s.parent.handleError(err, "", -1)
  627. return
  628. }
  629. select {
  630. case <-s.hbDying:
  631. return
  632. case <-time.After(s.parent.config.Metadata.Retry.Backoff):
  633. retries--
  634. }
  635. continue
  636. }
  637. resp, err := s.parent.heartbeatRequest(coordinator, s.memberID, s.generationID)
  638. if err != nil {
  639. _ = coordinator.Close()
  640. if retries <= 0 {
  641. s.parent.handleError(err, "", -1)
  642. return
  643. }
  644. retries--
  645. continue
  646. }
  647. switch resp.Err {
  648. case ErrNoError:
  649. retries = s.parent.config.Metadata.Retry.Max
  650. case ErrRebalanceInProgress, ErrUnknownMemberId, ErrIllegalGeneration:
  651. return
  652. default:
  653. s.parent.handleError(err, "", -1)
  654. return
  655. }
  656. select {
  657. case <-pause.C:
  658. case <-s.hbDying:
  659. return
  660. }
  661. }
  662. }
  663. // --------------------------------------------------------------------
  664. // ConsumerGroupHandler instances are used to handle individual topic/partition claims.
  665. // It also provides hooks for your consumer group session life-cycle and allow you to
  666. // trigger logic before or after the consume loop(s).
  667. //
  668. // PLEASE NOTE that handlers are likely be called from several goroutines concurrently,
  669. // ensure that all state is safely protected against race conditions.
  670. type ConsumerGroupHandler interface {
  671. // Setup is run at the beginning of a new session, before ConsumeClaim.
  672. Setup(ConsumerGroupSession) error
  673. // Cleanup is run at the end of a session, once all ConsumeClaim goroutines have exited
  674. // but before the offsets are committed for the very last time.
  675. Cleanup(ConsumerGroupSession) error
  676. // ConsumeClaim must start a consumer loop of ConsumerGroupClaim's Messages().
  677. // Once the Messages() channel is closed, the Handler must finish its processing
  678. // loop and exit.
  679. ConsumeClaim(ConsumerGroupSession, ConsumerGroupClaim) error
  680. }
  681. // ConsumerGroupClaim processes Kafka messages from a given topic and partition within a consumer group.
  682. type ConsumerGroupClaim interface {
  683. // Topic returns the consumed topic name.
  684. Topic() string
  685. // Partition returns the consumed partition.
  686. Partition() int32
  687. // InitialOffset returns the initial offset that was used as a starting point for this claim.
  688. InitialOffset() int64
  689. // HighWaterMarkOffset returns the high water mark offset of the partition,
  690. // i.e. the offset that will be used for the next message that will be produced.
  691. // You can use this to determine how far behind the processing is.
  692. HighWaterMarkOffset() int64
  693. // Messages returns the read channel for the messages that are returned by
  694. // the broker. The messages channel will be closed when a new rebalance cycle
  695. // is due. You must finish processing and mark offsets within
  696. // Config.Consumer.Group.Session.Timeout before the topic/partition is eventually
  697. // re-assigned to another group member.
  698. Messages() <-chan *ConsumerMessage
  699. }
  700. type consumerGroupClaim struct {
  701. topic string
  702. partition int32
  703. offset int64
  704. PartitionConsumer
  705. }
  706. func newConsumerGroupClaim(sess *consumerGroupSession, topic string, partition int32, offset int64) (*consumerGroupClaim, error) {
  707. pcm, err := sess.parent.consumer.ConsumePartition(topic, partition, offset)
  708. if err == ErrOffsetOutOfRange {
  709. offset = sess.parent.config.Consumer.Offsets.Initial
  710. pcm, err = sess.parent.consumer.ConsumePartition(topic, partition, offset)
  711. }
  712. if err != nil {
  713. return nil, err
  714. }
  715. go func() {
  716. for err := range pcm.Errors() {
  717. sess.parent.handleError(err, topic, partition)
  718. }
  719. }()
  720. return &consumerGroupClaim{
  721. topic: topic,
  722. partition: partition,
  723. offset: offset,
  724. PartitionConsumer: pcm,
  725. }, nil
  726. }
  727. func (c *consumerGroupClaim) Topic() string { return c.topic }
  728. func (c *consumerGroupClaim) Partition() int32 { return c.partition }
  729. func (c *consumerGroupClaim) InitialOffset() int64 { return c.offset }
  730. // Drains messages and errors, ensures the claim is fully closed.
  731. func (c *consumerGroupClaim) waitClosed() (errs ConsumerErrors) {
  732. go func() {
  733. for range c.Messages() {
  734. }
  735. }()
  736. for err := range c.Errors() {
  737. errs = append(errs, err)
  738. }
  739. return
  740. }