Browse Source

First crack at a MultiProducer

Burke Libbey 12 years ago
parent
commit
586f9e155c
3 changed files with 443 additions and 105 deletions
  1. 332 0
      multiproducer.go
  2. 93 0
      multiproducer_test.go
  3. 18 105
      producer.go

+ 332 - 0
multiproducer.go

@@ -0,0 +1,332 @@
+package sarama
+
+import (
+	"sync"
+	"time"
+)
+
+// MultiProducerConfig is used to pass multiple configuration options to NewProducer.
+type MultiProducerConfig struct {
+	Partitioner    Partitioner      // Chooses the partition to send messages to, or randomly if this is nil.
+	RequiredAcks   RequiredAcks     // The level of acknowledgement reliability needed from the broker (defaults to no acknowledgement).
+	Timeout        int32            // The maximum time in ms the broker will wait the receipt of the number of RequiredAcks.
+	Compression    CompressionCodec // The type of compression to use on messages (defaults to no compression).
+	MaxBufferBytes uint32
+	MaxBufferTime  uint32
+}
+
+type brokerProducer struct {
+	sync.Mutex
+	broker        *Broker
+	request       *ProduceRequest
+	bufferedBytes uint32
+	flushNow      chan bool
+	stopper       chan bool
+}
+
+// MultiProducer publishes Kafka messages on a given topic. It routes messages to the correct broker, refreshing metadata as appropriate,
+// and parses responses for errors. You must call Close() on a MultiProducer to avoid leaks, it may not be garbage-collected automatically when
+// it passes out of scope (this is in addition to calling Close on the underlying client, which is still necessary).
+type MultiProducer struct {
+	m               sync.RWMutex
+	client          *Client
+	config          MultiProducerConfig
+	brokerProducers map[*Broker]*brokerProducer
+	errors          chan error
+}
+
+// NewMultiProducer creates a new MultiProducer using the given client. The resulting object will buffer/flush Produce messages to Kafka.
+func NewMultiProducer(client *Client, config *MultiProducerConfig) (*MultiProducer, error) {
+	if config == nil {
+		config = new(MultiProducerConfig)
+	}
+
+	if config.RequiredAcks < -1 {
+		return nil, ConfigurationError("Invalid RequiredAcks")
+	}
+
+	if config.Timeout < 0 {
+		return nil, ConfigurationError("Invalid Timeout")
+	}
+
+	if config.Partitioner == nil {
+		config.Partitioner = NewRandomPartitioner()
+	}
+
+	if config.MaxBufferBytes == 0 {
+		config.MaxBufferBytes = 1
+	}
+
+	p := new(MultiProducer)
+	p.client = client
+	p.config = *config
+	p.errors = make(chan error, 16)
+	p.brokerProducers = make(map[*Broker]*brokerProducer)
+
+	return p, nil
+}
+
+// Close shuts down the MultiProducer and flushes any messages it may have buffered. You must call this function before
+// a MultiProducer object passes out of scope, as it may otherwise leak memory. You must call this before calling Close
+// on the underlying client.
+func (p *MultiProducer) Close() error {
+	p.m.Lock()
+	defer p.m.Unlock()
+
+	for _, bp := range p.brokerProducers {
+		bp.Close()
+	}
+
+	return nil
+}
+
+// SendMessage sends a message with the given topic, key, and value. The partition to send to is selected by the
+// MultiProducer's Partitioner. To send strings as either key or value, see the StringEncoder type.
+// If operating in synchronous mode (MaxBufferTime=MaxBufferBytes=0), the error will be returned. If either value is > 0, nil will
+// always be returned and you must listen on the channel returned by Errors() to asynchronously receive error replies.
+func (p *MultiProducer) SendMessage(topic string, key, value Encoder) error {
+	return p.safeSendMessage(topic, key, value, true)
+}
+
+func (p *MultiProducer) choosePartition(topic string, key Encoder) (int32, error) {
+	partitions, err := p.client.Partitions(topic)
+	if err != nil {
+		return -1, err
+	}
+
+	numPartitions := int32(len(partitions))
+
+	choice := p.config.Partitioner.Partition(key, numPartitions)
+
+	if choice < 0 || choice >= numPartitions {
+		return -1, InvalidPartition
+	}
+
+	return partitions[choice], nil
+}
+
+func (p *MultiProducer) newBrokerProducer(broker *Broker) *brokerProducer {
+	bp := &brokerProducer{
+		broker:   broker,
+		flushNow: make(chan bool),
+		stopper:  make(chan bool),
+	}
+
+	maxBufferTime := time.Duration(p.config.MaxBufferTime) * time.Millisecond
+
+	initNow := make(chan bool)
+	go func() {
+		timer := time.NewTimer(maxBufferTime)
+		close(initNow)
+		for {
+			select {
+			case <-bp.flushNow:
+				p.flush(bp)
+			case <-timer.C:
+				p.flush(bp)
+			case <-bp.stopper:
+				p.m.Lock()
+				delete(p.brokerProducers, bp.broker)
+				p.m.Unlock()
+				p.flush(bp)
+				p.client.disconnectBroker(bp.broker)
+				close(bp.flushNow)
+				return
+			}
+			timer.Reset(maxBufferTime)
+		}
+	}()
+	<-initNow
+
+	return bp
+}
+
+func (p *MultiProducer) brokerProducerFor(broker *Broker) *brokerProducer {
+	p.m.RLock()
+	bp, ok := p.brokerProducers[broker]
+	p.m.RUnlock()
+	if !ok {
+		p.m.Lock()
+		bp, ok = p.brokerProducers[broker]
+		if !ok {
+			bp = p.newBrokerProducer(broker)
+			p.brokerProducers[broker] = bp
+		}
+		p.m.Unlock()
+	}
+	return bp
+}
+
+func (p *MultiProducer) isSynchronous() bool {
+	return p.config.MaxBufferTime == 0 && p.config.MaxBufferBytes < 2
+}
+
+// Shouldn't be used if operating in synchronous mode.
+func (p *MultiProducer) Errors() <-chan error {
+	if p.isSynchronous() {
+		panic("you can't use Errors() when operating in synchronous mode")
+	} else {
+		return p.errors
+	}
+}
+
+func (bp *brokerProducer) addMessage(topic string, partition int32, message *Message, maxBytes uint32) {
+	bp.request.AddMessage(topic, partition, message)
+	bp.bufferedBytes += uint32(len(message.Key) + len(message.Value))
+	if bp.bufferedBytes > maxBytes {
+		select {
+		case bp.flushNow <- true:
+		default:
+		}
+	}
+}
+
+func (p *MultiProducer) newProduceRequest() *ProduceRequest {
+	return &ProduceRequest{RequiredAcks: p.config.RequiredAcks, Timeout: p.config.Timeout}
+}
+
+func (p *MultiProducer) addMessageForBroker(broker *Broker, topic string, partition int32, keyBytes, valBytes []byte) error {
+	bp := p.brokerProducerFor(broker)
+
+	bp.Lock()
+	if bp.request == nil {
+		bp.request = p.newProduceRequest()
+	}
+	msg := &Message{Codec: p.config.Compression, Key: keyBytes, Value: valBytes}
+	bp.addMessage(topic, partition, msg, p.config.MaxBufferBytes)
+	bp.Unlock()
+
+	if p.isSynchronous() {
+		return <-p.errors
+	} else {
+		return nil
+	}
+}
+
+func (p *MultiProducer) safeSendMessage(topic string, key, value Encoder, retry bool) error {
+	partition, err := p.choosePartition(topic, key)
+	if err != nil {
+		return err
+	}
+
+	var keyBytes []byte
+	var valBytes []byte
+
+	if key != nil {
+		keyBytes, err = key.Encode()
+		if err != nil {
+			return err
+		}
+	}
+	valBytes, err = value.Encode()
+	if err != nil {
+		return err
+	}
+
+	broker, err := p.client.Leader(topic, partition)
+	if err != nil {
+		return err
+	}
+
+	return p.addMessageForBroker(broker, topic, partition, keyBytes, valBytes)
+}
+
+func (bp *brokerProducer) Close() error {
+	close(bp.stopper)
+	return nil
+}
+
+func (p *MultiProducer) flush(bp *brokerProducer) {
+	bp.Lock()
+	req := bp.request
+	bp.request = nil
+	bp.bufferedBytes = 0
+	bp.Unlock()
+	if req != nil {
+		p.flushRequest(bp, true, req)
+	}
+}
+
+// flushRequest must push one and exactly one message onto p.errors when given only one topic-partition.
+func (p *MultiProducer) flushRequest(bp *brokerProducer, retry bool, request *ProduceRequest) {
+
+	response, err := bp.broker.Produce(p.client.id, request)
+
+	switch err {
+	case nil:
+		break
+	case EncodingError:
+		p.errors <- err
+		return
+	default:
+		if !retry {
+			p.errors <- err
+			return
+		}
+
+		bp.Close()
+
+		for topic, d := range request.msgSets {
+			for partition, msgSet := range d {
+
+				otherBroker, err := p.client.Leader(topic, partition)
+				if err != nil {
+					p.errors <- err
+					return
+				}
+				otherBp := p.brokerProducerFor(otherBroker)
+
+				retryReq := p.newProduceRequest()
+				for _, msgBlock := range msgSet.Messages {
+					retryReq.AddMessage(topic, partition, msgBlock.Msg)
+				}
+				p.flushRequest(otherBp, false, retryReq)
+
+			}
+		}
+	}
+
+	if response == nil {
+		p.errors <- nil
+		return
+	}
+
+	for topic, d := range response.Blocks {
+		for partition, block := range d {
+			if block == nil {
+				p.errors <- IncompleteResponse
+				continue
+			}
+
+			switch block.Err {
+			case NoError:
+				p.errors <- nil
+
+			case UnknownTopicOrPartition, NotLeaderForPartition, LeaderNotAvailable:
+				if retry {
+
+					msgSet := request.msgSets[topic][partition]
+
+					otherBroker, err := p.client.Leader(topic, partition)
+					if err != nil {
+						p.errors <- err
+						continue
+					}
+					otherBp := p.brokerProducerFor(otherBroker)
+
+					retryReq := p.newProduceRequest()
+					for _, msgBlock := range msgSet.Messages {
+						retryReq.AddMessage(topic, partition, msgBlock.Msg)
+					}
+					p.flushRequest(otherBp, false, retryReq)
+
+				} else {
+					p.errors <- block.Err
+				}
+			default:
+				p.errors <- block.Err
+			}
+		}
+	}
+
+}

+ 93 - 0
multiproducer_test.go

@@ -0,0 +1,93 @@
+package sarama
+
+import (
+	"encoding/binary"
+	"fmt"
+	"testing"
+	"time"
+)
+
+func TestSimpleMultiProducer(t *testing.T) {
+	responses := make(chan []byte, 1)
+	extraResponses := make(chan []byte)
+	mockBroker := NewMockBroker(t, responses)
+	mockExtra := NewMockBroker(t, extraResponses)
+	defer mockBroker.Close()
+	defer mockExtra.Close()
+
+	// return the extra mock as another available broker
+	response := []byte{
+		0x00, 0x00, 0x00, 0x01,
+		0x00, 0x00, 0x00, 0x01,
+		0x00, 0x09, 'l', 'o', 'c', 'a', 'l', 'h', 'o', 's', 't',
+		0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x01,
+		0x00, 0x00,
+		0x00, 0x08, 'm', 'y', '_', 't', 'o', 'p', 'i', 'c',
+		0x00, 0x00, 0x00, 0x01,
+		0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x01,
+		0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00}
+	binary.BigEndian.PutUint32(response[19:], uint32(mockExtra.Port()))
+	responses <- response
+	go func() {
+		msg := []byte{
+			0x00, 0x00, 0x00, 0x01,
+			0x00, 0x08, 'm', 'y', '_', 't', 'o', 'p', 'i', 'c',
+			0x00, 0x00, 0x00, 0x01,
+			0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+		binary.BigEndian.PutUint64(msg[23:], 0)
+		extraResponses <- msg
+	}()
+
+	client, err := NewClient("client_id", []string{mockBroker.Addr()}, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	producer, err := NewMultiProducer(client, &MultiProducerConfig{
+		RequiredAcks:  WaitForLocal,
+		MaxBufferTime: 1000000, // "never"
+		// So that we flush once, after the 10th message.
+		MaxBufferBytes: uint32((len("ABC THE MESSAGE") * 10) - 1),
+	})
+	defer producer.Close()
+
+	for i := 0; i < 10; i++ {
+		err = producer.SendMessage("my_topic", nil, StringEncoder("ABC THE MESSAGE"))
+		if err != nil {
+			t.Error(err)
+		}
+	}
+
+	select {
+	case err = <-producer.Errors():
+		if err != nil {
+			t.Error(err)
+		}
+	case <-time.After(1 * time.Second):
+		t.Error(fmt.Errorf("Message was never received"))
+	}
+
+	select {
+	case <-producer.Errors():
+		t.Error(fmt.Errorf("too many values returned"))
+	default:
+	}
+
+	// TODO: This doesn't really test that we ONLY flush once.
+	// For example, change the MaxBufferBytes to be much lower.
+}
+
+func TestMultipleMultiProducer(t *testing.T) {
+
+	// TODO: Submit events to 3 different topics on 2 different brokers.
+	// Need to figure out how the request format works to return the broker
+	// info for those two new brokers, and how to return multiple blocks in
+	// a ProduceRespose
+
+}

+ 18 - 105
producer.go

@@ -12,9 +12,8 @@ type ProducerConfig struct {
 // and parses responses for errors. You must call Close() on a producer to avoid leaks, it may not be garbage-collected automatically when
 // it passes out of scope (this is in addition to calling Close on the underlying client, which is still necessary).
 type Producer struct {
-	client *Client
-	topic  string
-	config ProducerConfig
+	mp    MultiProducer
+	topic string
 }
 
 // NewProducer creates a new Producer using the given client. The resulting producer will publish messages on the given topic.
@@ -23,26 +22,27 @@ func NewProducer(client *Client, topic string, config *ProducerConfig) (*Produce
 		config = new(ProducerConfig)
 	}
 
-	if config.RequiredAcks < -1 {
-		return nil, ConfigurationError("Invalid RequiredAcks")
+	mpc := MultiProducerConfig{
+		Partitioner:    config.Partitioner,
+		RequiredAcks:   config.RequiredAcks,
+		Timeout:        config.Timeout,
+		Compression:    config.Compression,
+		MaxBufferBytes: 0, // synchronous
+		MaxBufferTime:  0, // synchronous
 	}
-
-	if config.Timeout < 0 {
-		return nil, ConfigurationError("Invalid Timeout")
-	}
-
-	if config.Partitioner == nil {
-		config.Partitioner = NewRandomPartitioner()
+	mp, err := NewMultiProducer(client, &mpc)
+	if err != nil {
+		return nil, err
 	}
 
 	if topic == "" {
 		return nil, ConfigurationError("Empty topic")
 	}
 
-	p := new(Producer)
-	p.client = client
-	p.topic = topic
-	p.config = *config
+	p := &Producer{
+		topic: topic,
+		mp:    *mp,
+	}
 
 	return p, nil
 }
@@ -51,98 +51,11 @@ func NewProducer(client *Client, topic string, config *ProducerConfig) (*Produce
 // a producer object passes out of scope, as it may otherwise leak memory. You must call this before calling Close
 // on the underlying client.
 func (p *Producer) Close() error {
-	// no-op for now, adding for consistency and so the API doesn't change when we add buffering
-	// (which will require a goroutine, which will require a close method in order to flush the buffer).
-	return nil
+	return p.mp.Close()
 }
 
 // SendMessage sends a message with the given key and value. The partition to send to is selected by the Producer's Partitioner.
 // To send strings as either key or value, see the StringEncoder type.
 func (p *Producer) SendMessage(key, value Encoder) error {
-	return p.safeSendMessage(key, value, true)
-}
-
-func (p *Producer) choosePartition(key Encoder) (int32, error) {
-	partitions, err := p.client.Partitions(p.topic)
-	if err != nil {
-		return -1, err
-	}
-
-	numPartitions := int32(len(partitions))
-
-	choice := p.config.Partitioner.Partition(key, numPartitions)
-
-	if choice < 0 || choice >= numPartitions {
-		return -1, InvalidPartition
-	}
-
-	return partitions[choice], nil
-}
-
-func (p *Producer) safeSendMessage(key, value Encoder, retry bool) error {
-	partition, err := p.choosePartition(key)
-	if err != nil {
-		return err
-	}
-
-	var keyBytes []byte
-	var valBytes []byte
-
-	if key != nil {
-		keyBytes, err = key.Encode()
-		if err != nil {
-			return err
-		}
-	}
-	valBytes, err = value.Encode()
-	if err != nil {
-		return err
-	}
-
-	broker, err := p.client.Leader(p.topic, partition)
-	if err != nil {
-		return err
-	}
-
-	request := &ProduceRequest{RequiredAcks: p.config.RequiredAcks, Timeout: p.config.Timeout}
-	request.AddMessage(p.topic, partition, &Message{Codec: p.config.Compression, Key: keyBytes, Value: valBytes})
-
-	response, err := broker.Produce(p.client.id, request)
-	switch err {
-	case nil:
-		break
-	case EncodingError:
-		return err
-	default:
-		if !retry {
-			return err
-		}
-		p.client.disconnectBroker(broker)
-		return p.safeSendMessage(key, value, false)
-	}
-
-	if response == nil {
-		return nil
-	}
-
-	block := response.GetBlock(p.topic, partition)
-	if block == nil {
-		return IncompleteResponse
-	}
-
-	switch block.Err {
-	case NoError:
-		return nil
-	case UnknownTopicOrPartition, NotLeaderForPartition, LeaderNotAvailable:
-		if !retry {
-			return block.Err
-		}
-		err = p.client.RefreshTopicMetadata(p.topic)
-		if err != nil {
-			return err
-		}
-		return p.safeSendMessage(key, value, false)
-	}
-
-	return block.Err
+	return p.mp.SendMessage(p.topic, key, value)
 }