message.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. package sarama
  2. import (
  3. "bytes"
  4. "compress/gzip"
  5. "fmt"
  6. "io/ioutil"
  7. "time"
  8. "github.com/DataDog/zstd"
  9. "github.com/eapache/go-xerial-snappy"
  10. "github.com/pierrec/lz4"
  11. )
  12. // CompressionCodec represents the various compression codecs recognized by Kafka in messages.
  13. type CompressionCodec int8
  14. // The lowest 3 bits contain the compression codec used for the message
  15. const compressionCodecMask int8 = 0x07
  16. const (
  17. CompressionNone CompressionCodec = 0
  18. CompressionGZIP CompressionCodec = 1
  19. CompressionSnappy CompressionCodec = 2
  20. CompressionLZ4 CompressionCodec = 3
  21. CompressionZSTD CompressionCodec = 4
  22. )
  23. func (cc CompressionCodec) String() string {
  24. return []string{
  25. "none",
  26. "gzip",
  27. "snappy",
  28. "lz4",
  29. }[int(cc)]
  30. }
  31. // CompressionLevelDefault is the constant to use in CompressionLevel
  32. // to have the default compression level for any codec. The value is picked
  33. // that we don't use any existing compression levels.
  34. const CompressionLevelDefault = -1000
  35. type Message struct {
  36. Codec CompressionCodec // codec used to compress the message contents
  37. CompressionLevel int // compression level
  38. Key []byte // the message key, may be nil
  39. Value []byte // the message contents
  40. Set *MessageSet // the message set a message might wrap
  41. Version int8 // v1 requires Kafka 0.10
  42. Timestamp time.Time // the timestamp of the message (version 1+ only)
  43. compressedCache []byte
  44. compressedSize int // used for computing the compression ratio metrics
  45. }
  46. func (m *Message) encode(pe packetEncoder) error {
  47. pe.push(newCRC32Field(crcIEEE))
  48. pe.putInt8(m.Version)
  49. attributes := int8(m.Codec) & compressionCodecMask
  50. pe.putInt8(attributes)
  51. if m.Version >= 1 {
  52. if err := (Timestamp{&m.Timestamp}).encode(pe); err != nil {
  53. return err
  54. }
  55. }
  56. err := pe.putBytes(m.Key)
  57. if err != nil {
  58. return err
  59. }
  60. var payload []byte
  61. if m.compressedCache != nil {
  62. payload = m.compressedCache
  63. m.compressedCache = nil
  64. } else if m.Value != nil {
  65. switch m.Codec {
  66. case CompressionNone:
  67. payload = m.Value
  68. case CompressionGZIP:
  69. var buf bytes.Buffer
  70. var writer *gzip.Writer
  71. if m.CompressionLevel != CompressionLevelDefault {
  72. writer, err = gzip.NewWriterLevel(&buf, m.CompressionLevel)
  73. if err != nil {
  74. return err
  75. }
  76. } else {
  77. writer = gzip.NewWriter(&buf)
  78. }
  79. if _, err = writer.Write(m.Value); err != nil {
  80. return err
  81. }
  82. if err = writer.Close(); err != nil {
  83. return err
  84. }
  85. m.compressedCache = buf.Bytes()
  86. payload = m.compressedCache
  87. case CompressionSnappy:
  88. tmp := snappy.Encode(m.Value)
  89. m.compressedCache = tmp
  90. payload = m.compressedCache
  91. case CompressionLZ4:
  92. var buf bytes.Buffer
  93. writer := lz4.NewWriter(&buf)
  94. if _, err = writer.Write(m.Value); err != nil {
  95. return err
  96. }
  97. if err = writer.Close(); err != nil {
  98. return err
  99. }
  100. m.compressedCache = buf.Bytes()
  101. payload = m.compressedCache
  102. case CompressionZSTD:
  103. if len(m.Value) == 0 {
  104. // Hardcoded empty ZSTD frame, see: https://github.com/DataDog/zstd/issues/41
  105. m.compressedCache = []byte{0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x00, 0x01, 0x00, 0x00, 0x99, 0xe9, 0xd8, 0x51}
  106. } else {
  107. c, err := zstd.CompressLevel(nil, m.Value, m.CompressionLevel)
  108. if err != nil {
  109. return err
  110. }
  111. m.compressedCache = c
  112. }
  113. payload = m.compressedCache
  114. default:
  115. return PacketEncodingError{fmt.Sprintf("unsupported compression codec (%d)", m.Codec)}
  116. }
  117. // Keep in mind the compressed payload size for metric gathering
  118. m.compressedSize = len(payload)
  119. }
  120. if err = pe.putBytes(payload); err != nil {
  121. return err
  122. }
  123. return pe.pop()
  124. }
  125. func (m *Message) decode(pd packetDecoder) (err error) {
  126. err = pd.push(newCRC32Field(crcIEEE))
  127. if err != nil {
  128. return err
  129. }
  130. m.Version, err = pd.getInt8()
  131. if err != nil {
  132. return err
  133. }
  134. if m.Version > 1 {
  135. return PacketDecodingError{fmt.Sprintf("unknown magic byte (%v)", m.Version)}
  136. }
  137. attribute, err := pd.getInt8()
  138. if err != nil {
  139. return err
  140. }
  141. m.Codec = CompressionCodec(attribute & compressionCodecMask)
  142. if m.Version == 1 {
  143. if err := (Timestamp{&m.Timestamp}).decode(pd); err != nil {
  144. return err
  145. }
  146. }
  147. m.Key, err = pd.getBytes()
  148. if err != nil {
  149. return err
  150. }
  151. m.Value, err = pd.getBytes()
  152. if err != nil {
  153. return err
  154. }
  155. // Required for deep equal assertion during tests but might be useful
  156. // for future metrics about the compression ratio in fetch requests
  157. m.compressedSize = len(m.Value)
  158. switch m.Codec {
  159. case CompressionNone:
  160. // nothing to do
  161. case CompressionGZIP:
  162. if m.Value == nil {
  163. break
  164. }
  165. reader, err := gzip.NewReader(bytes.NewReader(m.Value))
  166. if err != nil {
  167. return err
  168. }
  169. if m.Value, err = ioutil.ReadAll(reader); err != nil {
  170. return err
  171. }
  172. if err := m.decodeSet(); err != nil {
  173. return err
  174. }
  175. case CompressionSnappy:
  176. if m.Value == nil {
  177. break
  178. }
  179. if m.Value, err = snappy.Decode(m.Value); err != nil {
  180. return err
  181. }
  182. if err := m.decodeSet(); err != nil {
  183. return err
  184. }
  185. case CompressionLZ4:
  186. if m.Value == nil {
  187. break
  188. }
  189. reader := lz4.NewReader(bytes.NewReader(m.Value))
  190. if m.Value, err = ioutil.ReadAll(reader); err != nil {
  191. return err
  192. }
  193. if err := m.decodeSet(); err != nil {
  194. return err
  195. }
  196. case CompressionZSTD:
  197. if m.Value == nil {
  198. break
  199. }
  200. if m.Value, err = zstd.Decompress(nil, m.Value); err != nil {
  201. return err
  202. }
  203. if err := m.decodeSet(); err != nil {
  204. return err
  205. }
  206. default:
  207. return PacketDecodingError{fmt.Sprintf("invalid compression specified (%d)", m.Codec)}
  208. }
  209. return pd.pop()
  210. }
  211. // decodes a message set from a previousy encoded bulk-message
  212. func (m *Message) decodeSet() (err error) {
  213. pd := realDecoder{raw: m.Value}
  214. m.Set = &MessageSet{}
  215. return m.Set.decode(&pd)
  216. }