浪院长 浪尖聊大数据
kafka的消费者对于kafka 082版本,有
高阶API (例子:https://cwiki.apache.org/confluence/display/KAFKA/Consumer+Group+Example)
和
低阶API (例子:https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+SimpleConsumer+Example)
之分。
两者的细节,可以对比上面链接的例子。
高阶API消费者会有一个后台线程单独负责按照auto.commit.enable=true;
auto.commit.interval.ms={时间间隔}周期性提交offset到zk。
zk保存的offset信息如下:
kafka010的版本,例子链接:
http://kafka.apache.org/0102/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html
consumers在zookeeper上的信息已经没有:
消费者的组和offset信息提交到broker的topic上了,topic名字__consumer_offsets。
kafka 010的__consumer_offsets topic的schema信息;
val OFFSET_COMMIT_VALUE_SCHEMA_V1 = new Schema(new Field("offset", INT64),
new Field("metadata", STRING, "Associated metadata.", ""),
new Field("commit_timestamp", INT64),
new Field("expire_timestamp", INT64))
topic具体数据如下:
(test1,test,0)
[OffsetMetadata[26231,NO_METADATA],CommitTime 1537587480006,ExpirationTime 1537673880006]
获取消费者offset的信息主要是为了监控kafka消费者消费的lag 进而把控消费者的处理情况,本文主要是帮助大家获取kafka-0.10.+版本的消费者已经提交的offset信息,然后后面会再出文章去帮助大家获取broker上指定topic分区的最大offset。
主函数完整代码。
package bigdata.spark.kafka
import java.nio.ByteBuffer
import java.util.Properties
import java.util.concurrent.TimeUnit
import kafka.common.{KafkaException, OffsetAndMetadata}
import org.apache.kafka.clients.consumer.{Consumer, ConsumerRecords, KafkaConsumer}
import org.apache.kafka.common.TopicPartition
import bigdata.spark.kafka.GroupMetadataManager._
import com.github.benmanes.caffeine.cache.{Cache, Caffeine, RemovalCause, RemovalListener}
object monitor {
def createKafkaConsumer(): Consumer[Array[Byte], Array[Byte]] = {
val props: Properties = new Properties()
props.put("bootstrap.servers", "mt-mdh.local:9093")
props.put("group.id", "test2")
props.put("enable.auto.commit", "false")
props.put("auto.offset.reset", "earliest")
props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer")
new KafkaConsumer[Array[Byte], Array[Byte]](props)
}
def readMessageKey(buffer: ByteBuffer): BaseKey = {
val version = buffer.getShort
val keySchema = schemaForKey(version)
val key = keySchema.read(buffer)
//
if (version <= CURRENT_OFFSET_KEY_SCHEMA_VERSION) {
// version 0 and 1 refer to offset
val group = key.get("group").asInstanceOf[String]
val topic = key.get("topic").asInstanceOf[String]
val partition = key.get("partition").asInstanceOf[Int]
OffsetKey(version, GroupTopicPartition(group, new TopicPartition(topic, partition)))
}else if (version == CURRENT_GROUP_KEY_SCHEMA_VERSION) {
// version 2 refers to offset
val group = key.get("group").asInstanceOf[String]
GroupMetadataKey(version, group)
} else {
throw new IllegalStateException("Unknown version " + version + " for group metadata message")
}
}
def readOffsetMessageValue(buffer: ByteBuffer): OffsetAndMetadata = {
if (buffer == null) { // tombstone
null
} else {
val version = buffer.getShort
val valueSchema = schemaForOffset(version)
val value = valueSchema.read(buffer)
if (version == 0) {
val offset = value.get("offset").asInstanceOf[Long]
val metadata = value.get("metadata").asInstanceOf[String]
val timestamp = value.get("timestamp").asInstanceOf[Long]
OffsetAndMetadata(offset, metadata, timestamp)
} else if (version == 1) {
val offset = value.get("offset").asInstanceOf[Long]
val metadata = value.get("metadata").asInstanceOf[String]
val commitTimestamp = value.get("commit_timestamp").asInstanceOf[Long]
val expireTimestamp = value.get("expire_timestamp").asInstanceOf[Long]
OffsetAndMetadata(offset, metadata, commitTimestamp, expireTimestamp)
} else {
throw new IllegalStateException("Unknown offset message version")
}
}
}
// 主要类是 kafkaStateActor
def main(args: Array[String]): Unit = {
val groupTopicPartitionOffsetMap:Cache[(String, String, Int), OffsetAndMetadata] = Caffeine
.newBuilder()
.maximumSize(1025)
.expireAfterAccess(10, TimeUnit.DAYS)
// .removalListener(new RemovalListener[(String, String, Int), OffsetAndMetadata] {
// override def onRemoval(key: (String, String, Int), value: OffsetAndMetadata, cause: RemovalCause): Unit = {
// println("remove !")
// }
// })
.build[(String, String, Int), OffsetAndMetadata]()
val consumer = createKafkaConsumer()
consumer.subscribe(java.util.Arrays.asList("__consumer_offsets"))
while (true){
val records: ConsumerRecords[Array[Byte], Array[Byte]] = consumer.poll(100)
val iterator = records.iterator()
while (iterator.hasNext) {
val record = iterator.next()
readMessageKey(ByteBuffer.wrap(record.key()))match {
case OffsetKey(version, key) =>
val orgnal = record.value()
if(orgnal!=null){
val value: OffsetAndMetadata = readOffsetMessageValue(ByteBuffer.wrap(record.value()))
val newKey = (key.group, key.topicPartition.topic, key.topicPartition.partition)
println(newKey)
println(value)
groupTopicPartitionOffsetMap.put(newKey, value)
}
case GroupMetadataKey(version, key) =>
Nil
}
}
}
}
}
原文:https://blog.51cto.com/15127544/2665111