From a7f606492f9dc973614554a9e7f22ceee55b68c5 Mon Sep 17 00:00:00 2001 From: jazzl0ver Date: Tue, 25 Dec 2018 17:33:12 +0300 Subject: [PATCH] Upgrading Kafka to the latest (#84) Exporting JMX_PORT environment var (fix for #70) --- catalog/kafka/2.1/dockerfile/Dockerfile | 37 +++++ .../kafka/2.1/dockerfile/docker-entrypoint.sh | 110 ++++++++++++++ catalog/kafka/2.1/dockerfile/java.env | 2 + catalog/kafka/2.1/dockerfile/log4j.properties | 92 ++++++++++++ .../kafka/2.1/dockerfile/server.properties | 139 ++++++++++++++++++ catalog/kafka/kafkacatalog.go | 6 +- 6 files changed, 385 insertions(+), 1 deletion(-) create mode 100644 catalog/kafka/2.1/dockerfile/Dockerfile create mode 100755 catalog/kafka/2.1/dockerfile/docker-entrypoint.sh create mode 100644 catalog/kafka/2.1/dockerfile/java.env create mode 100644 catalog/kafka/2.1/dockerfile/log4j.properties create mode 100644 catalog/kafka/2.1/dockerfile/server.properties diff --git a/catalog/kafka/2.1/dockerfile/Dockerfile b/catalog/kafka/2.1/dockerfile/Dockerfile new file mode 100644 index 00000000..d7404d80 --- /dev/null +++ b/catalog/kafka/2.1/dockerfile/Dockerfile @@ -0,0 +1,37 @@ +FROM openjdk:8-jre-alpine + +# Install required packages +RUN apk add --no-cache \ + bash \ + su-exec + +ENV KAFKA_USER=kafka + +RUN set -x \ + && adduser -D "$KAFKA_USER" + +ENV KAFKA_VERSION 2.1.0 +ENV KAFKA_SCALA_VERSION 2.12 +ENV KAFKA_RELEASE kafka_${KAFKA_SCALA_VERSION}-${KAFKA_VERSION} + +# Download Kafka Zookeeper, verify its digest integrity, untar and clean up +RUN set -x \ + && wget -q http://www.us.apache.org/dist/kafka/${KAFKA_VERSION}/${KAFKA_RELEASE}.tgz \ + && tar -zx -C / -f ${KAFKA_RELEASE}.tgz \ + && mv /${KAFKA_RELEASE} /kafka \ + && chown -R $KAFKA_USER /kafka \ + && rm -f ${KAFKA_RELEASE}.tgz + +ENV PATH=$PATH:/kafka/bin + +# set the JVM TTL. +# https://www.confluent.io/blog/design-and-deployment-considerations-for-deploying-apache-kafka-on-aws/ +RUN sed -i 's/#networkaddress.cache.ttl=-1/networkaddress.cache.ttl=10/g' $JAVA_HOME/lib/security/java.security + +COPY docker-entrypoint.sh / +ENTRYPOINT ["/docker-entrypoint.sh", "kafka-server-start.sh", "/kafka/config/server.properties"] + +# Kafka listen port +EXPOSE 9092 +# Kafka JMX port +EXPOSE 9093 diff --git a/catalog/kafka/2.1/dockerfile/docker-entrypoint.sh b/catalog/kafka/2.1/dockerfile/docker-entrypoint.sh new file mode 100755 index 00000000..4e18ab89 --- /dev/null +++ b/catalog/kafka/2.1/dockerfile/docker-entrypoint.sh @@ -0,0 +1,110 @@ +#!/bin/bash +set -e + +rootdir=/data +datadir=/data/kafka +confdir=/data/conf + +# after release 0.9.5, sys.conf and java.env files are not created any more. +# instead service.conf and emember.conf are created for the common service configs +# and the service member configs. The default mongod.conf will be updated with the configs +# in service.conf and member.conf +syscfgfile=$confdir/sys.conf +javaenvfile=$confdir/java.env + +serverpropfile=$confdir/server.properties +logcfgfile=$confdir/log4j.properties +servicecfgfile=$confdir/service.conf +membercfgfile=$confdir/member.conf + +kafkacfgdir=/kafka/config +kafkaserverpropfile=$kafkacfgdir/server.properties + +# sanity check to make sure the volume is mounted to /data. +if [ ! -d "$rootdir" ]; then + echo "error: $rootdir not exist. Please make sure the volume is mounted to $rootdir." >&2 + exit 1 +fi +if [ ! -d "$datadir" ]; then + mkdir "$datadir" +fi +if [ ! -d "$confdir" ]; then + echo "error: $confdir not exist." >&2 + exit 1 +fi +# sanity check to make sure the config files are created. +if [ ! -f "$serverpropfile" ]; then + echo "error: $serverpropfile not exist." >&2 + exit 1 +fi +if [ ! -f "$logcfgfile" ]; then + echo "error: $logcfgfile not exist." >&2 + exit 1 +fi + +# allow the container to be started with `--user` +if [ "$1" = 'kafka-server-start.sh' -a "$(id -u)" = '0' ]; then + rootdiruser=$(stat -c "%U" $rootdir) + if [ "$rootdiruser" != "$KAFKA_USER" ]; then + echo "chown -R $KAFKA_USER $rootdir" + chown -R "$KAFKA_USER" "$rootdir" + fi + diruser=$(stat -c "%U" $datadir) + if [ "$diruser" != "$KAFKA_USER" ]; then + chown -R "$KAFKA_USER" "$datadir" + fi + chown -R "$KAFKA_USER" "$confdir" + + exec su-exec "$KAFKA_USER" "$BASH_SOURCE" "$@" +fi + +# copy config files to /kafka/config +cp $serverpropfile $kafkacfgdir +cp $logcfgfile $kafkacfgdir + +# after release 0.9.5 +if [ -f $servicecfgfile ]; then + # load service and member configs + . $servicecfgfile + . $membercfgfile + + # update server.properties file + sed -i 's/broker.id=0/broker.id='$SERVICE_MEMBER_INDEX'/g' $kafkaserverpropfile + sed -i 's/broker.rack=rack/broker.rack='$AVAILABILITY_ZONE'/g' $kafkaserverpropfile + sed -i 's/delete.topic.enable=true/delete.topic.enable='$ALLOW_TOPIC_DEL'/g' $kafkaserverpropfile + sed -i 's/num.partitions=8/num.partitions='$NUMBER_PARTITIONS'/g' $kafkaserverpropfile + sed -i 's/bindip/'$BIND_IP'/g' $kafkaserverpropfile + sed -i 's/advertisedip/'$SERVICE_MEMBER'/g' $kafkaserverpropfile + sed -i 's/replication.factor=3/replication.factor='$REPLICATION_FACTOR'/g' $kafkaserverpropfile + sed -i 's/log.min.isr=2/log.min.isr='$MIN_INSYNC_REPLICAS'/g' $kafkaserverpropfile + sed -i 's/min.insync.replicas=2/min.insync.replicas='$MIN_INSYNC_REPLICAS'/g' $kafkaserverpropfile + sed -i 's/retention.hours=168/retention.hours='$RETENTION_HOURS'/g' $kafkaserverpropfile + + # create jmx remote password and access files + echo "$JMX_REMOTE_USER $JMX_REMOTE_PASSWD" > $kafkacfgdir/jmxremote.password + echo "$JMX_REMOTE_USER $JMX_REMOTE_ACCESS" > $kafkacfgdir/jmxremote.access + chmod 0400 $kafkacfgdir/jmxremote.password + chmod 0400 $kafkacfgdir/jmxremote.access + + # set java options + export KAFKA_HEAP_OPTS="-Xmx${HEAP_SIZE_MB}m -Xms${HEAP_SIZE_MB}m" + export KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -XX:G1HeapRegionSize=16M -XX:MetaspaceSize=96m -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80" + export KAFKA_JMX_OPTS="-Djava.rmi.server.hostname=$SERVICE_MEMBER -Dcom.sun.management.jmxremote.rmi.port=9093 -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.password.file=$kafkacfgdir/jmxremote.password -Dcom.sun.management.jmxremote.access.file=$kafkacfgdir/jmxremote.access -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.ssl=false" + +else + # load the sys config file. the syscfgfile exists before 0.9.6 + . $syscfgfile + # load the java env file + . $javaenvfile +fi + +echo $SERVICE_MEMBER +echo "" + +echo $KAFKA_HEAP_OPTS +echo $KAFKA_JVM_PERFORMANCE_OPTS +export KAFKA_HEAP_OPTS=$KAFKA_HEAP_OPTS +export KAFKA_JVM_PERFORMANCE_OPTS=$KAFKA_JVM_PERFORMANCE_OPTS + +echo "$@" +exec "$@" diff --git a/catalog/kafka/2.1/dockerfile/java.env b/catalog/kafka/2.1/dockerfile/java.env new file mode 100644 index 00000000..1aec4453 --- /dev/null +++ b/catalog/kafka/2.1/dockerfile/java.env @@ -0,0 +1,2 @@ +KAFKA_HEAP_OPTS="-Xmx6144m -Xms6144m" +KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -XX:G1HeapRegionSize=16M -XX:MetaspaceSize=96m -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80" diff --git a/catalog/kafka/2.1/dockerfile/log4j.properties b/catalog/kafka/2.1/dockerfile/log4j.properties new file mode 100644 index 00000000..0a1ec4f6 --- /dev/null +++ b/catalog/kafka/2.1/dockerfile/log4j.properties @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Unspecified loggers and loggers with additivity=true output to server.log and stdout +# Note that INFO only applies to unspecified loggers, the log level of the child logger is used otherwise +log4j.rootLogger=INFO, stdout, kafkaAppender + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log +log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log +log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log +log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log +log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log +log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n + +log4j.appender.authorizerAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.authorizerAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.authorizerAppender.File=${kafka.logs.dir}/kafka-authorizer.log +log4j.appender.authorizerAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.authorizerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n + +# Change the two lines below to adjust ZK client logging +log4j.logger.org.I0Itec.zkclient.ZkClient=INFO +log4j.logger.org.apache.zookeeper=INFO + +# Change the two lines below to adjust the general broker logging level (output to server.log and stdout) +log4j.logger.kafka=INFO +log4j.logger.org.apache.kafka=INFO + +# Change to DEBUG or TRACE to enable request logging +log4j.logger.kafka.request.logger=WARN, requestAppender +log4j.additivity.kafka.request.logger=false + +# Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output +# related to the handling of requests +#log4j.logger.kafka.network.Processor=TRACE, requestAppender +#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender +#log4j.additivity.kafka.server.KafkaApis=false +log4j.logger.kafka.network.RequestChannel$=WARN, requestAppender +log4j.additivity.kafka.network.RequestChannel$=false + +log4j.logger.kafka.controller=TRACE, controllerAppender +log4j.additivity.kafka.controller=false + +log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender +log4j.additivity.kafka.log.LogCleaner=false + +log4j.logger.state.change.logger=TRACE, stateChangeAppender +log4j.additivity.state.change.logger=false + +# Change to DEBUG to enable audit log for the authorizer +log4j.logger.kafka.authorizer.logger=WARN, authorizerAppender +log4j.additivity.kafka.authorizer.logger=false + diff --git a/catalog/kafka/2.1/dockerfile/server.properties b/catalog/kafka/2.1/dockerfile/server.properties new file mode 100644 index 00000000..4a9a5506 --- /dev/null +++ b/catalog/kafka/2.1/dockerfile/server.properties @@ -0,0 +1,139 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id=0 + +# Switch to enable topic deletion or not, default value is false +#delete.topic.enable=true + +############################# Socket Server Settings ############################# + +# The address the socket server listens on. It will get the value returned from +# java.net.InetAddress.getCanonicalHostName() if not configured. +# FORMAT: +# listeners = listener_name://host_name:port +# EXAMPLE: +# listeners = PLAINTEXT://your.host.name:9092 +#listeners=PLAINTEXT://:9092 + +# Hostname and port the broker will advertise to producers and consumers. If not set, +# it uses the value for "listeners" if configured. Otherwise, it will use the value +# returned from java.net.InetAddress.getCanonicalHostName(). +#advertised.listeners=PLAINTEXT://your.host.name:9092 + +# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details +#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL + +# The number of threads that the server uses for receiving requests from the network and sending responses to the network +num.network.threads=3 + +# The number of threads that the server uses for processing requests, which may include disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma seperated list of directories under which to store log files +log.dirs=/tmp/kafka-logs + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions=1 + +# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. +# This value is recommended to be increased for installations with data dirs located in RAID array. +num.recovery.threads.per.data.dir=1 + +############################# Internal Topic Settings ############################# +# The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" +# For anything other than development testing, a value greater than 1 is recommended for to ensure availability such as 3. +offsets.topic.replication.factor=1 +transaction.state.log.replication.factor=1 +transaction.state.log.min.isr=1 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion due to age +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.bytes. Functions independently of log.retention.hours. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect=localhost:2181 + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=6000 + + +############################# Group Coordinator Settings ############################# + +# The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. +# The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. +# The default value for this is 3 seconds. +# We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. +# However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. +group.initial.rebalance.delay.ms=0 \ No newline at end of file diff --git a/catalog/kafka/kafkacatalog.go b/catalog/kafka/kafkacatalog.go index 8d28cee2..95f72cff 100644 --- a/catalog/kafka/kafkacatalog.go +++ b/catalog/kafka/kafkacatalog.go @@ -14,7 +14,7 @@ import ( ) const ( - defaultVersion = "1.0" + defaultVersion = "2.1" // ContainerImage is the main running container. ContainerImage = common.ContainerNamePrefix + "kafka:" + defaultVersion @@ -83,6 +83,10 @@ func GenDefaultCreateServiceRequest(platform string, region string, azs []string // generate member ReplicaConfigs replicaCfgs := genMemberConfigs(platform, cluster, service, azs, opts) + envkvs := []*common.EnvKeyValuePair{ + &common.EnvKeyValuePair{Name: ENV_JMX_PORT, Value: jmxPort}, + } + portMappings := []common.PortMapping{ {ContainerPort: ListenPort, HostPort: ListenPort, IsServicePort: true}, {ContainerPort: jmxPort, HostPort: jmxPort},