Apache Kafka 是一個分散式的訊息佇列框架,是由 LinkedIn 公司使用 Scala 語言開發的系統,被廣泛用來處理高吞吐量與容易水平擴展,目前許多巨量資料運算框架以都有整合 Kafka,諸如:Spark、Cloudera、Apache Storm等,
為保證集群高可用,Zookeeper 集群的節點數最好是奇數,最少有三個節點,所以這裡搭建一個三個節點的集群。
實驗機用三台
ZK01 192.168.0.204
ZK02 192.168.0.206
ZK03 192.168.0.218
----------------------------
ZK01機
zookeeper docker設定檔
mkdir zookeeper
cd zookeeper
vi .run.sh
#!/bin/sh
docker rm -f zookeeper
docker run -d \
--name zookeeper \
--restart=always \
--net=host \
-p 2181:2181 \
-p 2888:2888 \
-p 3888:3888 \
-e ZOOKEEPER_ID=1 \
-e ZOOKEEPER_SERVER_1=192.168.0.204 \
-e ZOOKEEPER_SERVER_2=192.168.0.206 \
-e ZOOKEEPER_SERVER_3=192.168.0.218 \
digitalwonderland/zookeeper
chmod +x
./run.sh
kafka docker設定檔
mkdir kafka
cd kafka
vi .run.sh
#/bin/sh
docker rm -f kafka
docker run -d \
--name kafka \
--restart=always \
-p 9092:9092 \
-e KAFKA_ID=2 \
-e KAFKA_HOST=192.168.0.206 \
-e KAFKA_PORT=9092 \
-e ZOOKEEPER_CONNECT=192.168.0.204:2181,192.168.0.206:2181,192.168.0.218:2181 \
-e KAFKA_LOG_RETENTION_BYTES=1073741824 \
-e KAFKA_LOG_retention_hours=24 \
-e KAFKA_LOG_segment_bytes=1048576 \
-e KAFKA_LOG_retention_check_interval_ms=3600 \
-e KAFKA_LOG_cleaner_enable=true \
jeygeethan/kafka-cluster
chmod +x run.sh
執行./run.sh
#Kafka的集群狀態工具
vi ma-kafka.sh
#!/bin/sh
docker rm -f ma-kafka
docker run -d \
--name ma-kafka \
-p 9010:9010 \
-e ZK_HOSTS=192.168.0.204:2181 \
-e LISTEN=9010 \
thomsch98/kafdrop
./ma-kafka.sh
mkdir kafka-manage
vi kafka-manage.sh
#/bin/sh
docker rm -f kafka-manage
docker run -d \
--name kafka-manage \
--restart=always \
-p 9000:9000 \
-e ZK_HOSTS="192.168.0.204" \
-e APPLICATION_SECRET=letmein sheepkiller/kafka-manager \
sheepkiller/kafka-manager
./kafka-manage
檢查docker服務是否都正常運作

ZK02機
zookeeper docker設定檔
mkdir zookeeper
cd zookeeper
vi .run.sh
#!/bin/sh
docker rm -f zookeeper
docker run -d \
--name zookeeper \
--restart=always \
--net=host \
-p 2181:2181 \
-p 2888:2888 \
-p 3888:3888 \
-e ZOOKEEPER_ID=2 \
-e ZOOKEEPER_SERVER_1=192.168.0.204 \
-e ZOOKEEPER_SERVER_2=192.168.0.206 \
-e ZOOKEEPER_SERVER_3=192.168.0.218 \
digitalwonderland/zookeeper
chmod +x
./run.sh
kafka docker設定檔
mkdir kafka
cd kafka
vi .run.sh
#/bin/sh
docker rm -f kafka
docker run -d \
--name kafka \
--restart=always \
-p 9092:9092 \
-e KAFKA_ID=2 \
-e KAFKA_HOST=192.168.0.206 \
-e KAFKA_PORT=9092 \
-e ZOOKEEPER_CONNECT=192.168.0.204:2181,192.168.0.206:2181,192.168.0.218:2181 \
-e KAFKA_LOG_RETENTION_BYTES=1073741824 \
-e KAFKA_LOG_retention_hours=24 \
-e KAFKA_LOG_segment_bytes=1048576 \
-e KAFKA_LOG_retention_check_interval_ms=3600 \
-e KAFKA_LOG_cleaner_enable=true \
jeygeethan/kafka-cluster
chmod +x run.sh
執行./run.sh
ZK03
zookeeper docker設定檔
mkdir zookeeper
cd zookeeper
vi .run.sh
#!/bin/sh
docker rm -f zookeeper
docker run -d \
--name zookeeper \
--restart=always \
--net=host \
-p 2181:2181 \
-p 2888:2888 \
-p 3888:3888 \
-e ZOOKEEPER_ID=3 \
-e ZOOKEEPER_SERVER_1=192.168.0.204 \
-e ZOOKEEPER_SERVER_2=192.168.0.206 \
-e ZOOKEEPER_SERVER_3=192.168.0.218 \
digitalwonderland/zookeeper
chmod +x
./run.sh
kafka docker設定檔
mkdir kafka
cd kafka
vi .run.sh
#/bin/sh
docker rm -f kafka
docker run -d \
--name kafka \
--restart=always \
-p 9092:9092 \
-e KAFKA_ID=3 \
-e KAFKA_HOST=192.168.0.218 \
-e KAFKA_PORT=9092 \
-e ZOOKEEPER_CONNECT=192.168.0.204:2181,192.168.0.206:2181,192.168.0.218:2181 \
-e KAFKA_LOG_RETENTION_BYTES=1073741824 \
-e KAFKA_LOG_retention_hours=24 \
-e KAFKA_LOG_segment_bytes=1048576 \
-e KAFKA_LOG_retention_check_interval_ms=3600 \
-e KAFKA_LOG_cleaner_enable=true \
jeygeethan/kafka-cluster
chmod +x run.sh
執行./run.sh
檢查三台的zookeeper叢集狀態
docker exec -it zookeeper /opt/zookeeper/bin/zkServer.sh status

Mode: leader # Master
Mode: follower # Slave
Mode: standalone # 單機
檢查id
docker exec -it zookeeper cat /data/myid
查看設定檔
docker exec -it zookeeper cat /zookeeper-3.4.13/conf/zoo.cfg
查看 zookeeper 的集群狀態
docker exec -it zookeeper ls -l / | grep zook
開始在ZK01建立的kafka-manege webui
http://192.168.0.204:9000/

建立一個叫log-topic的Topics

http://192.168.0.204:9010/

參考資料:
http://xstarcd.github.io/wiki/Cloud/kafka_Working_Principles.html