#!/bin/bash # ZOO_SERVICE_NAME=k8s headless svc # ZOO_COUNT=replicas num,节点数 set -e
# Allow the container to be started with `--user` if [[ "$1" = 'zkServer.sh' && "$(id -u)" = '0' ]]; then if [ "$ZOO_DATA_DIR"x != "/data"x ]; then mkdir -p $ZOO_DATA_DIR mkdir -p $ZOO_DATA_LOG_DIR fi chown -R zookeeper "$ZOO_DATA_DIR" "$ZOO_DATA_LOG_DIR" "$ZOO_LOG_DIR" exec gosu zookeeper "$0" "$@" fi
# Generate the config only if it doesn't exist if [[ ! -f "$ZOO_CONF_DIR/zoo.cfg" ]]; then CONFIG="$ZOO_CONF_DIR/zoo.cfg" { echo "dataDir=$ZOO_DATA_DIR" echo "dataLogDir=$ZOO_DATA_LOG_DIR"
if [[ -n $ZOO_ELECTION_PORT_BIND_RETRY ]]; then echo "electionPortBindRetry=$ZOO_ELECTION_PORT_BIND_RETRY" >> "$CONFIG" fi if [[ -n $ZOO_4LW_COMMANDS_WHITELIST ]]; then echo "4lw.commands.whitelist=$ZOO_4LW_COMMANDS_WHITELIST" >> "$CONFIG" fi
for cfg_extra_entry in $ZOO_CFG_EXTRA; do echo "$cfg_extra_entry" >> "$CONFIG" done name=${HOSTNAME%%-*} if [ ! $ZOO_SERVICE_NAME ]; then ZOO_SERVICE_NAME=$name fi # 根据节点数计算出集群的节点配置,podid从0开始,N-1结束 if [[ $ZOO_COUNT -gt 0 ]]; then for id in $(seq 1 $ZOO_COUNT); do podid=$[id-1] server="server.${id}=${name}-${podid}.${ZOO_SERVICE_NAME}:2888:3888;2181" echo "$server" >> "$CONFIG" done else echo "ZOO_COUNT not exist." exit 1 fi
fi
# Write myid only if it doesn't exist # myid=pod_id+1 PODID=${HOSTNAME##*-} ZOO_MY_ID=$[PODID+1] if [[ ! -f "$ZOO_DATA_DIR/myid" ]]; then echo "${ZOO_MY_ID:-1}" > "$ZOO_DATA_DIR/myid" fi
NAME READY STATUS RESTARTS AGE zk-0 1/1 Running 0 39m zk-1 1/1 Running 0 39m zk-2 1/1 Running 0 40m zk-3 0/1 Terminating 0 41m
依次重启剩下的节点
1 2 3 4 5 6 7 8 9 10 11
ubuntu@k8s-dev-m1:~/k8s/zkcluster$ kubectl get po NAME READY STATUS RESTARTS AGE zk-0 1/1 Running 0 39m zk-1 1/1 Running 0 40m zk-2 0/1 ContainerCreating 0 7s
ubuntu@k8s-dev-m1:~/k8s/zkcluster$ kubectl get po NAME READY STATUS RESTARTS AGE zk-0 1/1 Running 0 79s zk-1 1/1 Running 0 2m2s zk-2 1/1 Running 0 2m39s
模拟主机故障
使用cordon命令冻结主机,然后delete pod观察集群状态 原始状态:
1 2 3 4 5
ubuntu@k8s-dev-m1:~$ kubectl get po -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES zk-0 1/1 Running 0 3h17m 172.31.0.57 k8s-dev-node4 <none> <none> zk-1 1/1 Running 0 3h18m 172.31.0.184 k8s-dev-node6 <none> <none> zk-2 1/1 Running 0 3h18m 172.31.0.43 k8s-dev-node4 <none> <none>
模拟node6故障下线后,zk-1顺利漂移到了其他node
1 2 3 4
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES zk-0 1/1 Running 0 4h41m 172.31.0.57 k8s-dev-node4 <none> <none> zk-1 1/1 Running 0 65s 172.31.0.113 k8s-dev-node3 <none> <none> zk-2 1/1 Running 0 4h42m 172.31.0.43 k8s-dev-node4 <none> <none>