本实践将在 Ubuntu 22.04.3LTS 系统上进行安装测试
docker版本 24.0.6
kubeadm版本v1.28.2
版本
背景
集群中一个主节点因
硬盘故障
造成一个主节点无法恢复,进而影响整个k8s集群。
原搭建资源有限,采用了双主的方式搭建的集群。
集群无法启动的根本原因
双主集群中
etcd
为2个节点,一个主节点挂了,导致etcd
无法进行过半选举,处于不可用状态,进而影响整个集群的运行
原创2024年9月23日大约 2 分钟
本实践将在 Ubuntu 22.04.3LTS 系统上进行安装测试
docker版本 24.0.6
kubeadm版本v1.28.2
版本
集群中一个主节点因
硬盘故障
造成一个主节点无法恢复,进而影响整个k8s集群。
原搭建资源有限,采用了双主的方式搭建的集群。
双主集群中
etcd
为2个节点,一个主节点挂了,导致etcd
无法进行过半选举,处于不可用状态,进而影响整个集群的运行
原
docker部署脚本
#!/usr/bin/env bash
# Licensed to the LF AI & Data foundation under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
run_embed() {
cat << EOF > /milvus/configs/embedEtcd.yaml
listen-client-urls: http://0.0.0.0:2379
advertise-client-urls: http://0.0.0.0:2379
EOF
sudo docker run -d \
--name milvus-standalone \
--security-opt seccomp:unconfined \
-e ETCD_USE_EMBED=true \
-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
-e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
-e COMMON_STORAGETYPE=local \
-v /milvus/data:/var/lib/milvus \
-v /milvus/configs:/milvus/configs \
-p 19530:19530 \
-p 9091:9091 \
-p 2379:2379 \
--health-cmd="curl -f http://localhost:9091/healthz" \
--health-interval=30s \
--health-start-period=90s \
--health-timeout=20s \
--health-retries=3 \
milvusdb/milvus:v2.3.9 \
milvus run standalone 1> /dev/null
}
wait_for_milvus_running() {
echo "Wait for Milvus Starting..."
while true
do
res=`sudo docker ps|grep milvus-standalone|grep healthy|wc -l`
if [ $res -eq 1 ]
then
echo "Start successfully."
break
fi
sleep 1
done
}
start() {
res=`sudo docker ps|grep milvus-standalone|grep healthy|wc -l`
if [ $res -eq 1 ]
then
echo "Milvus is running."
exit 0
fi
res=`sudo docker ps -a|grep milvus-standalone|wc -l`
if [ $res -eq 1 ]
then
sudo docker start milvus-standalone 1> /dev/null
else
run_embed
fi
if [ $? -ne 0 ]
then
echo "Start failed."
exit 1
fi
wait_for_milvus_running
}
stop() {
sudo docker stop milvus-standalone 1> /dev/null
if [ $? -ne 0 ]
then
echo "Stop failed."
exit 1
fi
echo "Stop successfully."
}
delete() {
res=`sudo docker ps|grep milvus-standalone|wc -l`
if [ $res -eq 1 ]
then
echo "Please stop Milvus service before delete."
exit 1
fi
sudo docker rm milvus-standalone 1> /dev/null
if [ $? -ne 0 ]
then
echo "Delete failed."
exit 1
fi
sudo rm -rf $(pwd)/volumes
sudo rm -rf $(pwd)/embedEtcd.yaml
echo "Delete successfully."
}
case $1 in
start)
start
;;
stop)
stop
;;
delete)
delete
;;
*)
echo "please use bash standalone_embed.sh start|stop|delete"
;;
esac