前言
环境:centos7.9 docker-ce-20.10.9 kubernetes-version v1.23.1
本篇来讲解如何在centos下安装部署高可用k8s集群
服务器准备
#准备5台服务器,角色分配如下
192.168.6.23 master01、etcd01、keepalived+nginx(vip:192.168.6.20)
192.168.6.24 master02、etcd02、keepalived+nginx(vip:192.168.6.20)
192.168.6.25 master03、etcd03、keepalived+nginx(vip:192.168.6.20)
192.168.6.26 node01
192.168.6.27 node02
将keepalived+nginx外置
如果服务器足够,建议将keepalived+nginx单独准备两台服务器,如下:
#准备8台服务器,角色分配如下
192.168.6.21 keepalived+nginx(vip:192.168.6.20)2核 4G
192.168.6.22 keepalived+nginx(vip:192.168.6.20)2核 4G
192.168.6.23 master01、etcd01 2核 4G
192.168.6.24 master02、etcd02 2核 4G
192.168.6.25 master03、etcd03 2核 4G
192.168.6.26 node01 16核 32G
192.168.6.27 node02 16核 32G
192.168.6.28 node03 16核 32G
架构讲解
keepalived+nginx实现高可用+反向代理,这里将keepalived+nginx单独部署。
keepalived会虚拟一个vip,vip任意绑定在一台nginx节点上,使用nginx对3台master节点进行反向代理。在初始化k8s集群的使用,IP填写的vip,这样安装好k8s集群之后,kubectl客户端而言,访问的vip:6443端口,该端口是nginx监听的端口,nginx会进行反向代理到3个master节点上的6443端口。
环境初始化
#yum install ntp -y && systemctl start ntpd && systemctl enable ntpd;
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
mv /etc/yum.repos.d/epel.repo/etc/yum.repos.d/epel.repo.backup
curl -o /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
yum install chrony -y && systemctl enable --now chronyd
yum install epel-release -y && yum install jq -y
yum install vim lsof net-tools zip unzip tree wget curl bash-completion pciutils gcc make lrzsz tcpdump bind-utils -y
sed -ri 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
setenforce 0
echo "检查是否关闭selinux:";getenforce && grep 'SELINUX=disabled' /etc/selinux/config
systemctl stop firewalld.service && systemctl disable firewalld.service
echo "检查是否关闭防火墙:";systemctl status firewalld.service | grep -E 'Active|disabled'
sed -ri 's/.*swap.*/#&/' /etc/fstab
swapoff -a
echo "检查swap是否关闭:";grep -i 'swap' /etc/fstab;free -h | grep -i 'swap'
systemctl stop NetworkManager.service && systemctl disable NetworkManager.service
echo "检查是否关闭NetworkManager:";systemctl status NetworkManager.service | grep -E 'Active|disabled'
#每台主机设置自己的主机名
hostnamectl set-hostname nginx01
hostnamectl set-hostname nginx02
hostnamectl set-hostname master01
hostnamectl set-hostname master02
hostnamectl set-hostname master03
hostnamectl set-hostname node01
hostnamectl set-hostname node02
hostnamectl set-hostname node02
#写入/etc/hosts文件
cat >> /etc/hosts <<EOF
192.168.6.21 nginx01
192.168.6.22 nginx02
192.168.6.23 master01
192.168.6.24 master02
192.168.6.25 master03
192.168.6.26 node01
192.168.6.27 node02
192.168.6.28 node03
EOF
安装keepalived软件
2台nginx节点都要安装keepalived软件:
#2台nginx节点操作
yum install keepalived -y
cp /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf_bak
#keepalived配置文件的参数含义可以参考:https://blog.csdn.net/MssGuo/article/details/127330115
#master01节点的keepalived配置文件内容
#这里配置Keepalived监听1644端口或nginx挂掉的情况,有需要自行添加即可
[root@master01 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_strict
vrrp_version 3
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state MASTER
accept
interface ens192
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.6.20
}
}
#master02节点的keepalived配置文件内容
[root@master02 ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_strict
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state BACKUP
interface ens192
virtual_router_id 51
priority 60
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.6.20
}
}
#依次启动keepalived
systemctl start keepalived.service && systemctl enable keepalived.service
systemctl status keepalived.service
#查看vip,发现vip现在是在master01上,master02和master03均没有vip
ip a | grep '192.168.6.20'
#检测vip是否会漂移,关闭master01节点的keepalived
systemctl stop keepalived.service
#这时发现vip漂移到了master02上,master01和master03均没有vip
#重启keepalived服务之后vip又回到了master01节点,因为默认配置的是vip抢占模式,符合设计逻辑
# Keepalived 配置完成后 VIP ping 不通的解决方案 https://blog.51cto.com/sparkgo/6127764
tcpdump -n -i ens192 icmp
tcpdump -n -i ens32 icmp
安装nginx软件
在3台master节点上nginx软件:
#nginx需要用到pcre库,pcre库全称是Perl compatible regular expressions ,翻译为Perl兼容正则表达式,是为了
#支持Nginx具备URL重写#rewrite模块,若不安装pcre库,则Nginx无法使用rewrite模块。
#安装nginx的依赖
yum -y install gcc gcc-c++ make pcre pcre-devel zlib-devel zlib openssl-devel openssl
#参照官网安装nginx,官网地址:http://nginx.org/en/linux_packages.html#RHEL
yum install yum-utils
cat >/etc/yum.repos.d/nginx.repo<<'EOF'
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=1
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
[nginx-mainline]
name=nginx mainline repo
baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/
gpgcheck=1
enabled=0
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
EOF
yum-config-manager --enable nginx-mainline
yum install nginx -y
#注意:nginx配置为4四层反向代理,配置7层反向代理的好像协议方面存在问题,暂未解决,配置4层就没有问题
#直接修改主配置文件,添加下面的这段stream内容
[root@master01 nginx]# cat /etc/nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
#添加了stream 这一段,其他的保持默认即可
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 192.168.6.23:6443; #master01的IP和6443端口
server 192.168.6.24:6443; #master02的IP和6443端口
server 192.168.6.25:6443; #master03的IP和6443端口
}
server {
listen 6443; #监听的是6443端口,因为nginx和master复用机器,所以不能是6443端口
proxy_pass k8s-apiserver; #使用proxy_pass模块进行反向代理
}
}
#http模块保持默认即可
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
#gzip on;
include /etc/nginx/conf.d/*.conf;
}
[root@master01 nginx]#
systemctl enable --now nginx
systemctl status nginx
netstat -lntup| grep 6443
#将nginx配置文件发送到nginx2
scp /etc/nginx/nginx.conf root@nginx01:/etc/nginx/
scp /etc/nginx/nginx.conf root@master03:/etc/nginx/
#同样启动master02、master03上的nginx
systemctl enable --now nginx
systemctl status nginx
netstat -lntup| grep 6443
初始化k8s节点
#master节点和node节点都要配置
touch /etc/sysctl.d/k8s.conf
cat >> /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables=1
net.bridge.bridge-nf-call-iptables=1
net.ipv4.ip_forward=1
vm.swappiness=0
EOF
sysctl --system
#配置k8s的yum源,master节点和node节点都要配置
cat >/etc/yum.repos.d/kubernetes.repo <<'EOF'
[kubernetes]
name = Kubernetes
baseurl = https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled = 1
gpgcheck = 0
repo_gpgcheck = 0
gpgkey = https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
安装docker
每台k8s节点都要安装docker:
#在所有k8s节点上,包含master节点和node节点上都要安装docker
yum remove docker \
docker-client \
docker-client-latest \
docker-common \
docker-latest \
docker-latest-logrotate \
docker-logrotate \
docker-engine \
docker-ce
yum install -y yum-utils
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
yum list docker-ce --showduplicates | sort -r
#yum -y install docker-ce docker-ce-cli containerd.io
#安装docker-ce-20.10而不是安装最新的docker版本,因为k8s 1.23.1不一定支持最新的docker版本
yum -y install docker-ce-20.10.9 docker-ce-cli-20.10.9 containerd.io
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": [
"https://docker.m.daocloud.io",
"https://huecker.io",
"https://dockerhub.timeweb.cloud",
"https://noohub.ru"
],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
#注意,上面这两句是添加镜像加速器地址和修改docker的cgroupdriver为systemd,镜像加速器可以去阿里云获取
#每个人的阿里云账号里面的镜像加速器都是不同的,不要使用我这个,当然也可以不配置镜像加速器
systemctl enable --now docker
systemctl status docker
#检查加速器配置和cgroup是否配置成功
docker info |grep 'Cgroup Driver' ;docker info | grep -A 1 'Registry Mirrors'
#master节点和node节点都安装kubeadm、kubelet、kubectl
yum list --showduplicates | grep kubeadm
#正常情况下kubectl只是master节点安装,但是这里因为如果不安装kubectl的话yum会默认作为依赖安装,而安装的版本可能不是1.23.1
#所以干脆所以节点都安装了
yum -y install kubelet-1.23.1 kubeadm-1.23.1 kubectl-1.23.1
systemctl enable kubelet
初始化master01节点的控制面板
#仅在master01节点执行初始化
#注意
#apiserver-advertise-address设置master01本机的ip地址
#apiserver-bind-port是api-server的6443端口,默认也是6443端口
#control-plane-endpoint设置为vip+nginx的端口
#可以使用kubeadm init --help查看一下命令帮助
#模拟执行,这里加了--dry-run只是模拟执行看看有没有报错,并未真正安装
kubeadm init \
--apiserver-advertise-address=192.168.6.23 \
--apiserver-bind-port=6443 \
--control-plane-endpoint=192.168.6.20:6443 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.23.1 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 --dry-run
#如何输出没有报错,去掉--dry-run参数,开始真正执行:
kubeadm init \
--apiserver-advertise-address=192.168.6.23 \
--apiserver-bind-port=6443 \
--control-plane-endpoint=192.168.6.20:6443 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.23.1 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16
#这时再开一个终端执行docker images就可以看到拉取了很多k8s的镜像
#如果报错了,需要排查错误,然后清空环境
kubeadm reset
rm -rf /etc/cni
iptables -F
yum install ipvsadm -y
ipvsadm --clear
rm -rf $HOME/.kube/config
#然后重新执行kubeadm init命令初始化即可
curl -k https://192.168.6.23:6443/healthz
curl -k https://192.168.6.20:6443/healthz
#最终初始成功的后会输出以下信息
.......................................
addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.168.6.20:6443 --token hx6bp9.ygm4vsgbfp6lc1us \
--discovery-token-ca-cert-hash sha256:107bc95e7deef1b0f2c4544ada5540f7501633ddbc335593fc25b43ccef8b980 \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.6.20:6443 --token hx6bp9.ygm4vsgbfp6lc1us \
--discovery-token-ca-cert-hash sha256:107bc95e7deef1b0f2c4544ada5540f7501633ddbc335593fc25b43ccef8b980
#按照上面的信息提示,对应的步骤即可
#上面初始化完成master01节点之后会提示你在master节点或node节点执行对应的命令来将master节点或node节点加入k8s集群
#注意:这段kubeamd join命令的token只有24h,24h就过期,需要执行kubeadm token create --print-join-command 重新生成token,但是
#要注意,重新生成的加入集群命令默认是node节点角色加入的,如果新节点是作为master角色加入集群,需要在打印出来的命令后面添加--control-plane 参数再执行。
master02、master03节点加入集群
#首先需要在master02和master03上下载镜像
#可以在master01上看看需要下载哪些镜像
[root@master01 ~]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
registry.aliyuncs.com/google_containers/kube-apiserver v1.23.1 b6d7abedde39 2 years ago 135MB
registry.aliyuncs.com/google_containers/kube-proxy v1.23.1 b46c42588d51 2 years ago 112MB
registry.aliyuncs.com/google_containers/kube-controller-manager v1.23.1 f51846a4fd28 2 years ago 125MB
registry.aliyuncs.com/google_containers/kube-scheduler v1.23.1 71d575efe628 2 years ago 53.5MB
registry.aliyuncs.com/google_containers/etcd 3.5.1-0 25f8c7f3da61 2 years ago 293MB
registry.aliyuncs.com/google_containers/coredns v1.8.6 a4ca41631cc7 2 years ago 46.8MB
registry.aliyuncs.com/google_containers/pause 3.6 6270bb605e12 2 years ago 683kB
[root@master01 ~]#
#然后去master02和master03上下载这些镜像即可
docker pull registry.aliyuncs.com/google_containers/kube-apiserver:v1.23.1
docker pull registry.aliyuncs.com/google_containers/kube-controller-manager:v1.23.1
docker pull registry.aliyuncs.com/google_containers/kube-scheduler:v1.23.1
docker pull registry.aliyuncs.com/google_containers/kube-proxy:v1.23.1
docker pull registry.aliyuncs.com/google_containers/etcd:3.5.1-0
docker pull registry.aliyuncs.com/google_containers/coredns:v1.8.6
docker pull registry.aliyuncs.com/google_containers/pause:3.6
#master02、master03节点上创建目录
mkdir /etc/kubernetes/pki/etcd -p
# 在master01节点上,将master01节点上的证书拷贝到master02、master03节点上
scp -rp /etc/kubernetes/pki/ca.* master02:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/sa.* master02:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/front-proxy-ca.* master02:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/etcd/ca.* master02:/etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/admin.conf master02:/etc/kubernetes/
scp -rp /etc/kubernetes/pki/ca.* master03:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/sa.* master03:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/front-proxy-ca.* master03:/etc/kubernetes/pki/
scp -rp /etc/kubernetes/pki/etcd/ca.* master03:/etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/admin.conf master03:/etc/kubernetes/
#由上面初始成功的信息提示,复制粘贴命令到master02、master03节点执行即可
kubeadm join 192.168.6.20:6443 --token hx6bp9.ygm4vsgbfp6lc1us \
--discovery-token-ca-cert-hash sha256:107bc95e7deef1b0f2c4544ada5540f7501633ddbc335593fc25b43ccef8b980 \
--control-plane
#执行成功如下,安装提示操作即可
[mark-control-plane] Marking the node master02 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
[root@master02 pki]# mkdir -p $HOME/.kube
[root@master02 pki]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 pki]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
node01、node02节点加入集群
#node节点直接执行命令即可,不需要做什么配置
#在node01、node02节点执行下面命令
kubeadm join 192.168.6.20:6443 --token hx6bp9.ygm4vsgbfp6lc1us \
--discovery-token-ca-cert-hash sha256:107bc95e7deef1b0f2c4544ada5540f7501633ddbc335593fc25b43ccef8b980
检查集群
以上,就创建了3个master节点+2个node节点的k8s集群,在任意一个master节点检查集群:
[root@master01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
master01 NotReady control-plane,master 50m v1.23.1
master02 NotReady control-plane,master 6m58s v1.23.1
master03 NotReady control-plane,master 6m10s v1.23.1
node01 NotReady <none> 39s v1.23.1
node02 NotReady <none> 12s v1.23.1
[root@master01 ~]#
[root@master01 ~]# kubectl config view
apiVersion: v1
clusters:
- cluster:
certificate-authority-data: DATA+OMITTED
server: https://192.168.6.20:6443 #可以看到,监听在vip和6443端口上
name: kubernetes
contexts:
- context:
cluster: kubernetes
user: kubernetes-admin
name: kubernetes-admin@kubernetes
current-context: kubernetes-admin@kubernetes
kind: Config
preferences: {}
users:
- name: kubernetes-admin
user:
client-certificate-data: DATA+OMITTED
client-key-data: DATA+OMITTED
#安装flannel网络
wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml
kubectl apply -f kube-flannel.yml
[root@master01 nginx]# kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-flannel kube-flannel-ds-6tzzk 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-8n6nc 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-8rtgx 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-bwwrv 1/1 Running 0 5m43s
kube-flannel kube-flannel-ds-nmbzq 1/1 Running 0 5m43s
kube-system coredns-7f6cbbb7b8-mf22c 1/1 Running 0 60m
kube-system coredns-7f6cbbb7b8-n2w94 1/1 Running 0 60m
kube-system etcd-master01 1/1 Running 4 60m
kube-system etcd-master02 1/1 Running 0 17m
kube-system etcd-master03 1/1 Running 0 16m
kube-system kube-apiserver-master01 1/1 Running 4 60m
kube-system kube-apiserver-master02 1/1 Running 0 17m
kube-system kube-apiserver-master03 1/1 Running 1 (16m ago) 16m
kube-system kube-controller-manager-master01 1/1 Running 5 (17m ago) 60m
kube-system kube-controller-manager-master02 1/1 Running 0 17m
kube-system kube-controller-manager-master03 1/1 Running 0 15m
kube-system kube-proxy-6lzs9 1/1 Running 0 11m
kube-system kube-proxy-9tljk 1/1 Running 0 17m
kube-system kube-proxy-jzq49 1/1 Running 0 60m
kube-system kube-proxy-mk5w8 1/1 Running 0 10m
kube-system kube-proxy-rhmnv 1/1 Running 0 16m
kube-system kube-scheduler-master01 1/1 Running 5 (17m ago) 60m
kube-system kube-scheduler-master02 1/1 Running 0 17m
kube-system kube-scheduler-master03 1/1 Running 0 16m
[root@master01 nginx]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready control-plane,master 61m v1.23.1
master02 Ready control-plane,master 17m v1.23.1
master03 Ready control-plane,master 16m v1.23.1
node01 Ready <none> 11m v1.23.1
node02 Ready <none> 11m v1.23.1
[root@master01 nginx]#
配置docker和kubectl命令补全
#每个节点都配置docker命令自动补全功能
yum install bash-completion -y
curl -L https://raw.githubusercontent.com/docker/compose/1.24.1/contrib/completion/bash/docker-compose -o /etc/bash_completion.d/docker-compose
source /etc/bash_completion.d/docker-compose
#master节点配置kubectl命令补全功能
yum install -y bash-completion
echo 'source /usr/share/bash-completion/bash_completion' >>/root/.bashrc
echo 'source <(kubectl completion bash)' >>/root/.bashrc
source /root/.bashrc
验证master节点高可用
#把master01节点关机测试
#发现关掉任意一台master节点,k8s集群master节点kubectl get nodes 时行时不行,原因未知,不知是否与etcd有关,因为etcd都是安装在master节点上,也有可能是nginx仍然把请求发送给关机的master节点导致无法响应。