0%

K8s-部署安装

虚拟机环境准备

主机准备

虚拟机主机名 角色
k8s-master01 master
k8s-node01 node
k8s-node02 node

修改主机名称

1
2
3
4
#修改主机名称
hostnamectl set-hostname 修改之后的主机名
#重启生效
reboot

修改ip地址

vim /etc/sysconfig/network-scripts/ifcfg-ens33,添加主机ip和网关ip

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
TYPE=Ethernet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=dhcp
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=yes
IPV6_AUTOCONF=yes
IPV6_DEFROUTE=yes
IPV6_FAILURE_FATAL=no
IPV6_ADDR_GEN_MODE=stable-privacy
NAME=ens33
UUID=eb71b764-a576-4cd4-8513-975a5cb1c5c5
DEVICE=ens33
ONBOOT=yes
IPV6_PRIVACY=no
IPADDR="192.168.1.19"
GATEWAY="192.168.1.1"
DNS1=8.8.8.8
DNS2=4.2.2.2

重启网关 service network restart

关闭防火墙

1
2
3
4
5
6
#关闭防火墙开机自启
systemctl disable firewalld
#停止防火墙
systemctl stop firewalld
#查看防火墙是否启动,此时应该是not running
firewall-cmd --state

SELINUX配置

将/etc/selinux/config文件SELINUX参数修改为disabled

sed -ri 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config

截图

主机时间同步

1
2
yum install ntpdate
ntpdate ntp1.aliyun.com

升级操作系统内核

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#1、导入elrepo gpg key
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org

#2、安装elrepo YUM源仓库
yum -y install https://www.elrepo.org/elrepo-release-7.0-4.el7.elrepo.noarch.rpm

#3、安装kernel-ml版本,ml为长期稳定版本,lt为长期维护版本
yum --enablerepo="elrepo-kernel" -y install kernel-ml.x86_64

#4、设置grub2默认引导为0
grub2-set-default 0

#5、重新生成grub2引导文件
grub2-mkconfig -o /boot/grub2/grub.cfg

#6、更新后,需要重启,使用升级的内核生效。
reboot

#重启后,需要验证内核是否为更新对应的版本
uname -r

升级前后内核对比

截图2

配置内核转发及网桥过滤

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#1、添加网桥过滤及内核转发配置文件
vim /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
vm.swappiness = 0

#2、加载br_netfilter模块
modprobe br_netfilter

#3、查看是否加载
lsmod | grep br_netfilter

#4、加载网桥过滤及内核转发配置文件
sysctl -p /etc/sysctl.d/k8s.conf

安装ipset及ipvsadm

主要用于实现service转发

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#1、安装ipset及ipvsadm
yum -y install ipset ipvsadm

#2、配置ipvsadm模块加载方式
#添加需要加载的模块
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF

#3、授权、运行、检查是否加载
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack

关闭SWAP分区

1
2
3
4
5
#永远关闭swap分区,需要重启操作系统
vim /etc/fstab

#注释掉下面语句
#/dev/mapper/centos-swap swap swap defaults 0 0

修改完成后需要重启操作系统,如不重启,可临时关闭,命令为swapoff -a

docker准备

获取YUM源

1
wget https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -O /etc/yum.repos.d/docker-ce.repo

查看可安装版本

1
yum list docker-ce.x86_64 --showduplicates | sort -r

安装指定版本并设置启动及开机自启动

1
2
3
yum -y install --setopt=obsoletes=0 docker-ce-20.10.9-3.el7
systemctl enable docker
systemctl start docker

修改cgroup方式

1
2
3
4
5
6
7
8
9
#在/etc/docker/daemon.json添加如下内容

vim /etc/docker/daemon.json
{
"exec-opts": ["native.cgroupdriver=systemd"]
}

#重启docker
systemctl restart docker

kubernetes 安装

添加K8S的YUM源

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#二选一将谷歌YUM源或者阿里云YUM源,内容添加进文件中
vim /etc/yum.repos.d/k8s.repo

#谷歌YUM源
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg
https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg

#阿里云YUM源
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg

查看yum源:yum repolist

发现加载出错,解决方法,修改/etc/yum.repos.d/k8s.repo文件中的repo_gpgcheck=0跳过验证

已加载插件:fastestmirror, langpacks

kubernetes/signature | 844 B 00:00:00

https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg 检索密钥

https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg 检索密钥

kubernetes/signature | 1.4 kB 00:00:00 !!!

https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/repodata/repomd.xml: [Errno -1] repomd.xml signature could not be verified for kubernetes

正在尝试其它镜像。

One of the configured repositories failed (Kubernetes),

and yum doesn’t have enough cached data to continue. At this point the only

safe thing yum can do is fail. There are a few ways to work “fix” this:

此时yum源拉取正常

截图3

集群安装

1
2
3
4
5
6
7
#查看指定版本
yum list kubeadm.x86_64 --showduplicates | sort -r
yum list kubelet.x86_64 --showduplicates | sort -r
yum list kubectl.x86_64 --showduplicates | sort -r

#安装指定版本的kubeadm、kubelet、kubectl
yum -y install --setopt=obsoletes=0 kubeadm-1.21.0-0 kubelet-1.21.0-0 kubectl-1.21.0-0

已安装:

kubeadm.x86_64 0:1.21.0-0 kubectl.x86_64 0:1.21.0-0 kubelet.x86_64 0:1.21.0-0

作为依赖被安装:

conntrack-tools.x86_64 0:1.4.4-7.el7 cri-tools.x86_64 0:1.24.2-0

kubernetes-cni.x86_64 0:0.8.7-0 libnetfilter_cthelper.x86_64 0:1.0.0-11.el7

libnetfilter_cttimeout.x86_64 0:1.0.0-7.el7 libnetfilter_queue.x86_64 0:1.0.2-2.el7_2

socat.x86_64 0:1.7.3.2-2.el7

完毕!

配置kubelet

1
2
3
4
5
6
#为了实现docker使用的cgroupdriver与kubelet使用的cgroup的一致性,建议修改如下文件内容。
vim /etc/sysconfig/kubelet
KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"

#设置kubelet为开机自启动即可,由于没有生成配置文件,集群初始化后自动启动
systemctl enable kubelet

集群镜像准备

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#查看镜像版本,添加--kubernetes-version=v1.21.0查看指定版本
kubeadm config images list --kubernetes-version=v1.21.0
k8s.gcr.io/kube-apiserver:v1.21.0
k8s.gcr.io/kube-controller-manager:v1.21.0
k8s.gcr.io/kube-scheduler:v1.21.0
k8s.gcr.io/kube-proxy:v1.21.0
k8s.gcr.io/pause:3.4.1
k8s.gcr.io/etcd:3.4.13-0
k8s.gcr.io/coredns/coredns:v1.8.0

#创建镜像下载脚本,执行后拉取镜像,并将镜像保存为tar包,由于国内可能没办法拉取镜像,可以通过vpn,或者将k8s.gcr.io替换成mirrorgcrio拉取docker
#替换镜像源,k8s.gcr.io => registry.cn-hangzhou.aliyuncs.com/google_containers
vim image_download.sh
#!/bin/bash
images_list='
k8s.gcr.io/kube-apiserver:v1.21.0
k8s.gcr.io/kube-controller-manager:v1.21.0
k8s.gcr.io/kube-scheduler:v1.21.0
k8s.gcr.io/kube-proxy:v1.21.0
k8s.gcr.io/pause:3.4.1
k8s.gcr.io/etcd:3.4.13-0
k8s.gcr.io/coredns/coredns:v1.8.0'

for i in $images_list
do
docker pull $i
done

docker save -o k8s-1-21-0.tar $images_list

#执行脚本
sh image_download.sh

解决 k8s.gcr.io相关镜像无法拉取问题

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.21.0

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.21.0

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.21.0

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.21.0

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.4.1

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.4.13-0

docker pull registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.8.0

#记得重新打tag,否则kubeadm init 时会重新拉取镜像

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.21.0 k8s.gcr.io/kube-apiserver:v1.21.0

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.21.0 k8s.gcr.io/kube-controller-manager:v1.21.0

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.21.0 k8s.gcr.io/kube-scheduler:v1.21.0

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.21.0 k8s.gcr.io/kube-proxy:v1.21.0

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.4.1 k8s.gcr.io/pause:3.4.1

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.4.13-0 k8s.gcr.io/etcd:3.4.13-0

docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.8.0 k8s.gcr.io/coredns:v1.8.0

集群初始化

1
2
3
#在k8s-master01主机执行初始化命令
#执行init命令的主机将成为master
kubeadm init --kubernetes-version=v1.21.0 --pod-network-cidr=10.244.0.0/16 --apiserver-advertise-address=192.168.1.19

输出内容记得保存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
[root@k8s-master01 ~]# kubeadm init --kubernetes-version=v1.21.0 --pod-network-cidr=10.244.0.0/16 --apiserver-advertise-address=192.168.1.19
[init] Using Kubernetes version: v1.21.0
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-master01 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.1.19]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [k8s-master01 localhost] and IPs [192.168.1.19 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [k8s-master01 localhost] and IPs [192.168.1.19 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.
[apiclient] All control plane components are healthy after 62.001527 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.21" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node k8s-master01 as control-plane by adding the labels: [node-role.kubernetes.io/master(deprecated) node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node k8s-master01 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: 37exic.msprw2ejmhr9sgnm
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.1.19:6443 --token 37exic.msprw2ejmhr9sgnm \
--discovery-token-ca-cert-hash sha256:55753d0edf239649301c3463300e1e5777340926b0c1f1bbc1cf86034bac4c4e

集群应用客户端管理集群文件准备

从初始化脚本执行后的控制台可以得到以下信息

1
2
3
4
5
6
7
8
9
#要开始使用集群
#普通用户的身份,则运行以下命令
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config


#或者,如果是root用户,您可以运行
export KUBECONFIG=/etc/kubernetes/admin.conf

以上命令执行之后,可以使用kubectl命令

截图4

因为缺少网络设施,coredns处于pending状态

集群网络准备

https://projectcalico.docs.tigera.io/about/about-calico

calico可以为k8s集群提供基础的网络设施

截图5

calico安装

1、安装 Tigera Calico 操作符和自定义资源定义。

1
kubectl create -f https://docs.projectcalico.org/manifests/tigera-operator.yaml

2、通过创建必要的自定义资源来安装 Calico

1
2
3
4
5
6
7
#因为需要修改default IP pool CIDR to match your pod network CIDR,所以不建议直接执行
#kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.24.1/manifests/custom-resources.yaml

#自行创建
mkdir /usr/local/calicofir
wget https://docs.projectcalico.org/manifests/custom-resources.yaml
#修改文件中的cidr地址为集群初始化指定的--pod.network.dir中指定的10.244.0.0.0/16

截图6

1
kubectl apply -f custom-resources.yaml

截图7

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#查看命名空间
[root@k8s-master01 calicofir]# kubectl get ns
NAME STATUS AGE
calico-system Active 79s
default Active 10h
kube-node-lease Active 10h
kube-public Active 10h
kube-system Active 10h
tigera-operator Active 9m3s

#查看calico-system命名空间下的pod
[root@k8s-master01 calicofir]# kubectl get pods -n calico-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-78687bb75f-46l2t 0/1 Pending 0 94s
calico-node-xfr2n 0/1 PodInitializing 0 94s
calico-typha-75444c4b8-nmmj5 1/1 Running 0 94s
csi-node-driver-92q4k 0/2 ContainerCreating 0 4s

#监控calico-system相关pod安装情况
watch kubectl get pods -n calico-system
Every 2.0s: kubectl get pods -n calico-system Sun Sep 11 09:39:03 2022

NAME READY STATUS RESTARTS AGE
calico-kube-controllers-78687bb75f-46l2t 1/1 Running 0 8m26s
calico-node-xfr2n 1/1 Running 0 8m26s
calico-typha-75444c4b8-nmmj5 1/1 Running 0 8m26s
csi-node-driver-92q4k 2/2 Running 0 6m56s

此时再看coredns处于Running状态表明联网成功。

截图8

calico客户端安装

1
2
3
4
5
6
7
#下载二进制文件
安装
cd /usr/local/calicofir
curl -L https://github.com/projectcalico/calico/releases/download/v3.21.4/calicoctl-linux-amd64 -o calicoctl
#添加权限
chmod +x /usr/local/calicofir/calicoctl
calicoctl version

工作节点加入集群

1
2
3
4
5
#在之前初始化后控制台打印输出以下语句,将其粘贴到需要加入集群的主机上执行
Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.1.19:6443 --token 37exic.msprw2ejmhr9sgnm \
--discovery-token-ca-cert-hash sha256:55753d0edf239649301c3463300e1e5777340926b0c1f1bbc1cf86034bac4c4e

在node01和node02主机分别执行一次

截图9

回到master01主机查看,添加成功,等待拉取镜像安装

截图10

集群可用性验证

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#查看所有节点
[root@k8s-master01 calicofir]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master01 Ready control-plane,master 10h v1.21.0
k8s-node01 Ready <none> 9m31s v1.21.0
k8s-node02 Ready <none> 8m59s v1.21.0

#查看集群健康情况
[root@k8s-master01 calicofir]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0 Healthy {"health":"true"}


#查看k8s集群pod运行情况
[root@k8s-master01 calicofir]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-558bd4d5db-5j4gq 1/1 Running 0 10h
coredns-558bd4d5db-79qsm 1/1 Running 0 10h
etcd-k8s-master01 1/1 Running 0 10h
kube-apiserver-k8s-master01 1/1 Running 0 10h
kube-controller-manager-k8s-master01 1/1 Running 0 10h
kube-proxy-v2ltp 1/1 Running 0 10h
kube-proxy-wzxps 1/1 Running 0 11m
kube-proxy-zr77d 1/1 Running 0 11m
kube-scheduler-k8s-master01 1/1 Running 0 10h

#查看calico网络工具运行情况
[root@k8s-master01 calicofir]# kubectl get pods -n calico-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-78687bb75f-46l2t 1/1 Running 0 43m
calico-node-f9gx6 1/1 Running 0 12m
calico-node-rsk4w 1/1 Running 0 13m
calico-node-xfr2n 1/1 Running 0 43m
calico-typha-75444c4b8-4x7pp 1/1 Running 0 12m
calico-typha-75444c4b8-nmmj5 1/1 Running 0 43m
csi-node-driver-4l2ts 2/2 Running 0 10m
csi-node-driver-92q4k 2/2 Running 0 42m
csi-node-driver-kcw5b 2/2 Running 0 11m