Mô hình cài đặt
Mô hình cài đặt tôi lựa chọn triển khai gồm 6 node với các thông tin như sau:
- Control-Plane-1 => IP: 172.20.0.104 , OS: Ubuntu 22.04 , Hostname: master01
- Control-Plane-2 => IP: 172.20.0.105 , OS: Ubuntu 22.04, Hostname: master02
- Control-Plane-3 => IP: 172.20.0.106 , OS: Ubuntu 22.04, Hostname: master03
- Worker-1 => IP: 172.20.0.107 , OS: OS: Ubuntu 22.04, Hostname: worker01
- Worker-2 => IP: 172.20.0.108 , OS: Ubuntu 22.04 , Hostname: worker02
- L4 Load Balancer => ip: 172.20.0.109, OS: Ubuntu 22.04 , Hostname: gateway01
cat inventory.ini
[windows]
#window01 ansible_host=10.1.1.31
[windows:vars]
ansible_connection=winrm
#ansible_winrm_transport=basic
ansible_winrm_scheme=https
ansible_winrm_server_cert_validation=ignore
ansible_winrm_authentication=ntlm
ansible_winrm_port=5986
ansible_user="Administrator"
ansible_password=""
[gateway]
gateway01 ansible_host=172.20.0.109
[deployment]
#deployment01 ansible_host=172.20.0.104
[masters]
master01 ansible_host=172.20.0.104
master02 ansible_host=172.20.0.105
master03 ansible_host=172.20.0.106
[workers]
worker01 ansible_host=172.20.0.107
worker02 ansible_host=172.20.0.108
#worker03 ansible_host=172.20.0.109
[k8s:children]
masters
workers
[nfs-server]
nfs-server01 ansible_host=172.20.0.109
[nfs-client:children]
k8s
[nfs:children]
nfs-server
nfs-client
Tạo tài khoản, disable firewall, đặt host name và cài cac goi cần thiết
cat 01.createuser.yml
---
- hosts: all
become: true
gather_facts: no
vars:
created_username: rke2
upassword: '123456'
tasks:
- name: Setup passwordless sudo
lineinfile:
path: /etc/sudoers
state: present
regexp: '^%sudo'
line: '%sudo ALL=(ALL) NOPASSWD: ALL'
validate: '/usr/sbin/visudo -cf %s'
- name: Create a new regular user with sudo privileges
user:
name: "{{ created_username }}"
password: "{{ upassword | password_hash('sha512') }}"
state: present
groups: sudo
append: true
create_home: true
shell: /bin/bash
- name: Set authorized key for remote user
ansible.posix.authorized_key:
user: "{{ created_username }}"
state: present
key: "{{ lookup('file', lookup('env','HOME') + '/.ssh/id_rsa.pub') }}"
- name: UFW - Enable and deny by default
ufw:
state: disable
- name: set FQDN to match inventory
ansible.builtin.command: "/usr/bin/hostnamectl --static set-hostname {{ OVERRIDE_FQDN | default(inventory_hostname) | lower }}"
changed_when: false
- name: Set timezone to Asia/Saigon
community.general.timezone:
name: Asia/Saigon
- name: Disable swap
shell: swapoff -a
- name: Disable SWAP in fstab since kubernetes can't work with swap enabled (2/2)
replace:
path: /etc/fstab
regexp: '^([^#].*?\sswap\s+sw\s+.*)$'
replace: '# \1'
- name: Install aptitude
apt:
name: aptitude
state: latest
update_cache: true
- name: Update apt and install required system packages
apt:
pkg:
- curl
- vim
- git
- net-tools
- apt-transport-https
- ca-certificates
- curl
- gnupg-agent
- software-properties-common
- jq
state: latest
update_cache: true
- name: Uninstall required system packages
apt:
pkg:
- mlocate
state: absent
- name: Disable auto update APT::Periodic::Update-Package-Lists
shell: sed -i 's/APT::Periodic::Update-Package-Lists "1"/APT::Periodic::Update-Package-Lists "0"/' /etc/apt/apt.conf.d/20auto-upgrades
- name: Disable auto update APT::Periodic::Unattended-Upgrade
shell: sed -i 's/APT::Periodic::Unattended-Upgrade "1"/APT::Periodic::Unattended-Upgrade "0"/' /etc/apt/apt.conf.d/20auto-upgrades
- name: Disable auto update APT::Periodic::Update-Package-Lists
shell: sed -i 's/APT::Periodic::Update-Package-Lists "1"/APT::Periodic::Update-Package-Lists "0"/' /etc/apt/apt.conf.d/10periodic
- name: Disable startup services
command: "systemctl disable {{ item }}"
with_items:
- postfix
- snapd.service
- snapd.seeded.service
- snap.lxd.activate.service
- snapd.apparmor.service
- ModemManager.service
- ipvsadm.service
- apparmor.service
- apport.service
- ubuntu-fan.service
- name: stop services
command: "systemctl stop {{ item }}"
with_items:
- postfix
- snapd.service
- snapd.seeded.service
- snap.lxd.activate.service
- snapd.apparmor.service
- ModemManager.service
- docker.service
- containerd.service
- ipvsadm.service
- apparmor.service
- apport.service
- ubuntu-fan.service
- name: Sao lưu tệp sysctl.conf
command: mv /etc/sysctl.conf /etc/sysctl.conf.bak
ignore_errors: yes # Nếu tệp không tồn tại, sẽ không bị lỗi
- name: Copy tệp sysctl.conf từ mẫu Jinja2
template:
src: template/sysctl.conf # Đường dẫn đến mẫu Jinja2
dest: /etc/sysctl.conf # Đường dẫn đích
owner: root
group: root
mode: "0644"
notify: Reload sysctl # Thông báo cho handler để nạp cài đặt sysctl
- name: Sao lưu tệp /etc/pam.d/common-session
command: cp -rp /etc/pam.d/common-session /etc/pam.d/common-session.bak
ignore_errors: yes # Nếu tệp không tồn tại, sẽ không bị lỗi
- name: Copy tệp từ mẫu template/common-session đến /etc/pam.d/common-session
template:
src: template/common-session # Đường dẫn đến mẫu Jinja2
dest: /etc/pam.d/common-session # Đường dẫn đích
owner: root
group: root
mode: "0644"
- name: Sao lưu tệp /etc/pam.d/common-session-noninteractive
command: cp -rp /etc/pam.d/common-session-noninteractive /etc/pam.d/common-session-noninteractive.bak
ignore_errors: yes # Nếu tệp không tồn tại, sẽ không bị lỗi
- name: Copy tệp từ mẫu template/common-session-noninteractive đến /etc/pam.d/common-session-noninteractive
template:
src: template/common-session-noninteractive # Đường dẫn đến mẫu Jinja2
dest: /etc/pam.d/common-session-noninteractive # Đường dẫn đích
owner: root
group: root
mode: "0644"
- name: Sao lưu tệp /etc/security/limits.conf
command: cp -rp /etc/security/limits.conf /etc/security/limits.conf.bak
ignore_errors: yes # Nếu tệp không tồn tại, sẽ không bị lỗi
- name: Copy tệp từ mẫu template/limits.conf đến /etc/security/limits.conf
template:
src: template/limits.conf # Đường dẫn đến mẫu Jinja2
dest: /etc/security/limits.conf # Đường dẫn đích
owner: root
group: root
mode: "0644"
- name: apply modprobe overlay
command: modprobe overlay
- name: apply modprobe br_netfilter
command: modprobe br_netfilter
- name: set ip_forward
sysctl:
name: net.ipv4.ip_forward
value: 1
- name: apply sysctl params without reboot sysctl --system
command: sysctl --system
- name: Replace /etc/systemd/system.conf entry searching for DefaultTimeoutStopSec string
ansible.builtin.lineinfile:
path: /etc/systemd/system.conf
search_string: 'DefaultTimeoutStopSec'
line: 'DefaultTimeoutStopSec=5s'
handlers:
- name: Reload sysctl
command: sysctl --system
- name: Reload PAM
service:
name: "{{ item }}"
state: reloaded
loop:
- pam_limits
cat ./template/sysctl.conf
# maximum number of open files/file descriptors
fs.file-max = 4194303
fs.nr_open = 4194303
fs.aio-max-nr = 1048576
# use as little swap space as possible
vm.swappiness = 0
# prioritize application RAM against disk/swap cache
vm.vfs_cache_pressure = 50
# minimum free memory
vm.min_free_kbytes = 1000000
# follow mellanox best practices https://community.mellanox.com/s/article/linux-sysctl-tuning
# the following changes are recommended for improving IPv4 traffic
performance by Mellanox
# disable the TCP timestamps option for better CPU utilization
net.ipv4.tcp_timestamps = 0
# enable the TCP selective acks option for better throughput
net.ipv4.tcp_sack = 1
# increase the maximum length of processor input queues
net.core.netdev_max_backlog = 250000
# increase the TCP maximum and default buffer sizes using setsockopt()
net.core.rmem_max = 4194304
net.core.wmem_max = 4194304
net.core.rmem_default = 4194304
net.core.wmem_default = 4194304
net.core.optmem_max = 4194304
# increase memory thresholds to prevent packet dropping:
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 65536 4194304
# enable low latency mode for TCP:
net.ipv4.tcp_low_latency = 1
# the following variable is used to tell the kernel how much of the socket buffer
# space should be used for TCP window size, and how much to save for an application
# buffer. A value of 1 means the socket buffer will be divided evenly between.
# TCP windows size and application.
net.ipv4.tcp_adv_win_scale = 1
# maximum number of incoming connections
net.core.somaxconn = 65535
# maximum number of packets queued
net.core.netdev_max_backlog = 10000
# queue length of completely established sockets waiting for accept
net.ipv4.tcp_max_syn_backlog = 4096
# time to wait (seconds) for FIN packet
net.ipv4.tcp_fin_timeout = 15
# disable icmp send redirects
net.ipv4.conf.all.send_redirects = 0
# disable icmp accept redirect
net.ipv4.conf.all.accept_redirects = 0
# drop packets with LSR or SSR
net.ipv4.conf.all.accept_source_route = 0
# MTU discovery, only enable when ICMP blackhole detected
net.ipv4.tcp_mtu_probing = 1
net.ipv4.ip_forward=1
net.ipv4.conf.all.send_redirects=0
net.ipv4.conf.default.send_redirects=0
net.ipv4.conf.default.accept_source_route=0
net.ipv4.conf.all.accept_redirects=0
net.ipv4.conf.default.accept_redirects=0
net.ipv4.conf.all.log_martians=1
net.ipv4.conf.default.log_martians=1
net.ipv4.conf.all.rp_filter=1
net.ipv4.conf.default.rp_filter=1
net.ipv6.conf.all.accept_ra=0
net.ipv6.conf.default.accept_ra=0
net.ipv6.conf.all.accept_redirects=0
net.ipv6.conf.default.accept_redirects=0
kernel.keys.root_maxbytes=25000000
kernel.keys.root_maxkeys=1000000
kernel.panic=10
kernel.panic_on_oops=1
vm.overcommit_memory=1
vm.panic_on_oom=0
net.ipv4.ip_local_reserved_ports=8000-32767
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-arptables=1
net.bridge.bridge-nf-call-ip6tables=1
cat ./template/limits.conf
root soft nofile 1048576
root hard nofile 1048576
* soft nofile 1048576
* hard nofile 1048576
root soft nproc 1048576
root hard nproc 1048576
* soft nproc 1048576
* hard nproc 1048576
root soft core 0
root hard core 0
* soft core 0
* hard core 0
cat ./template/common-session |grep -v ^#
session [default=1] pam_permit.so
session requisite pam_deny.so
session required pam_permit.so
session optional pam_umask.so
session required pam_unix.so
session optional pam_systemd.so
session required pam_limits.so
Cài đặt:
modprobe br_netfilter
modprobe overlay
cat <<EOF | tee /etc/modules-load.d/k8s.conf
br_netfilter
overlay
EOF
cat <<EOF | tee /etc/sysctl.conf
net.ipv4.ip_forward=1
net.ipv4.conf.all.send_redirects=0
net.ipv4.conf.default.send_redirects=0
net.ipv4.conf.default.accept_source_route=0
net.ipv4.conf.all.accept_redirects=0
net.ipv4.conf.default.accept_redirects=0
net.ipv4.conf.all.log_martians=1
net.ipv4.conf.default.log_martians=1
net.ipv4.conf.all.rp_filter=1
net.ipv4.conf.default.rp_filter=1
net.ipv6.conf.all.accept_ra=0
net.ipv6.conf.default.accept_ra=0
net.ipv6.conf.all.accept_redirects=0
net.ipv6.conf.default.accept_redirects=0
kernel.keys.root_maxbytes=25000000
kernel.keys.root_maxkeys=1000000
kernel.panic=10
kernel.panic_on_oops=1
vm.overcommit_memory=1
vm.panic_on_oom=0
net.ipv4.ip_local_reserved_ports=30000-32767
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-arptables=1
net.bridge.bridge-nf-call-ip6tables=1
EOF
sysctl --system
Cài đặt Haproxy trên 172.20.0.109
cat ./template/haproxy.cfg
global
daemon
log 127.0.0.1 local0
log 127.0.0.1 local1 notice
maxconn 4096
tune.ssl.default-dh-param 2048
# default settings common to all HTTP proxies below
defaults http
mode http
option httplog
log global
timeout client 1m
timeout server 1m
timeout connect 10s
timeout http-keep-alive 2m
timeout queue 15s
timeout tunnel 4h # for websocket
# provide a stats page on port 9000
frontend stats
bind :9000
# provide advanced stats (ssl, h2, ...)
stats uri /
stats show-modules
# some users may want to protect the access to their stats and/or to
# enable admin mode on the page from local networks
stats auth admin:admin
# stats admin if { src 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 127.0.0.0/8
frontend kubernetes6443
bind 0.0.0.0:6443
option tcplog
mode tcp
default_backend kubernetes-master-nodes6443
backend kubernetes-master-nodes6443
mode tcp
balance roundrobin
option tcp-check
server kubernetes-master1 172.20.0.104:6443 check fall 3 rise 2
server kubernetes-master2 172.20.0.105:6443 check fall 3 rise 2
server kubernetes-master3 172.20.0.106:6443 check fall 3 rise 2
frontend kubernetes9345
bind 0.0.0.0:9345
option tcplog
mode tcp
default_backend kubernetes-master-nodes9345
backend kubernetes-master-nodes9345
mode tcp
balance roundrobin
option tcp-check
server kubernetes-master1 172.20.0.104:9345 check fall 3 rise 2
server kubernetes-master2 172.20.0.105:9345 check fall 3 rise 2
server kubernetes-master3 172.20.0.106:9345 check fall 3 rise 2
cat haproxy.yml
---
- hosts: all
become: true
tasks:
- name: Enable IP forwarding
sysctl:
name: net.ipv4.ip_forward
value: 1
state: present
reload: yes
- name: Update apt and install haproxy packages
apt:
pkg:
- haproxy
state: latest
update_cache: true
- name: Copy tệp từ mẫu template/haproxy.conf đến /etc/haproxy/haproxy.cfg
template:
src: template/haproxy.cfg # Đường dẫn đến mẫu Jinja2
dest: /etc/haproxy/haproxy.cfg # Đường dẫn đích
owner: root
group: root
mode: "0644"
- name: Start haproxy
service:
name: haproxy
state: restarted
enabled: yes
- name: Restart haproxy
command: systemctl restart haproxy
- name: Enable haproxy
command: systemctl enable haproxy
Install Rancher rke2
mkdir -p /etc/rancher/rke2/
vi /etc/rancher/rke2/config.yaml
write-kubeconfig-mode: "0644"
advertise-address: 172.20.0.109
node-name: kuber-master-1
tls-san:
- 172.20.0.109
- rancher.domain.com
- kubernetes.domain.com
- rke2.domain.com
- k8s.domain.com
cni: none
cluster-cidr: 10.100.0.0/16
service-cidr: 10.110.0.0/16
cluster-dns: 10.110.0.10
cluster-domain: arman-projects.com
etcd-arg: "--quota-backend-bytes 2048000000"
etcd-snapshot-schedule-cron: "0 3 * * *"
etcd-snapshot-retention: 10
disable:
- rke2-ingress-nginx
disable-kube-proxy: true
kube-apiserver-arg:
- '--default-not-ready-toleration-seconds=30'
- '--default-unreachable-toleration-seconds=30'
kube-controller-manager-arg:
- '--node-monitor-period=4s'
kubelet-arg:
- '--node-status-update-frequency=4s'
- '--max-pods=100'
egress-selector-mode: disabled
protect-kernel-defaults: true
Cài online
curl -sfL https://get.rke2.io > install_rke2.sh
chmod ug+x install_rke2.sh
INSTALL_RKE2_TYPE="server" ./install_rke2.sh
hoặc cài offline
Go to "https://github.com/rancher/rke2/releases" and select a release. After that, you should download rke2-images-core.linux-amd64.tar.gz.
mkdir -p /var/lib/rancher/rke2/agent/images
mv rke2-images-core.linux-amd64.tar.gz /var/lib/rancher/rke2/agent/images/
systemctl disable rke2-agent && systemctl mask rke2-agent
systemctl enable --now rke2-server.service
Kiểm tra
systemctl status rke2-server
journalctl -u rke2-server -f
echo 'PATH=$PATH:/var/lib/rancher/rke2/bin' >> ~/.bashrc
source ~/.bashrc
mkdir ~/.kube
cp /etc/rancher/rke2/rke2.yaml ~/.kube/config
sed -i 's/127.0.0.1/172.20.0.109/g' ~/.kube/config
NOTE: If you use kubectl, you’ll see that nodes are in NotReady state and the reason for that, is due to the absence of a CNI plugin.
CÀI CILIUM CNI
Cài cilium client
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
CLI_ARCH=amd64
curl -L --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
cat cilium.yaml
cluster:
name: cluster-rke
id: 0
prometheus:
enabled: true
serviceMonitor:
enabled: false
dashboards:
enabled: true
hubble:
metrics:
enabled:
- dns
- drop
- tcp
- flow
- icmp
- http
dashboards:
enabled: true
relay:
enabled: true
prometheus:
enabled: true
ui:
enabled: true
baseUrl: "/"
version: 1.17.1
operator:
prometheus:
enabled: true
dashboards:
enabled: true
cilium install –version 1.17.1 -f ./cilium.yaml –set gatewayAPI.enabled=true \
–set l2announcements.enabled=true \
–set l2announcements.leaseDuration=3s \
–set l2announcements.leaseRenewDeadline=1s \
–set l2announcements.leaseRetryPeriod=200ms
cilium status
kubectl get nodes
kubectl -n kube-system edit cm cilium-config
There is a line like this:
ipam: cluster
Change the value to “kubernetes”:
ipam: kubernetes
kubectl -n kube-system rollout restart deployment cilium-operator
kubectl -n kube-system rollout restart ds cilium
Control Plane 2 & Control Plane 3
mkdir -p /etc/rancher/rke2/
vim /etc/rancher/rke2/config.yaml
Thêm vào trên đầu file các thông tin
server: https://172.20.0.109:9345
token: XXXXXXXXXX
node-name: <node-name>
....
Với token lấy trên master01 tại: /var/lib/rancher/rke2/server/node-token
curl -sfL https://get.rke2.io > install_rke2.sh
chmod ug+x install_rke2.sh
INSTALL_RKE2_TYPE="server" ./install_rke2.sh
systemctl disable rke2-agent && systemctl mask rke2-agent
systemctl enable --now rke2-server.service
systemctl status rke2-server
journalctl -u rke2-server -f
Install worker
mkdir -p /etc/rancher/rke2/
vim /etc/rancher/rke2/config.yaml
File config gồm các thông tin
server: https://172.20.0.109:9345
token: XXXXXXXXXX
node-name: <node-name>
kubelet-arg:
- '--node-status-update-frequency=4s'
- '--max-pods=100'
Với token lấy trên master01 tại: /var/lib/rancher/rke2/server/node-token
curl -sfL https://get.rke2.io > install_rke2.sh
chmod ug+x install_rke2.sh
INSTALL_RKE2_TYPE="agent" ./install_rke2.sh
systemctl disable rke2-server && systemctl mask rke2-server
systemctl enable --now rke2-agent.service
systemctl status rke2-agent
journalctl -u rke2-server -f
Các worker khác làm tương tự.
Như vậylà cơ bản ta đã có hệ thống K8s với 3 node master,2 node worker và 1 node cài đặt Haproxy để làm load balancer
Sau này ta có thể cài thêm 1 máy chủ haproxy nữa cài đặt keepalive làm VIP IP cũng như cài thêm RANCHER để quản lý.