mirror of
https://github.com/kubernetes-sigs/kubespray.git
synced 2026-02-18 03:30:07 -03:30
* etcd: throttle restart for availability During upgrade, etcd member are restarted all at once. This can impact the availability of the etcd cluster and subsequently of the Kubernetes cluster. Limit the concurrent restart so that the etcd cluster can keep quorum. * Simplify etcd handlers
55 lines
1.7 KiB
YAML
55 lines
1.7 KiB
YAML
---
|
|
- name: Backup etcd
|
|
import_tasks: backup.yml
|
|
|
|
- name: Restart etcd
|
|
systemd_service:
|
|
name: etcd
|
|
state: restarted
|
|
daemon_reload: true
|
|
when: ('etcd' in group_names)
|
|
throttle: "{{ groups['etcd'] | length // 2 }}"
|
|
# Etcd cluster MUST have an odd number of members
|
|
# Truncated integer division by 2 will always return (majority - 1) which
|
|
# means the cluster will keep quorum and stay available
|
|
|
|
- name: Restart etcd-events
|
|
systemd_service:
|
|
name: etcd-events
|
|
state: restarted
|
|
daemon_reload: true
|
|
# TODO: this seems odd. etcd-events should be a different group possibly ?
|
|
when: ('etcd' in group_names)
|
|
throttle: "{{ groups['etcd'] | length // 2 }}"
|
|
|
|
- name: Wait for etcd up
|
|
uri:
|
|
url: "https://{% if 'etcd' in group_names %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2379/health"
|
|
validate_certs: false
|
|
client_cert: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem"
|
|
client_key: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem"
|
|
register: result
|
|
until: result.status is defined and result.status == 200
|
|
retries: 60
|
|
delay: 1
|
|
listen: Restart etcd
|
|
|
|
- name: Cleanup etcd backups
|
|
import_tasks: backup_cleanup.yml
|
|
|
|
- name: Wait for etcd-events up
|
|
uri:
|
|
url: "https://{% if 'etcd' in group_names %}{{ etcd_address }}{% else %}127.0.0.1{% endif %}:2383/health"
|
|
validate_certs: false
|
|
client_cert: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}.pem"
|
|
client_key: "{{ etcd_cert_dir }}/member-{{ inventory_hostname }}-key.pem"
|
|
register: result
|
|
until: result.status is defined and result.status == 200
|
|
retries: 60
|
|
delay: 1
|
|
listen: Restart etcd-events
|
|
|
|
- name: Set etcd_secret_changed
|
|
set_fact:
|
|
etcd_secret_changed: true
|