wait for control plane node to become ready after joining (#12922)

When joining a control plane node and "upgrading" the cluster setup (for example, to update etcd addresses after adding a new etcd) in the same playbook run, the node can take a bit of time to become ready after joining. This triggers a kubeadm preflight check (ControlPlaneNodesReady) in kubeadm upgrade, which is run directly after the join tasks. Add a configurable wait for the control plane node to become Ready to fix this race condition. Co-authored-by: Max Gautier <mg@max.gautier.name>
2026-03-18 17:37:32 -02:30 · 2026-01-29 02:09:49 -08:00
parent 7f915b333b
commit 03828c9ffa
2 changed files with 18 additions and 0 deletions
--- a/roles/kubernetes/control-plane/defaults/main/main.yml
+++ b/roles/kubernetes/control-plane/defaults/main/main.yml
@@ -2,6 +2,9 @@
 # disable upgrade cluster
 upgrade_cluster_setup: false

+# Number of retries (with 5 seconds interval) to check that new control plane nodes
+# are in Ready condition after joining
+control_plane_node_become_ready_tries: 24
 # By default the external API listens on all interfaces, this can be changed to
 # listen on a specific address/interface.
 # NOTE: If you specific address/interface and use loadbalancer_apiserver_localhost
--- a/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml
+++ b/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml
@@ -99,3 +99,18 @@
  when:
    - inventory_hostname != first_kube_control_plane
    - kubeadm_already_run is not defined or not kubeadm_already_run.stat.exists
+
+- name: Wait for new control plane nodes to be Ready
+  when: kubeadm_already_run.stat.exists
+  run_once: true
+  command: >
+    {{ kubectl }} get nodes --selector node-role.kubernetes.io/control-plane
+    -o jsonpath-as-json="{.items[*].status.conditions[?(@.type == 'Ready')]}"
+  register: control_plane_node_ready_conditions
+  retries: "{{ control_plane_node_become_ready_tries }}"
+  delay: 5
+  delegate_to: "{{ groups['kube_control_plane'][0] }}"
+  until: >
+    control_plane_node_ready_conditions.stdout
+    | from_json | selectattr('status', '==', 'True')
+    | length == (groups['kube_control_plane'] | length)