From fcecaf6943a4055cab6baf00f5198f020aaaff91 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Wed, 28 Jan 2026 16:45:51 +0000 Subject: [PATCH] wait for control plane node to become ready after joining (#12794) When joining a control plane node and "upgrading" the cluster setup (for example, to update etcd addresses after adding a new etcd) in the same playbook run, the node can take a bit of time to become ready after joining. This triggers a kubeadm preflight check (ControlPlaneNodesReady) in kubeadm upgrade, which is run directly after the join tasks. Add a configurable wait for the control plane node to become Ready to fix this race condition. --- .../control-plane/defaults/main/main.yml | 3 +++ .../control-plane/tasks/kubeadm-secondary.yml | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/roles/kubernetes/control-plane/defaults/main/main.yml b/roles/kubernetes/control-plane/defaults/main/main.yml index b94121dc2..d3582fb00 100644 --- a/roles/kubernetes/control-plane/defaults/main/main.yml +++ b/roles/kubernetes/control-plane/defaults/main/main.yml @@ -2,6 +2,9 @@ # disable upgrade cluster upgrade_cluster_setup: false +# Number of retries (with 5 seconds interval) to check that new control plane nodes +# are in Ready condition after joining +control_plane_node_become_ready_tries: 24 # By default the external API listens on all interfaces, this can be changed to # listen on a specific address/interface. # NOTE: If you specific address/interface and use loadbalancer_apiserver_localhost diff --git a/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml b/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml index d562d3019..35972d19d 100644 --- a/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml +++ b/roles/kubernetes/control-plane/tasks/kubeadm-secondary.yml @@ -98,3 +98,18 @@ when: - inventory_hostname != first_kube_control_plane - kubeadm_already_run is not defined or not kubeadm_already_run.stat.exists + +- name: Wait for new control plane nodes to be Ready + when: kubeadm_already_run.stat.exists + run_once: true + command: > + {{ kubectl }} get nodes --selector node-role.kubernetes.io/control-plane + -o jsonpath-as-json="{.items[*].status.conditions[?(@.type == 'Ready')]}" + register: control_plane_node_ready_conditions + retries: "{{ control_plane_node_become_ready_tries }}" + delay: 5 + delegate_to: "{{ groups['kube_control_plane'][0] }}" + until: > + control_plane_node_ready_conditions.stdout + | from_json | selectattr('status', '==', 'True') + | length == (groups['kube_control_plane'] | length)