From efdff890eda8bad1463930bfe6bd1a51b226c90b Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 19 Jan 2026 11:13:57 +0100 Subject: [PATCH 1/2] Introduce a timeout for package installation Sometimes package installations can get into weird state and stuck for a very long time. Timeout the tasks to fail early, with a customizable timeout duration. --- roles/system_packages/defaults/main.yml | 1 + roles/system_packages/tasks/main.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/roles/system_packages/defaults/main.yml b/roles/system_packages/defaults/main.yml index 62704067a..79c9836a4 100644 --- a/roles/system_packages/defaults/main.yml +++ b/roles/system_packages/defaults/main.yml @@ -1,4 +1,5 @@ --- # number of times package install task should be retried pkg_install_retries: 4 +pkg_install_timeout: "{{ 5 * 60 }}" yum_repo_dir: /etc/yum.repos.d diff --git a/roles/system_packages/tasks/main.yml b/roles/system_packages/tasks/main.yml index f0bc875df..ebb98147a 100644 --- a/roles/system_packages/tasks/main.yml +++ b/roles/system_packages/tasks/main.yml @@ -63,3 +63,4 @@ label: "{{ item.action_label }}" tags: - bootstrap_os + timeout: "{{ pkg_install_timeout }}" From 8c128771d71e1a23faaebec685341f88a1388589 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 19 Jan 2026 11:21:51 +0100 Subject: [PATCH 2/2] CI: create pr-flakey for jobs prone to failure We have a lot of PRs where we endlessy retry the same flakey jobs, which is useless and frustating for everyone. Put those jobs into a separate matrix with 1 retries to migitate the issue. --- .gitlab-ci/kubevirt.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci/kubevirt.yml b/.gitlab-ci/kubevirt.yml index ce580fe07..dd3e3dec7 100644 --- a/.gitlab-ci/kubevirt.yml +++ b/.gitlab-ci/kubevirt.yml @@ -43,7 +43,6 @@ pr: - fedora39-kube-router - fedora41-kube-router - fedora42-calico - - openeuler24-calico - rockylinux9-cilium - rockylinux10-cilium - ubuntu22-calico-all-in-one @@ -57,9 +56,19 @@ pr: - ubuntu24-kube-router-sep - ubuntu24-kube-router-svc-proxy - ubuntu24-ha-separate-etcd - - flatcar4081-calico - fedora40-flannel-crio-collection-scale +# This is for flakey test so they don't disrupt the PR worklflow too much. +# Jobs here MUST have a open issue so we don't lose sight of them +pr-flakey: + extends: pr + retry: 1 + parallel: + matrix: + - TESTCASE: + - flatcar4081-calico # https://github.com/kubernetes-sigs/kubespray/issues/12309 + - openeuler24-calico # https://github.com/kubernetes-sigs/kubespray/issues/12877 + # The ubuntu24-calico-all-in-one jobs are meant as early stages to prevent running the full CI if something is horribly broken ubuntu24-calico-all-in-one: stage: deploy-part1