[Operator] Make UpgradeTest stable

Fixes #37690

Signed-off-by: Pedro Ruivo <pruivo@redhat.com>
This commit is contained in:
Pedro Ruivo 2025-03-05 22:01:07 +00:00 committed by GitHub
parent 83adc99ef7
commit 14c5e2454e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 114 additions and 91 deletions

View File

@ -36,6 +36,8 @@ public final class Constants {
public static final String MANAGED_BY_VALUE = "keycloak-operator";
public static final String COMPONENT_LABEL = "app.kubernetes.io/component";
public static final String KEYCLOAK_MIGRATING_ANNOTATION = "operator.keycloak.org/migrating";
public static final String KEYCLOAK_RECREATE_UPDATE_ANNOTATION = "operator.keycloak.org/recreate-update";
public static final String KEYCLOAK_UPDATE_REASON_ANNOTATION = "operator.keycloak.org/update-reason";
public static final String DEFAULT_LABELS_AS_STRING = "app=keycloak,app.kubernetes.io/managed-by=keycloak-operator";

View File

@ -31,6 +31,7 @@ public final class ContextUtils {
public static final String OLD_DEPLOYMENT_KEY = "current_stateful_set";
public static final String NEW_DEPLOYMENT_KEY = "desired_new_stateful_set";
public static final String UPGRADE_TYPE_KEY = "upgrade_type";
public static final String UPGRADE_REASON_KEY = "upgrade_reason";
public static final String OPERATOR_CONFIG_KEY = "operator_config";
public static final String WATCHED_RESOURCES_KEY = "watched_resources";
public static final String DIST_CONFIGURATOR_KEY = "dist_configurator";
@ -54,14 +55,19 @@ public final class ContextUtils {
return context.managedWorkflowAndDependentResourceContext().getMandatory(NEW_DEPLOYMENT_KEY, StatefulSet.class);
}
public static void storeUpgradeType(Context<?> context, UpgradeType upgradeType) {
public static void storeUpgradeType(Context<?> context, UpgradeType upgradeType, String reason) {
context.managedWorkflowAndDependentResourceContext().put(UPGRADE_TYPE_KEY, upgradeType);
context.managedWorkflowAndDependentResourceContext().put(UPGRADE_REASON_KEY, reason);
}
public static Optional<UpgradeType> getUpgradeType(Context<?> context) {
return context.managedWorkflowAndDependentResourceContext().get(UPGRADE_TYPE_KEY, UpgradeType.class);
}
public static String getUpgradeReason(Context<?> context) {
return context.managedWorkflowAndDependentResourceContext().getMandatory(UPGRADE_REASON_KEY, String.class);
}
public static void storeOperatorConfig(Context<?> context, Config operatorConfig) {
context.managedWorkflowAndDependentResourceContext().put(OPERATOR_CONFIG_KEY, operatorConfig);
}

View File

@ -159,8 +159,8 @@ public class KeycloakDeploymentDependentResource extends CRUDKubernetesDependent
}
return switch (upgradeType.get()) {
case ROLLING -> handleRollingUpdate(baseDeployment);
case RECREATE -> handleRecreateUpdate(existingDeployment, baseDeployment);
case ROLLING -> handleRollingUpdate(baseDeployment, context);
case RECREATE -> handleRecreateUpdate(existingDeployment, baseDeployment, context);
};
}
@ -542,13 +542,18 @@ public class KeycloakDeploymentDependentResource extends CRUDKubernetesDependent
}));
}
private static StatefulSet handleRollingUpdate(StatefulSet desired) {
private static StatefulSet handleRollingUpdate(StatefulSet desired, Context<Keycloak> context) {
// return the desired stateful set since Kubernetes does a rolling in-place upgrade by default.
Log.debug("Performing a rolling upgrade");
return desired;
var builder = desired.toBuilder()
.editMetadata()
.addToAnnotations(Constants.KEYCLOAK_RECREATE_UPDATE_ANNOTATION, "false")
.addToAnnotations(Constants.KEYCLOAK_UPDATE_REASON_ANNOTATION, ContextUtils.getUpgradeReason(context))
.endMetadata();
return builder.build();
}
private static StatefulSet handleRecreateUpdate(StatefulSet actual, StatefulSet desired) {
private static StatefulSet handleRecreateUpdate(StatefulSet actual, StatefulSet desired, Context<Keycloak> context) {
if (actual.getStatus().getReplicas() == 0) {
Log.debug("Performing a recreate upgrade - scaling up the stateful set");
return desired;
@ -563,6 +568,8 @@ public class KeycloakDeploymentDependentResource extends CRUDKubernetesDependent
builder.withMetadata(desired.getMetadata());
builder.editMetadata()
.addToAnnotations(Constants.KEYCLOAK_MIGRATING_ANNOTATION, Boolean.TRUE.toString())
.addToAnnotations(Constants.KEYCLOAK_RECREATE_UPDATE_ANNOTATION, Boolean.TRUE.toString())
.addToAnnotations(Constants.KEYCLOAK_UPDATE_REASON_ANNOTATION, ContextUtils.getUpgradeReason(context))
.endMetadata();
return builder.build();
}

View File

@ -114,6 +114,14 @@ public final class CRDUtils {
.map(PodSpec::getVolumes)
.stream()
.flatMap(Collection::stream);
}
public static Optional<Boolean> fetchIsRecreateUpdate(StatefulSet statefulSet) {
var value = statefulSet.getMetadata().getAnnotations().get(Constants.KEYCLOAK_RECREATE_UPDATE_ANNOTATION);
return Optional.ofNullable(value).map(Boolean::parseBoolean);
}
public static Optional<String> findUpdateReason(StatefulSet statefulSet) {
return Optional.ofNullable(statefulSet.getMetadata().getAnnotations().get(Constants.KEYCLOAK_UPDATE_REASON_ANNOTATION));
}
}

View File

@ -109,6 +109,12 @@ public class KeycloakStatusAggregator {
updateType.setMessage(message);
}
public void resetUpgradeType() {
updateType.setStatus(null);
updateType.setObservedGeneration(observedGeneration);
updateType.setMessage(null);
}
/**
* Apply non-condition changes to the status
*/

View File

@ -48,7 +48,7 @@ abstract class BaseUpgradeLogic implements UpgradeLogic {
protected final Context<Keycloak> context;
protected final Keycloak keycloak;
private Consumer<KeycloakStatusAggregator> statusConsumer = unused -> {};
private Consumer<KeycloakStatusAggregator> statusConsumer = KeycloakStatusAggregator::resetUpgradeType;
BaseUpgradeLogic(Context<Keycloak> context, Keycloak keycloak) {
this.context = context;
@ -63,6 +63,8 @@ abstract class BaseUpgradeLogic implements UpgradeLogic {
Log.debug("New deployment - skipping upgrade logic");
return Optional.empty();
}
copyStatusFromExistStatefulSet(existing.get());
var desiredStatefulSet = ContextUtils.getDesiredStatefulSet(context);
var desiredContainer = CRDUtils.firstContainerOf(desiredStatefulSet).orElseThrow(BaseUpgradeLogic::containerNotFound);
var actualContainer = CRDUtils.firstContainerOf(existing.get()).orElseThrow(BaseUpgradeLogic::containerNotFound);
@ -96,16 +98,26 @@ abstract class BaseUpgradeLogic implements UpgradeLogic {
*/
abstract Optional<UpdateControl<Keycloak>> onUpgrade();
private void copyStatusFromExistStatefulSet(StatefulSet current) {
var maybeRecreate = CRDUtils.fetchIsRecreateUpdate(current);
if (maybeRecreate.isEmpty()) {
return;
}
var reason = CRDUtils.findUpdateReason(current).orElseThrow();
var recreate = maybeRecreate.get();
statusConsumer = statusAggregator -> statusAggregator.addUpgradeType(recreate, reason);
}
void decideRollingUpgrade(String reason) {
Log.debugf("Decided rolling upgrade type. Reason: %s", reason);
statusConsumer = status -> status.addUpgradeType(false, reason);
ContextUtils.storeUpgradeType(context, UpgradeType.ROLLING);
ContextUtils.storeUpgradeType(context, UpgradeType.ROLLING, reason);
}
void decideRecreateUpgrade(String reason) {
Log.debugf("Decided recreate upgrade type. Reason: %s", reason);
statusConsumer = status -> status.addUpgradeType(true, reason);
ContextUtils.storeUpgradeType(context, UpgradeType.RECREATE);
ContextUtils.storeUpgradeType(context, UpgradeType.RECREATE, reason);
}
static IllegalStateException containerNotFound() {

View File

@ -314,6 +314,8 @@ public class BaseOperatorTest implements QuarkusTestAfterEachCallback {
savePodLogs();
// provide some helpful entries in the main log as well
logFailedKeycloaks();
k8sclient.resources(Keycloak.class).list().getItems()
.forEach(keycloak -> Log.infof("Keycloak '%s' status:%n%s", keycloak.getMetadata().getName(), Serialization.asYaml(keycloak.getStatus())));
if (operatorDeployment == OperatorDeployment.remote) {
log(k8sclient.apps().deployments().withName("keycloak-operator"), Deployment::getStatus, false);
}

View File

@ -27,8 +27,6 @@ import java.util.concurrent.TimeoutException;
import io.fabric8.kubernetes.api.model.batch.v1.Job;
import io.fabric8.kubernetes.api.model.batch.v1.JobStatus;
import io.quarkus.test.junit.QuarkusTest;
import org.awaitility.Awaitility;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
import org.junit.jupiter.params.ParameterizedTest;
@ -48,54 +46,31 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.keycloak.operator.testsuite.utils.CRAssert.assertKeycloakStatusCondition;
import static org.keycloak.operator.testsuite.utils.CRAssert.eventuallyRecreateUpgradeStatus;
import static org.keycloak.operator.testsuite.utils.CRAssert.eventuallyRollingUpgradeStatus;
import static org.keycloak.operator.testsuite.utils.K8sUtils.deployKeycloak;
@QuarkusTest
@Disabled(value = "unstable")
public class UpgradeTest extends BaseOperatorTest {
@ParameterizedTest(name = "testImageChange-{0}")
@EnumSource(UpdateStrategy.class)
public void testImageChange(UpdateStrategy updateStrategy) throws InterruptedException {
var kc = createInitialDeployment(updateStrategy);
var upgradeCondition = assertUnknownUpdateTypeStatus(kc);
deployKeycloak(k8sclient, kc, true);
assertUnknownUpdateTypeStatus(kc);
var stsGetter = k8sclient.apps().statefulSets().inNamespace(namespace).withName(kc.getMetadata().getName());
final String newImage = "quay.io/keycloak/non-existing-keycloak";
await(upgradeCondition);
// changing the image to non-existing will always use the recreate upgrade type.
kc.getSpec().setImage(newImage);
var upgradeCondition = eventuallyRecreateUpgradeStatus(k8sclient, kc);
kc.getSpec().setImage("quay.io/keycloak/non-existing-keycloak");
disableProbes(kc);
upgradeCondition = switch (updateStrategy) {
case AUTO -> eventuallyRecreateUpgradeStatus(k8sclient, kc, "Unexpected update-compatibility command");
case RECREATE_ON_IMAGE_CHANGE -> eventuallyRecreateUpgradeStatus(k8sclient, kc, "Image changed");
};
deployKeycloak(k8sclient, kc, false);
await(upgradeCondition);
switch (updateStrategy) {
case AUTO:
// Sometimes the pod disappears, and the status could be:
// - Unexpected update-compatibility command error.
// - Unexpected update-compatibility command exit code.
assertRecreateUpdateTypeStatus(kc, "Unexpected update-compatibility command");
break;
case RECREATE_ON_IMAGE_CHANGE:
assertRecreateUpdateTypeStatus(kc, "Image changed");
break;
}
Awaitility.await()
.ignoreExceptions()
.untilAsserted(() -> {
var sts = stsGetter.get();
assertEquals(kc.getSpec().getInstances(), sts.getSpec().getReplicas()); // just checking specs as we're using a non-existing image
assertEquals(newImage, sts.getSpec().getTemplate().getSpec().getContainers().get(0).getImage());
var currentKc = k8sclient.resources(Keycloak.class)
.inNamespace(namespace).withName(kc.getMetadata().getName()).get();
assertKeycloakStatusCondition(currentKc, KeycloakStatusCondition.READY, false, "Waiting for more replicas");
});
if (updateStrategy == UpdateStrategy.AUTO) {
assertUpdateJobExists(kc);
@ -106,23 +81,20 @@ public class UpgradeTest extends BaseOperatorTest {
@EnumSource(UpdateStrategy.class)
public void testCacheMaxCount(UpdateStrategy updateStrategy) throws InterruptedException {
var kc = createInitialDeployment(updateStrategy);
var upgradeCondition = assertUnknownUpdateTypeStatus(kc);
deployKeycloak(k8sclient, kc, true);
assertUnknownUpdateTypeStatus(kc);
await(upgradeCondition);
// changing the local cache max-count should never use the recreate upgrade type
kc.getSpec().getAdditionalOptions().add(new ValueOrSecret("cache-embedded-authorization-max-count", "10"));
var upgradeCondition = eventuallyRollingUpgradeStatus(k8sclient, kc);
upgradeCondition = switch (updateStrategy) {
case AUTO -> eventuallyRollingUpgradeStatus(k8sclient, kc, "Compatible changes detected.");
case RECREATE_ON_IMAGE_CHANGE -> eventuallyRollingUpgradeStatus(k8sclient, kc, "Image unchanged");
};
deployKeycloak(k8sclient, kc, true);
await(upgradeCondition);
switch (updateStrategy) {
case AUTO:
assertRollingUpdateTypeStatus(kc, "Compatible changes detected.");
break;
case RECREATE_ON_IMAGE_CHANGE:
assertRollingUpdateTypeStatus(kc, "Image unchanged");
break;
}
if (updateStrategy == UpdateStrategy.AUTO) {
assertUpdateJobExists(kc);
@ -138,25 +110,19 @@ public class UpgradeTest extends BaseOperatorTest {
var kc = createInitialDeployment(updateStrategy);
// use the base image
kc.getSpec().setImage(null);
var upgradeCondition = assertUnknownUpdateTypeStatus(kc);
deployKeycloak(k8sclient, kc, true);
assertUnknownUpdateTypeStatus(kc);
await(upgradeCondition);
// use the optimized image, auto strategy should use a rolling upgrade
kc.getSpec().setImage(getTestCustomImage());
var upgradeCondition = updateStrategy == UpdateStrategy.AUTO ?
eventuallyRollingUpgradeStatus(k8sclient, kc) :
eventuallyRecreateUpgradeStatus(k8sclient, kc);
upgradeCondition = switch (updateStrategy) {
case AUTO -> eventuallyRollingUpgradeStatus(k8sclient, kc, "Compatible changes detected.");
case RECREATE_ON_IMAGE_CHANGE -> eventuallyRecreateUpgradeStatus(k8sclient, kc, "Image changed");
};
deployKeycloak(k8sclient, kc, true);
await(upgradeCondition);
switch (updateStrategy) {
case AUTO:
assertRollingUpdateTypeStatus(kc, "Compatible changes detected.");
break;
case RECREATE_ON_IMAGE_CHANGE:
assertRecreateUpdateTypeStatus(kc, "Image changed");
break;
}
if (updateStrategy == UpdateStrategy.AUTO) {
assertUpdateJobExists(kc);
@ -171,29 +137,29 @@ public class UpgradeTest extends BaseOperatorTest {
var kc = createInitialDeployment(UpdateStrategy.AUTO);
// use the base image
kc.getSpec().setImage(null);
var upgradeCondition = assertUnknownUpdateTypeStatus(kc);
deployKeycloak(k8sclient, kc, true);
assertUnknownUpdateTypeStatus(kc);
await(upgradeCondition);
// let's trigger a rolling upgrade
var upgradeCondition = eventuallyRollingUpgradeStatus(k8sclient, kc);
upgradeCondition = eventuallyRollingUpgradeStatus(k8sclient, kc, "Compatible changes detected.");
kc.getSpec().setImage(getTestCustomImage());
deployKeycloak(k8sclient, kc, true);
await(upgradeCondition);
assertRollingUpdateTypeStatus(kc, "Compatible changes detected.");
var job = assertUpdateJobExists(kc);
var hash = job.getMetadata().getAnnotations().get(KeycloakUpdateJobDependentResource.KEYCLOAK_CR_HASH_ANNOTATION);
assertEquals(0, containerExitCode(job));
//let's trigger a recreate
upgradeCondition = eventuallyRecreateUpgradeStatus(k8sclient, kc);
upgradeCondition = eventuallyRecreateUpgradeStatus(k8sclient, kc, "Unexpected update-compatibility command");
// enough to crash the Pod and return exit code != 0
kc.getSpec().setImage("quay.io/keycloak/non-existing-keycloak");
disableProbes(kc);
deployKeycloak(k8sclient, kc, false);
await(upgradeCondition);
assertRecreateUpdateTypeStatus(kc, "Unexpected update-compatibility command");
job = assertUpdateJobExists(kc);
var newHash = job.getMetadata().getAnnotations().get(KeycloakUpdateJobDependentResource.KEYCLOAK_CR_HASH_ANNOTATION);
@ -222,27 +188,23 @@ public class UpgradeTest extends BaseOperatorTest {
return maybeExitCode.get();
}
private void assertUnknownUpdateTypeStatus(Keycloak keycloak) {
var current = k8sclient.resource(keycloak).get();
CRAssert.assertKeycloakStatusCondition(current, KeycloakStatusCondition.UPDATE_TYPE, null);
}
private void assertRecreateUpdateTypeStatus(Keycloak keycloak, String reason) {
var current = k8sclient.resource(keycloak).get();
CRAssert.assertKeycloakStatusCondition(current, KeycloakStatusCondition.UPDATE_TYPE, true, reason);
}
private void assertRollingUpdateTypeStatus(Keycloak keycloak, String reason) {
var current = k8sclient.resource(keycloak).get();
CRAssert.assertKeycloakStatusCondition(current, KeycloakStatusCondition.UPDATE_TYPE, false, reason);
private CompletableFuture<Void> assertUnknownUpdateTypeStatus(Keycloak keycloak) {
return k8sclient.resource(keycloak).informOnCondition(kcs -> {
if (kcs.isEmpty() || kcs.get(0).getStatus() == null) {
return false;
}
try {
CRAssert.assertKeycloakStatusCondition(kcs.get(0), KeycloakStatusCondition.UPDATE_TYPE, null);
return true;
} catch (AssertionError e) {
return false;
}
}).thenAccept(unused -> {});
}
private static Keycloak createInitialDeployment(UpdateStrategy updateStrategy) {
var kc = getTestKeycloakDeployment(true);
kc.getSpec().setInstances(3);
if (updateStrategy == null) {
return kc;
}
var kc = getTestKeycloakDeployment(false);
kc.getSpec().setInstances(2);
var updateSpec = new UpdateSpec();
updateSpec.setStrategy(updateStrategy);
kc.getSpec().setUpdateSpec(updateSpec);

View File

@ -288,8 +288,8 @@ public final class CRAssert {
.findFirst();
}
public static CompletableFuture<List<Keycloak>> eventuallyRollingUpgradeStatus(KubernetesClient client, Keycloak keycloak) {
return client.resource(keycloak).informOnCondition(kcs -> {
public static CompletableFuture<Void> eventuallyRollingUpgradeStatus(KubernetesClient client, Keycloak keycloak, String reason) {
var cf1 = client.resource(keycloak).informOnCondition(kcs -> {
try {
assertKeycloakStatusCondition(kcs.get(0), KeycloakStatusCondition.ROLLING_UPDATE, true, "Rolling out deployment update");
return true;
@ -297,10 +297,19 @@ public final class CRAssert {
return false;
}
});
var cf2 = client.resource(keycloak).informOnCondition(kcs -> {
try {
assertKeycloakStatusCondition(kcs.get(0), KeycloakStatusCondition.UPDATE_TYPE, false, reason);
return true;
} catch (AssertionError e) {
return false;
}
});
return CompletableFuture.allOf(cf1, cf2);
}
public static CompletableFuture<List<Keycloak>> eventuallyRecreateUpgradeStatus(KubernetesClient client, Keycloak keycloak) {
return client.resource(keycloak).informOnCondition(kcs -> {
public static CompletableFuture<Void> eventuallyRecreateUpgradeStatus(KubernetesClient client, Keycloak keycloak, String reason) {
var cf1 = client.resource(keycloak).informOnCondition(kcs -> {
try {
assertKeycloakStatusCondition(kcs.get(0), KeycloakStatusCondition.READY, false, "Performing Keycloak upgrade");
return true;
@ -308,5 +317,14 @@ public final class CRAssert {
return false;
}
});
var cf2 = client.resource(keycloak).informOnCondition(kcs -> {
try {
assertKeycloakStatusCondition(kcs.get(0), KeycloakStatusCondition.UPDATE_TYPE, true, reason);
return true;
} catch (AssertionError e) {
return false;
}
});
return CompletableFuture.allOf(cf1, cf2);
}
}