diff --git a/.github/workflows/operator-ci.yml b/.github/workflows/operator-ci.yml index c25a72c6440..5777bad3f44 100644 --- a/.github/workflows/operator-ci.yml +++ b/.github/workflows/operator-ci.yml @@ -141,6 +141,7 @@ jobs: -Dkc.operator.keycloak.image=keycloak:${{ env.version_remote }} \ -Dquarkus.kubernetes.env.vars.kc-operator-keycloak-image-pull-policy=Never \ -Dtest.operator.custom.image=custom-keycloak:${{ env.version_remote }} \ + -Dquarkus.kubernetes.env.vars.kc-operator-keycloak-update-pod-deadline-seconds=60 \ --no-transfer-progress -Dtest.operator.deployment=remote test-olm: diff --git a/docs/guides/operator/advanced-configuration.adoc b/docs/guides/operator/advanced-configuration.adoc index 6db681c8d24..87116817ad4 100644 --- a/docs/guides/operator/advanced-configuration.adoc +++ b/docs/guides/operator/advanced-configuration.adoc @@ -495,6 +495,10 @@ When the image field changes, the StatefulSet is scaled down before applying the |On any configuration or image change |The StatefulSet is scaled down before applying the new configuration or image. +|`Auto` +|On incompatible changes +|The {project_name} Operator detects if a rolling or recreate upgrade is possible. + |=== diff --git a/operator/README.md b/operator/README.md index 893a2d7b04e..cac338a0a48 100644 --- a/operator/README.md +++ b/operator/README.md @@ -117,7 +117,7 @@ To avoid skipping tests that are depending on custom Keycloak images, you need t And run the tests passing an extra Java property: ```bash --Dtest.kc.operator.custom.image=custom-keycloak:latest +-Dtest.operator.custom.image=custom-keycloak:latest ``` ### Testing using a pre-built operator image from a remote registry diff --git a/operator/src/main/java/org/keycloak/operator/Config.java b/operator/src/main/java/org/keycloak/operator/Config.java index 4f54d14951e..34b24b164a4 100644 --- a/operator/src/main/java/org/keycloak/operator/Config.java +++ b/operator/src/main/java/org/keycloak/operator/Config.java @@ -34,6 +34,7 @@ public interface Config { String imagePullPolicy(); boolean startOptimized(); int pollIntervalSeconds(); + long updatePodDeadlineSeconds(); ResourceRequirements resources(); Map podLabels(); diff --git a/operator/src/main/java/org/keycloak/operator/Utils.java b/operator/src/main/java/org/keycloak/operator/Utils.java index 430ca6e5424..7d8de6ef7df 100644 --- a/operator/src/main/java/org/keycloak/operator/Utils.java +++ b/operator/src/main/java/org/keycloak/operator/Utils.java @@ -17,23 +17,30 @@ package org.keycloak.operator; +import io.fabric8.kubernetes.api.model.ConfigMap; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.HasMetadata; import io.fabric8.kubernetes.api.model.ResourceRequirements; +import io.fabric8.kubernetes.api.model.Secret; import io.fabric8.kubernetes.client.KubernetesClient; +import io.fabric8.kubernetes.client.utils.Serialization; import io.javaoperatorsdk.operator.api.reconciler.Context; import io.javaoperatorsdk.operator.processing.event.ResourceID; import io.javaoperatorsdk.operator.processing.event.source.informer.InformerEventSource; import io.quarkus.logging.Log; import org.keycloak.operator.crds.v2alpha1.deployment.Keycloak; +import java.math.BigInteger; import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.Base64; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.Function; @@ -111,4 +118,39 @@ public final class Utils { kcContainer.setResources(resourcesSpec); } + public static String hash(List current) { + var messageDigest = getMessageDigest(); + + current.stream() + .map(Utils::getData) + .map(Serialization::asYaml) + .map(Utils::utf8Bytes) + .forEachOrdered(messageDigest::update); + + return new BigInteger(1, messageDigest.digest()).toString(16); + } + + private static MessageDigest getMessageDigest() { + // Uses a fips compliant hash + try { + return MessageDigest.getInstance("SHA-256"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + + private static Object getData(Object object) { + if (object instanceof Secret) { + return ((Secret) object).getData(); + } + if (object instanceof ConfigMap) { + return ((ConfigMap) object).getData(); + } + return object; + } + + private static byte[] utf8Bytes(String string) { + return string.getBytes(StandardCharsets.UTF_8); + } + } diff --git a/operator/src/main/java/org/keycloak/operator/controllers/KeycloakController.java b/operator/src/main/java/org/keycloak/operator/controllers/KeycloakController.java index 0137c9f86a8..9333f8d7fd8 100644 --- a/operator/src/main/java/org/keycloak/operator/controllers/KeycloakController.java +++ b/operator/src/main/java/org/keycloak/operator/controllers/KeycloakController.java @@ -86,6 +86,7 @@ public class KeycloakController implements Reconciler, EventSourceInit UpgradeLogicFactory upgradeLogicFactory; volatile KeycloakDeploymentDependentResource deploymentDependentResource; + volatile KeycloakUpdateJobDependentResource updateJobDependentResource; @Override public Map prepareEventSources(EventSourceContext context) { @@ -106,6 +107,9 @@ public class KeycloakController implements Reconciler, EventSourceInit this.deploymentDependentResource = new KeycloakDeploymentDependentResource(config, watchedResources, distConfigurator); sources.putAll(EventSourceInitializer.nameEventSourcesFromDependentResource(context, this.deploymentDependentResource)); + updateJobDependentResource = new KeycloakUpdateJobDependentResource(config); + sources.putAll(EventSourceInitializer.nameEventSourcesFromDependentResource(context, updateJobDependentResource)); + return sources; } @@ -140,7 +144,7 @@ public class KeycloakController implements Reconciler, EventSourceInit return UpdateControl.updateResource(kc); } - var upgradeLogicControl = upgradeLogicFactory.create(kc, context, deploymentDependentResource) + var upgradeLogicControl = upgradeLogicFactory.create(kc, context, deploymentDependentResource, updateJobDependentResource) .decideUpgrade(); if (upgradeLogicControl.isPresent()) { Log.debug("--- Reconciliation interrupted due to upgrade logic"); diff --git a/operator/src/main/java/org/keycloak/operator/controllers/KeycloakUpdateJobDependentResource.java b/operator/src/main/java/org/keycloak/operator/controllers/KeycloakUpdateJobDependentResource.java new file mode 100644 index 00000000000..41421c18b48 --- /dev/null +++ b/operator/src/main/java/org/keycloak/operator/controllers/KeycloakUpdateJobDependentResource.java @@ -0,0 +1,220 @@ +/* + * Copyright 2025 Red Hat, Inc. and/or its affiliates + * and other contributors as indicated by the @author tags. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.keycloak.operator.controllers; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import io.fabric8.kubernetes.api.model.ContainerFluent; +import io.fabric8.kubernetes.api.model.ObjectMeta; +import io.fabric8.kubernetes.api.model.ObjectMetaBuilder; +import io.fabric8.kubernetes.api.model.PodSpec; +import io.fabric8.kubernetes.api.model.PodSpecBuilder; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.fabric8.kubernetes.api.model.batch.v1.JobBuilder; +import io.fabric8.kubernetes.api.model.batch.v1.JobSpecFluent; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependentResourceConfigBuilder; +import org.keycloak.operator.Config; +import org.keycloak.operator.Constants; +import org.keycloak.operator.ContextUtils; +import org.keycloak.operator.Utils; +import org.keycloak.operator.crds.v2alpha1.CRDUtils; +import org.keycloak.operator.crds.v2alpha1.deployment.Keycloak; + +public class KeycloakUpdateJobDependentResource extends CRUDKubernetesDependentResource { + + // shared volume configuration + private static final String WORK_DIR_VOLUME_NAME = "workdir"; + private static final String WORK_DIR_VOLUME_MOUNT_PATH = "/mnt/workdir"; + private static final String UPDATES_FILE_PATH = WORK_DIR_VOLUME_MOUNT_PATH + "/updates.json"; + + // Annotations + public static final String KEYCLOAK_CR_HASH_ANNOTATION = "operator.keycloak.org/keycloak-hash"; + + // container configuration + private static final String INIT_CONTAINER_NAME = "actual"; + private static final String CONTAINER_NAME = "desired"; + private static final List INIT_CONTAINER_ARGS = List.of("update-compatibility", "metadata", "--file", UPDATES_FILE_PATH); + private static final List CONTAINER_ARGS = List.of("update-compatibility", "check", "--file", UPDATES_FILE_PATH); + + // Job and Pod defaults + // Pod is restarted if it fails with an exit code != 0, and we don't want that. + private static final int JOB_RETRIES = 0; + // Job time to live + private static final int JOB_TIME_TO_LIVE_SECONDS = (int) TimeUnit.MINUTES.toSeconds(30); + + // container args to replace + private static final Set START_ARGS = Set.of("start", "start-dev"); + + private final Config operatorConfig; + + public KeycloakUpdateJobDependentResource(Config config) { + super(Job.class); + operatorConfig = config; + this.configureWith(new KubernetesDependentResourceConfigBuilder() + .withLabelSelector(Constants.DEFAULT_LABELS_AS_STRING) + .build()); + } + + @Override + protected Job desired(Keycloak primary, Context context) { + var builder = new JobBuilder(); + builder.withMetadata(createMetadata(jobName(primary), primary)); + var specBuilder = builder.withNewSpec(); + addPodSpecTemplate(specBuilder, primary, context); + // we don't need retries; we use exit code != 1 to signal the upgrade decision. + specBuilder.withBackoffLimit(JOB_RETRIES); + // Remove the job after 30 minutes. + specBuilder.withTtlSecondsAfterFinished(JOB_TIME_TO_LIVE_SECONDS); + specBuilder.endSpec(); + return builder.build(); + } + + public static boolean isJobFromCurrentKeycloakCr(Job job, Keycloak keycloak) { + var annotations = job.getMetadata().getAnnotations(); + var hash = annotations.get(KEYCLOAK_CR_HASH_ANNOTATION); + return Objects.equals(hash, keycloakHash(keycloak)); + } + + public static String jobName(Keycloak keycloak) { + return keycloak.getMetadata().getName() + "-update-job"; + } + + private static String podName(Keycloak keycloak) { + return keycloak.getMetadata().getName() + "-update-pod"; + } + + private static ObjectMeta createMetadata(String name, Keycloak keycloak) { + var builder = new ObjectMetaBuilder(); + builder.withName(name) + .withNamespace(keycloak.getMetadata().getNamespace()) + .withLabels(Utils.allInstanceLabels(keycloak)) + .withAnnotations(Map.of(KEYCLOAK_CR_HASH_ANNOTATION, keycloakHash(keycloak))); + return builder.build(); + } + + private void addPodSpecTemplate(JobSpecFluent builder, Keycloak keycloak, Context context) { + var podTemplate = builder.withNewTemplate(); + podTemplate.withMetadata(createMetadata(podName(keycloak), keycloak)); + podTemplate.withSpec(createPodSpec(context)); + podTemplate.endTemplate(); + } + + private PodSpec createPodSpec(Context context) { + var allVolumes = getAllVolumes(context); + Collection requiredVolumes = new HashSet<>(); + var builder = new PodSpecBuilder(); + builder.withRestartPolicy("Never"); + addInitContainer(builder, context, allVolumes.keySet(), requiredVolumes); + addContainer(builder, context, allVolumes.keySet(), requiredVolumes); + builder.addNewVolume() + .withName(WORK_DIR_VOLUME_NAME) + .withNewEmptyDir() + .endEmptyDir() + .endVolume(); + // add volumes to the pod + requiredVolumes.stream() + .map(allVolumes::get) + .forEach(volume -> builder.addNewVolumeLike(volume).endVolume()); + // For test KeycloakDeploymentTest#testDeploymentDurability + // it uses a pause image, which never ends. + // After this seconds, the job is terminated allowing the test to complete. + builder.withActiveDeadlineSeconds(operatorConfig.keycloak().updatePodDeadlineSeconds()); + return builder.build(); + } + + private static void addInitContainer(PodSpecBuilder builder, Context context, Collection availableVolumes, Collection requiredVolumes) { + var existing = CRDUtils.firstContainerOf(ContextUtils.getCurrentStatefulSet(context)).orElseThrow(); + var containerBuilder = builder.addNewInitContainerLike(existing); + configureContainer(containerBuilder, INIT_CONTAINER_NAME, INIT_CONTAINER_ARGS, availableVolumes, requiredVolumes); + containerBuilder.endInitContainer(); + } + + private static void addContainer(PodSpecBuilder builder, Context context, Collection availableVolumes, Collection requiredVolumes) { + var existing = CRDUtils.firstContainerOf(ContextUtils.getDesiredStatefulSet(context)).orElseThrow(); + var containerBuilder = builder.addNewContainerLike(existing); + configureContainer(containerBuilder, CONTAINER_NAME, CONTAINER_ARGS, availableVolumes, requiredVolumes); + containerBuilder.endContainer(); + } + + private static void configureContainer(ContainerFluent containerBuilder, String name, List args, Collection availableVolumes, Collection requiredVolumes) { + containerBuilder.withName(name); + containerBuilder.withArgs(replaceStartWithUpdateCommand(containerBuilder.getArgs(), args)); + + // remove volume devices + containerBuilder.withVolumeDevices(); + + // add existing volume mounts + var volumeMounts = containerBuilder.buildVolumeMounts(); + if (volumeMounts != null) { + var newVolumeMounts = volumeMounts.stream() + .filter(volumeMount -> availableVolumes.contains(volumeMount.getName())) + .filter(volumeMount -> !volumeMount.getName().startsWith("kube-api")) + .peek(volumeMount -> requiredVolumes.add(volumeMount.getName())) + .toList(); + containerBuilder.withVolumeMounts(newVolumeMounts); + } + + // remove restart policy and probes + containerBuilder.withRestartPolicy(null); + containerBuilder.withReadinessProbe(null); + containerBuilder.withLivenessProbe(null); + containerBuilder.withStartupProbe(null); + + // add the shared volume + containerBuilder.addNewVolumeMount() + .withName(WORK_DIR_VOLUME_NAME) + .withMountPath(WORK_DIR_VOLUME_MOUNT_PATH) + .endVolumeMount(); + } + + private Map getAllVolumes(Context context) { + Map allVolumes = new HashMap<>(); + Consumer volumeConsumer = volume -> allVolumes.put(volume.getName(), volume); + CRDUtils.volumesFromStatefulSet(ContextUtils.getCurrentStatefulSet(context)).forEach(volumeConsumer); + CRDUtils.volumesFromStatefulSet(ContextUtils.getDesiredStatefulSet(context)).forEach(volumeConsumer); + return allVolumes; + } + + + private static List replaceStartWithUpdateCommand(List currentArgs, List updateArgs) { + return currentArgs.stream(). + mapMulti((arg, downstream) -> { + if (START_ARGS.contains(arg)) { + updateArgs.forEach(downstream); + return; + } + downstream.accept(arg); + }).toList(); + } + + private static String keycloakHash(Keycloak keycloak) { + return Utils.hash(List.of(keycloak.getSpec())); + } + +} diff --git a/operator/src/main/java/org/keycloak/operator/controllers/WatchedResources.java b/operator/src/main/java/org/keycloak/operator/controllers/WatchedResources.java index 76e5b21fab9..2fd353a2716 100644 --- a/operator/src/main/java/org/keycloak/operator/controllers/WatchedResources.java +++ b/operator/src/main/java/org/keycloak/operator/controllers/WatchedResources.java @@ -17,24 +17,17 @@ package org.keycloak.operator.controllers; -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.fabric8.kubernetes.api.model.Secret; -import io.fabric8.kubernetes.api.model.apps.StatefulSet; -import io.fabric8.kubernetes.client.KubernetesClient; -import io.fabric8.kubernetes.client.utils.Serialization; - -import java.math.BigInteger; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.Arrays; import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; +import io.fabric8.kubernetes.api.model.HasMetadata; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.client.KubernetesClient; import jakarta.enterprise.context.ApplicationScoped; +import org.keycloak.operator.Utils; @ApplicationScoped public class WatchedResources { @@ -53,25 +46,15 @@ public class WatchedResources { deployment.getMetadata().getAnnotations().put(WatchedResources.KEYCLOAK_MISSING_ANNOTATION_PREFIX + plural, Boolean.valueOf(current.size() < names.size()).toString()); deployment.getMetadata().getAnnotations().put(WatchedResources.KEYCLOAK_WATCHING_ANNOTATION_PREFIX + plural, - names.stream().collect(Collectors.joining(";"))); + String.join(";", names)); deployment.getSpec().getTemplate().getMetadata().getAnnotations() .put(WatchedResources.KEYCLOAK_WATCHED_HASH_ANNOTATION_PREFIX + HasMetadata.getKind(type).toLowerCase() + "-hash", getHash(current)); } - static Object getData(Object object) { - if (object instanceof Secret) { - return ((Secret) object).getData(); - } - if (object instanceof ConfigMap) { - return ((ConfigMap) object).getData(); - } - return object; - } - public boolean hasMissing(StatefulSet deployment) { return deployment.getMetadata().getAnnotations().entrySet().stream() .anyMatch(e -> e.getKey().startsWith(WatchedResources.KEYCLOAK_MISSING_ANNOTATION_PREFIX) - && Boolean.valueOf(e.getValue())); + && Boolean.parseBoolean(e.getValue())); } public boolean isWatching(StatefulSet deployment) { @@ -81,19 +64,7 @@ public class WatchedResources { } public String getHash(List current) { - try { - // using hashes as it's more robust than resource versions that can change e.g. - // just when adding a label - // Uses a fips compliant hash - var messageDigest = MessageDigest.getInstance("SHA-256"); - - current.stream().map(s -> Serialization.asYaml(getData(s)).getBytes(StandardCharsets.UTF_8)) - .forEachOrdered(s -> messageDigest.update(s)); - - return new BigInteger(1, messageDigest.digest()).toString(16); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } + return Utils.hash(current); } private List fetch(List names, Class type, String namespace, @@ -109,4 +80,4 @@ public class WatchedResources { .map(watching -> watching.split(";")).map(Arrays::asList).orElse(List.of()); } -} \ No newline at end of file +} diff --git a/operator/src/main/java/org/keycloak/operator/crds/v2alpha1/CRDUtils.java b/operator/src/main/java/org/keycloak/operator/crds/v2alpha1/CRDUtils.java index 3580a4987cd..c01736b0392 100644 --- a/operator/src/main/java/org/keycloak/operator/crds/v2alpha1/CRDUtils.java +++ b/operator/src/main/java/org/keycloak/operator/crds/v2alpha1/CRDUtils.java @@ -30,6 +30,7 @@ import com.fasterxml.jackson.databind.JsonNode; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.PodSpec; import io.fabric8.kubernetes.api.model.PodTemplateSpec; +import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.apps.StatefulSetSpec; import io.javaoperatorsdk.operator.api.reconciler.Context; @@ -104,4 +105,15 @@ public final class CRDUtils { final var kubernetesSerialization = context.getClient().getKubernetesSerialization(); return kubernetesSerialization.convertValue(value, JsonNode.class); } + + public static Stream volumesFromStatefulSet(StatefulSet statefulSet) { + return Optional.of(statefulSet) + .map(StatefulSet::getSpec) + .map(StatefulSetSpec::getTemplate) + .map(PodTemplateSpec::getSpec) + .map(PodSpec::getVolumes) + .stream() + .flatMap(Collection::stream); + + } } diff --git a/operator/src/main/java/org/keycloak/operator/upgrade/UpdateStrategy.java b/operator/src/main/java/org/keycloak/operator/upgrade/UpdateStrategy.java index f04a9446255..d9ad176af53 100644 --- a/operator/src/main/java/org/keycloak/operator/upgrade/UpdateStrategy.java +++ b/operator/src/main/java/org/keycloak/operator/upgrade/UpdateStrategy.java @@ -23,5 +23,9 @@ import com.fasterxml.jackson.annotation.JsonPropertyDescription; public enum UpdateStrategy { @JsonPropertyDescription("Shutdown the Keycloak cluster before applying the new changes.") @JsonProperty("Recreate") - RECREATE + RECREATE, + + @JsonPropertyDescription("Automatically detects if the Keycloak CR changes requires a rolling or recreate update.") + @JsonProperty("Auto") + AUTO } diff --git a/operator/src/main/java/org/keycloak/operator/upgrade/UpgradeLogicFactory.java b/operator/src/main/java/org/keycloak/operator/upgrade/UpgradeLogicFactory.java index 5940685389e..606befc1daa 100644 --- a/operator/src/main/java/org/keycloak/operator/upgrade/UpgradeLogicFactory.java +++ b/operator/src/main/java/org/keycloak/operator/upgrade/UpgradeLogicFactory.java @@ -20,9 +20,11 @@ package org.keycloak.operator.upgrade; import io.javaoperatorsdk.operator.api.reconciler.Context; import jakarta.enterprise.context.ApplicationScoped; import org.keycloak.operator.controllers.KeycloakDeploymentDependentResource; +import org.keycloak.operator.controllers.KeycloakUpdateJobDependentResource; import org.keycloak.operator.crds.v2alpha1.deployment.Keycloak; import org.keycloak.operator.crds.v2alpha1.deployment.spec.UpdateSpec; import org.keycloak.operator.upgrade.impl.AlwaysRecreateUpgradeLogic; +import org.keycloak.operator.upgrade.impl.AutoUpgradeLogic; import org.keycloak.operator.upgrade.impl.RecreateOnImageChangeUpgradeLogic; /** @@ -32,13 +34,14 @@ import org.keycloak.operator.upgrade.impl.RecreateOnImageChangeUpgradeLogic; public class UpgradeLogicFactory { @SuppressWarnings("removal") - public UpgradeLogic create(Keycloak keycloak, Context context, KeycloakDeploymentDependentResource dependentResource) { + public UpgradeLogic create(Keycloak keycloak, Context context, KeycloakDeploymentDependentResource dependentResource, KeycloakUpdateJobDependentResource updateJobDependentResource) { var strategy = UpdateSpec.findUpdateStrategy(keycloak); if (strategy.isEmpty()) { return new RecreateOnImageChangeUpgradeLogic(context, keycloak, dependentResource); } return switch (strategy.get()) { case RECREATE -> new AlwaysRecreateUpgradeLogic(context, keycloak, dependentResource); + case AUTO -> new AutoUpgradeLogic(context, keycloak, dependentResource, updateJobDependentResource); }; } diff --git a/operator/src/main/java/org/keycloak/operator/upgrade/impl/AutoUpgradeLogic.java b/operator/src/main/java/org/keycloak/operator/upgrade/impl/AutoUpgradeLogic.java new file mode 100644 index 00000000000..8c018cc0e01 --- /dev/null +++ b/operator/src/main/java/org/keycloak/operator/upgrade/impl/AutoUpgradeLogic.java @@ -0,0 +1,184 @@ +/* + * Copyright 2025 Red Hat, Inc. and/or its affiliates + * and other contributors as indicated by the @author tags. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.keycloak.operator.upgrade.impl; + +import java.util.Collection; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Stream; + +import io.fabric8.kubernetes.api.model.ContainerState; +import io.fabric8.kubernetes.api.model.ContainerStateTerminated; +import io.fabric8.kubernetes.api.model.ContainerStatus; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodStatus; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.fabric8.kubernetes.client.KubernetesClient; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; +import io.quarkus.logging.Log; +import org.keycloak.operator.controllers.KeycloakDeploymentDependentResource; +import org.keycloak.operator.controllers.KeycloakUpdateJobDependentResource; +import org.keycloak.operator.crds.v2alpha1.CRDUtils; +import org.keycloak.operator.crds.v2alpha1.deployment.Keycloak; + +public class AutoUpgradeLogic extends BaseUpgradeLogic { + + private final KeycloakUpdateJobDependentResource updateJobResource; + + public AutoUpgradeLogic(Context context, Keycloak keycloak, KeycloakDeploymentDependentResource statefulSetResource, KeycloakUpdateJobDependentResource updateJobResource) { + super(context, keycloak, statefulSetResource); + this.updateJobResource = updateJobResource; + } + + @Override + Optional> onUpgrade() { + var existingJob = context.getSecondaryResource(Job.class); + if (existingJob.isEmpty()) { + updateJobResource.reconcile(keycloak, context); + Log.debug("Creating Update Job"); + return Optional.of(UpdateControl.noUpdate()); + } + + if (isJobRunning(existingJob.get())) { + Log.debug("Update Job is running. Waiting until terminated."); + return Optional.of(UpdateControl.noUpdate()); + } + + // Keycloak CR may be updated while the job is running; we need to delete and start over. + if (!KeycloakUpdateJobDependentResource.isJobFromCurrentKeycloakCr(existingJob.get(), keycloak)) { + context.getClient().resource(existingJob.get()).lockResourceVersion().delete(); + return Optional.of(UpdateControl.noUpdate()); + } + + var pod = findPodForJob(context.getClient(), existingJob.get()); + if (pod.isEmpty()) { + // TODO some cases the pod is removed. Do we start over or use recreate update? + Log.warn("Pod for Update Job not found."); + decideRecreateUpgrade(); + return Optional.empty(); + } + + checkUpgradeType(pod.get()); + return Optional.empty(); + } + + private boolean isJobRunning(Job job) { + var status = job.getStatus(); + Log.debugf("Update Job Status:%n%s", CRDUtils.toJsonNode(status, context).toPrettyString()); + var completed = Optional.ofNullable(status).stream() + .mapMultiToInt((jobStatus, downstream) -> { + if (jobStatus.getSucceeded() != null) { + downstream.accept(jobStatus.getSucceeded()); + } + if (jobStatus.getFailed() != null) { + downstream.accept(jobStatus.getFailed()); + } + }).sum(); + // we only have a single pod, so completed will be zero if running or 1 if finished. + return completed == 0; + } + + private void checkUpgradeType(Pod pod) { + // check init container. + var initContainerExitCode = initContainer(pod) + .map(AutoUpgradeLogic::exitCode); + if (initContainerExitCode.isEmpty()) { + Log.warn("InitContainer not found for Update Job."); + decideRecreateUpgrade(); + return; + } + if (initContainerExitCode.get() != 0) { + Log.warn("InitContainer unexpectedly failed for Update Job."); + decideRecreateUpgrade(); + return; + } + + // check container. + var containerExitCode = container(pod) + .map(AutoUpgradeLogic::exitCode); + if (containerExitCode.isEmpty()) { + Log.warn("Container not found for Update Job."); + decideRecreateUpgrade(); + return; + } + switch (containerExitCode.get()) { + case 0: { + decideRollingUpgrade(); + return; + } + case 1: { + Log.warn("Container has an unexpected error for Update Job"); + decideRecreateUpgrade(); + return; + } + case 2: { + Log.warn("Container has an invalid arguments for Update Job."); + decideRecreateUpgrade(); + return; + } + case 3: { + Log.warn("Rolling Update not possible."); + decideRecreateUpgrade(); + return; + } + case 4: { + Log.warn("Feature 'rolling-update' not enabled."); + decideRecreateUpgrade(); + return; + } + default: { + Log.warnf("Unexpected Update Job exit code: " + containerExitCode.get()); + decideRecreateUpgrade(); + } + } + } + + public static Optional findPodForJob(KubernetesClient client, Job job) { + return client.pods() + .inNamespace(job.getMetadata().getNamespace()) + .withLabelSelector(Objects.requireNonNull(job.getSpec().getSelector())) + .list() + .getItems() + .stream() + .findFirst(); + } + + private static Optional initContainer(Pod pod) { + return java.util.Optional.ofNullable(pod.getStatus()) + .map(PodStatus::getInitContainerStatuses) + .map(Collection::stream) + .flatMap(Stream::findFirst); + } + + public static Optional container(Pod pod) { + return java.util.Optional.ofNullable(pod.getStatus()) + .map(PodStatus::getContainerStatuses) + .map(Collection::stream) + .flatMap(Stream::findFirst); + } + + public static int exitCode(ContainerStatus containerStatus) { + return Optional.ofNullable(containerStatus) + .map(ContainerStatus::getState) + .map(ContainerState::getTerminated) + .map(ContainerStateTerminated::getExitCode) + .orElse(1); + } + +} diff --git a/operator/src/main/resources/application.properties b/operator/src/main/resources/application.properties index c26236367b7..85f9b8558e1 100644 --- a/operator/src/main/resources/application.properties +++ b/operator/src/main/resources/application.properties @@ -9,6 +9,8 @@ kc.operator.keycloak.image=${RELATED_IMAGE_KEYCLOAK:quay.io/keycloak/keycloak:ni kc.operator.keycloak.image-pull-policy=Always kc.operator.keycloak.start-optimized=false kc.operator.keycloak.poll-interval-seconds=60 +# Update Pod timeout: 5min +kc.operator.keycloak.update-pod-deadline-seconds=300 # Keycloak container default requests/limits resources kc.operator.keycloak.resources.requests.memory=1700Mi kc.operator.keycloak.resources.limits.memory=2Gi diff --git a/operator/src/test/java/org/keycloak/operator/testsuite/integration/UpgradeTest.java b/operator/src/test/java/org/keycloak/operator/testsuite/integration/UpgradeTest.java index 589e6c97c6a..2d610a21b2f 100644 --- a/operator/src/test/java/org/keycloak/operator/testsuite/integration/UpgradeTest.java +++ b/operator/src/test/java/org/keycloak/operator/testsuite/integration/UpgradeTest.java @@ -18,25 +18,35 @@ package org.keycloak.operator.testsuite.integration; import java.util.List; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.stream.Stream; +import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.fabric8.kubernetes.api.model.batch.v1.JobStatus; import io.quarkus.test.junit.QuarkusTest; import org.awaitility.Awaitility; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; import org.keycloak.common.Profile; +import org.keycloak.operator.controllers.KeycloakUpdateJobDependentResource; import org.keycloak.operator.crds.v2alpha1.deployment.Keycloak; import org.keycloak.operator.crds.v2alpha1.deployment.KeycloakStatusCondition; import org.keycloak.operator.crds.v2alpha1.deployment.ValueOrSecret; import org.keycloak.operator.crds.v2alpha1.deployment.spec.FeatureSpec; import org.keycloak.operator.crds.v2alpha1.deployment.spec.UpdateSpec; import org.keycloak.operator.upgrade.UpdateStrategy; +import org.keycloak.operator.upgrade.impl.AutoUpgradeLogic; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.keycloak.operator.testsuite.utils.CRAssert.assertKeycloakStatusCondition; import static org.keycloak.operator.testsuite.utils.CRAssert.eventuallyRecreateUpgradeStatus; import static org.keycloak.operator.testsuite.utils.CRAssert.eventuallyRollingUpgradeStatus; @@ -48,7 +58,8 @@ public class UpgradeTest extends BaseOperatorTest { private static Stream upgradeStrategy() { return Stream.of( null, - UpdateStrategy.RECREATE + UpdateStrategy.RECREATE, + UpdateStrategy.AUTO ); } @@ -79,6 +90,10 @@ public class UpgradeTest extends BaseOperatorTest { .inNamespace(namespace).withName(kc.getMetadata().getName()).get(); assertKeycloakStatusCondition(currentKc, KeycloakStatusCondition.READY, false, "Waiting for more replicas"); }); + + if (updateStrategy == UpdateStrategy.AUTO) { + assertUpdateJobExists(kc); + } } @ParameterizedTest(name = "testCacheMaxCount-{0}") @@ -97,6 +112,92 @@ public class UpgradeTest extends BaseOperatorTest { deployKeycloak(k8sclient, kc, true); await(upgradeCondition); + + if (updateStrategy == UpdateStrategy.AUTO) { + assertUpdateJobExists(kc); + } + } + + @ParameterizedTest(name = "testOptimizedImage-{0}") + @MethodSource("upgradeStrategy") + @EnabledIfSystemProperty(named = OPERATOR_CUSTOM_IMAGE, matches = ".+") + public void testOptimizedImage(UpdateStrategy updateStrategy) throws InterruptedException { + // In GHA, the custom image is an optimized image of the base image. + // We should be able to do a zero-downtime upgrade with Auto strategy. + var kc = createInitialDeployment(updateStrategy); + // use the base image + kc.getSpec().setImage(null); + deployKeycloak(k8sclient, kc, true); + + // use the optimized image, auto strategy should use a rolling upgrade + kc.getSpec().setImage(getTestCustomImage()); + var upgradeCondition = updateStrategy == UpdateStrategy.AUTO ? + eventuallyRollingUpgradeStatus(k8sclient, kc) : + eventuallyRecreateUpgradeStatus(k8sclient, kc); + + deployKeycloak(k8sclient, kc, true); + + await(upgradeCondition); + + if (updateStrategy == UpdateStrategy.AUTO) { + assertUpdateJobExists(kc); + } + } + + @EnabledIfSystemProperty(named = OPERATOR_CUSTOM_IMAGE, matches = ".+") + @Test + public void testNoJobReuse() throws InterruptedException { + // In GHA, the custom image is an optimized image of the base image. + // We should be able to do a zero-downtime upgrade with Auto strategy. + var kc = createInitialDeployment(UpdateStrategy.AUTO); + // use the base image + kc.getSpec().setImage(null); + deployKeycloak(k8sclient, kc, true); + + // let's trigger a rolling upgrade + var upgradeCondition = eventuallyRollingUpgradeStatus(k8sclient, kc); + kc.getSpec().setImage(getTestCustomImage()); + + deployKeycloak(k8sclient, kc, true); + + await(upgradeCondition); + var job = assertUpdateJobExists(kc); + var hash = job.getMetadata().getAnnotations().get(KeycloakUpdateJobDependentResource.KEYCLOAK_CR_HASH_ANNOTATION); + assertEquals(0, containerExitCode(job)); + + //let's trigger a recreate + upgradeCondition = eventuallyRecreateUpgradeStatus(k8sclient, kc); + // enough to crash the Pod and return exit code != 0 + kc.getSpec().setImage("quay.io/keycloak/non-existing-keycloak"); + + deployKeycloak(k8sclient, kc, false); + + await(upgradeCondition); + job = assertUpdateJobExists(kc); + var newHash = job.getMetadata().getAnnotations().get(KeycloakUpdateJobDependentResource.KEYCLOAK_CR_HASH_ANNOTATION); + assertNotEquals(hash, newHash); + assertNotEquals(0, containerExitCode(job)); + } + + private Job assertUpdateJobExists(Keycloak keycloak) { + var job = k8sclient.batch().v1().jobs() + .inNamespace(keycloak.getMetadata().getNamespace()) + .withName(KeycloakUpdateJobDependentResource.jobName(keycloak)) + .get(); + assertNotNull(job); + var maybeStatus = Optional.ofNullable(job.getStatus()); + var finished = maybeStatus.map(JobStatus::getSucceeded).orElse(0) + + maybeStatus.map(JobStatus::getFailed).orElse(0); + assertEquals(1, finished); + return job; + } + + private int containerExitCode(Job job) { + var maybeExitCode = AutoUpgradeLogic.findPodForJob(k8sclient, job) + .flatMap(AutoUpgradeLogic::container) + .map(AutoUpgradeLogic::exitCode); + assertTrue(maybeExitCode.isPresent()); + return maybeExitCode.get(); } private static Keycloak createInitialDeployment(UpdateStrategy updateStrategy) { diff --git a/operator/src/test/resources/application.properties b/operator/src/test/resources/application.properties index 680977d3c3c..0b08073e372 100644 --- a/operator/src/test/resources/application.properties +++ b/operator/src/test/resources/application.properties @@ -8,4 +8,6 @@ quarkus.log.level=INFO kc.operator.keycloak.pod-labels."test.label"=foobar kc.operator.keycloak.pod-labels."testLabelWithExpression"=${OPERATOR_TEST_LABEL_EXPRESSION} # allow the watching tests to complete more quickly -kc.operator.keycloak.poll-interval-seconds=10 \ No newline at end of file +kc.operator.keycloak.poll-interval-seconds=10 +# Update Pod timeout reduced to 1 min for testing +kc.operator.keycloak.update-pod-deadline-seconds=60 diff --git a/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/AbstractUpdatesCommand.java b/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/AbstractUpdatesCommand.java index 4dcd6f66566..fb422617119 100644 --- a/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/AbstractUpdatesCommand.java +++ b/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/AbstractUpdatesCommand.java @@ -21,10 +21,13 @@ import java.io.File; import java.util.List; import java.util.function.Predicate; +import org.keycloak.common.Profile; import org.keycloak.config.OptionCategory; +import org.keycloak.quarkus.runtime.Environment; import org.keycloak.quarkus.runtime.cli.PropertyException; import org.keycloak.quarkus.runtime.compatibility.CompatibilityManager; import org.keycloak.quarkus.runtime.compatibility.CompatibilityManagerImpl; +import org.keycloak.quarkus.runtime.compatibility.CompatibilityResult; import picocli.CommandLine; public abstract class AbstractUpdatesCommand extends AbstractCommand implements Runnable { @@ -45,12 +48,22 @@ public abstract class AbstractUpdatesCommand extends AbstractCommand implements .toList(); } - static void validateOptionIsPresent(String value, String option) { - if (value == null || value.isBlank()) { - throw new PropertyException("Missing required argument: " + option); + @Override + public void run() { + Environment.updateProfile(true); + if (!Profile.isFeatureEnabled(Profile.Feature.ROLLING_UPDATES)) { + printFeatureDisabled(); + picocli.exit(CompatibilityResult.FEATURE_DISABLED); + return; } + printPreviewWarning(); + validateConfig(); + var exitCode = executeAction(); + picocli.exit(exitCode); } + abstract int executeAction(); + static void validateFileIsNotDirectory(File file, String option) { if (file.isDirectory()) { throw new PropertyException("Incorrect argument %s. Path '%s' is not a valid file.".formatted(option, file.getAbsolutePath())); @@ -76,7 +89,7 @@ public abstract class AbstractUpdatesCommand extends AbstractCommand implements } } - void printPreviewWarning() { + private void printPreviewWarning() { printError("Warning! This command is preview and is not recommended for use in production. It may change or be removed at a future release."); } diff --git a/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityCheck.java b/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityCheck.java index 86d7740a59a..3b817b3f0bd 100644 --- a/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityCheck.java +++ b/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityCheck.java @@ -20,9 +20,7 @@ package org.keycloak.quarkus.runtime.cli.command; import java.io.File; import java.io.IOException; -import org.keycloak.common.Profile; import org.keycloak.quarkus.runtime.cli.PropertyException; -import org.keycloak.quarkus.runtime.compatibility.CompatibilityResult; import org.keycloak.quarkus.runtime.compatibility.ServerInfo; import org.keycloak.util.JsonSerialization; import picocli.CommandLine; @@ -42,19 +40,12 @@ public class UpdateCompatibilityCheck extends AbstractUpdatesCommand { String inputFile; @Override - public void run() { - if (!Profile.isFeatureEnabled(Profile.Feature.ROLLING_UPDATES)) { - printFeatureDisabled(); - picocli.exit(CompatibilityResult.FEATURE_DISABLED); - return; - } - printPreviewWarning(); - validateConfig(); + int executeAction() { var info = readServerInfo(); var result = compatibilityManager.isCompatible(info); result.errorMessage().ifPresent(this::printError); result.endMessage().ifPresent(this::printOut); - picocli.exit(result.exitCode()); + return result.exitCode(); } @Override @@ -74,7 +65,9 @@ public class UpdateCompatibilityCheck extends AbstractUpdatesCommand { } private void validateFileParameter() { - validateOptionIsPresent(inputFile, INPUT_OPTION_NAME); + if (inputFile == null || inputFile.isBlank()) { + throw new PropertyException("Missing required argument: " + INPUT_OPTION_NAME); + } var file = new File(inputFile); if (!file.exists()) { throw new PropertyException("Incorrect argument %s. Path '%s' not found".formatted(INPUT_OPTION_NAME, file.getAbsolutePath())); diff --git a/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityMetadata.java b/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityMetadata.java index 406afef9022..43f7ab82608 100644 --- a/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityMetadata.java +++ b/quarkus/runtime/src/main/java/org/keycloak/quarkus/runtime/cli/command/UpdateCompatibilityMetadata.java @@ -21,9 +21,7 @@ import java.io.File; import java.io.IOException; import com.fasterxml.jackson.core.JsonProcessingException; -import org.keycloak.common.Profile; import org.keycloak.quarkus.runtime.cli.PropertyException; -import org.keycloak.quarkus.runtime.compatibility.CompatibilityResult; import org.keycloak.quarkus.runtime.compatibility.ServerInfo; import org.keycloak.util.JsonSerialization; import picocli.CommandLine; @@ -42,17 +40,11 @@ public class UpdateCompatibilityMetadata extends AbstractUpdatesCommand { String outputFile; @Override - public void run() { - if (!Profile.isFeatureEnabled(Profile.Feature.ROLLING_UPDATES)) { - printFeatureDisabled(); - picocli.exit(CompatibilityResult.FEATURE_DISABLED); - return; - } - printPreviewWarning(); - validateConfig(); + int executeAction() { var info = compatibilityManager.current(); printToConsole(info); writeToFile(info); + return 0; } @Override