From f850f8d3e0ebc97e3efed7d5001395bae85c997b Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 21 Jan 2021 12:25:34 -0500 Subject: [PATCH] introduce a new global flag for denoating K8S-based deployments - In K8S-based installs, only container groups are intended to be used for playbook execution (JTs, adhoc, inventory updates), so in this scenario, other job types have a task impact of zero. - In K8S-based installs, traditional instances have *zero* capacity (because they're only members of the control plane where services - http/s, local control plane execution - run) - This commit also includes some changes that allow for the task manager to launch tasks with task_impact=0 on instances that have capacity=0 (previously, an instance with zero capacity would never be selected as the "execution node" This means that when IS_K8S=True, any Job Template associated with an Instance Group will never actually go from pending -> running (because there's no capacity - all playbooks must run through Container Groups). For an improved ux, our intention is to introduce logic into the operator install process such that the *default* group that's created at install time is a *Container Group* that's configured to point at the K8S cluster where awx itself is deployed. --- awx/main/models/ha.py | 7 +++++++ awx/main/models/jobs.py | 2 ++ awx/main/models/projects.py | 2 ++ awx/main/scheduler/task_manager.py | 9 ++++++--- awx/settings/defaults.py | 7 +++++++ 5 files changed, 24 insertions(+), 3 deletions(-) diff --git a/awx/main/models/ha.py b/awx/main/models/ha.py index 4f96bdc5b1..6dd72861cc 100644 --- a/awx/main/models/ha.py +++ b/awx/main/models/ha.py @@ -147,6 +147,13 @@ class Instance(HasPolicyEditsMixin, BaseModel): return self.rampart_groups.filter(controller__isnull=False).exists() def refresh_capacity(self): + if settings.IS_K8S: + self.capacity = self.cpu = self.memory = self.cpu_capacity = self.mem_capacity = 0 # noqa + self.version = awx_application_version + self.save(update_fields=['capacity', 'version', 'modified', 'cpu', + 'memory', 'cpu_capacity', 'mem_capacity']) + return + cpu = get_cpu_capacity() mem = get_mem_capacity() if self.enabled: diff --git a/awx/main/models/jobs.py b/awx/main/models/jobs.py index 81e17cdebf..70cdfa363a 100644 --- a/awx/main/models/jobs.py +++ b/awx/main/models/jobs.py @@ -1286,6 +1286,8 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin): @property def task_impact(self): + if settings.IS_K8S: + return 0 return 5 @property diff --git a/awx/main/models/projects.py b/awx/main/models/projects.py index ec14a2ef76..fb948916d0 100644 --- a/awx/main/models/projects.py +++ b/awx/main/models/projects.py @@ -563,6 +563,8 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage @property def task_impact(self): + if settings.IS_K8S: + return 0 return 0 if self.job_type == 'run' else 1 @property diff --git a/awx/main/scheduler/task_manager.py b/awx/main/scheduler/task_manager.py index 50345e5bb7..8d8df5eee2 100644 --- a/awx/main/scheduler/task_manager.py +++ b/awx/main/scheduler/task_manager.py @@ -70,7 +70,7 @@ class TaskManager(): ''' Init AFTER we know this instance of the task manager will run because the lock is acquired. ''' - instances = Instance.objects.filter(~Q(hostname=None), capacity__gt=0, enabled=True) + instances = Instance.objects.filter(~Q(hostname=None), enabled=True) self.real_instances = {i.hostname: i for i in instances} instances_partial = [SimpleNamespace(obj=instance, @@ -86,7 +86,7 @@ class TaskManager(): capacity_total=rampart_group.capacity, consumed_capacity=0, instances=[]) - for instance in rampart_group.instances.filter(capacity__gt=0, enabled=True).order_by('hostname'): + for instance in rampart_group.instances.filter(enabled=True).order_by('hostname'): if instance.hostname in instances_by_hostname: self.graph[rampart_group.name]['instances'].append(instances_by_hostname[instance.hostname]) @@ -528,7 +528,10 @@ class TaskManager(): break remaining_capacity = self.get_remaining_capacity(rampart_group.name) - if not rampart_group.is_container_group and self.get_remaining_capacity(rampart_group.name) <= 0: + if ( + task.task_impact > 0 and # project updates have a cost of zero + not rampart_group.is_container_group and + self.get_remaining_capacity(rampart_group.name) <= 0): logger.debug("Skipping group {}, remaining_capacity {} <= 0".format( rampart_group.name, remaining_capacity)) continue diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 9d21ac68bb..845bbe74d8 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -59,6 +59,13 @@ DATABASES = { } } +# Whether or not the deployment is a K8S-based deployment +# In K8S-based deployments, instances have zero capacity - all playbook +# automation is intended to flow through defined Container Groups that +# interface with some (or some set of) K8S api (which may or may not include +# the K8S cluster where awx itself is running) +IS_K8S = False + # TODO: remove this setting in favor of a default execution environment AWX_EXECUTION_ENVIRONMENT_DEFAULT_IMAGE = 'quay.io/ansible/awx-ee'