introduce a new global flag for denoating K8S-based deployments

- In K8S-based installs, only container groups are intended to be used
  for playbook execution (JTs, adhoc, inventory updates), so in this
  scenario, other job types have a task impact of zero.
- In K8S-based installs, traditional instances have *zero* capacity
  (because they're only members of the control plane where services
  - http/s, local control plane execution - run)
- This commit also includes some changes that allow for the task manager
  to launch tasks with task_impact=0 on instances that have capacity=0
  (previously, an instance with zero capacity would never be selected
  as the "execution node"

This means that when IS_K8S=True, any Job Template associated with an
Instance Group will never actually go from pending -> running (because
there's no capacity - all playbooks must run through Container Groups).
For an improved ux, our intention is to introduce logic into the
operator install process such that the *default* group that's created at
install time is a *Container Group* that's configured to point at the
K8S cluster where awx itself is deployed.
This commit is contained in:
Ryan Petrello
2021-01-21 12:25:34 -05:00
committed by Shane McDonald
parent c29d476919
commit f850f8d3e0
5 changed files with 24 additions and 3 deletions

View File

@@ -147,6 +147,13 @@ class Instance(HasPolicyEditsMixin, BaseModel):
return self.rampart_groups.filter(controller__isnull=False).exists() return self.rampart_groups.filter(controller__isnull=False).exists()
def refresh_capacity(self): def refresh_capacity(self):
if settings.IS_K8S:
self.capacity = self.cpu = self.memory = self.cpu_capacity = self.mem_capacity = 0 # noqa
self.version = awx_application_version
self.save(update_fields=['capacity', 'version', 'modified', 'cpu',
'memory', 'cpu_capacity', 'mem_capacity'])
return
cpu = get_cpu_capacity() cpu = get_cpu_capacity()
mem = get_mem_capacity() mem = get_mem_capacity()
if self.enabled: if self.enabled:

View File

@@ -1286,6 +1286,8 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin):
@property @property
def task_impact(self): def task_impact(self):
if settings.IS_K8S:
return 0
return 5 return 5
@property @property

View File

@@ -563,6 +563,8 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
@property @property
def task_impact(self): def task_impact(self):
if settings.IS_K8S:
return 0
return 0 if self.job_type == 'run' else 1 return 0 if self.job_type == 'run' else 1
@property @property

View File

@@ -70,7 +70,7 @@ class TaskManager():
''' '''
Init AFTER we know this instance of the task manager will run because the lock is acquired. Init AFTER we know this instance of the task manager will run because the lock is acquired.
''' '''
instances = Instance.objects.filter(~Q(hostname=None), capacity__gt=0, enabled=True) instances = Instance.objects.filter(~Q(hostname=None), enabled=True)
self.real_instances = {i.hostname: i for i in instances} self.real_instances = {i.hostname: i for i in instances}
instances_partial = [SimpleNamespace(obj=instance, instances_partial = [SimpleNamespace(obj=instance,
@@ -86,7 +86,7 @@ class TaskManager():
capacity_total=rampart_group.capacity, capacity_total=rampart_group.capacity,
consumed_capacity=0, consumed_capacity=0,
instances=[]) instances=[])
for instance in rampart_group.instances.filter(capacity__gt=0, enabled=True).order_by('hostname'): for instance in rampart_group.instances.filter(enabled=True).order_by('hostname'):
if instance.hostname in instances_by_hostname: if instance.hostname in instances_by_hostname:
self.graph[rampart_group.name]['instances'].append(instances_by_hostname[instance.hostname]) self.graph[rampart_group.name]['instances'].append(instances_by_hostname[instance.hostname])
@@ -528,7 +528,10 @@ class TaskManager():
break break
remaining_capacity = self.get_remaining_capacity(rampart_group.name) remaining_capacity = self.get_remaining_capacity(rampart_group.name)
if not rampart_group.is_container_group and self.get_remaining_capacity(rampart_group.name) <= 0: if (
task.task_impact > 0 and # project updates have a cost of zero
not rampart_group.is_container_group and
self.get_remaining_capacity(rampart_group.name) <= 0):
logger.debug("Skipping group {}, remaining_capacity {} <= 0".format( logger.debug("Skipping group {}, remaining_capacity {} <= 0".format(
rampart_group.name, remaining_capacity)) rampart_group.name, remaining_capacity))
continue continue

View File

@@ -59,6 +59,13 @@ DATABASES = {
} }
} }
# Whether or not the deployment is a K8S-based deployment
# In K8S-based deployments, instances have zero capacity - all playbook
# automation is intended to flow through defined Container Groups that
# interface with some (or some set of) K8S api (which may or may not include
# the K8S cluster where awx itself is running)
IS_K8S = False
# TODO: remove this setting in favor of a default execution environment # TODO: remove this setting in favor of a default execution environment
AWX_EXECUTION_ENVIRONMENT_DEFAULT_IMAGE = 'quay.io/ansible/awx-ee' AWX_EXECUTION_ENVIRONMENT_DEFAULT_IMAGE = 'quay.io/ansible/awx-ee'