introduce a new global flag for denoating K8S-based deployments

- In K8S-based installs, only container groups are intended to be used
  for playbook execution (JTs, adhoc, inventory updates), so in this
  scenario, other job types have a task impact of zero.
- In K8S-based installs, traditional instances have *zero* capacity
  (because they're only members of the control plane where services
  - http/s, local control plane execution - run)
- This commit also includes some changes that allow for the task manager
  to launch tasks with task_impact=0 on instances that have capacity=0
  (previously, an instance with zero capacity would never be selected
  as the "execution node"

This means that when IS_K8S=True, any Job Template associated with an
Instance Group will never actually go from pending -> running (because
there's no capacity - all playbooks must run through Container Groups).
For an improved ux, our intention is to introduce logic into the
operator install process such that the *default* group that's created at
install time is a *Container Group* that's configured to point at the
K8S cluster where awx itself is deployed.
This commit is contained in:
Ryan Petrello 2021-01-21 12:25:34 -05:00 committed by Shane McDonald
parent c29d476919
commit f850f8d3e0
5 changed files with 24 additions and 3 deletions

View File

@ -147,6 +147,13 @@ class Instance(HasPolicyEditsMixin, BaseModel):
return self.rampart_groups.filter(controller__isnull=False).exists()
def refresh_capacity(self):
if settings.IS_K8S:
self.capacity = self.cpu = self.memory = self.cpu_capacity = self.mem_capacity = 0 # noqa
self.version = awx_application_version
self.save(update_fields=['capacity', 'version', 'modified', 'cpu',
'memory', 'cpu_capacity', 'mem_capacity'])
return
cpu = get_cpu_capacity()
mem = get_mem_capacity()
if self.enabled:

View File

@ -1286,6 +1286,8 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin):
@property
def task_impact(self):
if settings.IS_K8S:
return 0
return 5
@property

View File

@ -563,6 +563,8 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
@property
def task_impact(self):
if settings.IS_K8S:
return 0
return 0 if self.job_type == 'run' else 1
@property

View File

@ -70,7 +70,7 @@ class TaskManager():
'''
Init AFTER we know this instance of the task manager will run because the lock is acquired.
'''
instances = Instance.objects.filter(~Q(hostname=None), capacity__gt=0, enabled=True)
instances = Instance.objects.filter(~Q(hostname=None), enabled=True)
self.real_instances = {i.hostname: i for i in instances}
instances_partial = [SimpleNamespace(obj=instance,
@ -86,7 +86,7 @@ class TaskManager():
capacity_total=rampart_group.capacity,
consumed_capacity=0,
instances=[])
for instance in rampart_group.instances.filter(capacity__gt=0, enabled=True).order_by('hostname'):
for instance in rampart_group.instances.filter(enabled=True).order_by('hostname'):
if instance.hostname in instances_by_hostname:
self.graph[rampart_group.name]['instances'].append(instances_by_hostname[instance.hostname])
@ -528,7 +528,10 @@ class TaskManager():
break
remaining_capacity = self.get_remaining_capacity(rampart_group.name)
if not rampart_group.is_container_group and self.get_remaining_capacity(rampart_group.name) <= 0:
if (
task.task_impact > 0 and # project updates have a cost of zero
not rampart_group.is_container_group and
self.get_remaining_capacity(rampart_group.name) <= 0):
logger.debug("Skipping group {}, remaining_capacity {} <= 0".format(
rampart_group.name, remaining_capacity))
continue

View File

@ -59,6 +59,13 @@ DATABASES = {
}
}
# Whether or not the deployment is a K8S-based deployment
# In K8S-based deployments, instances have zero capacity - all playbook
# automation is intended to flow through defined Container Groups that
# interface with some (or some set of) K8S api (which may or may not include
# the K8S cluster where awx itself is running)
IS_K8S = False
# TODO: remove this setting in favor of a default execution environment
AWX_EXECUTION_ENVIRONMENT_DEFAULT_IMAGE = 'quay.io/ansible/awx-ee'