Introduce distinct controlplane instance group

This commit is contained in:
Shane McDonald
2021-05-29 10:48:49 -04:00
parent 82c4f6bb88
commit ec8ac6f1a7
11 changed files with 51 additions and 33 deletions

View File

@@ -173,7 +173,7 @@ init:
. $(VENV_BASE)/awx/bin/activate; \ . $(VENV_BASE)/awx/bin/activate; \
fi; \ fi; \
$(MANAGEMENT_COMMAND) provision_instance --hostname=$(COMPOSE_HOST); \ $(MANAGEMENT_COMMAND) provision_instance --hostname=$(COMPOSE_HOST); \
$(MANAGEMENT_COMMAND) register_queue --queuename=tower --instance_percent=100; $(MANAGEMENT_COMMAND) register_queue --queuename=controlplane --instance_percent=100;
# Refresh development environment after pulling new code. # Refresh development environment after pulling new code.
refresh: clean requirements_dev version_file develop migrate refresh: clean requirements_dev version_file develop migrate

View File

@@ -247,7 +247,7 @@ class IsSuperUser(permissions.BasePermission):
class InstanceGroupTowerPermission(ModelAccessPermission): class InstanceGroupTowerPermission(ModelAccessPermission):
def has_object_permission(self, request, view, obj): def has_object_permission(self, request, view, obj):
if request.method == 'DELETE' and obj.name == settings.DEFAULT_QUEUE_NAME: if request.method == 'DELETE' and obj.name in [settings.DEFAULT_EXECUTION_QUEUE_NAME, settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME]:
return False return False
return super(InstanceGroupTowerPermission, self).has_object_permission(request, view, obj) return super(InstanceGroupTowerPermission, self).has_object_permission(request, view, obj)

View File

@@ -4918,8 +4918,12 @@ class InstanceGroupSerializer(BaseSerializer):
return value return value
def validate_name(self, value): def validate_name(self, value):
if self.instance and self.instance.name == settings.DEFAULT_QUEUE_NAME and value != settings.DEFAULT_QUEUE_NAME: if self.instance and self.instance.name == settings.DEFAULT_EXECUTION_QUEUE_NAME and value != settings.DEFAULT_EXECUTION_QUEUE_NAME:
raise serializers.ValidationError(_('%s instance group name may not be changed.' % settings.DEFAULT_QUEUE_NAME)) raise serializers.ValidationError(_('%s instance group name may not be changed.' % settings.DEFAULT_EXECUTION_QUEUE_NAME))
if self.instance and self.instance.name == settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME and value != settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME:
raise serializers.ValidationError(_('%s instance group name may not be changed.' % settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME))
return value return value
def validate_credential(self, value): def validate_credential(self, value):

View File

@@ -33,7 +33,7 @@ class HostManager(models.Manager):
- Only consider results that are unique - Only consider results that are unique
- Return the count of this query - Return the count of this query
""" """
return self.order_by().exclude(inventory_sources__source=settings.DEFAULT_QUEUE_NAME).values('name').distinct().count() return self.order_by().exclude(inventory_sources__source='tower').values('name').distinct().count()
def org_active_count(self, org_id): def org_active_count(self, org_id):
"""Return count of active, unique hosts used by an organization. """Return count of active, unique hosts used by an organization.
@@ -146,8 +146,8 @@ class InstanceManager(models.Manager):
pod_ip = os.environ.get('MY_POD_IP') pod_ip = os.environ.get('MY_POD_IP')
registered = self.register(ip_address=pod_ip) registered = self.register(ip_address=pod_ip)
is_container_group = settings.IS_K8S RegisterQueue(settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, 100, 0, [], is_container_group=False).register()
RegisterQueue('tower', 100, 0, [], is_container_group).register() RegisterQueue(settings.DEFAULT_EXECUTION_QUEUE_NAME, 100, 0, [], is_container_group=True).register()
return registered return registered
else: else:
return (False, self.me()) return (False, self.me())
@@ -156,10 +156,6 @@ class InstanceManager(models.Manager):
"""Return count of active Tower nodes for licensing.""" """Return count of active Tower nodes for licensing."""
return self.all().count() return self.all().count()
def my_role(self):
# NOTE: TODO: Likely to repurpose this once standalone ramparts are a thing
return "tower"
class InstanceGroupManager(models.Manager): class InstanceGroupManager(models.Manager):
"""A custom manager class for the Instance model. """A custom manager class for the Instance model.

View File

@@ -130,12 +130,6 @@ class Instance(HasPolicyEditsMixin, BaseModel):
return self.modified < ref_time - timedelta(seconds=grace_period) return self.modified < ref_time - timedelta(seconds=grace_period)
def refresh_capacity(self): def refresh_capacity(self):
if settings.IS_K8S:
self.capacity = self.cpu = self.memory = self.cpu_capacity = self.mem_capacity = 0 # noqa
self.version = awx_application_version
self.save(update_fields=['capacity', 'version', 'modified', 'cpu', 'memory', 'cpu_capacity', 'mem_capacity'])
return
cpu = get_cpu_capacity() cpu = get_cpu_capacity()
mem = get_mem_capacity() mem = get_mem_capacity()
if self.enabled: if self.enabled:

View File

@@ -1265,10 +1265,12 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin):
return UnpartitionedSystemJobEvent return UnpartitionedSystemJobEvent
return SystemJobEvent return SystemJobEvent
@property
def can_run_on_control_plane(self):
return True
@property @property
def task_impact(self): def task_impact(self):
if settings.IS_K8S:
return 0
return 5 return 5
@property @property

View File

@@ -553,6 +553,10 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
websocket_data.update(dict(project_id=self.project.id)) websocket_data.update(dict(project_id=self.project.id))
return websocket_data return websocket_data
@property
def can_run_on_control_plane(self):
return True
@property @property
def event_class(self): def event_class(self):
if self.has_unpartitioned_events: if self.has_unpartitioned_events:
@@ -561,8 +565,6 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
@property @property
def task_impact(self): def task_impact(self):
if settings.IS_K8S:
return 0
return 0 if self.job_type == 'run' else 1 return 0 if self.job_type == 'run' else 1
@property @property
@@ -623,6 +625,8 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
organization_groups = [] organization_groups = []
template_groups = [x for x in super(ProjectUpdate, self).preferred_instance_groups] template_groups = [x for x in super(ProjectUpdate, self).preferred_instance_groups]
selected_groups = template_groups + organization_groups selected_groups = template_groups + organization_groups
if not any([not group.is_container_group for group in selected_groups]):
selected_groups = selected_groups + list(self.control_plane_instance_group)
if not selected_groups: if not selected_groups:
return self.global_instance_groups return self.global_instance_groups
return selected_groups return selected_groups

View File

@@ -736,6 +736,13 @@ class UnifiedJob(
def _get_task_class(cls): def _get_task_class(cls):
raise NotImplementedError # Implement in subclasses. raise NotImplementedError # Implement in subclasses.
@property
def can_run_on_control_plane(self):
if settings.IS_K8S:
return False
return True
@property @property
def can_run_containerized(self): def can_run_containerized(self):
return False return False
@@ -1415,14 +1422,21 @@ class UnifiedJob(
return [] return []
return list(self.unified_job_template.instance_groups.all()) return list(self.unified_job_template.instance_groups.all())
@property
def control_plane_instance_group(self):
from awx.main.models.ha import InstanceGroup
control_plane_instance_group = InstanceGroup.objects.filter(name=settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME)
return list(control_plane_instance_group)
@property @property
def global_instance_groups(self): def global_instance_groups(self):
from awx.main.models.ha import InstanceGroup from awx.main.models.ha import InstanceGroup
default_instance_group = InstanceGroup.objects.filter(name=settings.DEFAULT_QUEUE_NAME) default_instance_groups = InstanceGroup.objects.filter(name__in=[settings.DEFAULT_EXECUTION_QUEUE_NAME, settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME])
if default_instance_group.exists():
return [default_instance_group.first()] return list(default_instance_groups)
return []
def awx_meta_vars(self): def awx_meta_vars(self):
""" """

View File

@@ -474,6 +474,7 @@ class TaskManager:
tasks_to_update_job_explanation.append(task) tasks_to_update_job_explanation.append(task)
continue continue
preferred_instance_groups = task.preferred_instance_groups preferred_instance_groups = task.preferred_instance_groups
found_acceptable_queue = False found_acceptable_queue = False
if isinstance(task, WorkflowJob): if isinstance(task, WorkflowJob):
if task.unified_job_template_id in running_workflow_templates: if task.unified_job_template_id in running_workflow_templates:
@@ -484,6 +485,7 @@ class TaskManager:
running_workflow_templates.add(task.unified_job_template_id) running_workflow_templates.add(task.unified_job_template_id)
self.start_task(task, None, task.get_jobs_fail_chain(), None) self.start_task(task, None, task.get_jobs_fail_chain(), None)
continue continue
for rampart_group in preferred_instance_groups: for rampart_group in preferred_instance_groups:
if task.can_run_containerized and rampart_group.is_container_group: if task.can_run_containerized and rampart_group.is_container_group:
self.graph[rampart_group.name]['graph'].add_job(task) self.graph[rampart_group.name]['graph'].add_job(task)
@@ -491,12 +493,12 @@ class TaskManager:
found_acceptable_queue = True found_acceptable_queue = True
break break
if not task.can_run_on_control_plane:
logger.debug("Skipping group {}, task cannot run on control plane".format(rampart_group.name))
continue
remaining_capacity = self.get_remaining_capacity(rampart_group.name) remaining_capacity = self.get_remaining_capacity(rampart_group.name)
if ( if task.task_impact > 0 and self.get_remaining_capacity(rampart_group.name) <= 0:
task.task_impact > 0
and not rampart_group.is_container_group # project updates have a cost of zero
and self.get_remaining_capacity(rampart_group.name) <= 0
):
logger.debug("Skipping group {}, remaining_capacity {} <= 0".format(rampart_group.name, remaining_capacity)) logger.debug("Skipping group {}, remaining_capacity {} <= 0".format(rampart_group.name, remaining_capacity))
continue continue

View File

@@ -104,7 +104,7 @@ class TestActiveCount:
def test_active_count_minus_tower(self, inventory): def test_active_count_minus_tower(self, inventory):
inventory.hosts.create(name='locally-managed-host') inventory.hosts.create(name='locally-managed-host')
source = inventory.inventory_sources.create(name='tower-source', source='default') source = inventory.inventory_sources.create(name='tower-source', source='tower')
source.hosts.create(name='remotely-managed-host', inventory=inventory) source.hosts.create(name='remotely-managed-host', inventory=inventory)
assert Host.objects.active_count() == 1 assert Host.objects.active_count() == 1

View File

@@ -946,5 +946,7 @@ BROADCAST_WEBSOCKET_STATS_POLL_RATE_SECONDS = 5
DJANGO_GUID = {'GUID_HEADER_NAME': 'X-API-Request-Id'} DJANGO_GUID = {'GUID_HEADER_NAME': 'X-API-Request-Id'}
# Default name of the task queue # Name of the default task queue
DEFAULT_QUEUE_NAME = 'default' DEFAULT_EXECUTION_QUEUE_NAME = 'default'
# Name of the default controlplane queue
DEFAULT_CONTROL_PLANE_QUEUE_NAME = 'controlplane'