From e06bf9f87efa8ad105bface2f0921e60258b646e Mon Sep 17 00:00:00 2001 From: Matthew Date: Mon, 14 Oct 2019 10:11:58 -0400 Subject: [PATCH 01/22] Change host counting for task impact Go through the job -> inventory module linkage to calculate the hosts for a more accurate view of the number of hosts that could be impacted. This also creates a bailout that will set count hosts to the forks rather than assuming some crazy low number in the case where we can't determine the actual number of hosts because we are missing the inventory --- awx/main/models/jobs.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/awx/main/models/jobs.py b/awx/main/models/jobs.py index 65be4db925..d573d1ed96 100644 --- a/awx/main/models/jobs.py +++ b/awx/main/models/jobs.py @@ -629,15 +629,17 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana @property def task_impact(self): - # NOTE: We sorta have to assume the host count matches and that forks default to 5 - from awx.main.models.inventory import Host if self.launch_type == 'callback': count_hosts = 2 else: - count_hosts = Host.objects.filter(inventory__jobs__pk=self.pk).count() - if self.job_slice_count > 1: - # Integer division intentional - count_hosts = (count_hosts + self.job_slice_count - self.job_slice_number) // self.job_slice_count + # If for some reason we can't count the hosts then lets assume the impact as forks + if self.inventory is not None: + count_hosts = self.inventory.hosts.count() + if self.job_slice_count > 1: + # Integer division intentional + count_hosts = (count_hosts + self.job_slice_count - self.job_slice_number) // self.job_slice_count + else: + count_hosts = 5 if self.forks == 0 else self.forks return min(count_hosts, 5 if self.forks == 0 else self.forks) + 1 @property From 9d81b0077219de1a226d2af3d939f7235043c6f9 Mon Sep 17 00:00:00 2001 From: Christian Adams Date: Wed, 16 Oct 2019 13:32:06 -0400 Subject: [PATCH 02/22] have analytics collections verify with system trusted CA list --- awx/main/analytics/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awx/main/analytics/core.py b/awx/main/analytics/core.py index 34e4fc9f90..df13ebd4fa 100644 --- a/awx/main/analytics/core.py +++ b/awx/main/analytics/core.py @@ -167,7 +167,7 @@ def ship(path): files = {'file': (os.path.basename(path), f, settings.INSIGHTS_AGENT_MIME)} response = requests.post(url, files=files, - verify=True, + verify="/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", auth=(rh_user, rh_password), timeout=(31, 31)) if response.status_code != 202: From 11d39bd8cc43d2f0ea2a48f2cbe6a9045948b5fd Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 16 Oct 2019 13:48:59 -0400 Subject: [PATCH 03/22] Blacklist rsa even more. --- Makefile | 2 +- requirements/requirements_tower_uninstall.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9b307613d4..0a93a8696b 100644 --- a/Makefile +++ b/Makefile @@ -189,7 +189,7 @@ requirements_awx: virtualenv_awx cat requirements/requirements.txt requirements/requirements_git.txt | $(VENV_BASE)/awx/bin/pip install $(PIP_OPTIONS) --no-binary $(SRC_ONLY_PKGS) --ignore-installed -r /dev/stdin ; \ fi echo "include-system-site-packages = true" >> $(VENV_BASE)/awx/lib/python$(PYTHON_VERSION)/pyvenv.cfg - #$(VENV_BASE)/awx/bin/pip uninstall --yes -r requirements/requirements_tower_uninstall.txt + $(VENV_BASE)/awx/bin/pip uninstall --yes -r requirements/requirements_tower_uninstall.txt requirements_awx_dev: $(VENV_BASE)/awx/bin/pip install -r requirements/requirements_dev.txt diff --git a/requirements/requirements_tower_uninstall.txt b/requirements/requirements_tower_uninstall.txt index dc3292cd0b..3769969f4d 100644 --- a/requirements/requirements_tower_uninstall.txt +++ b/requirements/requirements_tower_uninstall.txt @@ -1 +1,2 @@ enum34 +rsa # stop adding new crypto libs From 7df448a3484f20cc5541d41a1e4f2cb7e406711f Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 16 Oct 2019 15:34:33 -0400 Subject: [PATCH 04/22] Remove removal requirement that isn't actually in the requirements --- requirements/requirements_tower_uninstall.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/requirements_tower_uninstall.txt b/requirements/requirements_tower_uninstall.txt index 3769969f4d..56cbaa5f19 100644 --- a/requirements/requirements_tower_uninstall.txt +++ b/requirements/requirements_tower_uninstall.txt @@ -1,2 +1 @@ -enum34 rsa # stop adding new crypto libs From ce5bb9197e8c06d7a0b4e4ab06287740a116c2e4 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Wed, 16 Oct 2019 15:58:35 -0400 Subject: [PATCH 05/22] rename the CyberArk AIM credential type see: https://github.com/ansible/awx/issues/4400 --- awx/main/credential_plugins/aim.py | 2 +- ...360_rename_cyberark_aim_credential_type.py | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py diff --git a/awx/main/credential_plugins/aim.py b/awx/main/credential_plugins/aim.py index a5bac71fdd..f9e0076b40 100644 --- a/awx/main/credential_plugins/aim.py +++ b/awx/main/credential_plugins/aim.py @@ -101,7 +101,7 @@ def aim_backend(**kwargs): aim_plugin = CredentialPlugin( - 'CyberArk AIM Secret Lookup', + 'CyberArk AIM Central Credential Provider Lookup', inputs=aim_inputs, backend=aim_backend ) diff --git a/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py b/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py new file mode 100644 index 0000000000..f63524d5c1 --- /dev/null +++ b/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py @@ -0,0 +1,20 @@ +# Generated by Django 2.2.4 on 2019-10-16 19:51 + +from django.db import migrations + + +def update_cyberark_aim_name(apps, schema_editor): + apps.get_model('main', 'CredentialType').objects.filter(namespace='aim').update( + name='CyberArk AIM Central Credential Provider Lookup' + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ('main', '0097_v360_workflowapproval_approved_or_denied_by'), + ] + + operations = [ + migrations.RunPython(update_cyberark_aim_name) + ] From 24b9a6a38d2ec459d15f4d6a05b5fe7141775c7b Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 17 Oct 2019 08:41:53 -0400 Subject: [PATCH 06/22] fix a minor bug in the notification templates UI see: https://github.com/ansible/awx/issues/5029 --- .../notifications/shared/message-utils.service.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/awx/ui/client/src/notifications/shared/message-utils.service.js b/awx/ui/client/src/notifications/shared/message-utils.service.js index 48711e5faf..f49b96f0ed 100644 --- a/awx/ui/client/src/notifications/shared/message-utils.service.js +++ b/awx/ui/client/src/notifications/shared/message-utils.service.js @@ -60,27 +60,27 @@ export default [function() { return; } let isCustomized = false; - if (messages.started.message) { + if (messages.started && messages.started.message) { isCustomized = true; $scope.started_message = messages.started.message; } - if (messages.started.body) { + if (messages.started && messages.started.body) { isCustomized = true; $scope.started_body = messages.started.body; } - if (messages.success.message) { + if (messages.success && messages.success.message) { isCustomized = true; $scope.success_message = messages.success.message; } - if (messages.success.body) { + if (messages.success && messages.success.body) { isCustomized = true; $scope.success_body = messages.success.body; } - if (messages.error.message) { + if (messages.error && messages.error.message) { isCustomized = true; $scope.error_message = messages.error.message; } - if (messages.error.body) { + if (messages.error && messages.error.body) { isCustomized = true; $scope.error_body = messages.error.body; } From 4c92e0af7714d9f6104ddfa73ae8acfba2e174d3 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 17 Oct 2019 08:53:01 -0400 Subject: [PATCH 07/22] fix a 500 error when creating/editing notification templates see: https://github.com/ansible/awx/issues/5028 --- awx/api/serializers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/awx/api/serializers.py b/awx/api/serializers.py index bcca272a83..818160ac3b 100644 --- a/awx/api/serializers.py +++ b/awx/api/serializers.py @@ -4406,6 +4406,8 @@ class NotificationTemplateSerializer(BaseSerializer): for event in messages: if not messages[event]: continue + if not isinstance(messages[event], dict): + continue body = messages[event].get('body', {}) if body: try: From ffb1707e74a71a55e10e737fdec3e2e0db0dcbce Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 10 Oct 2019 17:06:12 -0400 Subject: [PATCH 08/22] add support for `awx-manage run_callback_receiver --status` --- awx/main/dispatch/control.py | 9 +++++---- awx/main/dispatch/pool.py | 5 +++++ awx/main/dispatch/worker/base.py | 12 +++++++++++- .../commands/run_callback_receiver.py | 18 +++++++++++++++++- awx/main/models/unified_jobs.py | 2 +- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/awx/main/dispatch/control.py b/awx/main/dispatch/control.py index 5f081e84f2..83e2226012 100644 --- a/awx/main/dispatch/control.py +++ b/awx/main/dispatch/control.py @@ -15,18 +15,19 @@ class Control(object): services = ('dispatcher', 'callback_receiver') result = None - def __init__(self, service, host=None): + def __init__(self, service, queuename=None, routing_key=None): if service not in self.services: raise RuntimeError('{} must be in {}'.format(service, self.services)) self.service = service - self.queuename = host or get_local_queuename() - self.queue = Queue(self.queuename, Exchange(self.queuename), routing_key=self.queuename) + self.queuename = queuename or get_local_queuename() + self.routing_key = routing_key or self.queuename + self.queue = Queue(self.queuename, Exchange(self.queuename), routing_key=self.routing_key) def publish(self, msg, conn, **kwargs): producer = Producer( exchange=self.queue.exchange, channel=conn, - routing_key=self.queuename + routing_key=self.routing_key ) producer.publish(msg, expiration=5, **kwargs) diff --git a/awx/main/dispatch/pool.py b/awx/main/dispatch/pool.py index f5b92ca8f1..3fc502b33e 100644 --- a/awx/main/dispatch/pool.py +++ b/awx/main/dispatch/pool.py @@ -280,6 +280,11 @@ class WorkerPool(object): logger.exception('could not kill {}'.format(worker.pid)) + def cleanup(self): + for worker in self.workers: + worker.calculate_managed_tasks() + + class AutoscalePool(WorkerPool): ''' An extended pool implementation that automatically scales workers up and diff --git a/awx/main/dispatch/worker/base.py b/awx/main/dispatch/worker/base.py index e73ed4bade..bc440b831e 100644 --- a/awx/main/dispatch/worker/base.py +++ b/awx/main/dispatch/worker/base.py @@ -56,8 +56,18 @@ class AWXConsumer(ConsumerMixin): @property def listening_on(self): + def qname(q): + if q.routing_key != q.name: + return ':'.join([q.name, q.routing_key]) + return q.name + + def qtype(q): + if q.exchange.type != 'direct': + return ' [{}]'.format(q.exchange.type) + return '' + return 'listening on {}'.format([ - '{} [{}]'.format(q.name, q.exchange.type) for q in self.queues + '{}{}'.format(qname(q), qtype(q)) for q in self.queues ]) def control(self, body, message): diff --git a/awx/main/management/commands/run_callback_receiver.py b/awx/main/management/commands/run_callback_receiver.py index 51608a8b7a..8e706f5309 100644 --- a/awx/main/management/commands/run_callback_receiver.py +++ b/awx/main/management/commands/run_callback_receiver.py @@ -5,6 +5,8 @@ from django.conf import settings from django.core.management.base import BaseCommand from kombu import Exchange, Queue +from awx.main.dispatch import get_local_queuename +from awx.main.dispatch.control import Control from awx.main.dispatch.kombu import Connection from awx.main.dispatch.worker import AWXConsumer, CallbackBrokerWorker @@ -17,7 +19,20 @@ class Command(BaseCommand): ''' help = 'Launch the job callback receiver' + def add_arguments(self, parser): + parser.add_argument('--status', dest='status', action='store_true', + help='print the internal state of any running callback receiver') + def handle(self, *arg, **options): + control_routing_key = 'callback_receiver-{}-control'.format(get_local_queuename()) + if options.get('status'): + print(Control( + 'callback_receiver', + queuename=settings.CALLBACK_QUEUE, + routing_key=control_routing_key + ).status()) + return + with Connection(settings.BROKER_URL) as conn: consumer = None try: @@ -29,8 +44,9 @@ class Command(BaseCommand): Queue( settings.CALLBACK_QUEUE, Exchange(settings.CALLBACK_QUEUE, type='direct'), - routing_key=settings.CALLBACK_QUEUE + routing_key=key ) + for key in [settings.CALLBACK_QUEUE, control_routing_key] ] ) consumer.run() diff --git a/awx/main/models/unified_jobs.py b/awx/main/models/unified_jobs.py index 3613ac4d34..398f8bbe88 100644 --- a/awx/main/models/unified_jobs.py +++ b/awx/main/models/unified_jobs.py @@ -1319,7 +1319,7 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique timeout = 5 try: running = self.celery_task_id in ControlDispatcher( - 'dispatcher', self.execution_node + 'dispatcher', queuename=self.execution_node ).running(timeout=timeout) except socket.timeout: logger.error('could not reach dispatcher on {} within {}s'.format( From 7dd241fcff9d39736276e204ab7f4decb31219df Mon Sep 17 00:00:00 2001 From: Graham Mainwaring Date: Thu, 17 Oct 2019 13:54:13 -0400 Subject: [PATCH 09/22] Add a --dry-run option to gather analytics locally, even if analytics is disabled in settings. --- awx/main/analytics/core.py | 4 ++-- awx/main/management/commands/gather_analytics.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/awx/main/analytics/core.py b/awx/main/analytics/core.py index df13ebd4fa..02a586b7eb 100644 --- a/awx/main/analytics/core.py +++ b/awx/main/analytics/core.py @@ -88,8 +88,8 @@ def gather(dest=None, module=None, collection_type='scheduled'): logger.exception("Invalid License provided, or No License Provided") return "Error: Invalid License provided, or No License Provided" - if not settings.INSIGHTS_TRACKING_STATE: - logger.error("Automation Analytics not enabled") + if collection_type != 'dry-run' and not settings.INSIGHTS_TRACKING_STATE: + logger.error("Automation Analytics not enabled. Use --dry-run to gather locally without sending.") return if module is None: diff --git a/awx/main/management/commands/gather_analytics.py b/awx/main/management/commands/gather_analytics.py index 8f66b6f12a..aa096d6f28 100644 --- a/awx/main/management/commands/gather_analytics.py +++ b/awx/main/management/commands/gather_analytics.py @@ -11,6 +11,8 @@ class Command(BaseCommand): help = 'Gather AWX analytics data' def add_arguments(self, parser): + parser.add_argument('--dry-run', dest='dry-run', action='store_true', + help='Gather analytics without shipping. Works even if analytics are disabled in settings.') parser.add_argument('--ship', dest='ship', action='store_true', help='Enable to ship metrics to the Red Hat Cloud') @@ -23,9 +25,14 @@ class Command(BaseCommand): self.logger.propagate = False def handle(self, *args, **options): - tgz = gather(collection_type='manual') self.init_logging() + opt_ship = options.get('ship') + opt_dry_run = options.get('dry-run') + if opt_ship and opt_dry_run: + self.logger.error('Both --ship and --dry-run cannot be processed at the same time.') + return + tgz = gather(collection_type='manual' if not opt_dry_run else 'dry-run') if tgz: self.logger.debug(tgz) - if options.get('ship'): + if opt_ship: ship(tgz) From 2f350cfda7bd30255dbd5e987957883e341090a7 Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Thu, 17 Oct 2019 14:56:29 -0400 Subject: [PATCH 10/22] Adjust description/help text for profiling features. Note that data is merely for sosreport collection for now, and warn against increasing collection frequency. --- awx/main/conf.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/awx/main/conf.py b/awx/main/conf.py index d75e254d1e..cce5e0a5de 100644 --- a/awx/main/conf.py +++ b/awx/main/conf.py @@ -351,8 +351,9 @@ register( 'AWX_RESOURCE_PROFILING_ENABLED', field_class=fields.BooleanField, default=False, - label=_('Enable resource profiling on all tower jobs'), - help_text=_('If set, resource profiling data will be collected on all jobs.'), # noqa + label=_('Enable detailed resource profiling on all playbook runs'), + help_text=_('If set, detailed resource profiling data will be collected on all jobs. ' + 'This data can be gathered with `sosreport`.'), # noqa category=_('Jobs'), category_slug='jobs', ) @@ -362,7 +363,8 @@ register( field_class=FloatField, default='0.25', label=_('Interval (in seconds) between polls for cpu usage.'), - help_text=_('Interval (in seconds) between polls for cpu usage.'), + help_text=_('Interval (in seconds) between polls for cpu usage. ' + 'Setting this lower than the default will affect playbook performance.'), category=_('Jobs'), category_slug='jobs', required=False, @@ -373,7 +375,8 @@ register( field_class=FloatField, default='0.25', label=_('Interval (in seconds) between polls for memory usage.'), - help_text=_('Interval (in seconds) between polls for memory usage.'), + help_text=_('Interval (in seconds) between polls for memory usage. ' + 'Setting this lower than the default will affect playbook performance.'), category=_('Jobs'), category_slug='jobs', required=False, @@ -384,7 +387,8 @@ register( field_class=FloatField, default='0.25', label=_('Interval (in seconds) between polls for PID count.'), - help_text=_('Interval (in seconds) between polls for PID count.'), + help_text=_('Interval (in seconds) between polls for PID count. ' + 'Setting this lower than the default will affect playbook performance.'), category=_('Jobs'), category_slug='jobs', required=False, From 1cf02e1e1726627b5c4df3623a1f02a6496e88d6 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 17 Oct 2019 15:15:24 -0400 Subject: [PATCH 11/22] properly set execution_node for project and inv updates run "in k8s" see: https://github.com/ansible/awx/issues/4907 --- awx/main/scheduler/task_manager.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/awx/main/scheduler/task_manager.py b/awx/main/scheduler/task_manager.py index df23b7136e..4c8ca36960 100644 --- a/awx/main/scheduler/task_manager.py +++ b/awx/main/scheduler/task_manager.py @@ -253,6 +253,18 @@ class TaskManager(): task.log_format, task.execution_node, controller_node)) elif rampart_group.is_containerized: task.instance_group = rampart_group + if not task.supports_isolation(): + # project updates and inventory updates don't *actually* run in pods, + # so just pick *any* non-isolated, non-containerized host and use it + for group in InstanceGroup.objects.all(): + if group.is_containerized or group.controller_id: + continue + match = group.find_largest_idle_instance() + if match: + task.execution_node = match.hostname + logger.debug('Submitting containerized {} to queue {}.'.format( + task.log_format, task.execution_node)) + break else: task.instance_group = rampart_group if instance is not None: From 570ffad52b8d151711b4ea27d1734a55832cfdb9 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 17 Oct 2019 15:29:44 -0400 Subject: [PATCH 12/22] clean up pods for all k8s execution, not just playbook runs see: https://github.com/ansible/awx/issues/4908 --- awx/main/tasks.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 5a7fea3bf3..10aac79dd9 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -1094,6 +1094,13 @@ class BaseTask(object): if os.path.isdir(job_profiling_dir): shutil.copytree(job_profiling_dir, os.path.join(awx_profiling_dir, str(instance.pk))) + if instance.is_containerized: + from awx.main.scheduler.kubernetes import PodManager # prevent circular import + pm = PodManager(instance) + logger.debug(f"Deleting pod {pm.pod_name}") + pm.delete() + + def event_handler(self, event_data): # # ⚠️ D-D-D-DANGER ZONE ⚠️ @@ -1841,13 +1848,6 @@ class RunJob(BaseTask): if isolated_manager_instance and not job.is_containerized: isolated_manager_instance.cleanup() - if job.is_containerized: - from awx.main.scheduler.kubernetes import PodManager # prevent circular import - pm = PodManager(job) - logger.debug(f"Deleting pod {pm.pod_name}") - pm.delete() - - try: inventory = job.inventory except Inventory.DoesNotExist: From 1fae3534a199ead71b39319781d1094514a7c6b2 Mon Sep 17 00:00:00 2001 From: Jake McDermott Date: Thu, 17 Oct 2019 15:52:13 -0400 Subject: [PATCH 13/22] Get the last two pages of events on page load When the page loads, we want to retrieve and initially display enough content for the scrollbar to show. If the very last page doesn't have enough content for the scrollbar to show, the user won't be able to scroll up to see more job history. To avoid this scenario, we always fetch the last _two_ pages when loading the view. --- awx/ui/client/features/output/index.controller.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/awx/ui/client/features/output/index.controller.js b/awx/ui/client/features/output/index.controller.js index 07124d8bd9..7f171f728a 100644 --- a/awx/ui/client/features/output/index.controller.js +++ b/awx/ui/client/features/output/index.controller.js @@ -392,7 +392,8 @@ function last () { return lastPage(); } - return lastRange(); + return lastRange() + .then(() => previousRange()); } function next () { From 16812542f88ac2a14057d4e258e22103e847deab Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 17 Oct 2019 16:51:30 -0400 Subject: [PATCH 14/22] implement a simple periodic pod reaper for container groups see: https://github.com/ansible/awx/issues/4911 --- awx/main/scheduler/kubernetes.py | 28 +++++++++++++++++++++++++++- awx/main/tasks.py | 19 +++++++++++++++++++ awx/settings/defaults.py | 5 +++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/awx/main/scheduler/kubernetes.py b/awx/main/scheduler/kubernetes.py index 90f2849c3d..00f82a3859 100644 --- a/awx/main/scheduler/kubernetes.py +++ b/awx/main/scheduler/kubernetes.py @@ -1,3 +1,4 @@ +import collections import os import stat import time @@ -47,6 +48,27 @@ class PodManager(object): else: logger.warn(f"Pod {self.pod_name} did not start. Status is {pod.status.phase}.") + @classmethod + def list_active_jobs(self, instance_group): + task = collections.namedtuple('Task', 'id instance_group')( + id='', + instance_group=instance_group + ) + pm = PodManager(task) + try: + for pod in pm.kube_api.list_namespaced_pod( + pm.namespace, + label_selector='ansible-awx={}'.format(settings.INSTALL_UUID) + ).to_dict().get('items', []): + job = pod['metadata'].get('labels', {}).get('ansible-awx-job-id') + if job: + try: + yield int(job) + except ValueError: + pass + except Exception: + logger.exception('Failed to list pods for container group {}'.format(instance_group)) + def delete(self): return self.kube_api.delete_namespaced_pod(name=self.pod_name, namespace=self.namespace, @@ -71,7 +93,7 @@ class PodManager(object): @property def pod_name(self): - return f"job-{self.task.id}" + return f"awx-job-{self.task.id}" @property def pod_definition(self): @@ -102,6 +124,10 @@ class PodManager(object): if self.task: pod_spec['metadata']['name'] = self.pod_name + pod_spec['metadata']['labels'] = { + 'ansible-awx': settings.INSTALL_UUID, + 'ansible-awx-job-id': str(self.task.id) + } pod_spec['spec']['containers'][0]['name'] = self.pod_name return pod_spec diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 10aac79dd9..ff53cd00ac 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -458,6 +458,25 @@ def cluster_node_heartbeat(): logger.exception('Error marking {} as lost'.format(other_inst.hostname)) +@task(queue=get_local_queuename) +def awx_k8s_reaper(): + from awx.main.scheduler.kubernetes import PodManager # prevent circular import + for group in InstanceGroup.objects.filter(credential__isnull=False).iterator(): + if group.is_containerized: + logger.debug("Checking for orphaned k8s pods for {}.".format(group)) + for job in UnifiedJob.objects.filter( + pk__in=list(PodManager.list_active_jobs(group)) + ).exclude(status__in=ACTIVE_STATES): + logger.debug('{} is no longer active, reaping orphaned k8s pod'.format(job.log_format)) + try: + PodManager(job).delete() + except Exception: + logger.exception("Failed to delete orphaned pod {} from {}".format( + job.log_format, group + )) + + + @task(queue=get_local_queuename) def awx_isolated_heartbeat(): local_hostname = settings.CLUSTER_HOST_ID diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 07c76f8b01..658d41d6b3 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -479,6 +479,11 @@ CELERYBEAT_SCHEDULE = { 'schedule': timedelta(seconds=20), 'options': {'expires': 20} }, + 'k8s_reaper': { + 'task': 'awx.main.tasks.awx_k8s_reaper', + 'schedule': timedelta(seconds=60), + 'options': {'expires': 50,} + }, # 'isolated_heartbeat': set up at the end of production.py and development.py } AWX_INCONSISTENT_TASK_INTERVAL = 60 * 3 From 95c9e8e06870effbabdf09be2b6573ef2b553d2c Mon Sep 17 00:00:00 2001 From: Jake McDermott Date: Thu, 17 Oct 2019 12:22:39 -0400 Subject: [PATCH 15/22] Always disable search when processing events When jobs are still processing events, the UI uses numerical ranges based on job_event.counter instead of page numbers. We can't apply search filters in this state because then there would be no way to distinguish between events that are missing due to being filtered out by search and events that are missing because they're still being processed. The UI must be able to distinguish between the two types of missing events because their absence is presented differently. Events that are filtered out by a search query have no visual representation, while events that are missing due to event processing or other causes are displayed as clickable "..." segments. --- awx/ui/client/features/output/search.component.js | 12 +++++++----- awx/ui/client/features/output/search.partial.html | 2 +- awx/ui/client/features/output/status.service.js | 8 +++++++- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/awx/ui/client/features/output/search.component.js b/awx/ui/client/features/output/search.component.js index 09ef3e52f2..cb8299be89 100644 --- a/awx/ui/client/features/output/search.component.js +++ b/awx/ui/client/features/output/search.component.js @@ -1,3 +1,4 @@ +/* eslint camelcase: 0 */ import { OUTPUT_SEARCH_DOCLINK, OUTPUT_SEARCH_FIELDS, @@ -17,7 +18,7 @@ function toggleSearchKey () { } function getCurrentQueryset () { - const { job_event_search } = $state.params; // eslint-disable-line camelcase + const { job_event_search } = $state.params; return qs.decodeArr(job_event_search); } @@ -114,12 +115,13 @@ function JobSearchController (_$state_, _qs_, _strings_, { subscribe }) { vm.key = false; vm.rejected = false; vm.disabled = true; - vm.running = false; + vm.isJobActive = false; vm.tags = getSearchTags(getCurrentQueryset()); - unsubscribe = subscribe(({ running }) => { - vm.disabled = running; - vm.running = running; + unsubscribe = subscribe(({ running, event_processing_finished }) => { + const isJobActive = running || !event_processing_finished; + vm.disabled = isJobActive; + vm.isJobActive = isJobActive; }); }; diff --git a/awx/ui/client/features/output/search.partial.html b/awx/ui/client/features/output/search.partial.html index 602270bd95..b6e6a7342f 100644 --- a/awx/ui/client/features/output/search.partial.html +++ b/awx/ui/client/features/output/search.partial.html @@ -7,7 +7,7 @@ ng-disabled="vm.disabled" ng-class="{ 'at-Input--rejected': vm.rejected }" ng-model="vm.value" - ng-attr-placeholder="{{ vm.running ? + ng-attr-placeholder="{{ vm.isJobActive ? vm.strings.get('search.PLACEHOLDER_RUNNING') : vm.strings.get('search.PLACEHOLDER_DEFAULT') }}"> diff --git a/awx/ui/client/features/output/status.service.js b/awx/ui/client/features/output/status.service.js index ad3bbd8886..f661d5b491 100644 --- a/awx/ui/client/features/output/status.service.js +++ b/awx/ui/client/features/output/status.service.js @@ -50,7 +50,8 @@ function JobStatusService (moment, message) { inventoryScm: { id: model.get('source_project_update'), status: model.get('summary_fields.inventory_source.status') - } + }, + event_processing_finished: model.get('event_processing_finished'), }; this.initHostStatusCounts({ model }); @@ -309,6 +310,10 @@ function JobStatusService (moment, message) { this.state.resultTraceback = traceback; }; + this.setEventProcessingFinished = val => { + this.state.event_processing_finished = val; + }; + this.setHostStatusCounts = counts => { counts = counts || {}; @@ -348,6 +353,7 @@ function JobStatusService (moment, message) { this.setArtifacts(model.get('artifacts')); this.setExecutionNode(model.get('execution_node')); this.setResultTraceback(model.get('result_traceback')); + this.setEventProcessingFinished(model.get('event_processing_finished')); this.initHostStatusCounts({ model }); this.initPlaybookCounts({ model }); From 0ab44e70f9679b2bb2bfdba41f7500b72289a96e Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Thu, 17 Oct 2019 22:27:25 -0400 Subject: [PATCH 16/22] properly migrate the CyberArk AIM type to its new name --- ..._v360_rename_cyberark_aim_credential_type.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py b/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py index f63524d5c1..0bd03b94ba 100644 --- a/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py +++ b/awx/main/migrations/0098_v360_rename_cyberark_aim_credential_type.py @@ -1,12 +1,23 @@ # Generated by Django 2.2.4 on 2019-10-16 19:51 from django.db import migrations +from awx.main.models import CredentialType def update_cyberark_aim_name(apps, schema_editor): - apps.get_model('main', 'CredentialType').objects.filter(namespace='aim').update( - name='CyberArk AIM Central Credential Provider Lookup' - ) + CredentialType.setup_tower_managed_defaults() + aim_types = apps.get_model('main', 'CredentialType').objects.filter( + namespace='aim' + ).order_by('id') + + if aim_types.count() == 2: + original, renamed = aim_types.all() + apps.get_model('main', 'Credential').objects.filter( + credential_type_id=original.id + ).update( + credential_type_id=renamed.id + ) + original.delete() class Migration(migrations.Migration): From d01088d33eee7edc498a02d7a51c30bbfe577188 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Fri, 18 Oct 2019 09:49:02 -0400 Subject: [PATCH 17/22] Revert "add support for `awx-manage run_callback_receiver --status`" --- awx/main/dispatch/control.py | 9 ++++----- awx/main/dispatch/pool.py | 5 ----- awx/main/dispatch/worker/base.py | 12 +----------- .../commands/run_callback_receiver.py | 18 +----------------- awx/main/models/unified_jobs.py | 2 +- 5 files changed, 7 insertions(+), 39 deletions(-) diff --git a/awx/main/dispatch/control.py b/awx/main/dispatch/control.py index 83e2226012..5f081e84f2 100644 --- a/awx/main/dispatch/control.py +++ b/awx/main/dispatch/control.py @@ -15,19 +15,18 @@ class Control(object): services = ('dispatcher', 'callback_receiver') result = None - def __init__(self, service, queuename=None, routing_key=None): + def __init__(self, service, host=None): if service not in self.services: raise RuntimeError('{} must be in {}'.format(service, self.services)) self.service = service - self.queuename = queuename or get_local_queuename() - self.routing_key = routing_key or self.queuename - self.queue = Queue(self.queuename, Exchange(self.queuename), routing_key=self.routing_key) + self.queuename = host or get_local_queuename() + self.queue = Queue(self.queuename, Exchange(self.queuename), routing_key=self.queuename) def publish(self, msg, conn, **kwargs): producer = Producer( exchange=self.queue.exchange, channel=conn, - routing_key=self.routing_key + routing_key=self.queuename ) producer.publish(msg, expiration=5, **kwargs) diff --git a/awx/main/dispatch/pool.py b/awx/main/dispatch/pool.py index 3fc502b33e..f5b92ca8f1 100644 --- a/awx/main/dispatch/pool.py +++ b/awx/main/dispatch/pool.py @@ -280,11 +280,6 @@ class WorkerPool(object): logger.exception('could not kill {}'.format(worker.pid)) - def cleanup(self): - for worker in self.workers: - worker.calculate_managed_tasks() - - class AutoscalePool(WorkerPool): ''' An extended pool implementation that automatically scales workers up and diff --git a/awx/main/dispatch/worker/base.py b/awx/main/dispatch/worker/base.py index bc440b831e..e73ed4bade 100644 --- a/awx/main/dispatch/worker/base.py +++ b/awx/main/dispatch/worker/base.py @@ -56,18 +56,8 @@ class AWXConsumer(ConsumerMixin): @property def listening_on(self): - def qname(q): - if q.routing_key != q.name: - return ':'.join([q.name, q.routing_key]) - return q.name - - def qtype(q): - if q.exchange.type != 'direct': - return ' [{}]'.format(q.exchange.type) - return '' - return 'listening on {}'.format([ - '{}{}'.format(qname(q), qtype(q)) for q in self.queues + '{} [{}]'.format(q.name, q.exchange.type) for q in self.queues ]) def control(self, body, message): diff --git a/awx/main/management/commands/run_callback_receiver.py b/awx/main/management/commands/run_callback_receiver.py index 8e706f5309..51608a8b7a 100644 --- a/awx/main/management/commands/run_callback_receiver.py +++ b/awx/main/management/commands/run_callback_receiver.py @@ -5,8 +5,6 @@ from django.conf import settings from django.core.management.base import BaseCommand from kombu import Exchange, Queue -from awx.main.dispatch import get_local_queuename -from awx.main.dispatch.control import Control from awx.main.dispatch.kombu import Connection from awx.main.dispatch.worker import AWXConsumer, CallbackBrokerWorker @@ -19,20 +17,7 @@ class Command(BaseCommand): ''' help = 'Launch the job callback receiver' - def add_arguments(self, parser): - parser.add_argument('--status', dest='status', action='store_true', - help='print the internal state of any running callback receiver') - def handle(self, *arg, **options): - control_routing_key = 'callback_receiver-{}-control'.format(get_local_queuename()) - if options.get('status'): - print(Control( - 'callback_receiver', - queuename=settings.CALLBACK_QUEUE, - routing_key=control_routing_key - ).status()) - return - with Connection(settings.BROKER_URL) as conn: consumer = None try: @@ -44,9 +29,8 @@ class Command(BaseCommand): Queue( settings.CALLBACK_QUEUE, Exchange(settings.CALLBACK_QUEUE, type='direct'), - routing_key=key + routing_key=settings.CALLBACK_QUEUE ) - for key in [settings.CALLBACK_QUEUE, control_routing_key] ] ) consumer.run() diff --git a/awx/main/models/unified_jobs.py b/awx/main/models/unified_jobs.py index 398f8bbe88..3613ac4d34 100644 --- a/awx/main/models/unified_jobs.py +++ b/awx/main/models/unified_jobs.py @@ -1319,7 +1319,7 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique timeout = 5 try: running = self.celery_task_id in ControlDispatcher( - 'dispatcher', queuename=self.execution_node + 'dispatcher', self.execution_node ).running(timeout=timeout) except socket.timeout: logger.error('could not reach dispatcher on {} within {}s'.format( From a664c5eabe70e3997a1c7668871767de0668463a Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Fri, 18 Oct 2019 14:28:10 -0400 Subject: [PATCH 18/22] Log the remote IP for logged in users --- awx/api/generics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awx/api/generics.py b/awx/api/generics.py index be58b057d8..8b8fb55894 100644 --- a/awx/api/generics.py +++ b/awx/api/generics.py @@ -92,7 +92,7 @@ class LoggedLoginView(auth_views.LoginView): ret = super(LoggedLoginView, self).post(request, *args, **kwargs) current_user = getattr(request, 'user', None) if request.user.is_authenticated: - logger.info(smart_text(u"User {} logged in.".format(self.request.user.username))) + logger.info(smart_text(u"User {} logged in from {}".format(self.request.user.username,request.META.get('REMOTE_ADDR', None)))) ret.set_cookie('userLoggedIn', 'true') current_user = UserSerializer(self.request.user) current_user = smart_text(JSONRenderer().render(current_user.data)) From c6033399d0a00cc906d23fa3d2e9a1208b1c7ae8 Mon Sep 17 00:00:00 2001 From: Jake McDermott Date: Fri, 18 Oct 2019 18:28:59 -0400 Subject: [PATCH 19/22] Fix off-by-one errors --- awx/ui/client/features/output/render.service.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/awx/ui/client/features/output/render.service.js b/awx/ui/client/features/output/render.service.js index 161aebceb3..11d7108cd0 100644 --- a/awx/ui/client/features/output/render.service.js +++ b/awx/ui/client/features/output/render.service.js @@ -473,7 +473,7 @@ function JobRenderService ($q, $compile, $sce, $window) { this.shift = lines => { // We multiply by two here under the assumption that one element and one text node // is generated for each line of output. - const count = 2 * lines; + const count = (2 * lines) + 1; const elements = this.el.contents().slice(0, count); return this.remove(elements); @@ -482,7 +482,7 @@ function JobRenderService ($q, $compile, $sce, $window) { this.pop = lines => { // We multiply by two here under the assumption that one element and one text node // is generated for each line of output. - const count = 2 * lines; + const count = (2 * lines) + 1; const elements = this.el.contents().slice(-count); return this.remove(elements); @@ -558,7 +558,7 @@ function JobRenderService ($q, $compile, $sce, $window) { } const max = this.state.tail; - const min = max - count; + const min = max - count + 1; let lines = 0; @@ -589,7 +589,7 @@ function JobRenderService ($q, $compile, $sce, $window) { } const min = this.state.head; - const max = min + count; + const max = min + count - 1; let lines = 0; From 312cf137777ea1a960d1394b8d4fad84fc015a99 Mon Sep 17 00:00:00 2001 From: Jake McDermott Date: Fri, 18 Oct 2019 18:44:06 -0400 Subject: [PATCH 20/22] Set omitted runner event line lengths to 0 runner_on_start events have zero-length strings for their stdout fields. We don't want to display these in the ui so we omit them. Although the stdout field is an empty string, it still has a recorded line length of 1 that we must account for. Since we're not rendering the blank line, we must also go back and set the event record's line length to 0 in order to avoid deleting too many lines when we pop or shift events off of the view while scrolling. --- awx/ui/client/features/output/render.service.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/awx/ui/client/features/output/render.service.js b/awx/ui/client/features/output/render.service.js index 11d7108cd0..3dad042aa4 100644 --- a/awx/ui/client/features/output/render.service.js +++ b/awx/ui/client/features/output/render.service.js @@ -213,6 +213,18 @@ function JobRenderService ($q, $compile, $sce, $window) { const record = this.createRecord(event, lines); if (lines.length === 1 && lines[0] === '') { + // Some events, mainly runner_on_start events, have an actual line count of 1 + // (stdout = '') and a claimed line count of 0 (end_line - start_line = 0). + // Since a zero-length string has an actual line count of 1, they'll still get + // rendered as blank lines unless we intercept them and add some special + // handling to remove them. + // + // Although we're not going to render the blank line, the actual line count of + // the zero-length stdout string, which is 1, has already been recorded at this + // point so we must also go back and set the event's recorded line length to 0 + // in order to avoid deleting too many lines when we need to pop or shift a + // page that contains this event off of the view. + this.records[record.uuid].lineCount = 0; return { html: '', count: 0 }; } From a8aed53c10541100b7ba8eb4973e70682e3bcf67 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Mon, 21 Oct 2019 11:01:35 -0400 Subject: [PATCH 21/22] when isolated or container jobs fail to launch, set job status to error a status of error makes more sense, because failed generally points to an issue with the playbook itself, while error is more generally used for reporting issues internal to Tower see: https://github.com/ansible/awx/issues/4909 --- awx/main/isolated/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/awx/main/isolated/manager.py b/awx/main/isolated/manager.py index d78587cf7b..5a0555ed72 100644 --- a/awx/main/isolated/manager.py +++ b/awx/main/isolated/manager.py @@ -172,6 +172,7 @@ class IsolatedManager(object): if runner_obj.status == 'failed': self.instance.result_traceback = runner_obj.stdout.read() self.instance.save(update_fields=['result_traceback']) + return 'error', runner_obj.rc return runner_obj.status, runner_obj.rc From e64b087e9f1583377d64aed1e7b0294eba36231a Mon Sep 17 00:00:00 2001 From: mabashian Date: Mon, 21 Oct 2019 11:55:12 -0400 Subject: [PATCH 22/22] Revert 6282b5bacbb30f31bc7bbf53df668ac8ed68829f --- awx/ui/client/legacy/styles/lists.less | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/awx/ui/client/legacy/styles/lists.less b/awx/ui/client/legacy/styles/lists.less index 8357a78b78..3e004461af 100644 --- a/awx/ui/client/legacy/styles/lists.less +++ b/awx/ui/client/legacy/styles/lists.less @@ -372,7 +372,9 @@ table, tbody { .List-noItems { margin-top: 52px; - display: inline-block; + display: flex; + align-items: center; + justify-content: center; width: 100%; height: 200px; border-radius: 5px; @@ -381,7 +383,7 @@ table, tbody { color: @list-no-items-txt; text-transform: uppercase; text-align: center; - padding: 80px 10px; + padding: 10px; } .modal-body > .List-noItems {