From daeba1a8980659eb13c75de057f206089d714983 Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Thu, 10 Dec 2020 14:39:25 -0500 Subject: [PATCH 1/4] output timing data for isolated playbook runs * We batch logging isolated management playbook output. This results in the timestamp of the log being useless when trying to determine when each task in the playbook ran. * To fix this, we enable timestamp logging at the playbook level via ansible `profile_tasks` callback plugin. --- awx/main/isolated/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/awx/main/isolated/manager.py b/awx/main/isolated/manager.py index 1c0978f432..9a45db6f49 100644 --- a/awx/main/isolated/manager.py +++ b/awx/main/isolated/manager.py @@ -123,6 +123,7 @@ class IsolatedManager(object): dir=private_data_dir ) params = self.runner_params.copy() + params.get('envvars', dict())['ANSIBLE_CALLBACK_WHITELIST'] = 'profile_tasks' params['playbook'] = playbook params['private_data_dir'] = iso_dir if idle_timeout: From d39d4d9a9eaab39a8c3ae22dc2a877e684f82b0d Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Thu, 10 Dec 2020 14:42:46 -0500 Subject: [PATCH 2/4] add job id to iso management playbook output * It's hard/impossible to know what job a check_isolated.yml playbook runs for by just looking at the logs. * Forward the job id for which an iso management playbook is running for and output that job id so it can be found in the logs. --- awx/main/isolated/manager.py | 8 ++++++-- awx/playbooks/check_isolated.yml | 3 +++ awx/playbooks/run_isolated.yml | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/awx/main/isolated/manager.py b/awx/main/isolated/manager.py index 9a45db6f49..4f8f2ce6bb 100644 --- a/awx/main/isolated/manager.py +++ b/awx/main/isolated/manager.py @@ -169,7 +169,8 @@ class IsolatedManager(object): extravars = { 'src': self.private_data_dir, 'dest': settings.AWX_PROOT_BASE_PATH, - 'ident': self.ident + 'ident': self.ident, + 'job_id': self.instance.id, } if playbook: extravars['playbook'] = playbook @@ -205,7 +206,10 @@ class IsolatedManager(object): :param interval: an interval (in seconds) to wait between status polls """ interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL - extravars = {'src': self.private_data_dir} + extravars = { + 'src': self.private_data_dir, + 'job_id': self.instance.id + } status = 'failed' rc = None last_check = time.time() diff --git a/awx/playbooks/check_isolated.yml b/awx/playbooks/check_isolated.yml index 18b3305846..472b772fbb 100644 --- a/awx/playbooks/check_isolated.yml +++ b/awx/playbooks/check_isolated.yml @@ -9,6 +9,9 @@ - ansible.posix tasks: + - name: "Output job the playbook is running for" + debug: + msg: "Checking on job {{ job_id }}" - name: Determine if daemon process is alive. shell: "ansible-runner is-alive {{src}}" diff --git a/awx/playbooks/run_isolated.yml b/awx/playbooks/run_isolated.yml index 4e3b7b54ee..76ea42d17c 100644 --- a/awx/playbooks/run_isolated.yml +++ b/awx/playbooks/run_isolated.yml @@ -13,6 +13,10 @@ - ansible.posix tasks: + - name: "Output job the playbook is running for" + debug: + msg: "Checking on job {{ job_id }}" + - name: synchronize job environment with isolated host synchronize: copy_links: true From 566913fceccb2ccb87f5811171e5b95f0400396c Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Thu, 10 Dec 2020 14:44:48 -0500 Subject: [PATCH 3/4] log time it took to run check_isolated.yml * Knowing how long check_isolated.yml ran can be helpful in debuging the isolated execution path. Especially if you suspect the connection speed or reliability of the control node -> execution node --- awx/main/isolated/manager.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/awx/main/isolated/manager.py b/awx/main/isolated/manager.py index 4f8f2ce6bb..de4783e277 100644 --- a/awx/main/isolated/manager.py +++ b/awx/main/isolated/manager.py @@ -7,6 +7,7 @@ import tempfile import time import logging import yaml +import datetime from django.conf import settings import ansible_runner @@ -225,9 +226,13 @@ class IsolatedManager(object): logger.warning('Isolated job {} was manually canceled.'.format(self.instance.id)) logger.debug('Checking on isolated job {} with `check_isolated.yml`.'.format(self.instance.id)) + time_start = datetime.datetime.now() runner_obj = self.run_management_playbook('check_isolated.yml', self.private_data_dir, extravars=extravars) + time_end = datetime.datetime.now() + time_diff = time_end - time_start + logger.debug('Finished checking on isolated job {} with `check_isolated.yml` took {} seconds.'.format(self.instance.id, time_diff.total_seconds())) status, rc = runner_obj.status, runner_obj.rc if self.check_callback is not None and not self.captured_command_artifact: From 32ad6cdea63859cd749f1e33aed2573ede819e65 Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Thu, 10 Dec 2020 15:02:33 -0500 Subject: [PATCH 4/4] enable iso logger * The namespace for isolated logging was not enabled. Add a handler and logger so that it's enabled. This is particularly useful when the logging level is switched to DEBUG --- awx/settings/defaults.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index d47277eaed..0ea3722e13 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -932,6 +932,14 @@ LOGGING = { 'backupCount': 5, 'formatter':'simple', }, + 'isolated_manager': { + 'level': 'WARNING', + 'class':'logging.handlers.RotatingFileHandler', + 'filename': os.path.join(LOG_ROOT, 'isolated_manager.log'), + 'maxBytes': 1024 * 1024 * 5, # 5 MB + 'backupCount': 5, + 'formatter':'simple', + }, }, 'loggers': { 'django': { @@ -981,6 +989,11 @@ LOGGING = { 'awx.main.wsbroadcast': { 'handlers': ['wsbroadcast'], }, + 'awx.isolated.manager': { + 'level': 'WARNING', + 'handlers': ['console', 'file', 'isolated_manager'], + 'propagate': True + }, 'awx.isolated.manager.playbooks': { 'handlers': ['management_playbooks'], 'propagate': False