From d5de1f9d114808ef415e799c687efec2ea0f56f0 Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Mon, 20 Feb 2023 09:56:38 -0500 Subject: [PATCH] Make use of new keepalive messages from ansible-runner Make setting API configurable and process keepalive events when seen in the event callback Use env var in pod spec and make it specific to K8S --- awx/main/conf.py | 10 ++++++++++ awx/main/tasks/callback.py | 2 ++ awx/main/tasks/receptor.py | 4 ++++ awx/settings/defaults.py | 5 +++++ 4 files changed, 21 insertions(+) diff --git a/awx/main/conf.py b/awx/main/conf.py index dab0543a1a..99b995b113 100644 --- a/awx/main/conf.py +++ b/awx/main/conf.py @@ -282,6 +282,16 @@ register( placeholder={'HTTP_PROXY': 'myproxy.local:8080'}, ) +register( + 'AWX_RUNNER_KEEPALIVE_SECONDS', + field_class=fields.IntegerField, + label=_('K8S Ansible Runner Keep-Alive Message Interval'), + help_text=_('Only applies to K8S deployments and container_group jobs. If not 0, send a message every so-many seconds to keep connection open.'), + category=_('Jobs'), + category_slug='jobs', + placeholder=240, # intended to be under common 5 minute idle timeout +) + register( 'GALAXY_TASK_ENV', field_class=fields.KeyValueField, diff --git a/awx/main/tasks/callback.py b/awx/main/tasks/callback.py index 92bfc40368..0046d07d82 100644 --- a/awx/main/tasks/callback.py +++ b/awx/main/tasks/callback.py @@ -85,6 +85,8 @@ class RunnerCallback: # which generate job events from two 'streams': # ansible-inventory and the awx.main.commands.inventory_import # logger + if event_data.get('event') == 'keepalive': + return if event_data.get(self.event_data_key, None): if self.event_data_key != 'job_id': diff --git a/awx/main/tasks/receptor.py b/awx/main/tasks/receptor.py index 006c805943..9cb4d49efe 100644 --- a/awx/main/tasks/receptor.py +++ b/awx/main/tasks/receptor.py @@ -526,6 +526,10 @@ class AWXReceptorJob: pod_spec['spec']['containers'][0]['image'] = ee.image pod_spec['spec']['containers'][0]['args'] = ['ansible-runner', 'worker', '--private-data-dir=/runner'] + if settings.AWX_RUNNER_KEEPALIVE_SECONDS: + pod_spec['spec']['containers'][0].setdefault('env', []) + pod_spec['spec']['containers'][0]['env'].append({'name': 'ANSIBLE_RUNNER_KEEPALIVE_SECONDS', 'value': str(settings.AWX_RUNNER_KEEPALIVE_SECONDS)}) + # Enforce EE Pull Policy pull_options = {"always": "Always", "missing": "IfNotPresent", "never": "Never"} if self.task and self.task.instance.execution_environment: diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 4d18540bcd..74a36b3e2d 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -929,6 +929,11 @@ AWX_RUNNER_OMIT_ENV_FILES = True # Allow ansible-runner to save ansible output (may cause performance issues) AWX_RUNNER_SUPPRESS_OUTPUT_FILE = True +# https://github.com/ansible/ansible-runner/pull/1191/files +# Interval in seconds between the last message and keep-alive messages that +# ansible-runner will send +AWX_RUNNER_KEEPALIVE_SECONDS = 0 + # Delete completed work units in receptor RECEPTOR_RELEASE_WORK = True