Refactor canceling to work through messaging and signals, not database

If canceled attempted before, still allow attempting another cancel in this case, attempt to send the sigterm signal again. Keep clicking, you might help! Replace other cancel_callbacks with sigterm watcher adapt special inventory mechanism for this too Get rid of the cancel_watcher method with exception in main thread Handle academic case of sigterm race condition Process cancelation as control signal Fully connect cancel method and run_dispatcher to control Never transition workflows directly to canceled, add logs
2026-02-14 17:50:02 -03:30 · 2021-11-08 16:57:12 -05:00
parent f9428c10b9
commit c59bbdecdb
13 changed files with 137 additions and 91 deletions
--- a/awx/main/tasks/callback.py
+++ b/awx/main/tasks/callback.py
@@ -6,17 +6,16 @@ import os
 import stat

 # Django
-from django.utils.timezone import now
 from django.conf import settings
 from django_guid import get_guid
 from django.utils.functional import cached_property
+from django.db import connections

 # AWX
 from awx.main.redact import UriCleaner
 from awx.main.constants import MINIMAL_EVENTS, ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE
 from awx.main.utils.update_model import update_model
 from awx.main.queue import CallbackQueueDispatcher
-from awx.main.tasks.signals import signal_callback

 logger = logging.getLogger('awx.main.tasks.callback')

@@ -175,28 +174,6 @@ class RunnerCallback:

        return False

-    def cancel_callback(self):
-        """
-        Ansible runner callback to tell the job when/if it is canceled
-        """
-        unified_job_id = self.instance.pk
-        if signal_callback():
-            return True
-        try:
-            self.instance = self.update_model(unified_job_id)
-        except Exception:
-            logger.exception(f'Encountered error during cancel check for {unified_job_id}, canceling now')
-            return True
-        if not self.instance:
-            logger.error('unified job {} was deleted while running, canceling'.format(unified_job_id))
-            return True
-        if self.instance.cancel_flag or self.instance.status == 'canceled':
-            cancel_wait = (now() - self.instance.modified).seconds if self.instance.modified else 0
-            if cancel_wait > 5:
-                logger.warning('Request to cancel {} took {} seconds to complete.'.format(self.instance.log_format, cancel_wait))
-            return True
-        return False
-
    def finished_callback(self, runner_obj):
        """
        Ansible runner callback triggered on finished run
@@ -227,6 +204,8 @@ class RunnerCallback:

            with disable_activity_stream():
                self.instance = self.update_model(self.instance.pk, job_args=json.dumps(runner_config.command), job_cwd=runner_config.cwd, job_env=job_env)
+            # We opened a connection just for that save, close it here now
+            connections.close_all()
        elif status_data['status'] == 'failed':
            # For encrypted ssh_key_data, ansible-runner worker will open and write the
            # ssh_key_data to a named pipe. Then, once the podman container starts, ssh-agent will