import logging from django.db.models import Q from django.contrib.contenttypes.models import ContentType from awx.main.models import Instance, UnifiedJob, WorkflowJob logger = logging.getLogger('awx.main.dispatch') def startup_reaping(): """ If this particular instance is starting, then we know that any running jobs are invalid so we will reap those jobs as a special action here """ jobs = UnifiedJob.objects.filter(status='running', controller_node=Instance.objects.my_hostname()) job_ids = [] for j in jobs: job_ids.append(j.id) reap_job( j, 'failed', job_explanation='Task was marked as running at system start up. The system must have not shut down properly, so it has been marked as failed.', ) if job_ids: logger.error(f'Unified jobs {job_ids} were reaped on dispatch startup') def reap_job(j, status, job_explanation=None): j.refresh_from_db(fields=['status', 'job_explanation']) status_before = j.status if status_before not in ('running', 'waiting'): # just in case, don't reap jobs that aren't running return j.status = status j.start_args = '' # blank field to remove encrypted passwords if j.job_explanation: j.job_explanation += ' ' # Separate messages for readability if job_explanation is None: j.job_explanation += 'Task was marked as running but was not present in the job queue, so it has been marked as failed.' else: j.job_explanation += job_explanation j.save(update_fields=['status', 'start_args', 'job_explanation']) if hasattr(j, 'send_notification_templates'): j.send_notification_templates('failed') j.websocket_emit_status(status) logger.error(f'{j.log_format} is no longer {status_before}; reaping') def reap(instance=None, status='failed', job_explanation=None, excluded_uuids=None, ref_time=None): """ Reap all jobs in running for this instance. """ if instance is None: hostname = Instance.objects.my_hostname() else: hostname = instance.hostname workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id base_Q = Q(status='running') & (Q(execution_node=hostname) | Q(controller_node=hostname)) & ~Q(polymorphic_ctype_id=workflow_ctype_id) if ref_time: jobs = UnifiedJob.objects.filter(base_Q & Q(started__lte=ref_time)) else: jobs = UnifiedJob.objects.filter(base_Q) if excluded_uuids: jobs = jobs.exclude(celery_task_id__in=excluded_uuids) for j in jobs: reap_job(j, status, job_explanation=job_explanation)