mirror of
https://github.com/ansible/awx.git
synced 2026-04-14 06:29:25 -02:30
Fix notification timing issue by sending in the latter of 2 events (#12110)
* Track host_status_counts and use that to process notifications * Remove now unused setting * Back out changes to callback class not needed after all * Skirt the need for duck typing by leaning on the cached field * Delete tests for deleted task * Revert "Back out changes to callback class not needed after all" This reverts commit 3b8ae350d218991d42bffd65ce4baac6f41926b2. * Directly hardcode stats_event_type for callback class * Fire notifications if stats event was never sent * Remove test content for deleted methods * Add placeholder for when no hosts matched * Make field default be None, denote events processed with empty dict * Make UI process null value for host_status_counts * Fix tracking of EOF dispatch for system jobs * Reorganize EVENT_MAP into class properties * Consolidate conditional I missed from EVENT_MAP refactor * Give up on the null condition, also applies for empty hosts * Remove cls position argument not being used * Move wrapup method out of class, add tests
This commit is contained in:
@@ -1607,7 +1607,6 @@ class ProjectUpdateSerializer(UnifiedJobSerializer, ProjectOptionsSerializer):
|
|||||||
|
|
||||||
class ProjectUpdateDetailSerializer(ProjectUpdateSerializer):
|
class ProjectUpdateDetailSerializer(ProjectUpdateSerializer):
|
||||||
|
|
||||||
host_status_counts = serializers.SerializerMethodField(help_text=_('A count of hosts uniquely assigned to each status.'))
|
|
||||||
playbook_counts = serializers.SerializerMethodField(help_text=_('A count of all plays and tasks for the job run.'))
|
playbook_counts = serializers.SerializerMethodField(help_text=_('A count of all plays and tasks for the job run.'))
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
@@ -1622,14 +1621,6 @@ class ProjectUpdateDetailSerializer(ProjectUpdateSerializer):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_host_status_counts(self, obj):
|
|
||||||
try:
|
|
||||||
counts = obj.project_update_events.only('event_data').get(event='playbook_on_stats').get_host_status_counts()
|
|
||||||
except ProjectUpdateEvent.DoesNotExist:
|
|
||||||
counts = {}
|
|
||||||
|
|
||||||
return counts
|
|
||||||
|
|
||||||
|
|
||||||
class ProjectUpdateListSerializer(ProjectUpdateSerializer, UnifiedJobListSerializer):
|
class ProjectUpdateListSerializer(ProjectUpdateSerializer, UnifiedJobListSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
@@ -3107,7 +3098,6 @@ class JobSerializer(UnifiedJobSerializer, JobOptionsSerializer):
|
|||||||
|
|
||||||
class JobDetailSerializer(JobSerializer):
|
class JobDetailSerializer(JobSerializer):
|
||||||
|
|
||||||
host_status_counts = serializers.SerializerMethodField(help_text=_('A count of hosts uniquely assigned to each status.'))
|
|
||||||
playbook_counts = serializers.SerializerMethodField(help_text=_('A count of all plays and tasks for the job run.'))
|
playbook_counts = serializers.SerializerMethodField(help_text=_('A count of all plays and tasks for the job run.'))
|
||||||
custom_virtualenv = serializers.ReadOnlyField()
|
custom_virtualenv = serializers.ReadOnlyField()
|
||||||
|
|
||||||
@@ -3123,14 +3113,6 @@ class JobDetailSerializer(JobSerializer):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_host_status_counts(self, obj):
|
|
||||||
try:
|
|
||||||
counts = obj.get_event_queryset().only('event_data').get(event='playbook_on_stats').get_host_status_counts()
|
|
||||||
except JobEvent.DoesNotExist:
|
|
||||||
counts = {}
|
|
||||||
|
|
||||||
return counts
|
|
||||||
|
|
||||||
|
|
||||||
class JobCancelSerializer(BaseSerializer):
|
class JobCancelSerializer(BaseSerializer):
|
||||||
|
|
||||||
@@ -3319,21 +3301,10 @@ class AdHocCommandSerializer(UnifiedJobSerializer):
|
|||||||
|
|
||||||
|
|
||||||
class AdHocCommandDetailSerializer(AdHocCommandSerializer):
|
class AdHocCommandDetailSerializer(AdHocCommandSerializer):
|
||||||
|
|
||||||
host_status_counts = serializers.SerializerMethodField(help_text=_('A count of hosts uniquely assigned to each status.'))
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = AdHocCommand
|
model = AdHocCommand
|
||||||
fields = ('*', 'host_status_counts')
|
fields = ('*', 'host_status_counts')
|
||||||
|
|
||||||
def get_host_status_counts(self, obj):
|
|
||||||
try:
|
|
||||||
counts = obj.ad_hoc_command_events.only('event_data').get(event='playbook_on_stats').get_host_status_counts()
|
|
||||||
except AdHocCommandEvent.DoesNotExist:
|
|
||||||
counts = {}
|
|
||||||
|
|
||||||
return counts
|
|
||||||
|
|
||||||
|
|
||||||
class AdHocCommandCancelSerializer(AdHocCommandSerializer):
|
class AdHocCommandCancelSerializer(AdHocCommandSerializer):
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import traceback
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils.timezone import now as tz_now
|
from django.utils.timezone import now as tz_now
|
||||||
from django.db import DatabaseError, OperationalError, connection as django_connection
|
from django.db import DatabaseError, OperationalError, transaction, connection as django_connection
|
||||||
from django.db.utils import InterfaceError, InternalError
|
from django.db.utils import InterfaceError, InternalError
|
||||||
from django_guid import set_guid
|
from django_guid import set_guid
|
||||||
|
|
||||||
@@ -16,8 +16,8 @@ import psutil
|
|||||||
import redis
|
import redis
|
||||||
|
|
||||||
from awx.main.consumers import emit_channel_notification
|
from awx.main.consumers import emit_channel_notification
|
||||||
from awx.main.models import JobEvent, AdHocCommandEvent, ProjectUpdateEvent, InventoryUpdateEvent, SystemJobEvent, UnifiedJob, Job
|
from awx.main.models import JobEvent, AdHocCommandEvent, ProjectUpdateEvent, InventoryUpdateEvent, SystemJobEvent, UnifiedJob
|
||||||
from awx.main.tasks.system import handle_success_and_failure_notifications
|
from awx.main.constants import ACTIVE_STATES
|
||||||
from awx.main.models.events import emit_event_detail
|
from awx.main.models.events import emit_event_detail
|
||||||
from awx.main.utils.profiling import AWXProfiler
|
from awx.main.utils.profiling import AWXProfiler
|
||||||
import awx.main.analytics.subsystem_metrics as s_metrics
|
import awx.main.analytics.subsystem_metrics as s_metrics
|
||||||
@@ -26,6 +26,32 @@ from .base import BaseWorker
|
|||||||
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
logger = logging.getLogger('awx.main.commands.run_callback_receiver')
|
||||||
|
|
||||||
|
|
||||||
|
def job_stats_wrapup(job_identifier, event=None):
|
||||||
|
"""Fill in the unified job host_status_counts, fire off notifications if needed"""
|
||||||
|
try:
|
||||||
|
# empty dict (versus default of None) can still indicate that events have been processed
|
||||||
|
# for job types like system jobs, and jobs with no hosts matched
|
||||||
|
host_status_counts = {}
|
||||||
|
if event:
|
||||||
|
host_status_counts = event.get_host_status_counts()
|
||||||
|
|
||||||
|
# Update host_status_counts while holding the row lock
|
||||||
|
with transaction.atomic():
|
||||||
|
uj = UnifiedJob.objects.select_for_update().get(pk=job_identifier)
|
||||||
|
uj.host_status_counts = host_status_counts
|
||||||
|
uj.save(update_fields=['host_status_counts'])
|
||||||
|
|
||||||
|
uj.log_lifecycle("stats_wrapup_finished")
|
||||||
|
|
||||||
|
# If the status was a finished state before this update was made, send notifications
|
||||||
|
# If not, we will send notifications when the status changes
|
||||||
|
if uj.status not in ACTIVE_STATES:
|
||||||
|
uj.send_notification_templates('succeeded' if uj.status == 'successful' else 'failed')
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception('Worker failed to save stats or emit notifications: Job {}'.format(job_identifier))
|
||||||
|
|
||||||
|
|
||||||
class CallbackBrokerWorker(BaseWorker):
|
class CallbackBrokerWorker(BaseWorker):
|
||||||
"""
|
"""
|
||||||
A worker implementation that deserializes callback event data and persists
|
A worker implementation that deserializes callback event data and persists
|
||||||
@@ -146,6 +172,8 @@ class CallbackBrokerWorker(BaseWorker):
|
|||||||
if not getattr(e, '_skip_websocket_message', False):
|
if not getattr(e, '_skip_websocket_message', False):
|
||||||
metrics_events_broadcast += 1
|
metrics_events_broadcast += 1
|
||||||
emit_event_detail(e)
|
emit_event_detail(e)
|
||||||
|
if getattr(e, '_notification_trigger_event', False):
|
||||||
|
job_stats_wrapup(getattr(e, e.JOB_REFERENCE), event=e)
|
||||||
self.buff = {}
|
self.buff = {}
|
||||||
self.last_flush = time.time()
|
self.last_flush = time.time()
|
||||||
# only update metrics if we saved events
|
# only update metrics if we saved events
|
||||||
@@ -165,47 +193,32 @@ class CallbackBrokerWorker(BaseWorker):
|
|||||||
if flush:
|
if flush:
|
||||||
self.last_event = ''
|
self.last_event = ''
|
||||||
if not flush:
|
if not flush:
|
||||||
event_map = {
|
|
||||||
'job_id': JobEvent,
|
|
||||||
'ad_hoc_command_id': AdHocCommandEvent,
|
|
||||||
'project_update_id': ProjectUpdateEvent,
|
|
||||||
'inventory_update_id': InventoryUpdateEvent,
|
|
||||||
'system_job_id': SystemJobEvent,
|
|
||||||
}
|
|
||||||
|
|
||||||
job_identifier = 'unknown job'
|
job_identifier = 'unknown job'
|
||||||
for key, cls in event_map.items():
|
for cls in (JobEvent, AdHocCommandEvent, ProjectUpdateEvent, InventoryUpdateEvent, SystemJobEvent):
|
||||||
if key in body:
|
if cls.JOB_REFERENCE in body:
|
||||||
job_identifier = body[key]
|
job_identifier = body[cls.JOB_REFERENCE]
|
||||||
break
|
break
|
||||||
|
|
||||||
self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})' # noqa
|
self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})' # noqa
|
||||||
|
|
||||||
|
notification_trigger_event = bool(body.get('event') == cls.WRAPUP_EVENT)
|
||||||
|
|
||||||
if body.get('event') == 'EOF':
|
if body.get('event') == 'EOF':
|
||||||
try:
|
try:
|
||||||
if 'guid' in body:
|
if 'guid' in body:
|
||||||
set_guid(body['guid'])
|
set_guid(body['guid'])
|
||||||
final_counter = body.get('final_counter', 0)
|
final_counter = body.get('final_counter', 0)
|
||||||
logger.info('Event processing is finished for Job {}, sending notifications'.format(job_identifier))
|
logger.info('Starting EOF event processing for Job {}'.format(job_identifier))
|
||||||
# EOF events are sent when stdout for the running task is
|
# EOF events are sent when stdout for the running task is
|
||||||
# closed. don't actually persist them to the database; we
|
# closed. don't actually persist them to the database; we
|
||||||
# just use them to report `summary` websocket events as an
|
# just use them to report `summary` websocket events as an
|
||||||
# approximation for when a job is "done"
|
# approximation for when a job is "done"
|
||||||
emit_channel_notification('jobs-summary', dict(group_name='jobs', unified_job_id=job_identifier, final_counter=final_counter))
|
emit_channel_notification('jobs-summary', dict(group_name='jobs', unified_job_id=job_identifier, final_counter=final_counter))
|
||||||
# Additionally, when we've processed all events, we should
|
|
||||||
# have all the data we need to send out success/failure
|
|
||||||
# notification templates
|
|
||||||
uj = UnifiedJob.objects.get(pk=job_identifier)
|
|
||||||
|
|
||||||
if isinstance(uj, Job):
|
if notification_trigger_event:
|
||||||
# *actual playbooks* send their success/failure
|
job_stats_wrapup(job_identifier)
|
||||||
# notifications in response to the playbook_on_stats
|
|
||||||
# event handling code in main.models.events
|
|
||||||
pass
|
|
||||||
elif hasattr(uj, 'send_notification_templates'):
|
|
||||||
handle_success_and_failure_notifications.apply_async([uj.id])
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('Worker failed to emit notifications: Job {}'.format(job_identifier))
|
logger.exception('Worker failed to perform EOF tasks: Job {}'.format(job_identifier))
|
||||||
finally:
|
finally:
|
||||||
self.subsystem_metrics.inc('callback_receiver_events_in_memory', -1)
|
self.subsystem_metrics.inc('callback_receiver_events_in_memory', -1)
|
||||||
set_guid('')
|
set_guid('')
|
||||||
@@ -215,9 +228,12 @@ class CallbackBrokerWorker(BaseWorker):
|
|||||||
|
|
||||||
event = cls.create_from_data(**body)
|
event = cls.create_from_data(**body)
|
||||||
|
|
||||||
if skip_websocket_message:
|
if skip_websocket_message: # if this event sends websocket messages, fire them off on flush
|
||||||
event._skip_websocket_message = True
|
event._skip_websocket_message = True
|
||||||
|
|
||||||
|
if notification_trigger_event: # if this is an Ansible stats event, ensure notifications on flush
|
||||||
|
event._notification_trigger_event = True
|
||||||
|
|
||||||
self.buff.setdefault(cls, []).append(event)
|
self.buff.setdefault(cls, []).append(event)
|
||||||
|
|
||||||
retries = 0
|
retries = 0
|
||||||
|
|||||||
18
awx/main/migrations/0161_unifiedjob_host_status_counts.py
Normal file
18
awx/main/migrations/0161_unifiedjob_host_status_counts.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 3.2.12 on 2022-04-27 02:16
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('main', '0160_alter_schedule_rrule'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='unifiedjob',
|
||||||
|
name='host_status_counts',
|
||||||
|
field=models.JSONField(blank=True, default=None, editable=False, help_text='Playbook stats from the Ansible playbook_on_stats event.', null=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -6,7 +6,7 @@ from collections import defaultdict
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.exceptions import ObjectDoesNotExist
|
from django.core.exceptions import ObjectDoesNotExist
|
||||||
from django.db import models, DatabaseError, connection
|
from django.db import models, DatabaseError
|
||||||
from django.utils.dateparse import parse_datetime
|
from django.utils.dateparse import parse_datetime
|
||||||
from django.utils.text import Truncator
|
from django.utils.text import Truncator
|
||||||
from django.utils.timezone import utc, now
|
from django.utils.timezone import utc, now
|
||||||
@@ -126,6 +126,7 @@ class BasePlaybookEvent(CreatedModifiedModel):
|
|||||||
'host_name',
|
'host_name',
|
||||||
'verbosity',
|
'verbosity',
|
||||||
]
|
]
|
||||||
|
WRAPUP_EVENT = 'playbook_on_stats'
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
@@ -384,14 +385,6 @@ class BasePlaybookEvent(CreatedModifiedModel):
|
|||||||
job.get_event_queryset().filter(uuid__in=changed).update(changed=True)
|
job.get_event_queryset().filter(uuid__in=changed).update(changed=True)
|
||||||
job.get_event_queryset().filter(uuid__in=failed).update(failed=True)
|
job.get_event_queryset().filter(uuid__in=failed).update(failed=True)
|
||||||
|
|
||||||
# send success/failure notifications when we've finished handling the playbook_on_stats event
|
|
||||||
from awx.main.tasks.system import handle_success_and_failure_notifications # circular import
|
|
||||||
|
|
||||||
def _send_notifications():
|
|
||||||
handle_success_and_failure_notifications.apply_async([job.id])
|
|
||||||
|
|
||||||
connection.on_commit(_send_notifications)
|
|
||||||
|
|
||||||
for field in ('playbook', 'play', 'task', 'role'):
|
for field in ('playbook', 'play', 'task', 'role'):
|
||||||
value = force_str(event_data.get(field, '')).strip()
|
value = force_str(event_data.get(field, '')).strip()
|
||||||
if value != getattr(self, field):
|
if value != getattr(self, field):
|
||||||
@@ -470,6 +463,7 @@ class JobEvent(BasePlaybookEvent):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
VALID_KEYS = BasePlaybookEvent.VALID_KEYS + ['job_id', 'workflow_job_id', 'job_created']
|
VALID_KEYS = BasePlaybookEvent.VALID_KEYS + ['job_id', 'workflow_job_id', 'job_created']
|
||||||
|
JOB_REFERENCE = 'job_id'
|
||||||
|
|
||||||
objects = DeferJobCreatedManager()
|
objects = DeferJobCreatedManager()
|
||||||
|
|
||||||
@@ -600,6 +594,7 @@ UnpartitionedJobEvent._meta.db_table = '_unpartitioned_' + JobEvent._meta.db_tab
|
|||||||
class ProjectUpdateEvent(BasePlaybookEvent):
|
class ProjectUpdateEvent(BasePlaybookEvent):
|
||||||
|
|
||||||
VALID_KEYS = BasePlaybookEvent.VALID_KEYS + ['project_update_id', 'workflow_job_id', 'job_created']
|
VALID_KEYS = BasePlaybookEvent.VALID_KEYS + ['project_update_id', 'workflow_job_id', 'job_created']
|
||||||
|
JOB_REFERENCE = 'project_update_id'
|
||||||
|
|
||||||
objects = DeferJobCreatedManager()
|
objects = DeferJobCreatedManager()
|
||||||
|
|
||||||
@@ -641,6 +636,7 @@ class BaseCommandEvent(CreatedModifiedModel):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
VALID_KEYS = ['event_data', 'created', 'counter', 'uuid', 'stdout', 'start_line', 'end_line', 'verbosity']
|
VALID_KEYS = ['event_data', 'created', 'counter', 'uuid', 'stdout', 'start_line', 'end_line', 'verbosity']
|
||||||
|
WRAPUP_EVENT = 'EOF'
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
@@ -736,6 +732,8 @@ class BaseCommandEvent(CreatedModifiedModel):
|
|||||||
class AdHocCommandEvent(BaseCommandEvent):
|
class AdHocCommandEvent(BaseCommandEvent):
|
||||||
|
|
||||||
VALID_KEYS = BaseCommandEvent.VALID_KEYS + ['ad_hoc_command_id', 'event', 'host_name', 'host_id', 'workflow_job_id', 'job_created']
|
VALID_KEYS = BaseCommandEvent.VALID_KEYS + ['ad_hoc_command_id', 'event', 'host_name', 'host_id', 'workflow_job_id', 'job_created']
|
||||||
|
WRAPUP_EVENT = 'playbook_on_stats' # exception to BaseCommandEvent
|
||||||
|
JOB_REFERENCE = 'ad_hoc_command_id'
|
||||||
|
|
||||||
objects = DeferJobCreatedManager()
|
objects = DeferJobCreatedManager()
|
||||||
|
|
||||||
@@ -836,6 +834,7 @@ UnpartitionedAdHocCommandEvent._meta.db_table = '_unpartitioned_' + AdHocCommand
|
|||||||
class InventoryUpdateEvent(BaseCommandEvent):
|
class InventoryUpdateEvent(BaseCommandEvent):
|
||||||
|
|
||||||
VALID_KEYS = BaseCommandEvent.VALID_KEYS + ['inventory_update_id', 'workflow_job_id', 'job_created']
|
VALID_KEYS = BaseCommandEvent.VALID_KEYS + ['inventory_update_id', 'workflow_job_id', 'job_created']
|
||||||
|
JOB_REFERENCE = 'inventory_update_id'
|
||||||
|
|
||||||
objects = DeferJobCreatedManager()
|
objects = DeferJobCreatedManager()
|
||||||
|
|
||||||
@@ -881,6 +880,7 @@ UnpartitionedInventoryUpdateEvent._meta.db_table = '_unpartitioned_' + Inventory
|
|||||||
class SystemJobEvent(BaseCommandEvent):
|
class SystemJobEvent(BaseCommandEvent):
|
||||||
|
|
||||||
VALID_KEYS = BaseCommandEvent.VALID_KEYS + ['system_job_id', 'job_created']
|
VALID_KEYS = BaseCommandEvent.VALID_KEYS + ['system_job_id', 'job_created']
|
||||||
|
JOB_REFERENCE = 'system_job_id'
|
||||||
|
|
||||||
objects = DeferJobCreatedManager()
|
objects = DeferJobCreatedManager()
|
||||||
|
|
||||||
|
|||||||
@@ -421,21 +421,8 @@ class JobNotificationMixin(object):
|
|||||||
The context will contain allowed content retrieved from a serialized job object
|
The context will contain allowed content retrieved from a serialized job object
|
||||||
(see JobNotificationMixin.JOB_FIELDS_ALLOWED_LIST the job's friendly name,
|
(see JobNotificationMixin.JOB_FIELDS_ALLOWED_LIST the job's friendly name,
|
||||||
and a url to the job run."""
|
and a url to the job run."""
|
||||||
job_context = {'host_status_counts': {}}
|
|
||||||
summary = None
|
|
||||||
try:
|
|
||||||
has_event_property = any([f for f in self.event_class._meta.fields if f.name == 'event'])
|
|
||||||
except NotImplementedError:
|
|
||||||
has_event_property = False
|
|
||||||
if has_event_property:
|
|
||||||
qs = self.get_event_queryset()
|
|
||||||
if qs:
|
|
||||||
event = qs.only('event_data').filter(event='playbook_on_stats').first()
|
|
||||||
if event:
|
|
||||||
summary = event.get_host_status_counts()
|
|
||||||
job_context['host_status_counts'] = summary
|
|
||||||
context = {
|
context = {
|
||||||
'job': job_context,
|
'job': {'host_status_counts': self.host_status_counts},
|
||||||
'job_friendly_name': self.get_notification_friendly_name(),
|
'job_friendly_name': self.get_notification_friendly_name(),
|
||||||
'url': self.get_ui_url(),
|
'url': self.get_ui_url(),
|
||||||
'job_metadata': json.dumps(self.notification_data(), ensure_ascii=False, indent=4),
|
'job_metadata': json.dumps(self.notification_data(), ensure_ascii=False, indent=4),
|
||||||
|
|||||||
@@ -717,6 +717,13 @@ class UnifiedJob(
|
|||||||
editable=False,
|
editable=False,
|
||||||
help_text=_("The version of Ansible Core installed in the execution environment."),
|
help_text=_("The version of Ansible Core installed in the execution environment."),
|
||||||
)
|
)
|
||||||
|
host_status_counts = models.JSONField(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
default=None,
|
||||||
|
editable=False,
|
||||||
|
help_text=_("Playbook stats from the Ansible playbook_on_stats event."),
|
||||||
|
)
|
||||||
work_unit_id = models.CharField(
|
work_unit_id = models.CharField(
|
||||||
max_length=255, blank=True, default=None, editable=False, null=True, help_text=_("The Receptor work unit ID associated with this job.")
|
max_length=255, blank=True, default=None, editable=False, null=True, help_text=_("The Receptor work unit ID associated with this job.")
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import stat
|
|||||||
from django.utils.timezone import now
|
from django.utils.timezone import now
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django_guid import get_guid
|
from django_guid import get_guid
|
||||||
|
from django.utils.functional import cached_property
|
||||||
|
|
||||||
# AWX
|
# AWX
|
||||||
from awx.main.redact import UriCleaner
|
from awx.main.redact import UriCleaner
|
||||||
@@ -20,8 +21,6 @@ logger = logging.getLogger('awx.main.tasks.callback')
|
|||||||
|
|
||||||
|
|
||||||
class RunnerCallback:
|
class RunnerCallback:
|
||||||
event_data_key = 'job_id'
|
|
||||||
|
|
||||||
def __init__(self, model=None):
|
def __init__(self, model=None):
|
||||||
self.parent_workflow_job_id = None
|
self.parent_workflow_job_id = None
|
||||||
self.host_map = {}
|
self.host_map = {}
|
||||||
@@ -33,10 +32,19 @@ class RunnerCallback:
|
|||||||
self.event_ct = 0
|
self.event_ct = 0
|
||||||
self.model = model
|
self.model = model
|
||||||
self.update_attempts = int(settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE / 5)
|
self.update_attempts = int(settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE / 5)
|
||||||
|
self.wrapup_event_dispatched = False
|
||||||
|
|
||||||
def update_model(self, pk, _attempt=0, **updates):
|
def update_model(self, pk, _attempt=0, **updates):
|
||||||
return update_model(self.model, pk, _attempt=0, _max_attempts=self.update_attempts, **updates)
|
return update_model(self.model, pk, _attempt=0, _max_attempts=self.update_attempts, **updates)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def wrapup_event_type(self):
|
||||||
|
return self.instance.event_class.WRAPUP_EVENT
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def event_data_key(self):
|
||||||
|
return self.instance.event_class.JOB_REFERENCE
|
||||||
|
|
||||||
def event_handler(self, event_data):
|
def event_handler(self, event_data):
|
||||||
#
|
#
|
||||||
# ⚠️ D-D-D-DANGER ZONE ⚠️
|
# ⚠️ D-D-D-DANGER ZONE ⚠️
|
||||||
@@ -130,6 +138,9 @@ class RunnerCallback:
|
|||||||
elif self.recent_event_timings.maxlen:
|
elif self.recent_event_timings.maxlen:
|
||||||
self.recent_event_timings.append(time.time())
|
self.recent_event_timings.append(time.time())
|
||||||
|
|
||||||
|
if event_data.get('event', '') == self.wrapup_event_type:
|
||||||
|
self.wrapup_event_dispatched = True
|
||||||
|
|
||||||
event_data.setdefault(self.event_data_key, self.instance.id)
|
event_data.setdefault(self.event_data_key, self.instance.id)
|
||||||
self.dispatcher.dispatch(event_data)
|
self.dispatcher.dispatch(event_data)
|
||||||
self.event_ct += 1
|
self.event_ct += 1
|
||||||
@@ -170,6 +181,8 @@ class RunnerCallback:
|
|||||||
}
|
}
|
||||||
event_data.setdefault(self.event_data_key, self.instance.id)
|
event_data.setdefault(self.event_data_key, self.instance.id)
|
||||||
self.dispatcher.dispatch(event_data)
|
self.dispatcher.dispatch(event_data)
|
||||||
|
if self.wrapup_event_type == 'EOF':
|
||||||
|
self.wrapup_event_dispatched = True
|
||||||
|
|
||||||
def status_handler(self, status_data, runner_config):
|
def status_handler(self, status_data, runner_config):
|
||||||
"""
|
"""
|
||||||
@@ -212,9 +225,6 @@ class RunnerCallback:
|
|||||||
|
|
||||||
|
|
||||||
class RunnerCallbackForProjectUpdate(RunnerCallback):
|
class RunnerCallbackForProjectUpdate(RunnerCallback):
|
||||||
|
|
||||||
event_data_key = 'project_update_id'
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(RunnerCallbackForProjectUpdate, self).__init__(*args, **kwargs)
|
super(RunnerCallbackForProjectUpdate, self).__init__(*args, **kwargs)
|
||||||
self.playbook_new_revision = None
|
self.playbook_new_revision = None
|
||||||
@@ -231,9 +241,6 @@ class RunnerCallbackForProjectUpdate(RunnerCallback):
|
|||||||
|
|
||||||
|
|
||||||
class RunnerCallbackForInventoryUpdate(RunnerCallback):
|
class RunnerCallbackForInventoryUpdate(RunnerCallback):
|
||||||
|
|
||||||
event_data_key = 'inventory_update_id'
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(RunnerCallbackForInventoryUpdate, self).__init__(*args, **kwargs)
|
super(RunnerCallbackForInventoryUpdate, self).__init__(*args, **kwargs)
|
||||||
self.end_line = 0
|
self.end_line = 0
|
||||||
@@ -245,9 +252,6 @@ class RunnerCallbackForInventoryUpdate(RunnerCallback):
|
|||||||
|
|
||||||
|
|
||||||
class RunnerCallbackForAdHocCommand(RunnerCallback):
|
class RunnerCallbackForAdHocCommand(RunnerCallback):
|
||||||
|
|
||||||
event_data_key = 'ad_hoc_command_id'
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(RunnerCallbackForAdHocCommand, self).__init__(*args, **kwargs)
|
super(RunnerCallbackForAdHocCommand, self).__init__(*args, **kwargs)
|
||||||
self.host_map = {}
|
self.host_map = {}
|
||||||
@@ -255,4 +259,4 @@ class RunnerCallbackForAdHocCommand(RunnerCallback):
|
|||||||
|
|
||||||
class RunnerCallbackForSystemJob(RunnerCallback):
|
class RunnerCallbackForSystemJob(RunnerCallback):
|
||||||
|
|
||||||
event_data_key = 'system_job_id'
|
pass
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ from awx.main.utils.common import (
|
|||||||
)
|
)
|
||||||
from awx.conf.license import get_license
|
from awx.conf.license import get_license
|
||||||
from awx.main.utils.handlers import SpecialInventoryHandler
|
from awx.main.utils.handlers import SpecialInventoryHandler
|
||||||
from awx.main.tasks.system import handle_success_and_failure_notifications, update_smart_memberships_for_inventory, update_inventory_computed_fields
|
from awx.main.tasks.system import update_smart_memberships_for_inventory, update_inventory_computed_fields
|
||||||
from awx.main.utils.update_model import update_model
|
from awx.main.utils.update_model import update_model
|
||||||
from rest_framework.exceptions import PermissionDenied
|
from rest_framework.exceptions import PermissionDenied
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
@@ -552,8 +552,6 @@ class BaseTask(object):
|
|||||||
status = 'failed'
|
status = 'failed'
|
||||||
|
|
||||||
extra_update_fields['job_explanation'] = self.instance.job_explanation
|
extra_update_fields['job_explanation'] = self.instance.job_explanation
|
||||||
# ensure failure notification sends even if playbook_on_stats event is not triggered
|
|
||||||
handle_success_and_failure_notifications.apply_async([self.instance.id])
|
|
||||||
|
|
||||||
except ReceptorNodeNotFound as exc:
|
except ReceptorNodeNotFound as exc:
|
||||||
extra_update_fields['job_explanation'] = str(exc)
|
extra_update_fields['job_explanation'] = str(exc)
|
||||||
@@ -580,7 +578,12 @@ class BaseTask(object):
|
|||||||
extra_update_fields['result_traceback'] = "{}\n\n{}".format(extra_update_fields['result_traceback'], ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE)
|
extra_update_fields['result_traceback'] = "{}\n\n{}".format(extra_update_fields['result_traceback'], ANSIBLE_RUNNER_NEEDS_UPDATE_MESSAGE)
|
||||||
|
|
||||||
self.instance = self.update_model(pk)
|
self.instance = self.update_model(pk)
|
||||||
self.instance = self.update_model(pk, status=status, emitted_events=self.runner_callback.event_ct, **extra_update_fields)
|
self.instance = self.update_model(pk, status=status, emitted_events=self.runner_callback.event_ct, select_for_update=True, **extra_update_fields)
|
||||||
|
|
||||||
|
# Field host_status_counts is used as a metric to check if event processing is finished
|
||||||
|
# we send notifications if it is, if not, callback receiver will send them
|
||||||
|
if (self.instance.host_status_counts is not None) or (not self.runner_callback.wrapup_event_dispatched):
|
||||||
|
self.instance.send_notification_templates('succeeded' if status == 'successful' else 'failed')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times)
|
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times)
|
||||||
|
|||||||
@@ -716,25 +716,6 @@ def handle_work_error(task_id, *args, **kwargs):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@task(queue=get_local_queuename)
|
|
||||||
def handle_success_and_failure_notifications(job_id):
|
|
||||||
uj = UnifiedJob.objects.get(pk=job_id)
|
|
||||||
retries = 0
|
|
||||||
while retries < settings.AWX_NOTIFICATION_JOB_FINISH_MAX_RETRY:
|
|
||||||
if uj.finished:
|
|
||||||
uj.send_notification_templates('succeeded' if uj.status == 'successful' else 'failed')
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
# wait a few seconds to avoid a race where the
|
|
||||||
# events are persisted _before_ the UJ.status
|
|
||||||
# changes from running -> successful
|
|
||||||
retries += 1
|
|
||||||
time.sleep(1)
|
|
||||||
uj = UnifiedJob.objects.get(pk=job_id)
|
|
||||||
|
|
||||||
logger.warning(f"Failed to even try to send notifications for job '{uj}' due to job not being in finished state.")
|
|
||||||
|
|
||||||
|
|
||||||
@task(queue=get_local_queuename)
|
@task(queue=get_local_queuename)
|
||||||
def update_inventory_computed_fields(inventory_id):
|
def update_inventory_computed_fields(inventory_id):
|
||||||
"""
|
"""
|
||||||
|
|||||||
26
awx/main/tests/functional/commands/test_callback_receiver.py
Normal file
26
awx/main/tests/functional/commands/test_callback_receiver.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from awx.main.dispatch.worker.callback import job_stats_wrapup
|
||||||
|
from awx.main.models.jobs import Job
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_wrapup_does_not_send_notifications(mocker):
|
||||||
|
job = Job.objects.create(status='running')
|
||||||
|
assert job.host_status_counts is None
|
||||||
|
mock = mocker.patch('awx.main.models.notifications.JobNotificationMixin.send_notification_templates')
|
||||||
|
job_stats_wrapup(job.id)
|
||||||
|
job.refresh_from_db()
|
||||||
|
assert job.host_status_counts == {}
|
||||||
|
mock.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_wrapup_does_send_notifications(mocker):
|
||||||
|
job = Job.objects.create(status='successful')
|
||||||
|
assert job.host_status_counts is None
|
||||||
|
mock = mocker.patch('awx.main.models.notifications.JobNotificationMixin.send_notification_templates')
|
||||||
|
job_stats_wrapup(job.id)
|
||||||
|
job.refresh_from_db()
|
||||||
|
assert job.host_status_counts == {}
|
||||||
|
mock.assert_called_once_with('succeeded')
|
||||||
@@ -1,16 +1,10 @@
|
|||||||
# Python
|
# Python
|
||||||
from collections import namedtuple
|
|
||||||
import pytest
|
import pytest
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
import json
|
import json
|
||||||
|
|
||||||
# AWX
|
# AWX
|
||||||
from awx.api.serializers import (
|
from awx.api.serializers import JobSerializer, JobOptionsSerializer
|
||||||
JobDetailSerializer,
|
|
||||||
JobSerializer,
|
|
||||||
JobOptionsSerializer,
|
|
||||||
ProjectUpdateDetailSerializer,
|
|
||||||
)
|
|
||||||
|
|
||||||
from awx.main.models import (
|
from awx.main.models import (
|
||||||
Label,
|
Label,
|
||||||
@@ -108,7 +102,7 @@ class TestJobOptionsSerializerGetSummaryFields:
|
|||||||
|
|
||||||
|
|
||||||
class TestJobDetailSerializerGetHostStatusCountFields(object):
|
class TestJobDetailSerializerGetHostStatusCountFields(object):
|
||||||
def test_hosts_are_counted_once(self, job, mocker):
|
def test_hosts_are_counted_once(self):
|
||||||
mock_event = JobEvent(
|
mock_event = JobEvent(
|
||||||
**{
|
**{
|
||||||
'event': 'playbook_on_stats',
|
'event': 'playbook_on_stats',
|
||||||
@@ -133,26 +127,11 @@ class TestJobDetailSerializerGetHostStatusCountFields(object):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_qs = namedtuple('mock_qs', ['get'])(mocker.MagicMock(return_value=mock_event))
|
assert mock_event.get_host_status_counts() == {'ok': 1, 'changed': 1, 'dark': 2}
|
||||||
only = mocker.MagicMock(return_value=mock_qs)
|
|
||||||
job.get_event_queryset = lambda *args, **kwargs: mocker.MagicMock(only=only)
|
|
||||||
|
|
||||||
serializer = JobDetailSerializer()
|
|
||||||
host_status_counts = serializer.get_host_status_counts(job)
|
|
||||||
|
|
||||||
assert host_status_counts == {'ok': 1, 'changed': 1, 'dark': 2}
|
|
||||||
|
|
||||||
def test_host_status_counts_is_empty_dict_without_stats_event(self, job):
|
|
||||||
job.get_event_queryset = lambda *args, **kwargs: JobEvent.objects.none()
|
|
||||||
|
|
||||||
serializer = JobDetailSerializer()
|
|
||||||
host_status_counts = serializer.get_host_status_counts(job)
|
|
||||||
|
|
||||||
assert host_status_counts == {}
|
|
||||||
|
|
||||||
|
|
||||||
class TestProjectUpdateDetailSerializerGetHostStatusCountFields(object):
|
class TestProjectUpdateDetailSerializerGetHostStatusCountFields(object):
|
||||||
def test_hosts_are_counted_once(self, project_update, mocker):
|
def test_hosts_are_counted_once(self):
|
||||||
mock_event = ProjectUpdateEvent(
|
mock_event = ProjectUpdateEvent(
|
||||||
**{
|
**{
|
||||||
'event': 'playbook_on_stats',
|
'event': 'playbook_on_stats',
|
||||||
@@ -177,18 +156,4 @@ class TestProjectUpdateDetailSerializerGetHostStatusCountFields(object):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_qs = namedtuple('mock_qs', ['get'])(mocker.MagicMock(return_value=mock_event))
|
assert mock_event.get_host_status_counts() == {'ok': 1, 'changed': 1, 'dark': 2}
|
||||||
project_update.project_update_events.only = mocker.MagicMock(return_value=mock_qs)
|
|
||||||
|
|
||||||
serializer = ProjectUpdateDetailSerializer()
|
|
||||||
host_status_counts = serializer.get_host_status_counts(project_update)
|
|
||||||
|
|
||||||
assert host_status_counts == {'ok': 1, 'changed': 1, 'dark': 2}
|
|
||||||
|
|
||||||
def test_host_status_counts_is_empty_dict_without_stats_event(self, project_update):
|
|
||||||
project_update.project_update_events = ProjectUpdateEvent.objects.none()
|
|
||||||
|
|
||||||
serializer = ProjectUpdateDetailSerializer()
|
|
||||||
host_status_counts = serializer.get_host_status_counts(project_update)
|
|
||||||
|
|
||||||
assert host_status_counts == {}
|
|
||||||
|
|||||||
@@ -1919,26 +1919,6 @@ def test_managed_injector_redaction(injector_cls):
|
|||||||
assert 'very_secret_value' not in str(build_safe_env(env))
|
assert 'very_secret_value' not in str(build_safe_env(env))
|
||||||
|
|
||||||
|
|
||||||
@mock.patch('logging.getLogger')
|
|
||||||
def test_notification_job_not_finished(logging_getLogger, mocker):
|
|
||||||
uj = mocker.MagicMock()
|
|
||||||
uj.finished = False
|
|
||||||
logger = mocker.Mock()
|
|
||||||
logging_getLogger.return_value = logger
|
|
||||||
|
|
||||||
with mocker.patch('awx.main.models.UnifiedJob.objects.get', uj):
|
|
||||||
system.handle_success_and_failure_notifications(1)
|
|
||||||
assert logger.warning.called_with(f"Failed to even try to send notifications for job '{uj}' due to job not being in finished state.")
|
|
||||||
|
|
||||||
|
|
||||||
def test_notification_job_finished(mocker):
|
|
||||||
uj = mocker.MagicMock(send_notification_templates=mocker.MagicMock(), finished=True)
|
|
||||||
|
|
||||||
with mocker.patch('awx.main.models.UnifiedJob.objects.get', mocker.MagicMock(return_value=uj)):
|
|
||||||
system.handle_success_and_failure_notifications(1)
|
|
||||||
uj.send_notification_templates.assert_called()
|
|
||||||
|
|
||||||
|
|
||||||
def test_job_run_no_ee(mock_me):
|
def test_job_run_no_ee(mock_me):
|
||||||
org = Organization(pk=1)
|
org = Organization(pk=1)
|
||||||
proj = Project(pk=1, organization=org)
|
proj = Project(pk=1, organization=org)
|
||||||
|
|||||||
@@ -7,14 +7,17 @@ import time
|
|||||||
logger = logging.getLogger('awx.main.tasks.utils')
|
logger = logging.getLogger('awx.main.tasks.utils')
|
||||||
|
|
||||||
|
|
||||||
def update_model(model, pk, _attempt=0, _max_attempts=5, **updates):
|
def update_model(model, pk, _attempt=0, _max_attempts=5, select_for_update=False, **updates):
|
||||||
"""Reload the model instance from the database and update the
|
"""Reload the model instance from the database and update the
|
||||||
given fields.
|
given fields.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
# Retrieve the model instance.
|
# Retrieve the model instance.
|
||||||
instance = model.objects.get(pk=pk)
|
if select_for_update:
|
||||||
|
instance = model.objects.select_for_update().get(pk=pk)
|
||||||
|
else:
|
||||||
|
instance = model.objects.get(pk=pk)
|
||||||
|
|
||||||
# Update the appropriate fields and save the model
|
# Update the appropriate fields and save the model
|
||||||
# instance, then return the new instance.
|
# instance, then return the new instance.
|
||||||
|
|||||||
@@ -997,9 +997,6 @@ BROADCAST_WEBSOCKET_NEW_INSTANCE_POLL_RATE_SECONDS = 10
|
|||||||
# How often websocket process will generate stats
|
# How often websocket process will generate stats
|
||||||
BROADCAST_WEBSOCKET_STATS_POLL_RATE_SECONDS = 5
|
BROADCAST_WEBSOCKET_STATS_POLL_RATE_SECONDS = 5
|
||||||
|
|
||||||
# Number of times to retry sending a notification when waiting on a job to finish.
|
|
||||||
AWX_NOTIFICATION_JOB_FINISH_MAX_RETRY = 5
|
|
||||||
|
|
||||||
DJANGO_GUID = {'GUID_HEADER_NAME': 'X-API-Request-Id'}
|
DJANGO_GUID = {'GUID_HEADER_NAME': 'X-API-Request-Id'}
|
||||||
|
|
||||||
# Name of the default task queue
|
# Name of the default task queue
|
||||||
|
|||||||
@@ -652,7 +652,7 @@ function JobOutput({ job, eventRelatedSearchableKeys, eventSearchableKeys }) {
|
|||||||
isDeleteDisabled={isDeleting}
|
isDeleteDisabled={isDeleting}
|
||||||
/>
|
/>
|
||||||
</OutputHeader>
|
</OutputHeader>
|
||||||
<HostStatusBar counts={job.host_status_counts} />
|
<HostStatusBar counts={job.host_status_counts || {}} />
|
||||||
<JobOutputSearch
|
<JobOutputSearch
|
||||||
qsConfig={QS_CONFIG}
|
qsConfig={QS_CONFIG}
|
||||||
job={job}
|
job={job}
|
||||||
|
|||||||
Reference in New Issue
Block a user