Bulk save facts, and move to before status change (#12998)

* Facts scaling fixes for large inventory, timing issue

Move save of Ansible facts to before the job status changes
  this is considered an acceptable delay with the other
  performance fixes here

Remove completely unrelated unused facts method

Scale related changes to facts saving:
  Use .iterator() on queryset when looping
  Change save to bulk_update
  Apply bulk_update in batches of 100, to reduce memory
  Only save a single file modtime, avoiding large dict

Use decorator for long func time logging
  update decorator to fill in format statement
This commit is contained in:
Alan Rominger
2022-11-15 15:18:06 -05:00
committed by GitHub
parent 0933a96d60
commit 2fdce43f9e
5 changed files with 89 additions and 65 deletions

View File

@@ -44,7 +44,7 @@ from awx.main.models.notifications import (
NotificationTemplate,
JobNotificationMixin,
)
from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField, polymorphic
from awx.main.utils import parse_yaml_or_json, getattr_dne, NullablePromptPseudoField, polymorphic, log_excess_runtime
from awx.main.fields import ImplicitRoleField, AskForField, JSONBlob, OrderedManyToManyField
from awx.main.models.mixins import (
ResourceMixin,
@@ -857,8 +857,11 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
return host_queryset.iterator()
return host_queryset
def start_job_fact_cache(self, destination, modification_times, timeout=None):
@log_excess_runtime(logger, debug_cutoff=0.01, msg='Job {job_id} host facts prepared for {written_ct} hosts, took {delta:.3f} s', add_log_data=True)
def start_job_fact_cache(self, destination, log_data, timeout=None):
self.log_lifecycle("start_job_fact_cache")
log_data['job_id'] = self.id
log_data['written_ct'] = 0
os.makedirs(destination, mode=0o700)
if timeout is None:
@@ -869,6 +872,8 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
hosts = self._get_inventory_hosts(ansible_facts_modified__gte=timeout)
else:
hosts = self._get_inventory_hosts()
last_filepath_written = None
for host in hosts:
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
@@ -878,23 +883,38 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
with codecs.open(filepath, 'w', encoding='utf-8') as f:
os.chmod(f.name, 0o600)
json.dump(host.ansible_facts, f)
log_data['written_ct'] += 1
last_filepath_written = filepath
except IOError:
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
continue
# make note of the time we wrote the file so we can check if it changed later
modification_times[filepath] = os.path.getmtime(filepath)
# make note of the time we wrote the last file so we can check if any file changed later
if last_filepath_written:
return os.path.getmtime(last_filepath_written)
return None
def finish_job_fact_cache(self, destination, modification_times):
@log_excess_runtime(
logger,
debug_cutoff=0.01,
msg='Job {job_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
add_log_data=True,
)
def finish_job_fact_cache(self, destination, facts_write_time, log_data):
self.log_lifecycle("finish_job_fact_cache")
log_data['job_id'] = self.id
log_data['updated_ct'] = 0
log_data['unmodified_ct'] = 0
log_data['cleared_ct'] = 0
hosts_to_update = []
for host in self._get_inventory_hosts():
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
continue
if os.path.exists(filepath):
# If the file changed since we wrote it pre-playbook run...
# If the file changed since we wrote the last facts file, pre-playbook run...
modified = os.path.getmtime(filepath)
if modified > modification_times.get(filepath, 0):
if (not facts_write_time) or modified > facts_write_time:
with codecs.open(filepath, 'r', encoding='utf-8') as f:
try:
ansible_facts = json.load(f)
@@ -902,7 +922,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
continue
host.ansible_facts = ansible_facts
host.ansible_facts_modified = now()
host.save(update_fields=['ansible_facts', 'ansible_facts_modified'])
hosts_to_update.append(host)
system_tracking_logger.info(
'New fact for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)),
extra=dict(
@@ -913,12 +933,21 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
job_id=self.id,
),
)
log_data['updated_ct'] += 1
else:
log_data['unmodified_ct'] += 1
else:
# if the file goes missing, ansible removed it (likely via clear_facts)
host.ansible_facts = {}
host.ansible_facts_modified = now()
hosts_to_update.append(host)
system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
host.save()
log_data['cleared_ct'] += 1
if len(hosts_to_update) > 100:
self.inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
hosts_to_update = []
if hosts_to_update:
self.inventory.hosts.bulk_update(hosts_to_update, ['ansible_facts', 'ansible_facts_modified'])
class LaunchTimeConfigBase(BaseModel):