From f2ae68f30207da413655412d1146e6f7328c2d9d Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Tue, 10 Dec 2024 15:23:54 -0500 Subject: [PATCH] Fix project cache identifiers for new updates (#6762) Finish test and discover viable solution Add comment on related task code --- awx/main/models/projects.py | 26 ++++++++- awx/main/tasks/jobs.py | 1 + .../live/tests/projects/test_requirements.py | 56 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 awx/main/tests/live/tests/projects/test_requirements.py diff --git a/awx/main/models/projects.py b/awx/main/models/projects.py index 8c8fcd52ba..d0b22830ee 100644 --- a/awx/main/models/projects.py +++ b/awx/main/models/projects.py @@ -5,6 +5,8 @@ import datetime import os import urllib.parse as urlparse +from uuid import uuid4 +import logging # Django from django.conf import settings @@ -39,6 +41,8 @@ from awx.main.models.rbac import ( ROLE_SINGLETON_SYSTEM_AUDITOR, ) +logger = logging.getLogger('awx.main.models.projects') + __all__ = ['Project', 'ProjectUpdate'] @@ -447,7 +451,25 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn @property def cache_id(self): - return str(self.last_job_id) + """This gives the folder name where collections and roles will be saved to so it does not re-download + + Normally we want this to track with the last update, because every update should pull new content. + This does not count sync jobs, but sync jobs do not update last_job or current_job anyway. + If cleanup_jobs deletes the last jobs, then we can fallback to using any given heuristic related + to the last job ran. + """ + if self.current_job_id: + return str(self.current_job_id) + elif self.last_job_id: + return str(self.last_job_id) + elif self.last_job_run: + return self.last_job_run.isoformat() + else: + logger.warning(f'No info about last update for project {self.id}, content cache may misbehave') + if self.modified: + return self.modified.isoformat() + else: + return str(uuid4()) @property def notification_templates(self): @@ -618,7 +640,7 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage @property def cache_id(self): if self.branch_override or self.job_type == 'check' or (not self.project): - return str(self.id) + return str(self.id) # causes it to not use the cache, basically return self.project.cache_id def result_stdout_raw_limited(self, start_line=0, end_line=None, redact_sensitive=True): diff --git a/awx/main/tasks/jobs.py b/awx/main/tasks/jobs.py index c6cfc6a180..509ebc7e98 100644 --- a/awx/main/tasks/jobs.py +++ b/awx/main/tasks/jobs.py @@ -698,6 +698,7 @@ class SourceControlMixin(BaseTask): logger.debug(f'Project not available locally, {self.instance.id} will sync with remote') sync_needs.append(source_update_tag) + # Determine whether or not this project sync needs to populate the cache for Ansible content, roles and collections has_cache = os.path.exists(os.path.join(project.get_cache_path(), project.cache_id)) # Galaxy requirements are not supported for manual projects if project.scm_type and ((not has_cache) or branch_override): diff --git a/awx/main/tests/live/tests/projects/test_requirements.py b/awx/main/tests/live/tests/projects/test_requirements.py new file mode 100644 index 0000000000..c82ccbec80 --- /dev/null +++ b/awx/main/tests/live/tests/projects/test_requirements.py @@ -0,0 +1,56 @@ +import os +import time + +import pytest + +from django.conf import settings + +from awx.main.tests.live.tests.conftest import wait_for_job + +from awx.main.models import Project, SystemJobTemplate + + +@pytest.fixture(scope='session') +def project_with_requirements(default_org): + project, _ = Project.objects.get_or_create( + name='project-with-requirements', + scm_url='https://github.com/ansible/test-playbooks.git', + scm_branch="with_requirements", + scm_type='git', + organization=default_org, + ) + start = time.time() + while time.time() - start < 3.0: + if project.current_job or project.last_job or project.last_job_run: + break + assert project.current_job or project.last_job or project.last_job_run, f'Project never updated id={project.id}' + update = project.current_job or project.last_job + if update: + wait_for_job(update) + return project + + +def project_cache_is_populated(project): + proj_cache = os.path.join(project.get_cache_path(), project.cache_id) + return os.path.exists(proj_cache) + + +def test_cache_is_populated_after_cleanup_job(project_with_requirements): + assert project_with_requirements.cache_id is not None # already updated, should be something + cache_path = os.path.join(settings.PROJECTS_ROOT, '.__awx_cache') + assert os.path.exists(cache_path) + + assert project_cache_is_populated(project_with_requirements) + + cleanup_sjt = SystemJobTemplate.objects.get(name='Cleanup Job Details') + cleanup_job = cleanup_sjt.create_unified_job(extra_vars={'days': 0}) + cleanup_job.signal_start() + wait_for_job(cleanup_job) + + project_with_requirements.refresh_from_db() + assert project_with_requirements.cache_id is not None + update = project_with_requirements.update() + wait_for_job(update) + + # Now, we still have a populated cache + assert project_cache_is_populated(project_with_requirements)