From fefab898150609b5225e93151f044c8769598f3c Mon Sep 17 00:00:00 2001 From: AlanCoding Date: Mon, 15 Jun 2020 10:33:27 -0400 Subject: [PATCH] Integrate content caching with existing task logic Revert the --force flags use the update id as metric for role caching Shift the movement of cache to job folder from rsync task to python Only install roles and collections if needed Deal with roles and collections for jobs without sync Skip local copy if roles or collections turned off update docs for content caching Design pivot - use empty cache dir to indicate lack of content Do not cache content if we did not install content Test changes to allay concerns about reliability of local_path Do not blow away cache for SCM inventory updates --- awx/main/models/projects.py | 21 ++- awx/main/tasks.py | 154 +++++++++++------- awx/main/tests/functional/api/test_project.py | 8 +- awx/main/tests/functional/conftest.py | 1 - .../tests/functional/models/test_inventory.py | 3 +- .../tests/functional/models/test_project.py | 12 ++ awx/main/tests/functional/test_projects.py | 4 +- awx/main/tests/functional/test_tasks.py | 4 +- awx/main/tests/unit/test_tasks.py | 9 +- awx/playbooks/project_update.yml | 48 ++---- docs/collections.md | 42 ++++- 11 files changed, 196 insertions(+), 110 deletions(-) diff --git a/awx/main/models/projects.py b/awx/main/models/projects.py index fbfea3b61b..5d8cfd5290 100644 --- a/awx/main/models/projects.py +++ b/awx/main/models/projects.py @@ -423,6 +423,10 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn return True return False + @property + def cache_id(self): + return str(self.last_job_id) + @property def notification_templates(self): base_notification_templates = NotificationTemplate.objects @@ -560,6 +564,19 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage def result_stdout_raw(self): return self._result_stdout_raw(redact_sensitive=True) + @property + def branch_override(self): + """Whether a branch other than the project default is used.""" + if not self.project: + return True + return bool(self.scm_branch and self.scm_branch != self.project.scm_branch) + + @property + def cache_id(self): + if self.branch_override or self.job_type == 'check' or (not self.project): + return str(self.id) + return self.project.cache_id + def result_stdout_raw_limited(self, start_line=0, end_line=None, redact_sensitive=True): return self._result_stdout_raw_limited(start_line, end_line, redact_sensitive=redact_sensitive) @@ -603,9 +620,7 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage def save(self, *args, **kwargs): added_update_fields = [] if not self.job_tags: - job_tags = ['update_{}'.format(self.scm_type)] - job_tags.append('install_roles') - job_tags.append('install_collections') + job_tags = ['update_{}'.format(self.scm_type), 'install_roles', 'install_collections'] self.job_tags = ','.join(job_tags) added_update_fields.append('job_tags') if self.scm_delete_on_update and 'delete' not in self.job_tags and self.job_type == 'check': diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 49e9aea150..7ff21fa85f 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -1865,44 +1865,32 @@ class RunJob(BaseTask): project_path = job.project.get_project_path(check_if_exists=False) job_revision = job.project.scm_revision sync_needs = [] - all_sync_needs = ['update_{}'.format(job.project.scm_type), 'install_roles', 'install_collections'] + source_update_tag = 'update_{}'.format(job.project.scm_type) + branch_override = bool(job.scm_branch and job.scm_branch != job.project.scm_branch) if not job.project.scm_type: pass # manual projects are not synced, user has responsibility for that elif not os.path.exists(project_path): logger.debug('Performing fresh clone of {} on this instance.'.format(job.project)) - sync_needs = all_sync_needs - elif not job.project.scm_revision: - logger.debug('Revision not known for {}, will sync with remote'.format(job.project)) - sync_needs = all_sync_needs - elif job.project.scm_type == 'git': + sync_needs.append(source_update_tag) + elif job.project.scm_type == 'git' and job.project.scm_revision and (not branch_override): git_repo = git.Repo(project_path) try: - desired_revision = job.project.scm_revision - if job.scm_branch and job.scm_branch != job.project.scm_branch: - desired_revision = job.scm_branch # could be commit or not, but will try as commit - current_revision = git_repo.head.commit.hexsha - if desired_revision == current_revision: - job_revision = desired_revision + if job_revision == git_repo.head.commit.hexsha: logger.debug('Skipping project sync for {} because commit is locally available'.format(job.log_format)) else: - sync_needs = all_sync_needs + sync_needs.append(source_update_tag) except (ValueError, BadGitName): logger.debug('Needed commit for {} not in local source tree, will sync with remote'.format(job.log_format)) - sync_needs = all_sync_needs + sync_needs.append(source_update_tag) else: - sync_needs = all_sync_needs - # Galaxy requirements are not supported for manual projects - if not sync_needs and job.project.scm_type: - # see if we need a sync because of presence of roles - galaxy_req_path = os.path.join(project_path, 'roles', 'requirements.yml') - if os.path.exists(galaxy_req_path): - logger.debug('Running project sync for {} because of galaxy role requirements.'.format(job.log_format)) - sync_needs.append('install_roles') + logger.debug('Project not available locally {}, will sync with remote'.format(job.project)) + sync_needs.append(source_update_tag) - galaxy_collections_req_path = os.path.join(project_path, 'collections', 'requirements.yml') - if os.path.exists(galaxy_collections_req_path): - logger.debug('Running project sync for {} because of galaxy collections requirements.'.format(job.log_format)) - sync_needs.append('install_collections') + cache_id = str(job.project.last_job_id) # content cache - for roles and collections + has_cache = os.path.exists(os.path.join(job.project.get_cache_path(), cache_id)) + # Galaxy requirements are not supported for manual projects + if job.project.scm_type and ((not has_cache) or branch_override): + sync_needs.extend(['install_roles', 'install_collections']) if sync_needs: pu_ig = job.instance_group @@ -1920,7 +1908,7 @@ class RunJob(BaseTask): execution_node=pu_en, celery_task_id=job.celery_task_id ) - if job.scm_branch and job.scm_branch != job.project.scm_branch: + if branch_override: sync_metafields['scm_branch'] = job.scm_branch if 'update_' not in sync_metafields['job_tags']: sync_metafields['scm_revision'] = job_revision @@ -1952,10 +1940,7 @@ class RunJob(BaseTask): if job_revision: job = self.update_model(job.pk, scm_revision=job_revision) # Project update does not copy the folder, so copy here - RunProjectUpdate.make_local_copy( - project_path, os.path.join(private_data_dir, 'project'), - job.project.scm_type, job_revision - ) + RunProjectUpdate.make_local_copy(job.project, private_data_dir, scm_revision=job_revision) if job.inventory.kind == 'smart': # cache smart inventory memberships so that the host_filter query is not @@ -1995,10 +1980,7 @@ class RunProjectUpdate(BaseTask): @property def proot_show_paths(self): - show_paths = [settings.PROJECTS_ROOT] - if self.job_private_data_dir: - show_paths.append(self.job_private_data_dir) - return show_paths + return [settings.PROJECTS_ROOT] def __init__(self, *args, job_private_data_dir=None, **kwargs): super(RunProjectUpdate, self).__init__(*args, **kwargs) @@ -2165,8 +2147,7 @@ class RunProjectUpdate(BaseTask): extra_vars.update(extra_vars_new) scm_branch = project_update.scm_branch - branch_override = bool(scm_branch and project_update.scm_branch != project_update.project.scm_branch) - if project_update.job_type == 'run' and (not branch_override): + if project_update.job_type == 'run' and (not project_update.branch_override): if project_update.project.scm_revision: scm_branch = project_update.project.scm_revision elif not scm_branch: @@ -2174,14 +2155,15 @@ class RunProjectUpdate(BaseTask): elif not scm_branch: scm_branch = {'hg': 'tip'}.get(project_update.scm_type, 'HEAD') extra_vars.update({ - 'project_path': project_update.get_project_path(check_if_exists=False), + 'projects_root': settings.PROJECTS_ROOT.rstrip('/'), + 'local_path': os.path.basename(project_update.project.local_path), + 'project_path': project_update.get_project_path(check_if_exists=False), # deprecated 'insights_url': settings.INSIGHTS_URL_BASE, 'awx_license_type': get_license(show_key=False).get('license_type', 'UNLICENSED'), 'awx_version': get_awx_version(), 'scm_url': scm_url, 'scm_branch': scm_branch, 'scm_clean': project_update.scm_clean, - 'project_cache': project_update.get_cache_path(), 'roles_enabled': settings.AWX_ROLES_ENABLED, 'collections_enabled': settings.AWX_COLLECTIONS_ENABLED, }) @@ -2323,8 +2305,7 @@ class RunProjectUpdate(BaseTask): os.mkdir(settings.PROJECTS_ROOT) self.acquire_lock(instance) self.original_branch = None - if (instance.scm_type == 'git' and instance.job_type == 'run' and instance.project and - instance.scm_branch != instance.project.scm_branch): + if instance.scm_type == 'git' and instance.branch_override: project_path = instance.project.get_project_path(check_if_exists=False) if os.path.exists(project_path): git_repo = git.Repo(project_path) @@ -2332,13 +2313,20 @@ class RunProjectUpdate(BaseTask): self.original_branch = git_repo.head.commit else: self.original_branch = git_repo.active_branch + stage_path = os.path.join(instance.get_cache_path(), 'stage') + if os.path.exists(stage_path): + logger.warning('{0} cache staging area unexpectedly existed before update.') + shutil.rmtree(stage_path) + # Important - the presence of an empty cache will indicate that a given + # project revision did not have any roles or collections + os.makedirs(stage_path) - def clear_project_cache(self, instance, revision): - cache_dir = instance.get_cache_path() + @staticmethod + def clear_project_cache(cache_dir, keep_value): if os.path.isdir(cache_dir): for entry in os.listdir(cache_dir): old_path = os.path.join(cache_dir, entry) - if entry != revision: + if entry not in (keep_value, 'stage'): # invalidate, then delete new_path = os.path.join(cache_dir,'.~~delete~~' + entry) try: @@ -2348,16 +2336,27 @@ class RunProjectUpdate(BaseTask): logger.warning(f"Could not remove cache directory {old_path}") @staticmethod - def make_local_copy(project_path, destination_folder, scm_type, scm_revision): - if scm_type == 'git': + def make_local_copy(p, job_private_data_dir, scm_revision=None): + """Copy project content (roles and collections) to a job private_data_dir + + :param object p: Either a project or a project update + :param str job_private_data_dir: The root of the target ansible-runner folder + :param str scm_revision: For branch_override cases, the git revision to copy + """ + project_path = p.get_project_path(check_if_exists=False) + destination_folder = os.path.join(job_private_data_dir, 'project') + if not scm_revision: + scm_revision = p.scm_revision + + if p.scm_type == 'git': git_repo = git.Repo(project_path) if not os.path.exists(destination_folder): os.mkdir(destination_folder, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC) tmp_branch_name = 'awx_internal/{}'.format(uuid4()) # always clone based on specific job revision - if not scm_revision: + if not p.scm_revision: raise RuntimeError('Unexpectedly could not determine a revision to run from project.') - source_branch = git_repo.create_head(tmp_branch_name, scm_revision) + source_branch = git_repo.create_head(tmp_branch_name, p.scm_revision) # git clone must take file:// syntax for source repo or else options like depth will be ignored source_as_uri = Path(project_path).as_uri() git.Repo.clone_from( @@ -2376,20 +2375,48 @@ class RunProjectUpdate(BaseTask): else: copy_tree(project_path, destination_folder, preserve_symlinks=1) + # copy over the roles and collection cache to job folder + cache_path = os.path.join(p.get_cache_path(), p.cache_id) + subfolders = [] + if settings.AWX_COLLECTIONS_ENABLED: + subfolders.append('requirements_collections') + if settings.AWX_ROLES_ENABLED: + subfolders.append('requirements_roles') + for subfolder in subfolders: + cache_subpath = os.path.join(cache_path, subfolder) + if os.path.exists(cache_subpath): + dest_subpath = os.path.join(job_private_data_dir, subfolder) + copy_tree(cache_subpath, dest_subpath, preserve_symlinks=1) + logger.debug('{0} {1} prepared {2} from cache'.format(type(p).__name__, p.pk, dest_subpath)) + def post_run_hook(self, instance, status): # To avoid hangs, very important to release lock even if errors happen here try: if self.playbook_new_revision: - self.clear_project_cache(instance, self.playbook_new_revision) instance.scm_revision = self.playbook_new_revision instance.save(update_fields=['scm_revision']) + + # Roles and collection folders copy to durable cache + base_path = instance.get_cache_path() + stage_path = os.path.join(base_path, 'stage') + if status == 'successful' and 'install_' in instance.job_tags: + # Clear other caches before saving this one, and if branch is overridden + # do not clear cache for main branch, but do clear it for other branches + self.clear_project_cache(base_path, keep_value=str(instance.project.last_job_id)) + cache_path = os.path.join(base_path, instance.cache_id) + if os.path.exists(stage_path): + if os.path.exists(cache_path): + logger.warning('Rewriting cache at {0}, performance may suffer'.format(cache_path)) + shutil.rmtree(cache_path) + os.rename(stage_path, cache_path) + logger.debug('{0} wrote to cache at {1}'.format(instance.log_format, cache_path)) + elif os.path.exists(stage_path): + shutil.rmtree(stage_path) # cannot trust content update produced + if self.job_private_data_dir: # copy project folder before resetting to default branch # because some git-tree-specific resources (like submodules) might matter - self.make_local_copy( - instance.get_project_path(check_if_exists=False), os.path.join(self.job_private_data_dir, 'project'), - instance.scm_type, instance.scm_revision - ) + self.make_local_copy(instance, self.job_private_data_dir) if self.original_branch: # for git project syncs, non-default branches can be problems # restore to branch the repo was on before this run @@ -2642,13 +2669,22 @@ class RunInventoryUpdate(BaseTask): source_project = None if inventory_update.inventory_source: source_project = inventory_update.inventory_source.source_project - if (inventory_update.source=='scm' and inventory_update.launch_type!='scm' and source_project): - # In project sync, pulling galaxy roles is not needed + if (inventory_update.source=='scm' and inventory_update.launch_type!='scm' and + source_project and source_project.scm_type): # never ever update manual projects + + # Check if the content cache exists, so that we do not unnecessarily re-download roles + sync_needs = ['update_{}'.format(source_project.scm_type)] + cache_id = str(source_project.last_job_id) # content cache id for roles and collections + has_cache = os.path.exists(os.path.join(source_project.get_cache_path(), cache_id)) + # Galaxy requirements are not supported for manual projects + if not has_cache: + sync_needs.extend(['install_roles', 'install_collections']) + local_project_sync = source_project.create_project_update( _eager_fields=dict( launch_type="sync", job_type='run', - job_tags='update_{},install_collections'.format(source_project.scm_type), # roles are never valid for inventory + job_tags=','.join(sync_needs), status='running', execution_node=inventory_update.execution_node, instance_group = inventory_update.instance_group, @@ -2672,11 +2708,7 @@ class RunInventoryUpdate(BaseTask): raise elif inventory_update.source == 'scm' and inventory_update.launch_type == 'scm' and source_project: # This follows update, not sync, so make copy here - project_path = source_project.get_project_path(check_if_exists=False) - RunProjectUpdate.make_local_copy( - project_path, os.path.join(private_data_dir, 'project'), - source_project.scm_type, source_project.scm_revision - ) + RunProjectUpdate.make_local_copy(source_project, private_data_dir) @task(queue=get_local_queuename) diff --git a/awx/main/tests/functional/api/test_project.py b/awx/main/tests/functional/api/test_project.py index af46363557..09fed17c67 100644 --- a/awx/main/tests/functional/api/test_project.py +++ b/awx/main/tests/functional/api/test_project.py @@ -54,7 +54,9 @@ def test_no_changing_overwrite_behavior_if_used(post, patch, organization, admin data={ 'name': 'fooo', 'organization': organization.id, - 'allow_override': True + 'allow_override': True, + 'scm_type': 'git', + 'scm_url': 'https://github.com/ansible/test-playbooks.git' }, user=admin_user, expect=201 @@ -83,7 +85,9 @@ def test_changing_overwrite_behavior_okay_if_not_used(post, patch, organization, data={ 'name': 'fooo', 'organization': organization.id, - 'allow_override': True + 'allow_override': True, + 'scm_type': 'git', + 'scm_url': 'https://github.com/ansible/test-playbooks.git' }, user=admin_user, expect=201 diff --git a/awx/main/tests/functional/conftest.py b/awx/main/tests/functional/conftest.py index f6accff877..7111950003 100644 --- a/awx/main/tests/functional/conftest.py +++ b/awx/main/tests/functional/conftest.py @@ -145,7 +145,6 @@ def project(instance, organization): description="test-proj-desc", organization=organization, playbook_files=['helloworld.yml', 'alt-helloworld.yml'], - local_path='_92__test_proj', scm_revision='1234567890123456789012345678901234567890', scm_url='localhost', scm_type='git' diff --git a/awx/main/tests/functional/models/test_inventory.py b/awx/main/tests/functional/models/test_inventory.py index adc7aa245c..6765f0e73b 100644 --- a/awx/main/tests/functional/models/test_inventory.py +++ b/awx/main/tests/functional/models/test_inventory.py @@ -169,7 +169,8 @@ class TestSCMUpdateFeatures: inventory_update = InventoryUpdate( inventory_source=scm_inventory_source, source_path=scm_inventory_source.source_path) - assert inventory_update.get_actual_source_path().endswith('_92__test_proj/inventory_file') + p = scm_inventory_source.source_project + assert inventory_update.get_actual_source_path().endswith(f'_{p.id}__test_proj/inventory_file') def test_no_unwanted_updates(self, scm_inventory_source): # Changing the non-sensitive fields should not trigger update diff --git a/awx/main/tests/functional/models/test_project.py b/awx/main/tests/functional/models/test_project.py index 3f57691ac3..2cf43c5690 100644 --- a/awx/main/tests/functional/models/test_project.py +++ b/awx/main/tests/functional/models/test_project.py @@ -34,6 +34,18 @@ def test_sensitive_change_triggers_update(project): mock_update.assert_called_once_with() +@pytest.mark.django_db +def test_local_path_autoset(organization): + with mock.patch.object(Project, "update"): + p = Project.objects.create( + name="test-proj", + organization=organization, + scm_url='localhost', + scm_type='git' + ) + assert p.local_path == f'_{p.id}__test_proj' + + @pytest.mark.django_db def test_foreign_key_change_changes_modified_by(project, organization): assert project._get_fields_snapshot()['organization_id'] == organization.id diff --git a/awx/main/tests/functional/test_projects.py b/awx/main/tests/functional/test_projects.py index ef4b59630d..ccfbd06627 100644 --- a/awx/main/tests/functional/test_projects.py +++ b/awx/main/tests/functional/test_projects.py @@ -29,8 +29,8 @@ def team_project_list(organization_factory): @pytest.mark.django_db def test_get_project_path(project): # Test combining projects root with project local path - with mock.patch('awx.main.models.projects.settings.PROJECTS_ROOT', '/var/lib/awx'): - assert project.get_project_path(check_if_exists=False) == '/var/lib/awx/_92__test_proj' + with mock.patch('awx.main.models.projects.settings.PROJECTS_ROOT', '/var/lib/foo'): + assert project.get_project_path(check_if_exists=False) == f'/var/lib/foo/_{project.id}__test_proj' @pytest.mark.django_db diff --git a/awx/main/tests/functional/test_tasks.py b/awx/main/tests/functional/test_tasks.py index f1cf382a7c..c7bc50c8d2 100644 --- a/awx/main/tests/functional/test_tasks.py +++ b/awx/main/tests/functional/test_tasks.py @@ -30,7 +30,7 @@ class TestDependentInventoryUpdate: def test_dependent_inventory_updates_is_called(self, scm_inventory_source, scm_revision_file): task = RunProjectUpdate() task.revision_path = scm_revision_file - proj_update = ProjectUpdate.objects.create(project=scm_inventory_source.source_project) + proj_update = scm_inventory_source.source_project.create_project_update() with mock.patch.object(RunProjectUpdate, '_update_dependent_inventories') as inv_update_mck: with mock.patch.object(RunProjectUpdate, 'release_lock'): task.post_run_hook(proj_update, 'successful') @@ -39,7 +39,7 @@ class TestDependentInventoryUpdate: def test_no_unwanted_dependent_inventory_updates(self, project, scm_revision_file): task = RunProjectUpdate() task.revision_path = scm_revision_file - proj_update = ProjectUpdate.objects.create(project=project) + proj_update = project.create_project_update() with mock.patch.object(RunProjectUpdate, '_update_dependent_inventories') as inv_update_mck: with mock.patch.object(RunProjectUpdate, 'release_lock'): task.post_run_hook(proj_update, 'successful') diff --git a/awx/main/tests/unit/test_tasks.py b/awx/main/tests/unit/test_tasks.py index 354b5a2db5..71bcd8d03c 100644 --- a/awx/main/tests/unit/test_tasks.py +++ b/awx/main/tests/unit/test_tasks.py @@ -61,7 +61,10 @@ def patch_Job(): @pytest.fixture def job(): - return Job(pk=1, id=1, project=Project(), inventory=Inventory(), job_template=JobTemplate(id=1, name='foo')) + return Job( + pk=1, id=1, + project=Project(local_path='/projects/_23_foo'), + inventory=Inventory(), job_template=JobTemplate(id=1, name='foo')) @pytest.fixture @@ -406,7 +409,9 @@ class TestExtraVarSanitation(TestJobExecution): class TestGenericRun(): def test_generic_failure(self, patch_Job): - job = Job(status='running', inventory=Inventory(), project=Project()) + job = Job( + status='running', inventory=Inventory(), + project=Project(local_path='/projects/_23_foo')) job.websocket_emit_status = mock.Mock() task = tasks.RunJob() diff --git a/awx/playbooks/project_update.yml b/awx/playbooks/project_update.yml index c84bd0ba25..419f0531eb 100644 --- a/awx/playbooks/project_update.yml +++ b/awx/playbooks/project_update.yml @@ -1,7 +1,9 @@ --- # The following variables will be set by the runner of this playbook: -# project_path: PROJECTS_DIR/_local_path_ -# project_cache: CACHE_DIR/.__awx_cache/_local_path_ +# projects_root: Global location for caching project checkouts and roles and collections +# should not have trailing slash on end +# local_path: Path within projects_root to use for this project +# project_path: projects_root/local_path # scm_url: https://server/repo # insights_url: Insights service URL (from configuration) # scm_branch: branch/tag/revision (HEAD if unset) @@ -30,13 +32,6 @@ tags: - delete - - name: delete project cache directory before update - file: - path: "{{project_cache|quote}}" - state: absent - tags: - - delete - - block: - name: update project using git git: @@ -117,12 +112,11 @@ - update_svn - update_insights - - name: Set content cache location - set_fact: - cache_dir: "{{ project_cache }}/{{ scm_version|default(scm_branch) }}" - tags: - - install_collections - - install_roles +- hosts: localhost + gather_facts: false + connection: local + name: Install content with ansible-galaxy command if necessary + tasks: - block: - name: detect requirements.yml @@ -131,7 +125,10 @@ register: doesRequirementsExist - name: fetch galaxy roles from requirements.yml - command: ansible-galaxy role install {{ '--force' if roles_destination is not defined else '' }} -r roles/requirements.yml -p {{ cache_dir }}/requirements_roles {{ ' -' + 'v' * ansible_verbosity if ansible_verbosity else '' }} + command: > + ansible-galaxy role install -r roles/requirements.yml + --roles-path {{projects_root}}/.__awx_cache/{{local_path}}/stage/requirements_roles + {{ ' -' + 'v' * ansible_verbosity if ansible_verbosity else '' }} args: chdir: "{{project_path|quote}}" register: galaxy_result @@ -141,12 +138,6 @@ ANSIBLE_FORCE_COLOR: false GIT_SSH_COMMAND: "ssh -o StrictHostKeyChecking=no" - - name: populate job directory with needed roles - synchronize: - src: "{{ cache_dir }}/requirements_roles/" - dest: "{{ roles_destination }}" - when: roles_destination is defined and doesRequirementsExist.stat.exists - when: roles_enabled|bool tags: - install_roles @@ -158,7 +149,10 @@ register: doesCollectionRequirementsExist - name: fetch galaxy collections from collections/requirements.yml - command: ansible-galaxy collection install {{ '--force' if collections_destination is not defined else '' }} -r collections/requirements.yml -p {{ cache_dir }}/requirements_collections {{ ' -' + 'v' * ansible_verbosity if ansible_verbosity else '' }} + command: > + ansible-galaxy collection install -r collections/requirements.yml + --collections-path {{projects_root}}/.__awx_cache/{{local_path}}/stage/requirements_collections + {{ ' -' + 'v' * ansible_verbosity if ansible_verbosity else '' }} args: chdir: "{{project_path|quote}}" register: galaxy_collection_result @@ -166,15 +160,9 @@ changed_when: "'Installing ' in galaxy_collection_result.stdout" environment: ANSIBLE_FORCE_COLOR: false - ANSIBLE_COLLECTIONS_PATHS: "{{ cache_dir }}/requirements_collections" + ANSIBLE_COLLECTIONS_PATHS: "{{projects_root}}/.__awx_cache/{{local_path}}/stage/requirements_collections" GIT_SSH_COMMAND: "ssh -o StrictHostKeyChecking=no" - - name: populate job directory with needed collections - synchronize: - src: "{{ cache_dir }}/requirements_collections/" - dest: "{{ collections_destination }}" - when: collections_destination is defined and doesCollectionRequirementsExist.stat.exists - when: - "ansible_version.full is version_compare('2.8', '>=')" - collections_enabled|bool diff --git a/docs/collections.md b/docs/collections.md index 5cbf3eab7a..68c11950f8 100644 --- a/docs/collections.md +++ b/docs/collections.md @@ -4,15 +4,18 @@ AWX supports the use of Ansible Collections. This section will give ways to use ### Project Collections Requirements -If you specify a Collections requirements file in SCM at `collections/requirements.yml`, -then AWX will install Collections from that file in the implicit project sync -before a job run. The invocation looks like: +If you specify a collections requirements file in SCM at `collections/requirements.yml`, +then AWX will install collections from that file to a special cache folder in project updates. +Before a job runs, the roles and/or collections will be copied from the special +cache folder to the job temporary folder. + +The invocation looks like: ``` -ansible-galaxy collection install -r requirements.yml -p /requirements_collections +ansible-galaxy collection install -r requirements.yml -p /requirements_collections ``` -Example of the resultant `tmp` directory where job is running: +Example of the resultant job `tmp` directory where job is running: ``` ├── project @@ -20,7 +23,7 @@ Example of the resultant `tmp` directory where job is running: │   └── debug.yml ├── requirements_collections │   └── ansible_collections -│   └── username +│   └── collection_namespace │   └── collection_name │   ├── FILES.json │   ├── MANIFEST.json @@ -53,6 +56,33 @@ Example of the resultant `tmp` directory where job is running: ``` +### Cache Folder Mechanics + +Every time a project is updated as a "check" job +(via `/api/v2/projects/N/update/` or by a schedule, workflow, etc.), +the roles and collections are downloaded and saved to the project's content cache. +In other words, the cache is invalidated every time a project is updated. +That means that the `ansible-galaxy` commands are ran to download content +even if the project revision does not change in the course of the update. + +Project updates all initially target a staging directory at a path like: + +``` +/var/lib/awx/projects/.__awx_cache/_42__project_name/stage +``` + +After the update finishes, the task logic will decide what id to associate +with the content downloaded. +Then the folder will be renamed from "stage" to the cache id. +For instance, if the cache id is determined to be 63: + +``` +/var/lib/awx/projects/.__awx_cache/_42__project_name/63 +``` + +The cache may be updated by project syncs (the "run" type) which happen before +job runs. It will populate the cache id set by the last "check" type update. + ### Galaxy Server Selection Ansible core default settings will download collections from the public