Integrate content caching with existing task logic

Revert the --force flags

use the update id as metric for role caching

Shift the movement of cache to job folder from rsync task to python

Only install roles and collections if needed

Deal with roles and collections for jobs without sync
Skip local copy if roles or collections turned off

update docs for content caching

Design pivot - use empty cache dir to indicate lack of content

Do not cache content if we did not install content

Test changes to allay concerns about reliability of local_path

Do not blow away cache for SCM inventory updates
This commit is contained in:
AlanCoding
2020-06-15 10:33:27 -04:00
parent 72848fa097
commit fefab89815
11 changed files with 196 additions and 110 deletions

View File

@@ -423,6 +423,10 @@ class Project(UnifiedJobTemplate, ProjectOptions, ResourceMixin, CustomVirtualEn
return True
return False
@property
def cache_id(self):
return str(self.last_job_id)
@property
def notification_templates(self):
base_notification_templates = NotificationTemplate.objects
@@ -560,6 +564,19 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
def result_stdout_raw(self):
return self._result_stdout_raw(redact_sensitive=True)
@property
def branch_override(self):
"""Whether a branch other than the project default is used."""
if not self.project:
return True
return bool(self.scm_branch and self.scm_branch != self.project.scm_branch)
@property
def cache_id(self):
if self.branch_override or self.job_type == 'check' or (not self.project):
return str(self.id)
return self.project.cache_id
def result_stdout_raw_limited(self, start_line=0, end_line=None, redact_sensitive=True):
return self._result_stdout_raw_limited(start_line, end_line, redact_sensitive=redact_sensitive)
@@ -603,9 +620,7 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
def save(self, *args, **kwargs):
added_update_fields = []
if not self.job_tags:
job_tags = ['update_{}'.format(self.scm_type)]
job_tags.append('install_roles')
job_tags.append('install_collections')
job_tags = ['update_{}'.format(self.scm_type), 'install_roles', 'install_collections']
self.job_tags = ','.join(job_tags)
added_update_fields.append('job_tags')
if self.scm_delete_on_update and 'delete' not in self.job_tags and self.job_type == 'check':

View File

@@ -1865,44 +1865,32 @@ class RunJob(BaseTask):
project_path = job.project.get_project_path(check_if_exists=False)
job_revision = job.project.scm_revision
sync_needs = []
all_sync_needs = ['update_{}'.format(job.project.scm_type), 'install_roles', 'install_collections']
source_update_tag = 'update_{}'.format(job.project.scm_type)
branch_override = bool(job.scm_branch and job.scm_branch != job.project.scm_branch)
if not job.project.scm_type:
pass # manual projects are not synced, user has responsibility for that
elif not os.path.exists(project_path):
logger.debug('Performing fresh clone of {} on this instance.'.format(job.project))
sync_needs = all_sync_needs
elif not job.project.scm_revision:
logger.debug('Revision not known for {}, will sync with remote'.format(job.project))
sync_needs = all_sync_needs
elif job.project.scm_type == 'git':
sync_needs.append(source_update_tag)
elif job.project.scm_type == 'git' and job.project.scm_revision and (not branch_override):
git_repo = git.Repo(project_path)
try:
desired_revision = job.project.scm_revision
if job.scm_branch and job.scm_branch != job.project.scm_branch:
desired_revision = job.scm_branch # could be commit or not, but will try as commit
current_revision = git_repo.head.commit.hexsha
if desired_revision == current_revision:
job_revision = desired_revision
if job_revision == git_repo.head.commit.hexsha:
logger.debug('Skipping project sync for {} because commit is locally available'.format(job.log_format))
else:
sync_needs = all_sync_needs
sync_needs.append(source_update_tag)
except (ValueError, BadGitName):
logger.debug('Needed commit for {} not in local source tree, will sync with remote'.format(job.log_format))
sync_needs = all_sync_needs
sync_needs.append(source_update_tag)
else:
sync_needs = all_sync_needs
# Galaxy requirements are not supported for manual projects
if not sync_needs and job.project.scm_type:
# see if we need a sync because of presence of roles
galaxy_req_path = os.path.join(project_path, 'roles', 'requirements.yml')
if os.path.exists(galaxy_req_path):
logger.debug('Running project sync for {} because of galaxy role requirements.'.format(job.log_format))
sync_needs.append('install_roles')
logger.debug('Project not available locally {}, will sync with remote'.format(job.project))
sync_needs.append(source_update_tag)
galaxy_collections_req_path = os.path.join(project_path, 'collections', 'requirements.yml')
if os.path.exists(galaxy_collections_req_path):
logger.debug('Running project sync for {} because of galaxy collections requirements.'.format(job.log_format))
sync_needs.append('install_collections')
cache_id = str(job.project.last_job_id) # content cache - for roles and collections
has_cache = os.path.exists(os.path.join(job.project.get_cache_path(), cache_id))
# Galaxy requirements are not supported for manual projects
if job.project.scm_type and ((not has_cache) or branch_override):
sync_needs.extend(['install_roles', 'install_collections'])
if sync_needs:
pu_ig = job.instance_group
@@ -1920,7 +1908,7 @@ class RunJob(BaseTask):
execution_node=pu_en,
celery_task_id=job.celery_task_id
)
if job.scm_branch and job.scm_branch != job.project.scm_branch:
if branch_override:
sync_metafields['scm_branch'] = job.scm_branch
if 'update_' not in sync_metafields['job_tags']:
sync_metafields['scm_revision'] = job_revision
@@ -1952,10 +1940,7 @@ class RunJob(BaseTask):
if job_revision:
job = self.update_model(job.pk, scm_revision=job_revision)
# Project update does not copy the folder, so copy here
RunProjectUpdate.make_local_copy(
project_path, os.path.join(private_data_dir, 'project'),
job.project.scm_type, job_revision
)
RunProjectUpdate.make_local_copy(job.project, private_data_dir, scm_revision=job_revision)
if job.inventory.kind == 'smart':
# cache smart inventory memberships so that the host_filter query is not
@@ -1995,10 +1980,7 @@ class RunProjectUpdate(BaseTask):
@property
def proot_show_paths(self):
show_paths = [settings.PROJECTS_ROOT]
if self.job_private_data_dir:
show_paths.append(self.job_private_data_dir)
return show_paths
return [settings.PROJECTS_ROOT]
def __init__(self, *args, job_private_data_dir=None, **kwargs):
super(RunProjectUpdate, self).__init__(*args, **kwargs)
@@ -2165,8 +2147,7 @@ class RunProjectUpdate(BaseTask):
extra_vars.update(extra_vars_new)
scm_branch = project_update.scm_branch
branch_override = bool(scm_branch and project_update.scm_branch != project_update.project.scm_branch)
if project_update.job_type == 'run' and (not branch_override):
if project_update.job_type == 'run' and (not project_update.branch_override):
if project_update.project.scm_revision:
scm_branch = project_update.project.scm_revision
elif not scm_branch:
@@ -2174,14 +2155,15 @@ class RunProjectUpdate(BaseTask):
elif not scm_branch:
scm_branch = {'hg': 'tip'}.get(project_update.scm_type, 'HEAD')
extra_vars.update({
'project_path': project_update.get_project_path(check_if_exists=False),
'projects_root': settings.PROJECTS_ROOT.rstrip('/'),
'local_path': os.path.basename(project_update.project.local_path),
'project_path': project_update.get_project_path(check_if_exists=False), # deprecated
'insights_url': settings.INSIGHTS_URL_BASE,
'awx_license_type': get_license(show_key=False).get('license_type', 'UNLICENSED'),
'awx_version': get_awx_version(),
'scm_url': scm_url,
'scm_branch': scm_branch,
'scm_clean': project_update.scm_clean,
'project_cache': project_update.get_cache_path(),
'roles_enabled': settings.AWX_ROLES_ENABLED,
'collections_enabled': settings.AWX_COLLECTIONS_ENABLED,
})
@@ -2323,8 +2305,7 @@ class RunProjectUpdate(BaseTask):
os.mkdir(settings.PROJECTS_ROOT)
self.acquire_lock(instance)
self.original_branch = None
if (instance.scm_type == 'git' and instance.job_type == 'run' and instance.project and
instance.scm_branch != instance.project.scm_branch):
if instance.scm_type == 'git' and instance.branch_override:
project_path = instance.project.get_project_path(check_if_exists=False)
if os.path.exists(project_path):
git_repo = git.Repo(project_path)
@@ -2332,13 +2313,20 @@ class RunProjectUpdate(BaseTask):
self.original_branch = git_repo.head.commit
else:
self.original_branch = git_repo.active_branch
stage_path = os.path.join(instance.get_cache_path(), 'stage')
if os.path.exists(stage_path):
logger.warning('{0} cache staging area unexpectedly existed before update.')
shutil.rmtree(stage_path)
# Important - the presence of an empty cache will indicate that a given
# project revision did not have any roles or collections
os.makedirs(stage_path)
def clear_project_cache(self, instance, revision):
cache_dir = instance.get_cache_path()
@staticmethod
def clear_project_cache(cache_dir, keep_value):
if os.path.isdir(cache_dir):
for entry in os.listdir(cache_dir):
old_path = os.path.join(cache_dir, entry)
if entry != revision:
if entry not in (keep_value, 'stage'):
# invalidate, then delete
new_path = os.path.join(cache_dir,'.~~delete~~' + entry)
try:
@@ -2348,16 +2336,27 @@ class RunProjectUpdate(BaseTask):
logger.warning(f"Could not remove cache directory {old_path}")
@staticmethod
def make_local_copy(project_path, destination_folder, scm_type, scm_revision):
if scm_type == 'git':
def make_local_copy(p, job_private_data_dir, scm_revision=None):
"""Copy project content (roles and collections) to a job private_data_dir
:param object p: Either a project or a project update
:param str job_private_data_dir: The root of the target ansible-runner folder
:param str scm_revision: For branch_override cases, the git revision to copy
"""
project_path = p.get_project_path(check_if_exists=False)
destination_folder = os.path.join(job_private_data_dir, 'project')
if not scm_revision:
scm_revision = p.scm_revision
if p.scm_type == 'git':
git_repo = git.Repo(project_path)
if not os.path.exists(destination_folder):
os.mkdir(destination_folder, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
tmp_branch_name = 'awx_internal/{}'.format(uuid4())
# always clone based on specific job revision
if not scm_revision:
if not p.scm_revision:
raise RuntimeError('Unexpectedly could not determine a revision to run from project.')
source_branch = git_repo.create_head(tmp_branch_name, scm_revision)
source_branch = git_repo.create_head(tmp_branch_name, p.scm_revision)
# git clone must take file:// syntax for source repo or else options like depth will be ignored
source_as_uri = Path(project_path).as_uri()
git.Repo.clone_from(
@@ -2376,20 +2375,48 @@ class RunProjectUpdate(BaseTask):
else:
copy_tree(project_path, destination_folder, preserve_symlinks=1)
# copy over the roles and collection cache to job folder
cache_path = os.path.join(p.get_cache_path(), p.cache_id)
subfolders = []
if settings.AWX_COLLECTIONS_ENABLED:
subfolders.append('requirements_collections')
if settings.AWX_ROLES_ENABLED:
subfolders.append('requirements_roles')
for subfolder in subfolders:
cache_subpath = os.path.join(cache_path, subfolder)
if os.path.exists(cache_subpath):
dest_subpath = os.path.join(job_private_data_dir, subfolder)
copy_tree(cache_subpath, dest_subpath, preserve_symlinks=1)
logger.debug('{0} {1} prepared {2} from cache'.format(type(p).__name__, p.pk, dest_subpath))
def post_run_hook(self, instance, status):
# To avoid hangs, very important to release lock even if errors happen here
try:
if self.playbook_new_revision:
self.clear_project_cache(instance, self.playbook_new_revision)
instance.scm_revision = self.playbook_new_revision
instance.save(update_fields=['scm_revision'])
# Roles and collection folders copy to durable cache
base_path = instance.get_cache_path()
stage_path = os.path.join(base_path, 'stage')
if status == 'successful' and 'install_' in instance.job_tags:
# Clear other caches before saving this one, and if branch is overridden
# do not clear cache for main branch, but do clear it for other branches
self.clear_project_cache(base_path, keep_value=str(instance.project.last_job_id))
cache_path = os.path.join(base_path, instance.cache_id)
if os.path.exists(stage_path):
if os.path.exists(cache_path):
logger.warning('Rewriting cache at {0}, performance may suffer'.format(cache_path))
shutil.rmtree(cache_path)
os.rename(stage_path, cache_path)
logger.debug('{0} wrote to cache at {1}'.format(instance.log_format, cache_path))
elif os.path.exists(stage_path):
shutil.rmtree(stage_path) # cannot trust content update produced
if self.job_private_data_dir:
# copy project folder before resetting to default branch
# because some git-tree-specific resources (like submodules) might matter
self.make_local_copy(
instance.get_project_path(check_if_exists=False), os.path.join(self.job_private_data_dir, 'project'),
instance.scm_type, instance.scm_revision
)
self.make_local_copy(instance, self.job_private_data_dir)
if self.original_branch:
# for git project syncs, non-default branches can be problems
# restore to branch the repo was on before this run
@@ -2642,13 +2669,22 @@ class RunInventoryUpdate(BaseTask):
source_project = None
if inventory_update.inventory_source:
source_project = inventory_update.inventory_source.source_project
if (inventory_update.source=='scm' and inventory_update.launch_type!='scm' and source_project):
# In project sync, pulling galaxy roles is not needed
if (inventory_update.source=='scm' and inventory_update.launch_type!='scm' and
source_project and source_project.scm_type): # never ever update manual projects
# Check if the content cache exists, so that we do not unnecessarily re-download roles
sync_needs = ['update_{}'.format(source_project.scm_type)]
cache_id = str(source_project.last_job_id) # content cache id for roles and collections
has_cache = os.path.exists(os.path.join(source_project.get_cache_path(), cache_id))
# Galaxy requirements are not supported for manual projects
if not has_cache:
sync_needs.extend(['install_roles', 'install_collections'])
local_project_sync = source_project.create_project_update(
_eager_fields=dict(
launch_type="sync",
job_type='run',
job_tags='update_{},install_collections'.format(source_project.scm_type), # roles are never valid for inventory
job_tags=','.join(sync_needs),
status='running',
execution_node=inventory_update.execution_node,
instance_group = inventory_update.instance_group,
@@ -2672,11 +2708,7 @@ class RunInventoryUpdate(BaseTask):
raise
elif inventory_update.source == 'scm' and inventory_update.launch_type == 'scm' and source_project:
# This follows update, not sync, so make copy here
project_path = source_project.get_project_path(check_if_exists=False)
RunProjectUpdate.make_local_copy(
project_path, os.path.join(private_data_dir, 'project'),
source_project.scm_type, source_project.scm_revision
)
RunProjectUpdate.make_local_copy(source_project, private_data_dir)
@task(queue=get_local_queuename)

View File

@@ -54,7 +54,9 @@ def test_no_changing_overwrite_behavior_if_used(post, patch, organization, admin
data={
'name': 'fooo',
'organization': organization.id,
'allow_override': True
'allow_override': True,
'scm_type': 'git',
'scm_url': 'https://github.com/ansible/test-playbooks.git'
},
user=admin_user,
expect=201
@@ -83,7 +85,9 @@ def test_changing_overwrite_behavior_okay_if_not_used(post, patch, organization,
data={
'name': 'fooo',
'organization': organization.id,
'allow_override': True
'allow_override': True,
'scm_type': 'git',
'scm_url': 'https://github.com/ansible/test-playbooks.git'
},
user=admin_user,
expect=201

View File

@@ -145,7 +145,6 @@ def project(instance, organization):
description="test-proj-desc",
organization=organization,
playbook_files=['helloworld.yml', 'alt-helloworld.yml'],
local_path='_92__test_proj',
scm_revision='1234567890123456789012345678901234567890',
scm_url='localhost',
scm_type='git'

View File

@@ -169,7 +169,8 @@ class TestSCMUpdateFeatures:
inventory_update = InventoryUpdate(
inventory_source=scm_inventory_source,
source_path=scm_inventory_source.source_path)
assert inventory_update.get_actual_source_path().endswith('_92__test_proj/inventory_file')
p = scm_inventory_source.source_project
assert inventory_update.get_actual_source_path().endswith(f'_{p.id}__test_proj/inventory_file')
def test_no_unwanted_updates(self, scm_inventory_source):
# Changing the non-sensitive fields should not trigger update

View File

@@ -34,6 +34,18 @@ def test_sensitive_change_triggers_update(project):
mock_update.assert_called_once_with()
@pytest.mark.django_db
def test_local_path_autoset(organization):
with mock.patch.object(Project, "update"):
p = Project.objects.create(
name="test-proj",
organization=organization,
scm_url='localhost',
scm_type='git'
)
assert p.local_path == f'_{p.id}__test_proj'
@pytest.mark.django_db
def test_foreign_key_change_changes_modified_by(project, organization):
assert project._get_fields_snapshot()['organization_id'] == organization.id

View File

@@ -29,8 +29,8 @@ def team_project_list(organization_factory):
@pytest.mark.django_db
def test_get_project_path(project):
# Test combining projects root with project local path
with mock.patch('awx.main.models.projects.settings.PROJECTS_ROOT', '/var/lib/awx'):
assert project.get_project_path(check_if_exists=False) == '/var/lib/awx/_92__test_proj'
with mock.patch('awx.main.models.projects.settings.PROJECTS_ROOT', '/var/lib/foo'):
assert project.get_project_path(check_if_exists=False) == f'/var/lib/foo/_{project.id}__test_proj'
@pytest.mark.django_db

View File

@@ -30,7 +30,7 @@ class TestDependentInventoryUpdate:
def test_dependent_inventory_updates_is_called(self, scm_inventory_source, scm_revision_file):
task = RunProjectUpdate()
task.revision_path = scm_revision_file
proj_update = ProjectUpdate.objects.create(project=scm_inventory_source.source_project)
proj_update = scm_inventory_source.source_project.create_project_update()
with mock.patch.object(RunProjectUpdate, '_update_dependent_inventories') as inv_update_mck:
with mock.patch.object(RunProjectUpdate, 'release_lock'):
task.post_run_hook(proj_update, 'successful')
@@ -39,7 +39,7 @@ class TestDependentInventoryUpdate:
def test_no_unwanted_dependent_inventory_updates(self, project, scm_revision_file):
task = RunProjectUpdate()
task.revision_path = scm_revision_file
proj_update = ProjectUpdate.objects.create(project=project)
proj_update = project.create_project_update()
with mock.patch.object(RunProjectUpdate, '_update_dependent_inventories') as inv_update_mck:
with mock.patch.object(RunProjectUpdate, 'release_lock'):
task.post_run_hook(proj_update, 'successful')

View File

@@ -61,7 +61,10 @@ def patch_Job():
@pytest.fixture
def job():
return Job(pk=1, id=1, project=Project(), inventory=Inventory(), job_template=JobTemplate(id=1, name='foo'))
return Job(
pk=1, id=1,
project=Project(local_path='/projects/_23_foo'),
inventory=Inventory(), job_template=JobTemplate(id=1, name='foo'))
@pytest.fixture
@@ -406,7 +409,9 @@ class TestExtraVarSanitation(TestJobExecution):
class TestGenericRun():
def test_generic_failure(self, patch_Job):
job = Job(status='running', inventory=Inventory(), project=Project())
job = Job(
status='running', inventory=Inventory(),
project=Project(local_path='/projects/_23_foo'))
job.websocket_emit_status = mock.Mock()
task = tasks.RunJob()