From 2432a3d3c3264e6086fa175781a8e96b67313182 Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Mon, 13 Feb 2017 16:16:43 -0500 Subject: [PATCH 1/2] Revert "remove partial dependency job id logic" This reverts commit 4bb3a4909e616209fc291b2b3cee46469bc58f9e. --- awx/main/scheduler/partial.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/awx/main/scheduler/partial.py b/awx/main/scheduler/partial.py index 6cba9c35b8..6cb87add15 100644 --- a/awx/main/scheduler/partial.py +++ b/awx/main/scheduler/partial.py @@ -1,6 +1,7 @@ # Python import json +import itertools # AWX from awx.main.utils import decrypt_field_value @@ -61,13 +62,35 @@ class PartialModelDict(object): def task_impact(self): raise RuntimeError("Inherit and implement me") + @classmethod + def merge_values(cls, values): + grouped_results = itertools.groupby(values, key=lambda value: value['id']) + + merged_values = [] + for k, g in grouped_results: + groups = list(g) + merged_value = {} + for group in groups: + for key, val in group.iteritems(): + if not merged_value.get(key): + merged_value[key] = val + elif val != merged_value[key]: + if isinstance(merged_value[key], list): + if val not in merged_value[key]: + merged_value[key].append(val) + else: + old_val = merged_value[key] + merged_value[key] = [old_val, val] + merged_values.append(merged_value) + return merged_values + class JobDict(PartialModelDict): FIELDS = ( 'id', 'status', 'job_template_id', 'inventory_id', 'project_id', 'launch_type', 'limit', 'allow_simultaneous', 'created', 'job_type', 'celery_task_id', 'project__scm_update_on_launch', - 'forks', 'start_args', + 'forks', 'start_args', 'dependent_jobs__id', ) model = Job @@ -90,7 +113,8 @@ class JobDict(PartialModelDict): kv = { 'status__in': status } - return [cls(o) for o in cls.model.objects.filter(**kv).values(*cls.get_db_values())] + merged = PartialModelDict.merge_values(cls.model.objects.filter(**kv).values(*cls.get_db_values())) + return [cls(o) for o in merged] class ProjectUpdateDict(PartialModelDict): From 6e9488a59bad5b97acb1c80c56c1ca8a907f33b4 Mon Sep 17 00:00:00 2001 From: Chris Meyers Date: Tue, 14 Feb 2017 14:36:09 -0500 Subject: [PATCH 2/2] ensure job deps are created only once --- awx/main/scheduler/__init__.py | 23 ++++++++++++++++++++++- awx/main/scheduler/dependency_graph.py | 16 +++++----------- awx/main/tests/unit/scheduler/conftest.py | 3 ++- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/awx/main/scheduler/__init__.py b/awx/main/scheduler/__init__.py index 50a80b7116..8d29f0010c 100644 --- a/awx/main/scheduler/__init__.py +++ b/awx/main/scheduler/__init__.py @@ -251,6 +251,18 @@ class TaskManager(): dep.save() inventory_task = InventoryUpdateDict.get_partial(dep.id) + + ''' + Update internal datastructures with the newly created inventory update + ''' + # Should be only 1 inventory update. The one for the job (task) + latest_inventory_updates = self.get_latest_inventory_update_tasks([task]) + self.process_latest_inventory_updates(latest_inventory_updates) + + inventory_sources = self.get_inventory_source_tasks([task]) + self.process_inventory_sources(inventory_sources) + + self.graph.add_job(inventory_task) return inventory_task @@ -271,9 +283,15 @@ class TaskManager(): def capture_chain_failure_dependencies(self, task, dependencies): for dep in dependencies: - dep_obj = task.get_full() + dep_obj = dep.get_full() dep_obj.dependent_jobs.add(task['id']) dep_obj.save() + ''' + if not 'dependent_jobs__id' in task.data: + task.data['dependent_jobs__id'] = [dep_obj.data['id']] + else: + task.data['dependent_jobs__id'].append(dep_obj.data['id']) + ''' def generate_dependencies(self, task): dependencies = [] @@ -291,6 +309,9 @@ class TaskManager(): ''' inventory_sources_already_updated = task.get_inventory_sources_already_updated() + ''' + get_inventory_sources() only return update on launch sources + ''' for inventory_source_task in self.graph.get_inventory_sources(task['inventory_id']): if inventory_source_task['id'] in inventory_sources_already_updated: continue diff --git a/awx/main/scheduler/dependency_graph.py b/awx/main/scheduler/dependency_graph.py index 846a194b27..61f08c4241 100644 --- a/awx/main/scheduler/dependency_graph.py +++ b/awx/main/scheduler/dependency_graph.py @@ -113,21 +113,15 @@ class DependencyGraph(object): def should_update_related_inventory_source(self, job, inventory_source_id): now = self.get_now() + + # Already processed dependencies for this job + if job.data['dependent_jobs__id'] is not None: + return False + latest_inventory_update = self.data[self.LATEST_INVENTORY_UPDATES].get(inventory_source_id, None) if not latest_inventory_update: return True - ''' - This is a bit of fuzzy logic. - If the latest inventory update has a created time == job_created_time-2 - then consider the inventory update found. This is so we don't enter an infinite loop - of updating the project when cache timeout is 0. - ''' - if latest_inventory_update['inventory_source__update_cache_timeout'] == 0 and \ - latest_inventory_update['launch_type'] == 'dependency' and \ - latest_inventory_update['created'] == job['created'] - timedelta(seconds=2): - return False - ''' Normal, expected, cache timeout logic ''' diff --git a/awx/main/tests/unit/scheduler/conftest.py b/awx/main/tests/unit/scheduler/conftest.py index 40e221d0cc..8f3c5f913e 100644 --- a/awx/main/tests/unit/scheduler/conftest.py +++ b/awx/main/tests/unit/scheduler/conftest.py @@ -223,7 +223,8 @@ def job_factory(epoch): 'celery_task_id': '', 'project__scm_update_on_launch': project__scm_update_on_launch, 'inventory__inventory_sources': inventory__inventory_sources, - 'forks': 5 + 'forks': 5, + 'dependent_jobs__id': None, }) return fn