diff --git a/awx/main/scheduler/dag_workflow.py b/awx/main/scheduler/dag_workflow.py index 67834d6149..0b7aeb7140 100644 --- a/awx/main/scheduler/dag_workflow.py +++ b/awx/main/scheduler/dag_workflow.py @@ -129,15 +129,15 @@ class WorkflowDAG(SimpleDAG): obj = node['node_object'] if obj.unified_job_template is None: - return True + return True, "Workflow job node {} related unified job template missing".format(obj.id) if obj.job and obj.job.status in ['failed', 'canceled', 'error']: failed_nodes.append(node) for node in failed_nodes: obj = node['node_object'] if (len(self.get_dependencies(obj, 'failure_nodes')) + len(self.get_dependencies(obj, 'always_nodes'))) == 0: - return True - return False + return True, "Workflow job node {} has a status of '{}' without an error handler path".format(obj.id, obj.job.status) + return False, None r''' Determine if all nodes have been decided on being marked do_not_run. diff --git a/awx/main/scheduler/task_manager.py b/awx/main/scheduler/task_manager.py index a60c7f342c..ccc953cada 100644 --- a/awx/main/scheduler/task_manager.py +++ b/awx/main/scheduler/task_manager.py @@ -178,14 +178,18 @@ class TaskManager(): is_done = dag.is_workflow_done() if not is_done: continue - has_failed = dag.has_workflow_failed() + has_failed, reason = dag.has_workflow_failed() logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful') result.append(workflow_job.id) new_status = 'failed' if has_failed else 'successful' logger.debug(six.text_type("Transitioning {} to {} status.").format(workflow_job.log_format, new_status)) + update_fields = ['status', 'start_args'] workflow_job.status = new_status + if reason: + workflow_job.job_explanation = reason + update_fields.append('job_explanation') workflow_job.start_args = '' # blank field to remove encrypted passwords - workflow_job.save(update_fields=['status', 'start_args']) + workflow_job.save(update_fields=update_fields) status_changed = True if status_changed: workflow_job.websocket_emit_status(workflow_job.status) diff --git a/awx/main/tests/functional/models/test_workflow.py b/awx/main/tests/functional/models/test_workflow.py index 17fa705c47..41c8be70e3 100644 --- a/awx/main/tests/functional/models/test_workflow.py +++ b/awx/main/tests/functional/models/test_workflow.py @@ -66,9 +66,10 @@ class TestWorkflowDAGFunctional(TransactionTestCase): dag = WorkflowDAG(workflow_job=wfj) assert 3 == len(dag.mark_dnr_nodes()) is_done = dag.is_workflow_done() - has_failed = dag.has_workflow_failed() + has_failed, reason = dag.has_workflow_failed() self.assertTrue(is_done) self.assertFalse(has_failed) + assert reason is None # verify that relaunched WFJ fails if a JT leaf is deleted for jt in JobTemplate.objects.all(): @@ -77,9 +78,10 @@ class TestWorkflowDAGFunctional(TransactionTestCase): dag = WorkflowDAG(workflow_job=relaunched) dag.mark_dnr_nodes() is_done = dag.is_workflow_done() - has_failed = dag.has_workflow_failed() + has_failed, reason = dag.has_workflow_failed() self.assertTrue(is_done) self.assertTrue(has_failed) + assert "Workflow job node {} related unified job template missing".format(wfj.workflow_nodes.all()[0].id) def test_workflow_fails_for_no_error_handler(self): wfj = self.workflow_job(states=['successful', 'failed', None, None, None]) @@ -104,9 +106,10 @@ class TestWorkflowDAGFunctional(TransactionTestCase): dag = WorkflowDAG(workflow_job=wfj) dag.mark_dnr_nodes() is_done = dag.is_workflow_done() - has_failed = dag.has_workflow_failed() + has_failed, reason = dag.has_workflow_failed() self.assertFalse(is_done) self.assertFalse(has_failed) + assert reason is None @pytest.mark.django_db diff --git a/awx/main/tests/unit/scheduler/test_dag_workflow.py b/awx/main/tests/unit/scheduler/test_dag_workflow.py index e126456d4e..03a02d748c 100644 --- a/awx/main/tests/unit/scheduler/test_dag_workflow.py +++ b/awx/main/tests/unit/scheduler/test_dag_workflow.py @@ -27,7 +27,7 @@ def wf_node_generator(mocker): @pytest.fixture def workflow_dag_1(wf_node_generator): g = WorkflowDAG() - nodes = [wf_node_generator() for i in range(4)] + nodes = [wf_node_generator(unified_job_template=object()) for i in range(4)] map(lambda n: g.add_node(n), nodes) r''' @@ -183,17 +183,17 @@ class TestIsWorkflowDone(): assert g.is_workflow_done() is False def test_is_workflow_done_failed(self, workflow_dag_failed): - g = workflow_dag_failed[0] + (g, nodes) = workflow_dag_failed assert g.is_workflow_done() is True - assert g.has_workflow_failed() is True + assert g.has_workflow_failed() == (True, "Workflow job node {} has a status of 'failed' without an error handler path".format(nodes[2].id)) class TestHasWorkflowFailed(): @pytest.fixture def workflow_dag_canceled(self, wf_node_generator): g = WorkflowDAG() - nodes = [wf_node_generator() for i in range(1)] + nodes = [wf_node_generator(unified_job_template=object()) for i in range(1)] map(lambda n: g.add_node(n), nodes) r''' F0 @@ -210,12 +210,12 @@ class TestHasWorkflowFailed(): def test_canceled_should_fail(self, workflow_dag_canceled): (g, nodes) = workflow_dag_canceled - assert g.has_workflow_failed() is True + assert g.has_workflow_failed() == (True, "Workflow job node {} has a status of 'canceled' without an error handler path".format(nodes[0].id)) def test_failure_should_fail(self, workflow_dag_failure): (g, nodes) = workflow_dag_failure - assert g.has_workflow_failed() is True + assert g.has_workflow_failed() == (True, "Workflow job node {} has a status of 'failed' without an error handler path".format(nodes[0].id)) class TestBFSNodesToRun():