Files
awx/awx/main/scheduler/__init__.py
Chris Meyers 930b3651a0 Merge pull request #3917 from chrismeyersfsu/task-manager_prov_callback_inv_sync_dedup
on host callback, upate dynamic inv only once
2016-11-08 15:45:38 -05:00

364 lines
14 KiB
Python

#Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
# Python
from datetime import timedelta
import logging
from sets import Set
# Django
from django.conf import settings
from django.db import transaction, connection
from django.db.utils import DatabaseError
# AWX
from awx.main.models import * # noqa
#from awx.main.scheduler.dag_simple import SimpleDAG
from awx.main.scheduler.dag_workflow import WorkflowDAG
from awx.main.scheduler.dependency_graph import DependencyGraph
from awx.main.scheduler.partial import (
JobDict,
ProjectUpdateDict,
ProjectUpdateLatestDict,
InventoryUpdateDict,
InventoryUpdateLatestDict,
InventorySourceDict,
SystemJobDict,
AdHocCommandDict,
WorkflowJobDict,
)
# Celery
from celery.task.control import inspect
logger = logging.getLogger('awx.main.scheduler')
class TaskManager():
def __init__(self):
self.graph = DependencyGraph()
self.capacity_total = Instance.objects.total_capacity()
self.capacity_used = 0
def get_tasks(self):
status_list = ('pending', 'waiting', 'running')
jobs = JobDict.filter_partial(status=status_list)
inventory_updates = InventoryUpdateDict.filter_partial(status=status_list)
project_updates = ProjectUpdateDict.filter_partial(status=status_list)
system_jobs = SystemJobDict.filter_partial(status=status_list)
ad_hoc_commands = AdHocCommandDict.filter_partial(status=status_list)
workflow_jobs = WorkflowJobDict.filter_partial(status=status_list)
all_actions = sorted(jobs + project_updates + inventory_updates + system_jobs + ad_hoc_commands + workflow_jobs,
key=lambda task: task['created'])
return all_actions
'''
Tasks that are running and SHOULD have a celery task.
'''
def get_running_tasks(self):
status_list = ('running',)
jobs = JobDict.filter_partial(status=status_list)
inventory_updates = InventoryUpdateDict.filter_partial(status=status_list)
project_updates = ProjectUpdateDict.filter_partial(status=status_list)
system_jobs = SystemJobDict.filter_partial(status=status_list)
ad_hoc_commands = AdHocCommandDict.filter_partial(status=status_list)
all_actions = sorted(jobs + project_updates + inventory_updates + system_jobs + ad_hoc_commands,
key=lambda task: task['created'])
return all_actions
# TODO: Consider a database query for this logic
def get_latest_project_update_tasks(self, all_sorted_tasks):
project_ids = Set()
for task in all_sorted_tasks:
if type(task) == JobDict:
project_ids.add(task['project_id'])
return ProjectUpdateLatestDict.filter_partial(list(project_ids))
# TODO: Consider a database query for this logic
def get_latest_inventory_update_tasks(self, all_sorted_tasks):
inventory_ids = Set()
for task in all_sorted_tasks:
if type(task) == JobDict:
inventory_ids.add(task['inventory_id'])
return InventoryUpdateLatestDict.filter_partial(list(inventory_ids))
def get_running_workflow_jobs(self):
graph_workflow_jobs = [wf for wf in
WorkflowJob.objects.filter(status='running')]
return graph_workflow_jobs
# TODO: Consider a database query for this logic
def get_inventory_source_tasks(self, all_sorted_tasks):
inventory_ids = Set()
results = []
for task in all_sorted_tasks:
if type(task) is JobDict:
inventory_ids.add(task['inventory_id'])
for inventory_id in inventory_ids:
results.append((inventory_id, InventorySourceDict.filter_partial(inventory_id)))
return results
def spawn_workflow_graph_jobs(self, workflow_jobs):
for workflow_job in workflow_jobs:
dag = WorkflowDAG(workflow_job)
spawn_nodes = dag.bfs_nodes_to_run()
for spawn_node in spawn_nodes:
kv = spawn_node.get_job_kwargs()
job = spawn_node.unified_job_template.create_unified_job(**kv)
spawn_node.job = job
spawn_node.save()
can_start = job.signal_start(**kv)
if not can_start:
job.status = 'failed'
job.job_explanation = "Workflow job could not start because it was not in the right state or required manual credentials"
job.save(update_fields=['status', 'job_explanation'])
connection.on_commit(lambda: job.websocket_emit_status('failed'))
# TODO: should we emit a status on the socket here similar to tasks.py tower_periodic_scheduler() ?
#emit_websocket_notification('/socket.io/jobs', '', dict(id=))
# See comment in tasks.py::RunWorkflowJob::run()
def process_finished_workflow_jobs(self, workflow_jobs):
for workflow_job in workflow_jobs:
dag = WorkflowDAG(workflow_job)
if workflow_job.cancel_flag:
workflow_job.status = 'canceled'
workflow_job.save()
dag.cancel_node_jobs()
connection.on_commit(lambda: workflow_job.websocket_emit_status(workflow_job.status))
elif dag.is_workflow_done():
if workflow_job._has_failed():
workflow_job.status = 'failed'
else:
workflow_job.status = 'successful'
workflow_job.save()
connection.on_commit(lambda: workflow_job.websocket_emit_status(workflow_job.status))
def get_active_tasks(self):
inspector = inspect()
if not hasattr(settings, 'IGNORE_CELERY_INSPECTOR'):
active_task_queues = inspector.active()
else:
logger.warn("Ignoring celery task inspector")
active_task_queues = None
active_tasks = set()
if active_task_queues is not None:
for queue in active_task_queues:
map(lambda at: active_tasks.add(at['id']), active_task_queues[queue])
else:
if not hasattr(settings, 'CELERY_UNIT_TEST'):
return (None, None)
return (active_task_queues, active_tasks)
def start_task(self, task, dependent_tasks=[]):
from awx.main.tasks import handle_work_error, handle_work_success
task_actual = {
'type':task.get_job_type_str(),
'id': task['id'],
}
dependencies = [{'type': t.get_job_type_str(), 'id': t['id']} for t in dependent_tasks]
error_handler = handle_work_error.s(subtasks=[task_actual] + dependencies)
success_handler = handle_work_success.s(task_actual=task_actual)
job_obj = task.get_full()
job_obj.status = 'waiting'
(start_status, opts) = job_obj.pre_start()
if not start_status:
job_obj.status = 'failed'
if job_obj.job_explanation:
job_obj.job_explanation += ' '
job_obj.job_explanation += 'Task failed pre-start check.'
job_obj.save()
# TODO: run error handler to fail sub-tasks and send notifications
else:
if type(job_obj) is WorkflowJob:
job_obj.status = 'running'
job_obj.save()
self.consume_capacity(task)
def post_commit():
job_obj.websocket_emit_status(job_obj.status)
if job_obj.status != 'failed':
job_obj.start_celery_task(opts, error_callback=error_handler, success_callback=success_handler)
connection.on_commit(post_commit)
def process_runnable_tasks(self, runnable_tasks):
map(lambda task: self.graph.add_job(task), runnable_tasks)
def create_project_update(self, task):
dep = Project.objects.get(id=task['project_id']).create_project_update(launch_type='dependency')
# Project created 1 seconds behind
dep.created = task['created'] - timedelta(seconds=1)
dep.status = 'pending'
dep.save()
project_task = ProjectUpdateDict.get_partial(dep.id)
return project_task
def create_inventory_update(self, task, inventory_source_task):
dep = InventorySource.objects.get(id=inventory_source_task['id']).create_inventory_update(launch_type='dependency')
dep.created = task['created'] - timedelta(seconds=2)
dep.status = 'pending'
dep.save()
inventory_task = InventoryUpdateDict.get_partial(dep.id)
return inventory_task
def generate_dependencies(self, task):
dependencies = []
# TODO: What if the project is null ?
if type(task) is JobDict:
if task['project__scm_update_on_launch'] is True and \
self.graph.should_update_related_project(task):
project_task = self.create_project_update(task)
dependencies.append(project_task)
# Inventory created 2 seconds behind job
inventory_sources_already_updated = task.get_inventory_sources_already_updated()
for inventory_source_task in self.graph.get_inventory_sources(task['inventory_id']):
if inventory_source_task['id'] in inventory_sources_already_updated:
print("Inventory already updated")
continue
if self.graph.should_update_related_inventory_source(task, inventory_source_task['id']):
inventory_task = self.create_inventory_update(task, inventory_source_task)
dependencies.append(inventory_task)
return dependencies
def process_latest_project_updates(self, latest_project_updates):
map(lambda task: self.graph.add_latest_project_update(task), latest_project_updates)
def process_latest_inventory_updates(self, latest_inventory_updates):
map(lambda task: self.graph.add_latest_inventory_update(task), latest_inventory_updates)
def process_inventory_sources(self, inventory_id_sources):
map(lambda (inventory_id, inventory_sources): self.graph.add_inventory_sources(inventory_id, inventory_sources), inventory_id_sources)
def process_dependencies(self, dependent_task, dependency_tasks):
for task in dependency_tasks:
# ProjectUpdate or InventoryUpdate may be blocked by another of
# the same type.
if not self.graph.is_job_blocked(task):
self.graph.add_job(task)
if not self.would_exceed_capacity(task):
self.start_task(task, [dependent_task])
else:
self.graph.add_job(task)
def process_pending_tasks(self, pending_tasks):
for task in pending_tasks:
# Stop processing tasks if we know we are out of capacity
if self.get_remaining_capacity() <= 0:
return
if not self.graph.is_job_blocked(task):
dependencies = self.generate_dependencies(task)
self.process_dependencies(task, dependencies)
# Spawning deps might have blocked us
if not self.graph.is_job_blocked(task):
self.graph.add_job(task)
if not self.would_exceed_capacity(task):
self.start_task(task)
else:
self.graph.add_job(task)
def process_celery_tasks(self, active_tasks, all_running_sorted_tasks):
'''
Rectify tower db <-> celery inconsistent view of jobs state
'''
for task in all_running_sorted_tasks:
if (task['celery_task_id'] not in active_tasks and not hasattr(settings, 'IGNORE_CELERY_INSPECTOR')):
# NOTE: Pull status again and make sure it didn't finish in
# the meantime?
# TODO: try catch the getting of the job. The job COULD have been deleted
task_obj = task.get_full()
task_obj.status = 'failed'
task_obj.job_explanation += ' '.join((
'Task was marked as running in Tower but was not present in',
'Celery, so it has been marked as failed.',
))
task_obj.save()
connection.on_commit(lambda: task_obj.websocket_emit_status('failed'))
logger.error("Task %s appears orphaned... marking as failed" % task)
def calculate_capacity_used(self, tasks):
self.capacity_used = 0
for t in tasks:
self.capacity_used += t.task_impact()
def would_exceed_capacity(self, task):
return (task.task_impact() + self.capacity_used > self.capacity_total)
def consume_capacity(self, task):
self.capacity_used += task.task_impact()
def get_remaining_capacity(self):
return (self.capacity_total - self.capacity_used)
def process_tasks(self, all_sorted_tasks):
running_tasks = filter(lambda t: t['status'] == 'running', all_sorted_tasks)
runnable_tasks = filter(lambda t: t['status'] in ['waiting', 'running'], all_sorted_tasks)
self.calculate_capacity_used(running_tasks)
self.process_runnable_tasks(runnable_tasks)
pending_tasks = filter(lambda t: t['status'] in 'pending', all_sorted_tasks)
self.process_pending_tasks(pending_tasks)
def _schedule(self):
all_sorted_tasks = self.get_tasks()
if len(all_sorted_tasks) > 0:
latest_project_updates = self.get_latest_project_update_tasks(all_sorted_tasks)
self.process_latest_project_updates(latest_project_updates)
latest_inventory_updates = self.get_latest_inventory_update_tasks(all_sorted_tasks)
self.process_latest_inventory_updates(latest_inventory_updates)
inventory_id_sources = self.get_inventory_source_tasks(all_sorted_tasks)
self.process_inventory_sources(inventory_id_sources)
running_workflow_tasks = self.get_running_workflow_jobs()
self.process_finished_workflow_jobs(running_workflow_tasks)
self.spawn_workflow_graph_jobs(running_workflow_tasks)
self.process_tasks(all_sorted_tasks)
def schedule(self):
with transaction.atomic():
# Lock
try:
Instance.objects.select_for_update(nowait=True).all()[0]
except DatabaseError:
return
self._schedule()