add events to job lifecycle

* Note in the job lifecycle when the controller_node and execution_node
  are chosen. This event occurs most commonly in the task manager with a
  couple of exceptions that happen when we dynamically create dependenct
  jobs on the fly in tasks.py
This commit is contained in:
chris meyers
2021-10-15 04:49:28 -04:00
committed by Shane McDonald
parent 3a3fffb2dd
commit 9f8250bd47
3 changed files with 19 additions and 0 deletions

View File

@@ -291,6 +291,7 @@ class TaskManager:
# act as the controller for k8s API interaction
try:
task.controller_node = Instance.choose_online_control_plane_node()
task.log_lifecycle("controller_node_chosen")
except IndexError:
logger.warning("No control plane nodes available to run containerized job {}".format(task.log_format))
return
@@ -298,19 +299,23 @@ class TaskManager:
# project updates and system jobs don't *actually* run in pods, so
# just pick *any* non-containerized host and use it as the execution node
task.execution_node = Instance.choose_online_control_plane_node()
task.log_lifecycle("execution_node_chosen")
logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
else:
task.instance_group = rampart_group
task.execution_node = instance.hostname
task.log_lifecycle("execution_node_chosen")
if instance.node_type == 'execution':
try:
task.controller_node = Instance.choose_online_control_plane_node()
task.log_lifecycle("controller_node_chosen")
except IndexError:
logger.warning("No control plane nodes available to manage {}".format(task.log_format))
return
else:
# control plane nodes will manage jobs locally for performance and resilience
task.controller_node = task.execution_node
task.log_lifecycle("controller_node_chosen")
logger.debug('Submitting job {} to queue {} controlled by {}.'.format(task.log_format, task.execution_node, task.controller_node))
with disable_activity_stream():
task.celery_task_id = str(uuid.uuid4())