reregister node when they come back online

* Nodes are marked offline, then deleted; given enough time. Nodes can
come back for various reasions (i.e. netsplit). When they come back,
have them recreate the node Instance if AWX_AUTO_DEPROVISION_INSTANCES
is True. Otherwise, do nothing. The do nothing case will show up in the
logs as celery job tracebacks as they fail to be self aware.
This commit is contained in:
chris meyers
2018-03-27 13:51:35 -04:00
parent 3a3c883504
commit 7ce8907b7b
3 changed files with 34 additions and 13 deletions

View File

@@ -204,7 +204,9 @@ def handle_setting_changes(self, setting_keys):
@shared_task(bind=True, queue='tower_broadcast_all', base=LogErrorsTask)
def handle_ha_toplogy_changes(self):
instance = Instance.objects.me()
(changed, instance) = Instance.objects.get_or_register()
if changed:
logger.info(six.text_type("Registered tower node '{}'").format(instance.hostname))
logger.debug(six.text_type("Reconfigure celeryd queues task on host {}").format(self.request.hostname))
awx_app = Celery('awx')
awx_app.config_from_object('django.conf:settings')
@@ -234,7 +236,9 @@ def handle_ha_toplogy_worker_ready(sender, **kwargs):
def handle_update_celery_routes(sender=None, conf=None, **kwargs):
conf = conf if conf else sender.app.conf
logger.debug(six.text_type("Registering celery routes for {}").format(sender))
instance = Instance.objects.me()
(changed, instance) = Instance.objects.get_or_register()
if changed:
logger.info(six.text_type("Registered tower node '{}'").format(instance.hostname))
added_routes = update_celery_worker_routes(instance, conf)
logger.info(six.text_type("Workers on tower node '{}' added routes {} all routes are now {}")
.format(instance.hostname, added_routes, conf.CELERY_ROUTES))
@@ -242,7 +246,9 @@ def handle_update_celery_routes(sender=None, conf=None, **kwargs):
@celeryd_after_setup.connect
def handle_update_celery_hostname(sender, instance, **kwargs):
tower_instance = Instance.objects.me()
(changed, tower_instance) = Instance.objects.get_or_register()
if changed:
logger.info(six.text_type("Registered tower node '{}'").format(tower_instance.hostname))
instance.hostname = 'celery@{}'.format(tower_instance.hostname)
logger.warn(six.text_type("Set hostname to {}").format(instance.hostname))
@@ -310,6 +316,10 @@ def cluster_node_heartbeat(self):
this_inst = None
lost_instances = []
(changed, instance) = Instance.objects.get_or_register()
if changed:
logger.info(six.text_type("Registered tower node '{}'").format(instance.hostname))
for inst in list(instance_list):
if inst.hostname == settings.CLUSTER_HOST_ID:
this_inst = inst