mirror of
https://github.com/ansible/awx.git
synced 2026-01-19 05:31:22 -03:30
Merge pull request #12494 from AlanCoding/revival
Register system again if deleted by another pod
This commit is contained in:
commit
9e8ba6ca09
@ -166,7 +166,11 @@ class Metrics:
|
||||
elif settings.IS_TESTING():
|
||||
self.instance_name = "awx_testing"
|
||||
else:
|
||||
self.instance_name = Instance.objects.me().hostname
|
||||
try:
|
||||
self.instance_name = Instance.objects.me().hostname
|
||||
except Exception as e:
|
||||
self.instance_name = settings.CLUSTER_HOST_ID
|
||||
logger.info(f'Instance {self.instance_name} seems to be unregistered, error: {e}')
|
||||
|
||||
# metric name, help_text
|
||||
METRICSLIST = [
|
||||
|
||||
@ -15,7 +15,11 @@ def startup_reaping():
|
||||
If this particular instance is starting, then we know that any running jobs are invalid
|
||||
so we will reap those jobs as a special action here
|
||||
"""
|
||||
me = Instance.objects.me()
|
||||
try:
|
||||
me = Instance.objects.me()
|
||||
except RuntimeError as e:
|
||||
logger.warning(f'Local instance is not registered, not running startup reaper: {e}')
|
||||
return
|
||||
jobs = UnifiedJob.objects.filter(status='running', controller_node=me.hostname)
|
||||
job_ids = []
|
||||
for j in jobs:
|
||||
|
||||
@ -7,7 +7,7 @@ from django.core.cache import cache as django_cache
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import connection as django_connection
|
||||
|
||||
from awx.main.dispatch import get_local_queuename, reaper
|
||||
from awx.main.dispatch import get_local_queuename
|
||||
from awx.main.dispatch.control import Control
|
||||
from awx.main.dispatch.pool import AutoscalePool
|
||||
from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker
|
||||
@ -53,7 +53,6 @@ class Command(BaseCommand):
|
||||
# (like the node heartbeat)
|
||||
periodic.run_continuously()
|
||||
|
||||
reaper.startup_reaping()
|
||||
consumer = None
|
||||
|
||||
try:
|
||||
|
||||
@ -104,6 +104,7 @@ def dispatch_startup():
|
||||
#
|
||||
apply_cluster_membership_policies()
|
||||
cluster_node_heartbeat()
|
||||
reaper.startup_reaping()
|
||||
m = Metrics()
|
||||
m.reset_values()
|
||||
|
||||
@ -505,7 +506,13 @@ def cluster_node_heartbeat():
|
||||
logger.warning('Rejoining the cluster as instance {}.'.format(this_inst.hostname))
|
||||
return
|
||||
else:
|
||||
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
|
||||
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
|
||||
(changed, this_inst) = Instance.objects.register(ip_address=os.environ.get('MY_POD_IP'), node_type='control', uuid=settings.SYSTEM_UUID)
|
||||
if changed:
|
||||
logger.warning(f'Recreated instance record {this_inst.hostname} after unexpected removal')
|
||||
this_inst.local_health_check()
|
||||
else:
|
||||
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
|
||||
# IFF any node has a greater version than we do, then we'll shutdown services
|
||||
for other_inst in instance_list:
|
||||
if other_inst.node_type in ('execution', 'hop'):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user