mirror of
https://github.com/ansible/awx.git
synced 2026-01-19 05:31:22 -03:30
Merge pull request #5987 from ansible/version_protection_check
Protect cluster nodes after an upgrade
This commit is contained in:
commit
518e7cc896
@ -222,7 +222,8 @@ class ApiV1PingView(APIView):
|
||||
|
||||
response['instances'] = []
|
||||
for instance in Instance.objects.all():
|
||||
response['instances'].append(dict(node=instance.hostname, heartbeat=instance.modified, capacity=instance.capacity))
|
||||
response['instances'].append(dict(node=instance.hostname, heartbeat=instance.modified,
|
||||
capacity=instance.capacity, version=instance.version))
|
||||
response['instances'].sort()
|
||||
return Response(response)
|
||||
|
||||
|
||||
19
awx/main/migrations/0037_v313_instance_version.py
Normal file
19
awx/main/migrations/0037_v313_instance_version.py
Normal file
@ -0,0 +1,19 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0036_v311_insights'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='version',
|
||||
field=models.CharField(max_length=24, blank=True),
|
||||
),
|
||||
]
|
||||
@ -26,6 +26,7 @@ class Instance(models.Model):
|
||||
hostname = models.CharField(max_length=250, unique=True)
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
modified = models.DateTimeField(auto_now=True)
|
||||
version = models.CharField(max_length=24, blank=True)
|
||||
capacity = models.PositiveIntegerField(
|
||||
default=100,
|
||||
editable=False,
|
||||
|
||||
@ -21,6 +21,7 @@ import traceback
|
||||
import urlparse
|
||||
import uuid
|
||||
from distutils.version import LooseVersion as Version
|
||||
from datetime import timedelta
|
||||
import yaml
|
||||
try:
|
||||
import psutil
|
||||
@ -45,6 +46,7 @@ from django.utils.translation import ugettext_lazy as _
|
||||
from django.core.cache import cache
|
||||
|
||||
# AWX
|
||||
from awx import __version__ as tower_application_version
|
||||
from awx.main.constants import CLOUD_PROVIDERS
|
||||
from awx.main.models import * # noqa
|
||||
from awx.main.models import UnifiedJob
|
||||
@ -53,7 +55,7 @@ from awx.main.task_engine import TaskEnhancer
|
||||
from awx.main.utils import (get_ansible_version, get_ssh_version, decrypt_field, update_scm_url,
|
||||
check_proot_installed, build_proot_temp_dir, wrap_args_with_proot,
|
||||
get_system_task_capacity, OutputEventFilter, parse_yaml_or_json)
|
||||
from awx.main.utils.reload import restart_local_services
|
||||
from awx.main.utils.reload import restart_local_services, stop_local_services
|
||||
from awx.main.utils.handlers import configure_external_logger
|
||||
from awx.main.consumers import emit_channel_notification
|
||||
|
||||
@ -174,13 +176,27 @@ def purge_old_stdout_files(self):
|
||||
@task(bind=True)
|
||||
def cluster_node_heartbeat(self):
|
||||
logger.debug("Cluster node heartbeat task.")
|
||||
nowtime = now()
|
||||
inst = Instance.objects.filter(hostname=settings.CLUSTER_HOST_ID)
|
||||
if inst.exists():
|
||||
inst = inst[0]
|
||||
inst.capacity = get_system_task_capacity()
|
||||
inst.version = tower_application_version
|
||||
inst.save()
|
||||
return
|
||||
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
|
||||
else:
|
||||
raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID))
|
||||
recent_inst = Instance.objects.filter(modified__gt=nowtime - timedelta(seconds=70)).exclude(hostname=settings.CLUSTER_HOST_ID)
|
||||
# IFF any node has a greater version than we do, then we'll shutdown services
|
||||
for other_inst in recent_inst:
|
||||
if other_inst.version == "":
|
||||
continue
|
||||
if Version(other_inst.version) > Version(tower_application_version):
|
||||
logger.error("Host {} reports Tower version {}, but this node {} is at {}, shutting down".format(other_inst.hostname,
|
||||
other_inst.version,
|
||||
inst.hostname,
|
||||
inst.version))
|
||||
stop_local_services(['uwsgi', 'celery', 'beat', 'callback', 'fact'])
|
||||
|
||||
|
||||
|
||||
@task(bind=True, queue='default')
|
||||
|
||||
@ -4,7 +4,7 @@ from awx.main.utils import reload
|
||||
|
||||
def test_produce_supervisor_command(mocker):
|
||||
with mocker.patch.object(reload.subprocess, 'Popen'):
|
||||
reload._supervisor_service_restart(['beat', 'callback', 'fact'])
|
||||
reload._supervisor_service_command(['beat', 'callback', 'fact'], "restart")
|
||||
reload.subprocess.Popen.assert_called_once_with(
|
||||
['supervisorctl', 'restart', 'tower-processes:receiver', 'tower-processes:factcacher'])
|
||||
|
||||
@ -14,13 +14,13 @@ def test_routing_of_service_restarts_works(mocker):
|
||||
This tests that the parent restart method will call the appropriate
|
||||
service restart methods, depending on which services are given in args
|
||||
'''
|
||||
with mocker.patch.object(reload, '_uwsgi_reload'),\
|
||||
with mocker.patch.object(reload, '_uwsgi_fifo_command'),\
|
||||
mocker.patch.object(reload, '_reset_celery_thread_pool'),\
|
||||
mocker.patch.object(reload, '_supervisor_service_restart'):
|
||||
mocker.patch.object(reload, '_supervisor_service_command'):
|
||||
reload.restart_local_services(['uwsgi', 'celery', 'flower', 'daphne'])
|
||||
reload._uwsgi_reload.assert_called_once_with()
|
||||
reload._uwsgi_fifo_command.assert_called_once_with(uwsgi_command="c")
|
||||
reload._reset_celery_thread_pool.assert_called_once_with()
|
||||
reload._supervisor_service_restart.assert_called_once_with(['flower', 'daphne'])
|
||||
reload._supervisor_service_command.assert_called_once_with(['flower', 'daphne'], command="restart")
|
||||
|
||||
|
||||
|
||||
@ -28,11 +28,11 @@ def test_routing_of_service_restarts_diables(mocker):
|
||||
'''
|
||||
Test that methods are not called if not in the args
|
||||
'''
|
||||
with mocker.patch.object(reload, '_uwsgi_reload'),\
|
||||
with mocker.patch.object(reload, '_uwsgi_fifo_command'),\
|
||||
mocker.patch.object(reload, '_reset_celery_thread_pool'),\
|
||||
mocker.patch.object(reload, '_supervisor_service_restart'):
|
||||
mocker.patch.object(reload, '_supervisor_service_command'):
|
||||
reload.restart_local_services(['flower'])
|
||||
reload._uwsgi_reload.assert_not_called()
|
||||
reload._uwsgi_fifo_command.assert_not_called()
|
||||
reload._reset_celery_thread_pool.assert_not_called()
|
||||
reload._supervisor_service_restart.assert_called_once_with(['flower'])
|
||||
reload._supervisor_service_command.assert_called_once_with(['flower'], command="restart")
|
||||
|
||||
|
||||
@ -14,12 +14,12 @@ from celery import current_app
|
||||
logger = logging.getLogger('awx.main.utils.reload')
|
||||
|
||||
|
||||
def _uwsgi_reload():
|
||||
def _uwsgi_fifo_command(uwsgi_command):
|
||||
# http://uwsgi-docs.readthedocs.io/en/latest/MasterFIFO.html#available-commands
|
||||
logger.warn('Initiating uWSGI chain reload of server')
|
||||
TRIGGER_CHAIN_RELOAD = 'c'
|
||||
TRIGGER_COMMAND = uwsgi_command
|
||||
with open(settings.UWSGI_FIFO_LOCATION, 'w') as awxfifo:
|
||||
awxfifo.write(TRIGGER_CHAIN_RELOAD)
|
||||
awxfifo.write(TRIGGER_COMMAND)
|
||||
|
||||
|
||||
def _reset_celery_thread_pool():
|
||||
@ -29,7 +29,7 @@ def _reset_celery_thread_pool():
|
||||
destination=['celery@{}'.format(settings.CLUSTER_HOST_ID)], reply=False)
|
||||
|
||||
|
||||
def _supervisor_service_restart(service_internal_names):
|
||||
def _supervisor_service_command(service_internal_names, command):
|
||||
'''
|
||||
Service internal name options:
|
||||
- beat - celery - callback - channels - uwsgi - daphne
|
||||
@ -46,7 +46,7 @@ def _supervisor_service_restart(service_internal_names):
|
||||
for n in service_internal_names:
|
||||
if n in name_translation_dict:
|
||||
programs.append('{}:{}'.format(group_name, name_translation_dict[n]))
|
||||
args.extend(['restart'])
|
||||
args.extend([command])
|
||||
args.extend(programs)
|
||||
logger.debug('Issuing command to restart services, args={}'.format(args))
|
||||
subprocess.Popen(args)
|
||||
@ -55,14 +55,18 @@ def _supervisor_service_restart(service_internal_names):
|
||||
def restart_local_services(service_internal_names):
|
||||
logger.warn('Restarting services {} on this node in response to user action'.format(service_internal_names))
|
||||
if 'uwsgi' in service_internal_names:
|
||||
_uwsgi_reload()
|
||||
_uwsgi_fifo_command(uwsgi_command='c')
|
||||
service_internal_names.remove('uwsgi')
|
||||
restart_celery = False
|
||||
if 'celery' in service_internal_names:
|
||||
restart_celery = True
|
||||
service_internal_names.remove('celery')
|
||||
_supervisor_service_restart(service_internal_names)
|
||||
_supervisor_service_command(service_internal_names, command='restart')
|
||||
if restart_celery:
|
||||
# Celery restarted last because this probably includes current process
|
||||
_reset_celery_thread_pool()
|
||||
|
||||
|
||||
def stop_local_services(service_internal_names):
|
||||
logger.warn('Stopping services {} on this node in response to user action'.format(service_internal_names))
|
||||
_supervisor_service_command(service_internal_names, command='stop')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user