Refactor Tower HA Instance logic and models

* Gut the HA middleware
* Purge concept of primary and secondary.
* UUID is not the primary host identifier, now it's based mostly on the
  username.  Some work probably still left to do to make sure this is
  legit.  Also removed unique constraint from the uuid field.  This
  might become the cluster ident now... or it may just deprecate
* No more secondary -> primary redirection
* Initial revision of /api/v1/ping
* Revise and gut tower-manage register_instance
* Rename awx/main/socket.py to awx/main/socket_queue.py to prevent
  conflict with the "socket" module from python base
* Revist/gut the Instance manager... not sure if this manager is really
  needed anymore
This commit is contained in:
Matthew Jones 2016-09-08 13:37:53 -04:00
parent eafb6c92b5
commit 0c1e1fa2fb
14 changed files with 56 additions and 210 deletions

View File

@ -15,6 +15,8 @@ COMPOSE_TAG ?= devel
# NOTE: This defaults the container image version to the branch that's active
# COMPOSE_TAG ?= $(GIT_BRANCH)
COMPOSE_HOST ?= $(shell hostname)
VENV_BASE ?= /venv
SCL_PREFIX ?=
CELERY_SCHEDULE_FILE ?= /celerybeat-schedule
@ -325,7 +327,7 @@ init:
if [ "$(VENV_BASE)" ]; then \
. $(VENV_BASE)/tower/bin/activate; \
fi; \
tower-manage register_instance --primary --hostname=127.0.0.1; \
tower-manage register_instance --hostname=$(COMPOSE_HOST); \
# Refresh development environment after pulling new code.
refresh: clean requirements_dev version_file develop migrate

View File

@ -166,28 +166,13 @@ class ApiV1PingView(APIView):
# Most of this response is canned; just build the dictionary.
response = {
'ha': is_ha_environment(),
'role': Instance.objects.my_role(),
'version': get_awx_version(),
}
# If this is an HA environment, we also include the IP address of
# all of the instances.
#
# Set up a default structure.
response['instances'] = {
'primary': None,
'secondaries': [],
}
# Add all of the instances into the structure.
response['instances'] = []
for instance in Instance.objects.all():
if instance.primary:
response['instances']['primary'] = instance.hostname
else:
response['instances']['secondaries'].append(instance.hostname)
response['instances']['secondaries'].sort()
# Done; return the response.
response['instances'].append(instance.hostname)
response['instances'].sort()
return Response(response)

View File

@ -1,6 +1,7 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved.
import socket
from optparse import make_option
from django.core.management.base import BaseCommand, CommandError
@ -21,13 +22,9 @@ class BaseCommandInstance(BaseCommand):
def __init__(self):
super(BaseCommandInstance, self).__init__()
self.enforce_primary_role = False
self.enforce_roles = False
self.enforce_hostname_set = False
self.enforce_unique_find = False
self.option_primary = False
self.option_secondary = False
self.option_hostname = None
self.option_uuid = None
@ -38,48 +35,24 @@ class BaseCommandInstance(BaseCommand):
def generate_option_hostname():
return make_option('--hostname',
dest='hostname',
default='',
default=socket.gethostname(),
help='Find instance by specified hostname.')
@staticmethod
def generate_option_hostname_set():
return make_option('--hostname',
dest='hostname',
default='',
default=socket.gethostname(),
help='Hostname to assign to the new instance.')
@staticmethod
def generate_option_primary():
return make_option('--primary',
action='store_true',
default=False,
dest='primary',
help='Register instance as primary.')
@staticmethod
def generate_option_secondary():
return make_option('--secondary',
action='store_true',
default=False,
dest='secondary',
help='Register instance as secondary.')
@staticmethod
def generate_option_uuid():
#TODO: Likely deprecated, maybe uuid becomes the cluster ident?
return make_option('--uuid',
dest='uuid',
default='',
help='Find instance by specified uuid.')
def include_option_primary_role(self):
BaseCommand.option_list += ( BaseCommandInstance.generate_option_primary(), )
self.enforce_primary_role = True
def include_options_roles(self):
self.include_option_primary_role()
BaseCommand.option_list += ( BaseCommandInstance.generate_option_secondary(), )
self.enforce_roles = True
def include_option_hostname_set(self):
BaseCommand.option_list += ( BaseCommandInstance.generate_option_hostname_set(), )
self.enforce_hostname_set = True
@ -94,12 +67,6 @@ class BaseCommandInstance(BaseCommand):
def get_option_uuid(self):
return self.option_uuid
def is_option_primary(self):
return self.option_primary
def is_option_secondary(self):
return self.option_secondary
def get_UUID(self):
return self.UUID
@ -109,31 +76,13 @@ class BaseCommandInstance(BaseCommand):
@property
def usage_error(self):
if self.enforce_roles and self.enforce_hostname_set:
return CommandError('--hostname and one of --primary or --secondary is required.')
elif self.enforce_hostname_set:
if self.enforce_hostname_set:
return CommandError('--hostname is required.')
elif self.enforce_primary_role:
return CommandError('--primary is required.')
elif self.enforce_roles:
return CommandError('One of --primary or --secondary is required.')
def handle(self, *args, **options):
if self.enforce_hostname_set and self.enforce_unique_find:
raise OptionEnforceError('Can not enforce --hostname as a setter and --hostname as a getter')
if self.enforce_roles:
self.option_primary = options['primary']
self.option_secondary = options['secondary']
if self.is_option_primary() and self.is_option_secondary() or not (self.is_option_primary() or self.is_option_secondary()):
raise self.usage_error
elif self.enforce_primary_role:
if options['primary']:
self.option_primary = options['primary']
else:
raise self.usage_error
if self.enforce_hostname_set:
if options['hostname']:
self.option_hostname = options['hostname']
@ -162,11 +111,4 @@ class BaseCommandInstance(BaseCommand):
@staticmethod
def instance_str(instance):
return BaseCommandInstance.__instance_str(instance, ('uuid', 'hostname', 'role'))
def update_projects(self, instance):
"""Update all projects, ensuring the job runs against this instance,
which is the primary instance.
"""
for project in Project.objects.all():
project.update()
return BaseCommandInstance.__instance_str(instance, ('uuid', 'hostname'))

View File

@ -9,22 +9,14 @@ from awx.main.models import Instance
instance_str = BaseCommandInstance.instance_str
class Command(BaseCommandInstance):
"""Internal tower command.
"""
Internal tower command.
Regsiter this instance with the database for HA tracking.
This command is idempotent.
This command will error out in the following conditions:
* Attempting to register a secondary machine with no primary machines.
* Attempting to register a primary instance when a different primary
instance exists.
* Attempting to re-register an instance with changed values.
"""
def __init__(self):
super(Command, self).__init__()
self.include_options_roles()
self.include_option_hostname_set()
def handle(self, *args, **options):
@ -32,32 +24,10 @@ class Command(BaseCommandInstance):
uuid = self.get_UUID()
# Is there an existing record for this machine? If so, retrieve that record and look for issues.
try:
instance = Instance.objects.get(uuid=uuid)
if instance.hostname != self.get_option_hostname():
raise CommandError('Instance already registered with a different hostname %s.' % instance_str(instance))
print("Instance already registered %s" % instance_str(instance))
except Instance.DoesNotExist:
# Get a status on primary machines (excluding this one, regardless of its status).
other_instances = Instance.objects.exclude(uuid=uuid)
primaries = other_instances.filter(primary=True).count()
# If this instance is being set to primary and a *different* primary machine alreadyexists, error out.
if self.is_option_primary() and primaries:
raise CommandError('Another instance is already registered as primary.')
# Lastly, if there are no primary machines at all, then don't allow this to be registered as a secondary machine.
if self.is_option_secondary() and not primaries:
raise CommandError('Unable to register a secondary machine until another primary machine has been registered.')
# Okay, we've checked for appropriate errata; perform the registration.
instance = Instance(uuid=uuid, primary=self.is_option_primary(), hostname=self.get_option_hostname())
instance.save()
# If this is a primary instance, update projects.
if instance.primary:
self.update_projects(instance)
# Done!
print('Successfully registered instance %s.' % instance_str(instance))
instance = Instance.objects.filter(hostname=self.get_option_hostname())
if instance.exists():
print("Instance already registered %s" % instance_str(instance[0]))
return
instance = Instance(uuid=uuid, hostname=self.get_option_hostname())
instance.save()
print('Successfully registered instance %s.' % instance_str(instance))

View File

@ -21,7 +21,7 @@ from django.db import connection
# AWX
from awx.main.models import * # noqa
from awx.main.socket import Socket
from awx.main.socket_queue import Socket
logger = logging.getLogger('awx.main.commands.run_callback_receiver')

View File

@ -14,7 +14,7 @@ from django.utils import timezone
# AWX
from awx.main.models.fact import Fact
from awx.main.models.inventory import Host
from awx.main.socket import Socket
from awx.main.socket_queue import Socket
logger = logging.getLogger('awx.main.commands.run_fact_cache_receiver')

View File

@ -16,7 +16,7 @@ from django.core.management.base import NoArgsCommand
# AWX
import awx
from awx.main.models import * # noqa
from awx.main.socket import Socket
from awx.main.socket_queue import Socket
# socketio
from socketio import socketio_manage

View File

@ -2,6 +2,7 @@
# All Rights Reserved.
import sys
import socket
from django.conf import settings
from django.db import models
@ -28,31 +29,12 @@ class InstanceManager(models.Manager):
# If we are running unit tests, return a stub record.
if len(sys.argv) >= 2 and sys.argv[1] == 'test':
return self.model(id=1, primary=True,
hostname='localhost',
uuid='00000000-0000-0000-0000-000000000000')
# Return the appropriate record from the database.
return self.get(uuid=settings.SYSTEM_UUID)
return self.get(hostname=socket.gethostname())
def my_role(self):
"""Return the role of the currently active instance, as a string
('primary' or 'secondary').
"""
# If we are running unit tests, we are primary, because reasons.
if len(sys.argv) >= 2 and sys.argv[1] == 'test':
return 'primary'
# Check if this instance is primary; if so, return "primary", otherwise
# "secondary".
if self.me().primary:
return 'primary'
return 'secondary'
def primary(self):
"""Return the primary instance."""
# If we are running unit tests, return a stub record.
if len(sys.argv) >= 2 and sys.argv[1] == 'test':
return self.model(id=1, primary=True,
uuid='00000000-0000-0000-0000-000000000000')
# Return the appropriate record from the database.
return self.get(primary=True)
# NOTE: TODO: Likely to repurpose this once standalone ramparts are a thing
return "tower"

View File

@ -71,41 +71,6 @@ class ActivityStreamMiddleware(threading.local):
if instance.id not in self.instance_ids:
self.instance_ids.append(instance.id)
class HAMiddleware(object):
"""A middleware class that checks to see whether the request is being
served on a secondary instance, and redirects the request back to the
primary instance if so.
"""
def process_request(self, request):
"""Process the request, and redirect if this is a request on a
secondary node.
"""
# Is this the primary node? If so, we can just return None and be done;
# we just want normal behavior in this case.
if Instance.objects.my_role() == 'primary':
return None
# Always allow the /ping/ endpoint.
if request.path.startswith('/api/v1/ping'):
return None
# Get the primary instance.
primary = Instance.objects.primary()
# If this is a request to /, then we return a special landing page that
# informs the user that they are on the secondary instance and will
# be redirected.
if request.path == '/':
return TemplateResponse(request, 'ha/redirect.html', {
'primary': primary,
'redirect_seconds': 30,
'version': version,
})
# Redirect to the base page of the primary instance.
return HttpResponseRedirect('http://%s%s' % (primary.hostname, request.path))
class AuthTokenTimeoutMiddleware(object):
"""Presume that when the user includes the auth header, they go through the
authentication mechanism. Further, that mechanism is presumed to extend

View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0032_v302_credential_permissions_update'),
]
operations = [
migrations.RemoveField(
model_name='instance',
name='primary',
),
migrations.AlterField(
model_name='instance',
name='uuid',
field=models.CharField(max_length=40),
),
]

View File

@ -22,9 +22,8 @@ class Instance(models.Model):
"""
objects = InstanceManager()
uuid = models.CharField(max_length=40, unique=True)
uuid = models.CharField(max_length=40)
hostname = models.CharField(max_length=250, unique=True)
primary = models.BooleanField(default=False)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
@ -33,29 +32,8 @@ class Instance(models.Model):
@property
def role(self):
"""Return the role of this instance, as a string."""
if self.primary:
return 'primary'
return 'secondary'
@functools.wraps(models.Model.save)
def save(self, *args, **kwargs):
"""Save the instance. If this is a secondary instance, then ensure
that any currently-running jobs that this instance started are
canceled.
"""
# Perform the normal save.
result = super(Instance, self).save(*args, **kwargs)
# If this is not a primary instance, then kill any jobs that this
# instance was responsible for starting.
if not self.primary:
for job in UnifiedJob.objects.filter(job_origin__instance=self,
status__in=CAN_CANCEL):
job.cancel()
# Return back the original result.
return result
# NOTE: TODO: Likely to repurpose this once standalone ramparts are a thing
return "tower"
class JobOrigin(models.Model):

View File

@ -425,7 +425,7 @@ def get_system_task_capacity():
def emit_websocket_notification(endpoint, event, payload, token_key=None):
from awx.main.socket import Socket
from awx.main.socket_queue import Socket
try:
with Socket('websocket', 'w', nowait=True, logger=logger) as websocket:

View File

@ -152,7 +152,6 @@ MIDDLEWARE_CLASSES = ( # NOQA
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'awx.main.middleware.HAMiddleware',
'awx.main.middleware.ActivityStreamMiddleware',
'awx.sso.middleware.SocialAuthMiddleware',
'crum.CurrentRequestUserMiddleware',