From d734145c38d8129c2864c311db0e01e263ab6007 Mon Sep 17 00:00:00 2001 From: AlanCoding Date: Mon, 3 Oct 2016 11:41:42 -0400 Subject: [PATCH 1/4] start capability to return False for manual projects --- awx/main/access.py | 4 ++++ awx/main/tests/functional/api/test_rbac_displays.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/awx/main/access.py b/awx/main/access.py index 2be67f0849..4b67473205 100644 --- a/awx/main/access.py +++ b/awx/main/access.py @@ -264,6 +264,10 @@ class BaseAccess(object): elif display_method == 'copy' and isinstance(obj, (Group, Host)): user_capabilities['copy'] = user_capabilities['edit'] continue + elif display_method == 'start' and isinstance(obj, (Project)) and obj.scm_type == '': + # Special case to return False for a manual project + user_capabilities['start'] = False + continue # Preprocessing before the access method is called data = {} diff --git a/awx/main/tests/functional/api/test_rbac_displays.py b/awx/main/tests/functional/api/test_rbac_displays.py index 45b4a8f832..30ad16fe5f 100644 --- a/awx/main/tests/functional/api/test_rbac_displays.py +++ b/awx/main/tests/functional/api/test_rbac_displays.py @@ -309,6 +309,11 @@ def test_prefetch_jt_copy_capability(job_template, project, inventory, machine_c ]}], JobTemplate, rando) assert qs[0].capabilities_cache == {'copy': True} +@pytest.mark.django_db +def test_manual_projects_no_update(project, get, admin_user): + response = get(reverse('api:project_detail', args=[project.pk]), admin_user, expect=200) + assert not response.data['summary_fields']['user_capabilities']['start'] + @pytest.mark.django_db def test_group_update_capabilities_possible(group, inventory_source, admin_user): group.inventory_source = inventory_source From 89ae7ebe76eb734f90a4ce506093d22dd7fc5083 Mon Sep 17 00:00:00 2001 From: Jim Ladd Date: Wed, 5 Oct 2016 10:31:45 -0400 Subject: [PATCH 2/4] Clean docker images by name --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 01219e9c21..62aa4dd633 100644 --- a/Makefile +++ b/Makefile @@ -771,7 +771,7 @@ MACHINE?=default docker-clean: eval $$(docker-machine env $(MACHINE)) $(foreach container_id,$(shell docker ps -f name=tools_tower -aq),docker stop $(container_id); docker rm -f $(container_id);) - -docker images | grep "tower_devel" | awk '{print $$3}' | xargs docker rmi + -docker images | grep "tower_devel" | awk '{print $$1 ":" $$2}' | xargs docker rmi docker-refresh: docker-clean docker-compose From 180209236ece19106a5ab444ab0012c4d4401d31 Mon Sep 17 00:00:00 2001 From: Graham Mainwaring Date: Wed, 5 Oct 2016 16:43:19 -0400 Subject: [PATCH 3/4] Change names of Windows scan modules to avoid ambiguity --- awx/playbooks/scan_facts.yml | 28 ++++++++++++++++--- .../{scan_files.ps1 => win_scan_files.ps1} | 0 ...can_packages.ps1 => win_scan_packages.ps1} | 0 ...can_services.ps1 => win_scan_services.ps1} | 0 4 files changed, 24 insertions(+), 4 deletions(-) rename awx/plugins/library/{scan_files.ps1 => win_scan_files.ps1} (100%) rename awx/plugins/library/{scan_packages.ps1 => win_scan_packages.ps1} (100%) rename awx/plugins/library/{scan_services.ps1 => win_scan_services.ps1} (100%) diff --git a/awx/playbooks/scan_facts.yml b/awx/playbooks/scan_facts.yml index 1b90380c62..d24d07d6fa 100644 --- a/awx/playbooks/scan_facts.yml +++ b/awx/playbooks/scan_facts.yml @@ -3,11 +3,31 @@ scan_use_checksum: false scan_use_recursive: false tasks: - - scan_packages: + + - name: "Scan packages (Unix/Linux)" + scan_packages: os_family: '{{ ansible_os_family }}' - - scan_services: - - scan_files: + when: ansible_os_family != "Windows" + - name: "Scan services (Unix/Linux)" + scan_services: + when: ansible_os_family != "Windows" + - name: "Scan files (Unix/Linux)" + scan_files: paths: '{{ scan_file_paths }}' get_checksum: '{{ scan_use_checksum }}' recursive: '{{ scan_use_recursive }}' - when: scan_file_paths is defined \ No newline at end of file + when: scan_file_paths is defined and ansible_os_family != "Windows" + + - name: "Scan packages (Windows)" + win_scan_packages: + when: ansible_os_family == "Windows" + - name: "Scan services (Windows)" + win_scan_services: + when: ansible_os_family == "Windows" + - name: "Scan files (Windows)" + win_scan_files: + paths: '{{ scan_file_paths }}' + get_checksum: '{{ scan_use_checksum }}' + recursive: '{{ scan_use_recursive }}' + when: scan_file_paths is defined and ansible_os_family == "Windows" + diff --git a/awx/plugins/library/scan_files.ps1 b/awx/plugins/library/win_scan_files.ps1 similarity index 100% rename from awx/plugins/library/scan_files.ps1 rename to awx/plugins/library/win_scan_files.ps1 diff --git a/awx/plugins/library/scan_packages.ps1 b/awx/plugins/library/win_scan_packages.ps1 similarity index 100% rename from awx/plugins/library/scan_packages.ps1 rename to awx/plugins/library/win_scan_packages.ps1 diff --git a/awx/plugins/library/scan_services.ps1 b/awx/plugins/library/win_scan_services.ps1 similarity index 100% rename from awx/plugins/library/scan_services.ps1 rename to awx/plugins/library/win_scan_services.ps1 From babe29ebfa955107988e9c301bdaa1c10ac5b9aa Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Thu, 6 Oct 2016 16:05:39 -0400 Subject: [PATCH 4/4] Implement cluster health checks * Add a local node queue to execute targeted jobs * Add a setting for active cluster node id (per-node) * Base the heartbeat time on the `modified` time on the Instance table * Add periodic task that calls save() on the instance to update the heartbeat time if services are up * Purge/update any ha/instance management commands * Fix up CELERY_ROUTES settings data structure --- Makefile | 2 +- awx/api/views.py | 3 +- .../management/commands/_base_instance.py | 112 ------------------ .../management/commands/list_instances.py | 9 +- .../management/commands/register_instance.py | 25 ++-- .../management/commands/remove_instance.py | 43 ------- .../management/commands/update_instance.py | 65 ---------- awx/main/tasks.py | 10 ++ awx/settings/defaults.py | 11 +- awx/settings/development.py | 5 + tools/docker-compose/start_development.sh | 1 + 11 files changed, 43 insertions(+), 243 deletions(-) delete mode 100644 awx/main/management/commands/_base_instance.py delete mode 100644 awx/main/management/commands/remove_instance.py delete mode 100644 awx/main/management/commands/update_instance.py diff --git a/Makefile b/Makefile index 62aa4dd633..dc71fb01e3 100644 --- a/Makefile +++ b/Makefile @@ -400,7 +400,7 @@ celeryd: @if [ "$(VENV_BASE)" ]; then \ . $(VENV_BASE)/tower/bin/activate; \ fi; \ - $(PYTHON) manage.py celeryd -l DEBUG -B --autoscale=20,3 --schedule=$(CELERY_SCHEDULE_FILE) -Q projects,jobs,default,scheduler + $(PYTHON) manage.py celeryd -l DEBUG -B --autoscale=20,3 --schedule=$(CELERY_SCHEDULE_FILE) -Q projects,jobs,default,scheduler,$(COMPOSE_HOST) #$(PYTHON) manage.py celery multi show projects jobs default -l DEBUG -Q:projects projects -Q:jobs jobs -Q:default default -c:projects 1 -c:jobs 3 -c:default 3 -Ofair -B --schedule=$(CELERY_SCHEDULE_FILE) # Run to start the zeromq callback receiver diff --git a/awx/api/views.py b/awx/api/views.py index 5588f1c4bd..b6423776a2 100644 --- a/awx/api/views.py +++ b/awx/api/views.py @@ -169,11 +169,12 @@ class ApiV1PingView(APIView): response = { 'ha': is_ha_environment(), 'version': get_awx_version(), + 'active_node': settings.CLUSTER_HOST_ID, } response['instances'] = [] for instance in Instance.objects.all(): - response['instances'].append(instance.hostname) + response['instances'].append(dict(node=instance.hostname, heartbeat=instance.modified)) response['instances'].sort() return Response(response) diff --git a/awx/main/management/commands/_base_instance.py b/awx/main/management/commands/_base_instance.py deleted file mode 100644 index ac42eced2a..0000000000 --- a/awx/main/management/commands/_base_instance.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2015 Ansible, Inc. -# All Rights Reserved. - -import socket -from optparse import make_option - -from django.core.management.base import BaseCommand, CommandError -from django.conf import settings - - -class OptionEnforceError(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) - -class BaseCommandInstance(BaseCommand): - #option_list = BaseCommand.option_list - - def __init__(self): - super(BaseCommandInstance, self).__init__() - self.enforce_hostname_set = False - self.enforce_unique_find = False - - self.option_hostname = None - self.option_uuid = None - - self.UUID = settings.SYSTEM_UUID - self.unique_fields = {} - - @staticmethod - def generate_option_hostname(): - return make_option('--hostname', - dest='hostname', - default=socket.gethostname(), - help='Find instance by specified hostname.') - - @staticmethod - def generate_option_hostname_set(): - return make_option('--hostname', - dest='hostname', - default=socket.gethostname(), - help='Hostname to assign to the new instance.') - - @staticmethod - def generate_option_uuid(): - #TODO: Likely deprecated, maybe uuid becomes the cluster ident? - return make_option('--uuid', - dest='uuid', - default='', - help='Find instance by specified uuid.') - - def include_option_hostname_set(self): - BaseCommand.option_list += ( BaseCommandInstance.generate_option_hostname_set(), ) - self.enforce_hostname_set = True - - def include_option_hostname_uuid_find(self): - BaseCommand.option_list += ( BaseCommandInstance.generate_option_hostname(), BaseCommandInstance.generate_option_uuid(), ) - self.enforce_unique_find = True - - def get_option_hostname(self): - return self.option_hostname - - def get_option_uuid(self): - return self.option_uuid - - def get_UUID(self): - return self.UUID - - # for the enforce_unique_find policy - def get_unique_fields(self): - return self.unique_fields - - @property - def usage_error(self): - if self.enforce_hostname_set: - return CommandError('--hostname is required.') - - def handle(self, *args, **options): - if self.enforce_hostname_set and self.enforce_unique_find: - raise OptionEnforceError('Can not enforce --hostname as a setter and --hostname as a getter') - - if self.enforce_hostname_set: - if options['hostname']: - self.option_hostname = options['hostname'] - else: - raise self.usage_error - - if self.enforce_unique_find: - if options['hostname']: - self.unique_fields['hostname'] = self.option_hostname = options['hostname'] - - if options['uuid']: - self.unique_fields['uuid'] = self.option_uuid = options['uuid'] - - if len(self.unique_fields) == 0: - self.unique_fields['uuid'] = self.get_UUID() - - @staticmethod - def __instance_str(instance, fields): - string = '(' - for field in fields: - string += '%s="%s",' % (field, getattr(instance, field)) - if len(fields) > 0: - string = string[:-1] - string += ')' - return string - - @staticmethod - def instance_str(instance): - return BaseCommandInstance.__instance_str(instance, ('uuid', 'hostname')) diff --git a/awx/main/management/commands/list_instances.py b/awx/main/management/commands/list_instances.py index 08ccc928ca..cbe767984a 100644 --- a/awx/main/management/commands/list_instances.py +++ b/awx/main/management/commands/list_instances.py @@ -1,12 +1,10 @@ # Copyright (c) 2015 Ansible, Inc. # All Rights Reserved -from awx.main.management.commands._base_instance import BaseCommandInstance from awx.main.models import Instance +from django.core.management.base import NoArgsCommand -instance_str = BaseCommandInstance.instance_str - -class Command(BaseCommandInstance): +class Command(NoArgsCommand): """List instances from the Tower database """ @@ -14,5 +12,4 @@ class Command(BaseCommandInstance): super(Command, self).__init__() for instance in Instance.objects.all(): - print("uuid: %s; hostname: %s; primary: %s; created: %s; modified: %s" % - (instance.uuid, instance.hostname, instance.primary, instance.created, instance.modified)) + print("hostname: {}; created: {}; heartbeat: {}".format(instance.hostname, instance.created, instance.modified)) diff --git a/awx/main/management/commands/register_instance.py b/awx/main/management/commands/register_instance.py index e8ba1160f2..3355dcf983 100644 --- a/awx/main/management/commands/register_instance.py +++ b/awx/main/management/commands/register_instance.py @@ -1,31 +1,30 @@ # Copyright (c) 2015 Ansible, Inc. # All Rights Reserved -from awx.main.management.commands._base_instance import BaseCommandInstance from awx.main.models import Instance +from django.conf import settings -instance_str = BaseCommandInstance.instance_str +from django.core.management.base import CommandError, NoArgsCommand -class Command(BaseCommandInstance): +class Command(NoArgsCommand): """ Internal tower command. Regsiter this instance with the database for HA tracking. - - This command is idempotent. """ - def __init__(self): - super(Command, self).__init__() - self.include_option_hostname_set() + + option_list = NoArgsCommand.option_list + ( + make_option('--hostname', dest='hostname', type='string', + help='Hostname used during provisioning') + ) def handle(self, *args, **options): - super(Command, self).handle(*args, **options) + super(Command, self).handle(**options) + uuid = settings.SYSTEM_UUID - uuid = self.get_UUID() - - instance = Instance.objects.filter(hostname=self.get_option_hostname()) + instance = Instance.objects.filter(hostname=options.get('hostname')) if instance.exists(): print("Instance already registered %s" % instance_str(instance[0])) return - instance = Instance(uuid=uuid, hostname=self.get_option_hostname()) + instance = Instance(uuid=uuid, hostname=options.get('hostname')) instance.save() print('Successfully registered instance %s.' % instance_str(instance)) diff --git a/awx/main/management/commands/remove_instance.py b/awx/main/management/commands/remove_instance.py deleted file mode 100644 index d8712137be..0000000000 --- a/awx/main/management/commands/remove_instance.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2015 Ansible, Inc. -# All Rights Reserved - -from django.core.management.base import CommandError -from awx.main.management.commands._base_instance import BaseCommandInstance - -from awx.main.models import Instance - -instance_str = BaseCommandInstance.instance_str - -class Command(BaseCommandInstance): - """Internal tower command. - Remove an existing instance from the HA instance table. - - This command is idempotent. - - This command will error out in the following conditions: - - * Attempting to remove a primary instance. - """ - def __init__(self): - super(Command, self).__init__() - - self.include_option_hostname_uuid_find() - - def handle(self, *args, **options): - super(Command, self).handle(*args, **options) - - # Is there an existing record for this machine? If so, retrieve that record and look for issues. - try: - # Get the instance. - instance = Instance.objects.get(**self.get_unique_fields()) - - # Sanity check: Do not remove the primary instance. - if instance.primary: - raise CommandError('Cannot remove primary instance %s. Another instance must be promoted to primary first.' % instance_str(instance)) - - # Remove the instance. - instance.delete() - print('Successfully removed instance %s.' % instance_str(instance)) - except Instance.DoesNotExist: - print('No matching instance found to remove.') - diff --git a/awx/main/management/commands/update_instance.py b/awx/main/management/commands/update_instance.py deleted file mode 100644 index 346d5b728f..0000000000 --- a/awx/main/management/commands/update_instance.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2015 Ansible, Inc. -# All Rights Reserved - -from django.core.management.base import CommandError -from django.db import transaction - -from awx.main.management.commands._base_instance import BaseCommandInstance -from awx.conf.license import feature_enabled -from awx.main.models import Instance - -instance_str = BaseCommandInstance.instance_str - -class Command(BaseCommandInstance): - """Set an already registered instance to primary or secondary for HA - tracking. - - This command is idempotent. Settings a new primary instance when a - primary instance already exists will result in the existing primary - instance set to secondary and the new primary set to primary. - - This command will error out under the following circumstances: - - * Attempting to update a secondary instance with no primary instances. - * When a matching instance is not found. - """ - def __init__(self): - super(Command, self).__init__() - - self.include_option_primary_role() - self.include_option_hostname_uuid_find() - - @transaction.atomic - def handle(self, *args, **options): - super(Command, self).handle(*args, **options) - - # You can only promote/demote if your license allows HA - if not feature_enabled('ha'): - raise CommandError('Your Tower license does not permit promoting a secondary instance') - - # Is there an existing record for this machine? If so, retrieve that record and look for issues. - try: - instance = Instance.objects.get(**self.get_unique_fields()) - except Instance.DoesNotExist: - raise CommandError('No matching instance found to update.') - - # Get a status on primary machines (excluding this one, regardless of its status). - other_instances = Instance.objects.exclude(**self.get_unique_fields()) - primaries = other_instances.filter(primary=True).count() - - # If this is a primary machine and there is another primary machine, it must be de-primary-ified. - if self.is_option_primary() and primaries: - for old_primary in other_instances.filter(primary=True): - old_primary.primary = False - old_primary.save() - - # Okay, we've checked for appropriate errata; perform the registration. - instance.primary = self.is_option_primary() - instance.save() - - # If this is a primary instance, update projects. - if self.is_option_primary(): - self.update_projects(instance) - - # Done! - print('Successfully updated instance role %s' % instance_str(instance)) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 6dbac70108..932ba3262c 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -125,6 +125,15 @@ def run_administrative_checks(self): def cleanup_authtokens(self): AuthToken.objects.filter(expires__lt=now()).delete() +@task(bind=True) +def cluster_node_heartbeat(self): + inst = Instance.objects.filter(hostname=settings.CLUSTER_HOST_ID) + if inst.exists(): + inst = inst[0] + inst.save() + return + raise RuntimeError("Cluster Host Not Found: {}".format(settings.CLUSTER_HOST_ID)) + @task(bind=True, queue='default') def tower_periodic_scheduler(self): def get_last_run(): @@ -154,6 +163,7 @@ def tower_periodic_scheduler(self): # Sanity check: If this is a secondary machine, there is nothing # on the schedule. + # TODO: Fix for clustering/ha if Instance.objects.my_role() == 'secondary': return diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index f24ee87838..71467e3bab 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -359,7 +359,7 @@ CELERY_QUEUES = ( # Projects use a fanout queue, this isn't super well supported Broadcast('projects'), ) -CELERY_ROUTES = ({'awx.main.tasks.run_job': {'queue': 'jobs', +CELERY_ROUTES = {'awx.main.tasks.run_job': {'queue': 'jobs', 'routing_key': 'jobs'}, 'awx.main.tasks.run_project_update': {'queue': 'projects'}, 'awx.main.tasks.run_inventory_update': {'queue': 'jobs', @@ -371,7 +371,10 @@ CELERY_ROUTES = ({'awx.main.tasks.run_job': {'queue': 'jobs', 'awx.main.scheduler.tasks.run_job_launch': {'queue': 'scheduler', 'routing_key': 'scheduler.job.launch'}, 'awx.main.scheduler.tasks.run_job_complete': {'queue': 'scheduler', - 'routing_key': 'scheduler.job.complete'},}) + 'routing_key': 'scheduler.job.complete'}, + 'awx.main.tasks.cluster_node_heartbeat': {'queue': 'default', + 'routing_key': 'cluster.heartbeat'}, +} CELERYBEAT_SCHEDULE = { 'tower_scheduler': { @@ -386,6 +389,10 @@ CELERYBEAT_SCHEDULE = { 'task': 'awx.main.tasks.cleanup_authtokens', 'schedule': timedelta(days=30) }, + 'cluster_heartbeat': { + 'task': 'awx.main.tasks.cluster_node_heartbeat', + 'schedule': timedelta(seconds=60) + }, } # Django Caching Configuration diff --git a/awx/settings/development.py b/awx/settings/development.py index c19afa34e8..ec33a29876 100644 --- a/awx/settings/development.py +++ b/awx/settings/development.py @@ -4,6 +4,7 @@ # Development settings for AWX project. # Python +import socket import copy import sys import traceback @@ -106,3 +107,7 @@ try: except ImportError: traceback.print_exc() sys.exit(1) + +CLUSTER_HOST_ID = socket.gethostname() +CELERY_ROUTES['awx.main.tasks.cluster_node_heartbeat'] = {'queue': CLUSTER_HOST_ID, 'routing_key': CLUSTER_HOST_ID} + diff --git a/tools/docker-compose/start_development.sh b/tools/docker-compose/start_development.sh index d0191dc2f4..e2db66c3e4 100755 --- a/tools/docker-compose/start_development.sh +++ b/tools/docker-compose/start_development.sh @@ -10,6 +10,7 @@ ansible -i "127.0.0.1," -c local -v -m wait_for -a "host=${RABBITMQ_HOST} port=5 # TODO: FIX #/etc/init.d/ssh start + ansible -i "127.0.0.1," -c local -v -m postgresql_user -U postgres -a "name=awx-dev password=AWXsome1 login_user=postgres login_host=postgres" all ansible -i "127.0.0.1," -c local -v -m postgresql_db -U postgres -a "name=awx-dev owner=awx-dev login_user=postgres login_host=postgres" all