mirror of
https://github.com/ansible/awx.git
synced 2026-01-10 15:32:07 -03:30
Apply capacity algorithm changes
* This also adds fields to the instance view for tracking cpu and memory usage as well as information on what the capacity ranges are * Also adds a flag for enabling/disabling instances which removes them from all queues and has them stop processing new work * The capacity is now based almost exclusively on some value relative to forks * capacity_adjustment allows you to commit an instance to a certain amount of forks, cpu focused or memory focused * Each job run adds a single fork overhead (that's the reasoning behind the +1)
This commit is contained in:
parent
6a85fc38dd
commit
70bf78e29f
@ -3977,8 +3977,10 @@ class InstanceSerializer(BaseSerializer):
|
||||
|
||||
class Meta:
|
||||
model = Instance
|
||||
fields = ("id", "type", "url", "related", "uuid", "hostname", "created", "modified",
|
||||
"version", "capacity", "consumed_capacity", "percent_capacity_remaining", "jobs_running")
|
||||
read_only_fields = ('uuid', 'hostname', 'version')
|
||||
fields = ("id", "type", "url", "related", "uuid", "hostname", "created", "modified", 'capacity_adjustment',
|
||||
"version", "capacity", "consumed_capacity", "percent_capacity_remaining", "jobs_running",
|
||||
"cpu", "memory", "cpu_capacity", "mem_capacity", "enabled")
|
||||
|
||||
def get_related(self, obj):
|
||||
res = super(InstanceSerializer, self).get_related(obj)
|
||||
|
||||
@ -57,7 +57,7 @@ import pytz
|
||||
from wsgiref.util import FileWrapper
|
||||
|
||||
# AWX
|
||||
from awx.main.tasks import send_notifications
|
||||
from awx.main.tasks import send_notifications, handle_ha_toplogy_changes
|
||||
from awx.main.access import get_user_queryset
|
||||
from awx.main.ha import is_ha_environment
|
||||
from awx.api.authentication import TokenGetAuthentication
|
||||
@ -560,7 +560,7 @@ class InstanceList(ListAPIView):
|
||||
new_in_320 = True
|
||||
|
||||
|
||||
class InstanceDetail(RetrieveAPIView):
|
||||
class InstanceDetail(RetrieveUpdateAPIView):
|
||||
|
||||
view_name = _("Instance Detail")
|
||||
model = Instance
|
||||
@ -568,6 +568,20 @@ class InstanceDetail(RetrieveAPIView):
|
||||
new_in_320 = True
|
||||
|
||||
|
||||
def update(self, request, *args, **kwargs):
|
||||
r = super(InstanceDetail, self).update(request, *args, **kwargs)
|
||||
if status.is_success(r.status_code):
|
||||
obj = self.get_object()
|
||||
if obj.enabled:
|
||||
obj.refresh_capacity()
|
||||
else:
|
||||
obj.capacity = 0
|
||||
obj.save()
|
||||
handle_ha_toplogy_changes.apply_async()
|
||||
r.data = InstanceSerializer(obj, context=self.get_serializer_context()).to_representation(obj)
|
||||
return r
|
||||
|
||||
|
||||
class InstanceUnifiedJobsList(SubListAPIView):
|
||||
|
||||
view_name = _("Instance Running Jobs")
|
||||
|
||||
@ -2,12 +2,9 @@
|
||||
# All Rights Reserved.
|
||||
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from django.db import models
|
||||
from django.utils.timezone import now
|
||||
from django.db.models import Sum
|
||||
from django.conf import settings
|
||||
|
||||
from awx.main.utils.filters import SmartFilter
|
||||
|
||||
@ -1,31 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import awx.main.fields
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0017_v330_move_deprecated_stdout'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='instancegroup',
|
||||
name='policy_instance_list',
|
||||
field=awx.main.fields.JSONField(default=[], help_text='List of exact-match Instances that will always be automatically assigned to this group',
|
||||
blank=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instancegroup',
|
||||
name='policy_instance_minimum',
|
||||
field=models.IntegerField(default=0, help_text='Static minimum number of Instances to automatically assign to this group'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instancegroup',
|
||||
name='policy_instance_percentage',
|
||||
field=models.IntegerField(default=0, help_text='Percentage of Instances to automatically assign to this group'),
|
||||
),
|
||||
]
|
||||
62
awx/main/migrations/0020_v330_instancegroup_policies.py
Normal file
62
awx/main/migrations/0020_v330_instancegroup_policies.py
Normal file
@ -0,0 +1,62 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
from decimal import Decimal
|
||||
import awx.main.fields
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0019_v330_custom_virtualenv'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='instancegroup',
|
||||
name='policy_instance_list',
|
||||
field=awx.main.fields.JSONField(default=[], help_text='List of exact-match Instances that will always be automatically assigned to this group',
|
||||
blank=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instancegroup',
|
||||
name='policy_instance_minimum',
|
||||
field=models.IntegerField(default=0, help_text='Static minimum number of Instances to automatically assign to this group'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instancegroup',
|
||||
name='policy_instance_percentage',
|
||||
field=models.IntegerField(default=0, help_text='Percentage of Instances to automatically assign to this group'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='capacity_adjustment',
|
||||
field=models.DecimalField(decimal_places=2, default=Decimal('1.0'), max_digits=3),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='cpu',
|
||||
field=models.IntegerField(default=0, editable=False)
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='memory',
|
||||
field=models.BigIntegerField(default=0, editable=False)
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='cpu_capacity',
|
||||
field=models.IntegerField(default=0, editable=False)
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='mem_capacity',
|
||||
field=models.IntegerField(default=0, editable=False)
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='instance',
|
||||
name='enabled',
|
||||
field=models.BooleanField(default=True)
|
||||
)
|
||||
]
|
||||
@ -184,7 +184,7 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):
|
||||
# NOTE: We sorta have to assume the host count matches and that forks default to 5
|
||||
from awx.main.models.inventory import Host
|
||||
count_hosts = Host.objects.filter( enabled=True, inventory__ad_hoc_commands__pk=self.pk).count()
|
||||
return min(count_hosts, 5 if self.forks == 0 else self.forks) * 10
|
||||
return min(count_hosts, 5 if self.forks == 0 else self.forks) + 1
|
||||
|
||||
def copy(self):
|
||||
data = {}
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from django.db import models, connection
|
||||
from django.db.models.signals import post_save, post_delete
|
||||
from django.dispatch import receiver
|
||||
@ -10,6 +12,7 @@ from django.utils.timezone import now, timedelta
|
||||
|
||||
from solo.models import SingletonModel
|
||||
|
||||
from awx import __version__ as awx_application_version
|
||||
from awx.api.versioning import reverse
|
||||
from awx.main.managers import InstanceManager, InstanceGroupManager
|
||||
from awx.main.fields import JSONField
|
||||
@ -17,6 +20,7 @@ from awx.main.models.inventory import InventoryUpdate
|
||||
from awx.main.models.jobs import Job
|
||||
from awx.main.models.projects import ProjectUpdate
|
||||
from awx.main.models.unified_jobs import UnifiedJob
|
||||
from awx.main.utils import get_cpu_capacity, get_mem_capacity, get_system_task_capacity
|
||||
|
||||
__all__ = ('Instance', 'InstanceGroup', 'JobOrigin', 'TowerScheduleState',)
|
||||
|
||||
@ -39,6 +43,30 @@ class Instance(models.Model):
|
||||
default=100,
|
||||
editable=False,
|
||||
)
|
||||
capacity_adjustment = models.DecimalField(
|
||||
default=Decimal(1.0),
|
||||
max_digits=3,
|
||||
decimal_places=2,
|
||||
)
|
||||
enabled = models.BooleanField(
|
||||
default=True
|
||||
)
|
||||
cpu = models.IntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
)
|
||||
memory = models.BigIntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
)
|
||||
cpu_capacity = models.IntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
)
|
||||
mem_capacity = models.IntegerField(
|
||||
default=0,
|
||||
editable=False,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
app_label = 'main'
|
||||
@ -68,6 +96,20 @@ class Instance(models.Model):
|
||||
return Instance.objects.filter(rampart_groups__controller__instances=self).exists()
|
||||
|
||||
|
||||
def refresh_capacity(self):
|
||||
cpu = get_cpu_capacity()
|
||||
mem = get_mem_capacity()
|
||||
self.capacity = get_system_task_capacity(self.capacity_adjustment)
|
||||
self.cpu = cpu[0]
|
||||
self.memory = mem[0]
|
||||
self.cpu_capacity = cpu[1]
|
||||
self.mem_capacity = mem[1]
|
||||
self.version = awx_application_version
|
||||
self.save(update_fields=['capacity', 'version', 'modified', 'cpu',
|
||||
'memory', 'cpu_capacity', 'mem_capacity'])
|
||||
|
||||
|
||||
|
||||
class InstanceGroup(models.Model):
|
||||
"""A model representing a Queue/Group of AWX Instances."""
|
||||
objects = InstanceGroupManager()
|
||||
|
||||
@ -1602,7 +1602,7 @@ class InventoryUpdate(UnifiedJob, InventorySourceOptions, JobNotificationMixin,
|
||||
|
||||
@property
|
||||
def task_impact(self):
|
||||
return 50
|
||||
return 1
|
||||
|
||||
# InventoryUpdate credential required
|
||||
# Custom and SCM InventoryUpdate credential not required
|
||||
|
||||
@ -623,7 +623,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
count_hosts = 1
|
||||
else:
|
||||
count_hosts = Host.objects.filter(inventory__jobs__pk=self.pk).count()
|
||||
return min(count_hosts, 5 if self.forks == 0 else self.forks) * 10
|
||||
return min(count_hosts, 5 if self.forks == 0 else self.forks) + 1
|
||||
|
||||
@property
|
||||
def successful_hosts(self):
|
||||
@ -1190,7 +1190,7 @@ class SystemJob(UnifiedJob, SystemJobOptions, JobNotificationMixin):
|
||||
|
||||
@property
|
||||
def task_impact(self):
|
||||
return 150
|
||||
return 5
|
||||
|
||||
@property
|
||||
def preferred_instance_groups(self):
|
||||
|
||||
@ -492,7 +492,7 @@ class ProjectUpdate(UnifiedJob, ProjectOptions, JobNotificationMixin, TaskManage
|
||||
|
||||
@property
|
||||
def task_impact(self):
|
||||
return 0 if self.job_type == 'run' else 20
|
||||
return 0 if self.job_type == 'run' else 1
|
||||
|
||||
@property
|
||||
def result_stdout(self):
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
# All Rights Reserved.
|
||||
|
||||
# Python
|
||||
import codecs
|
||||
from collections import OrderedDict, namedtuple
|
||||
import ConfigParser
|
||||
import cStringIO
|
||||
@ -54,9 +53,8 @@ from awx.main.queue import CallbackQueueDispatcher
|
||||
from awx.main.expect import run, isolated_manager
|
||||
from awx.main.utils import (get_ansible_version, get_ssh_version, decrypt_field, update_scm_url,
|
||||
check_proot_installed, build_proot_temp_dir, get_licenser,
|
||||
wrap_args_with_proot, get_system_task_capacity, OutputEventFilter,
|
||||
ignore_inventory_computed_fields, ignore_inventory_group_removal,
|
||||
get_type_for_model, extract_ansible_vars)
|
||||
wrap_args_with_proot, OutputEventFilter, ignore_inventory_computed_fields,
|
||||
ignore_inventory_group_removal, get_type_for_model, extract_ansible_vars)
|
||||
from awx.main.utils.reload import restart_local_services, stop_local_services
|
||||
from awx.main.utils.pglock import advisory_lock
|
||||
from awx.main.utils.ha import update_celery_worker_routes, register_celery_worker_queues
|
||||
@ -307,6 +305,7 @@ def cluster_node_heartbeat(self):
|
||||
instance_list = list(Instance.objects.filter(rampart_groups__controller__isnull=True).distinct())
|
||||
this_inst = None
|
||||
lost_instances = []
|
||||
|
||||
for inst in list(instance_list):
|
||||
if inst.hostname == settings.CLUSTER_HOST_ID:
|
||||
this_inst = inst
|
||||
@ -316,11 +315,15 @@ def cluster_node_heartbeat(self):
|
||||
instance_list.remove(inst)
|
||||
if this_inst:
|
||||
startup_event = this_inst.is_lost(ref_time=nowtime)
|
||||
if this_inst.capacity == 0:
|
||||
if this_inst.capacity == 0 and this_inst.enabled:
|
||||
logger.warning('Rejoining the cluster as instance {}.'.format(this_inst.hostname))
|
||||
this_inst.capacity = get_system_task_capacity()
|
||||
this_inst.version = awx_application_version
|
||||
this_inst.save(update_fields=['capacity', 'version', 'modified'])
|
||||
if this_inst.enabled:
|
||||
this_inst.refresh_capacity()
|
||||
handle_ha_toplogy_changes.apply_async()
|
||||
elif this_inst.capacity != 0 and not this_inst.enabled:
|
||||
this_inst.capacity = 0
|
||||
this_inst.save(update_fields=['capacity'])
|
||||
handle_ha_toplogy_changes.apply_async()
|
||||
if startup_event:
|
||||
return
|
||||
else:
|
||||
@ -329,7 +332,7 @@ def cluster_node_heartbeat(self):
|
||||
for other_inst in instance_list:
|
||||
if other_inst.version == "":
|
||||
continue
|
||||
if Version(other_inst.version.split('-', 1)[0]) > Version(awx_application_version) and not settings.DEBUG:
|
||||
if Version(other_inst.version.split('-', 1)[0]) > Version(awx_application_version.split('-', 1)[0]) and not settings.DEBUG:
|
||||
logger.error("Host {} reports version {}, but this node {} is at {}, shutting down".format(other_inst.hostname,
|
||||
other_inst.version,
|
||||
this_inst.hostname,
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
from awx.main.models import Job, Instance
|
||||
from django.test.utils import override_settings
|
||||
import pytest
|
||||
|
||||
import mock
|
||||
import json
|
||||
|
||||
from awx.main.models import Job, Instance
|
||||
from awx.main.tasks import cluster_node_heartbeat
|
||||
from django.test.utils import override_settings
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_orphan_unified_job_creation(instance, inventory):
|
||||
@ -17,13 +19,19 @@ def test_orphan_unified_job_creation(instance, inventory):
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@mock.patch('awx.main.utils.common.get_cpu_capacity', lambda: (2,8))
|
||||
@mock.patch('awx.main.utils.common.get_mem_capacity', lambda: (8000,62))
|
||||
@mock.patch('awx.main.tasks.handle_ha_toplogy_changes.apply_async', lambda: True)
|
||||
def test_job_capacity_and_with_inactive_node():
|
||||
Instance.objects.create(hostname='test-1', capacity=50)
|
||||
assert Instance.objects.total_capacity() == 50
|
||||
Instance.objects.create(hostname='test-2', capacity=50)
|
||||
assert Instance.objects.total_capacity() == 100
|
||||
with override_settings(AWX_ACTIVE_NODE_TIME=0):
|
||||
assert Instance.objects.total_capacity() < 100
|
||||
i = Instance.objects.create(hostname='test-1')
|
||||
i.refresh_capacity()
|
||||
assert i.capacity == 62
|
||||
i.enabled = False
|
||||
i.save()
|
||||
with override_settings(CLUSTER_HOST_ID=i.hostname):
|
||||
cluster_node_heartbeat()
|
||||
i = Instance.objects.get(id=i.id)
|
||||
assert i.capacity == 0
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
||||
@ -60,6 +60,7 @@ class TestAddRemoveCeleryWorkerQueues():
|
||||
static_queues, _worker_queues,
|
||||
groups, hostname,
|
||||
added_expected, removed_expected):
|
||||
added_expected.append('tower_instance_router')
|
||||
instance = instance_generator(groups=groups, hostname=hostname)
|
||||
worker_queues = worker_queues_generator(_worker_queues)
|
||||
with mock.patch('awx.main.utils.ha.settings.AWX_CELERY_QUEUES_STATIC', static_queues):
|
||||
|
||||
@ -20,6 +20,8 @@ import six
|
||||
import psutil
|
||||
from StringIO import StringIO
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
# Decorator
|
||||
from decorator import decorator
|
||||
|
||||
@ -45,7 +47,7 @@ __all__ = ['get_object_or_400', 'get_object_or_403', 'camelcase_to_underscore',
|
||||
'ignore_inventory_computed_fields', 'ignore_inventory_group_removal',
|
||||
'_inventory_updates', 'get_pk_from_dict', 'getattrd', 'NoDefaultProvided',
|
||||
'get_current_apps', 'set_current_apps', 'OutputEventFilter',
|
||||
'extract_ansible_vars', 'get_search_fields', 'get_system_task_capacity',
|
||||
'extract_ansible_vars', 'get_search_fields', 'get_system_task_capacity', 'get_cpu_capacity', 'get_mem_capacity',
|
||||
'wrap_args_with_proot', 'build_proot_temp_dir', 'check_proot_installed', 'model_to_dict',
|
||||
'model_instance_diff', 'timestamp_apiformat', 'parse_yaml_or_json', 'RequireDebugTrueOrTest',
|
||||
'has_model_field_prefetched', 'set_environ', 'IllegalArgumentError', 'get_custom_venv_choices']
|
||||
@ -632,19 +634,52 @@ def parse_yaml_or_json(vars_str, silent_failure=True):
|
||||
return vars_dict
|
||||
|
||||
|
||||
@memoize()
|
||||
def get_system_task_capacity():
|
||||
def get_cpu_capacity():
|
||||
from django.conf import settings
|
||||
settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None)
|
||||
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
|
||||
cpu = psutil.cpu_count()
|
||||
|
||||
if env_forkcpu:
|
||||
forkcpu = int(env_forkcpu)
|
||||
elif settings_forkcpu:
|
||||
forkcpu = int(settings_forkcpu)
|
||||
else:
|
||||
forkcpu = 4
|
||||
return (cpu, cpu * forkcpu)
|
||||
|
||||
|
||||
def get_mem_capacity():
|
||||
from django.conf import settings
|
||||
settings_forkmem = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None)
|
||||
env_forkmem = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
|
||||
if env_forkmem:
|
||||
forkmem = int(env_forkmem)
|
||||
elif settings_forkmem:
|
||||
forkmem = int(settings_forkmem)
|
||||
else:
|
||||
forkmem = 100
|
||||
|
||||
mem = psutil.virtual_memory().total
|
||||
return (mem, max(1, ((mem / 1024 / 1024) - 2048) / forkmem))
|
||||
|
||||
|
||||
def get_system_task_capacity(scale=Decimal(1.0)):
|
||||
'''
|
||||
Measure system memory and use it as a baseline for determining the system's capacity
|
||||
'''
|
||||
from django.conf import settings
|
||||
if hasattr(settings, 'SYSTEM_TASK_CAPACITY'):
|
||||
return settings.SYSTEM_TASK_CAPACITY
|
||||
mem = psutil.virtual_memory()
|
||||
total_mem_value = mem.total / 1024 / 1024
|
||||
if total_mem_value <= 2048:
|
||||
return 50
|
||||
return 50 + ((total_mem_value / 1024) - 2) * 75
|
||||
settings_forks = getattr(settings, 'SYSTEM_TASK_FORKS_CAPACITY', None)
|
||||
env_forks = os.getenv('SYSTEM_TASK_FORKS_CAPACITY', None)
|
||||
|
||||
if env_forks:
|
||||
return int(env_forks)
|
||||
elif settings_forks:
|
||||
return int(settings_forks)
|
||||
|
||||
_, cpu_cap = get_cpu_capacity()
|
||||
_, mem_cap = get_mem_capacity()
|
||||
return min(mem_cap, cpu_cap) + ((max(mem_cap, cpu_cap) - min(mem_cap, cpu_cap)) * scale)
|
||||
|
||||
|
||||
_inventory_updates = threading.local()
|
||||
|
||||
@ -24,7 +24,7 @@ def _add_remove_celery_worker_queues(app, instance, worker_queues, worker_name):
|
||||
queue['alias'] in settings.AWX_CELERY_QUEUES_STATIC:
|
||||
continue
|
||||
|
||||
if queue['name'] not in ig_names | set([instance.hostname]):
|
||||
if queue['name'] not in ig_names | set([instance.hostname]) or not instance.enabled:
|
||||
app.control.cancel_consumer(queue['name'], reply=True, destination=[worker_name])
|
||||
removed_queues.append(queue['name'])
|
||||
|
||||
@ -43,7 +43,6 @@ def update_celery_worker_routes(instance, conf):
|
||||
'awx.main.tasks.purge_old_stdout_files',
|
||||
]
|
||||
routes_updated = {}
|
||||
|
||||
# Instance is, effectively, a controller node
|
||||
if instance.is_controller():
|
||||
tasks.append('awx.main.tasks.awx_isolated_heartbeat')
|
||||
|
||||
@ -639,9 +639,6 @@ AWX_PROOT_BASE_PATH = "/tmp"
|
||||
# Note: This setting may be overridden by database settings.
|
||||
AWX_ANSIBLE_CALLBACK_PLUGINS = ""
|
||||
|
||||
# Time at which an HA node is considered active
|
||||
AWX_ACTIVE_NODE_TIME = 7200
|
||||
|
||||
# Automatically remove nodes that have missed their heartbeats after some time
|
||||
AWX_AUTO_DEPROVISION_INSTANCES = False
|
||||
|
||||
|
||||
@ -28,6 +28,8 @@ It's important to point out a few existing things:
|
||||
* Existing old-style HA deployments will be transitioned automatically to the new HA system during the upgrade process to 3.1.
|
||||
* Manual projects will need to be synced to all instances by the customer
|
||||
|
||||
Ansible Tower 3.3 adds support for container-based clusters using Openshift or Kubernetes
|
||||
|
||||
## Important Changes
|
||||
|
||||
* There is no concept of primary/secondary in the new Tower system. *All* systems are primary.
|
||||
@ -226,6 +228,47 @@ show up in api endpoints and stats monitoring. These groups can be removed with
|
||||
$ awx-manage unregister_queue --queuename=<name>
|
||||
```
|
||||
|
||||
### Configuring Instances and Instance Groups from the API
|
||||
|
||||
Instance Groups can be created by posting to `/api/v2/instance_groups` as a System Admin.
|
||||
|
||||
Once created, `Instances` can be associated with an Instance Group with:
|
||||
|
||||
```
|
||||
HTTP POST /api/v2/instance_groups/x/instances/ {'id': y}`
|
||||
```
|
||||
|
||||
An `Instance` that is added to an `InstanceGroup` will automatically reconfigure itself to listen on the group's work queue. See the following
|
||||
section `Instance Group Policies` for more details.
|
||||
|
||||
### Instance Group Policies
|
||||
|
||||
Tower `Instances` can be configured to automatically join `Instance Groups` when they come online by defining a policy. These policies are evaluated for
|
||||
every new Instance that comes online.
|
||||
|
||||
Instance Group Policies are controlled by 3 optional fields on an `Instance Group`:
|
||||
|
||||
* `policy_instance_percentage`: This is a number between 0 - 100. It gaurantees that this percentage of active Tower instances will be added
|
||||
to this `Instance Group`. As new instances come online, if the number of Instances in this group relative to the total number of instances
|
||||
is less than the given percentage then new ones will be added until the percentage condition is satisfied.
|
||||
* `policy_instance_minimum`: This policy attempts to keep at least this many `Instances` in the `Instance Group`. If the number of
|
||||
available instances is lower than this minimum then all `Instances` will be placed in this `Instance Group`.
|
||||
* `policy_instance_list`: This is a fixed list of `Instance` names. These `Instances` will *always* be added to this `Instance Group`.
|
||||
Further, by adding Instances to this list you are declaring that you will manually manage those Instances and they will not be eligible under any other
|
||||
policy. This means they will not be automatically added to any other `Instance Group` even if the policy would cause them to be matched.
|
||||
|
||||
> NOTES
|
||||
|
||||
* `Instances` that are assigned directly to `Instance Groups` by posting to `/api/v2/instance_groups/x/instances` or
|
||||
`/api/v2/instances/x/instance_groups` are automatically added to the `policy_instance_list`. This means they are subject to the
|
||||
normal caveats for `policy_instance_list` and must be manually managed.
|
||||
* `policy_instance_percentage` and `policy_instance_minimum` work together. For example, if you have a `policy_instance_percentage` of
|
||||
50% and a `policy_instance_minimum` of 2 and you start 6 `Instances`. 3 of them would be assigned to the `Instance Group`. If you reduce the number
|
||||
of `Instances` to 2 then both of them would be assigned to the `Instance Group` to satisfy `policy_instance_minimum`. In this way, you can set a lower
|
||||
bound on the amount of available resources.
|
||||
* Policies don't actively prevent `Instances` from being associated with multiple `Instance Groups` but this can effectively be achieved by making the percentages
|
||||
sum to 100. If you have 4 `Instance Groups` assign each a percentage value of 25 and the `Instances` will be distributed among them with no overlap.
|
||||
|
||||
### Status and Monitoring
|
||||
|
||||
Tower itself reports as much status as it can via the api at `/api/v2/ping` in order to provide validation of the health
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user