Merge pull request #9957 from jbradberry/isolated-removal

Isolated removal

SUMMARY
Removal of the isolated nodes feature.
ISSUE TYPE

Feature Pull Request

COMPONENT NAME

API

AWX VERSION

Reviewed-by: Alan Rominger <arominge@redhat.com>
Reviewed-by: Jeff Bradberry <None>
Reviewed-by: Elyézer Rezende <None>
Reviewed-by: Bianca Henderson <beeankha@gmail.com>
This commit is contained in:
softwarefactory-project-zuul[bot] 2021-04-29 19:15:43 +00:00 committed by GitHub
commit 6bea5dd294
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 113 additions and 1740 deletions

View File

@ -174,12 +174,7 @@ init:
. $(VENV_BASE)/awx/bin/activate; \
fi; \
$(MANAGEMENT_COMMAND) provision_instance --hostname=$(COMPOSE_HOST); \
$(MANAGEMENT_COMMAND) register_queue --queuename=tower --instance_percent=100;\
if [ "$(AWX_GROUP_QUEUES)" == "tower,thepentagon" ]; then \
$(MANAGEMENT_COMMAND) provision_instance --hostname=isolated; \
$(MANAGEMENT_COMMAND) register_queue --queuename='thepentagon' --hostnames=isolated --controller=tower; \
$(MANAGEMENT_COMMAND) generate_isolated_key > /awx_devel/awx/main/isolated/authorized_keys; \
fi;
$(MANAGEMENT_COMMAND) register_queue --queuename=tower --instance_percent=100;
# Refresh development environment after pulling new code.
refresh: clean requirements_dev version_file develop migrate

View File

@ -170,7 +170,7 @@ SUMMARIZABLE_FK_FIELDS = {
'inventory_source': ('id', 'name', 'source', 'last_updated', 'status'),
'role': ('id', 'role_field'),
'notification_template': DEFAULT_SUMMARY_FIELDS,
'instance_group': ('id', 'name', 'controller_id', 'is_container_group'),
'instance_group': ('id', 'name', 'is_container_group'),
'insights_credential': DEFAULT_SUMMARY_FIELDS,
'source_credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'cloud', 'credential_type_id'),
'target_credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'cloud', 'credential_type_id'),
@ -4818,10 +4818,6 @@ class InstanceGroupSerializer(BaseSerializer):
)
jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance group'), read_only=True)
instances = serializers.SerializerMethodField()
is_controller = serializers.BooleanField(help_text=_('Indicates whether instance group controls any other group'), read_only=True)
is_isolated = serializers.BooleanField(
help_text=_('Indicates whether instances in this group are isolated.' 'Isolated groups have a designated controller group.'), read_only=True
)
is_container_group = serializers.BooleanField(
required=False,
help_text=_('Indicates whether instances in this group are containerized.' 'Containerized groups have a designated Openshift or Kubernetes cluster.'),
@ -4869,9 +4865,6 @@ class InstanceGroupSerializer(BaseSerializer):
"jobs_running",
"jobs_total",
"instances",
"controller",
"is_controller",
"is_isolated",
"is_container_group",
"credential",
"policy_instance_percentage",
@ -4885,8 +4878,6 @@ class InstanceGroupSerializer(BaseSerializer):
res = super(InstanceGroupSerializer, self).get_related(obj)
res['jobs'] = self.reverse('api:instance_group_unified_jobs_list', kwargs={'pk': obj.pk})
res['instances'] = self.reverse('api:instance_group_instance_list', kwargs={'pk': obj.pk})
if obj.controller_id:
res['controller'] = self.reverse('api:instance_group_detail', kwargs={'pk': obj.controller_id})
if obj.credential:
res['credential'] = self.reverse('api:credential_detail', kwargs={'pk': obj.credential_id})
@ -4898,10 +4889,6 @@ class InstanceGroupSerializer(BaseSerializer):
raise serializers.ValidationError(_('Duplicate entry {}.').format(instance_name))
if not Instance.objects.filter(hostname=instance_name).exists():
raise serializers.ValidationError(_('{} is not a valid hostname of an existing instance.').format(instance_name))
if Instance.objects.get(hostname=instance_name).is_isolated():
raise serializers.ValidationError(_('Isolated instances may not be added or removed from instances groups via the API.'))
if self.instance and self.instance.controller_id is not None:
raise serializers.ValidationError(_('Isolated instance group membership may not be managed via the API.'))
if value and self.instance and self.instance.is_container_group:
raise serializers.ValidationError(_('Containerized instances may not be managed via the API'))
return value

View File

@ -418,14 +418,6 @@ class InstanceGroupDetail(RelatedJobsPreventDeleteMixin, RetrieveUpdateDestroyAP
data.pop('policy_instance_list', None)
return super(InstanceGroupDetail, self).update_raw_data(data)
def destroy(self, request, *args, **kwargs):
instance = self.get_object()
if instance.controller is not None:
raise PermissionDenied(detail=_("Isolated Groups can not be removed from the API"))
if instance.controlled_groups.count():
raise PermissionDenied(detail=_("Instance Groups acting as a controller for an Isolated Group can not be removed from the API"))
return super(InstanceGroupDetail, self).destroy(request, *args, **kwargs)
class InstanceGroupUnifiedJobsList(SubListAPIView):

View File

@ -85,15 +85,6 @@ class InstanceGroupMembershipMixin(object):
ig_obj.save(update_fields=['policy_instance_list'])
return response
def is_valid_relation(self, parent, sub, created=False):
if sub.is_isolated():
return {'error': _('Isolated instances may not be added or removed from instances groups via the API.')}
if self.parent_model is InstanceGroup:
ig_obj = self.get_parent_object()
if ig_obj.controller_id is not None:
return {'error': _('Isolated instance group membership may not be managed via the API.')}
return None
def unattach_validate(self, request):
(sub_id, res) = super(InstanceGroupMembershipMixin, self).unattach_validate(request)
if res:

View File

@ -92,11 +92,7 @@ class SettingsRegistry(object):
continue
if kwargs.get('category_slug', None) in slugs_to_ignore:
continue
if (
read_only in {True, False}
and kwargs.get('read_only', False) != read_only
and setting not in ('INSTALL_UUID', 'AWX_ISOLATED_PRIVATE_KEY', 'AWX_ISOLATED_PUBLIC_KEY')
):
if read_only in {True, False} and kwargs.get('read_only', False) != read_only and setting != 'INSTALL_UUID':
# Note: Doesn't catch fields that set read_only via __init__;
# read-only field kwargs should always include read_only=True.
continue

View File

@ -81,10 +81,8 @@ class SettingSingletonSerializer(serializers.Serializer):
if self.instance and not hasattr(self.instance, key):
continue
extra_kwargs = {}
# Make LICENSE and AWX_ISOLATED_KEY_GENERATION read-only here;
# LICENSE is only updated via /api/v2/config/
# AWX_ISOLATED_KEY_GENERATION is only set/unset via the setup playbook
if key in ('LICENSE', 'AWX_ISOLATED_KEY_GENERATION'):
# Make LICENSE read-only here; LICENSE is only updated via /api/v2/config/
if key == 'LICENSE':
extra_kwargs['read_only'] = True
field = settings_registry.get_setting_field(key, mixin_class=SettingFieldMixin, for_user=bool(category_slug == 'user'), **extra_kwargs)
fields[key] = field

View File

@ -350,13 +350,8 @@ class SettingsWrapper(UserSettingsHolder):
if value is empty:
setting = None
setting_id = None
if not field.read_only or name in (
# these values are read-only - however - we *do* want
# to fetch their value from the database
'INSTALL_UUID',
'AWX_ISOLATED_PRIVATE_KEY',
'AWX_ISOLATED_PUBLIC_KEY',
):
# this value is read-only, however we *do* want to fetch its value from the database
if not field.read_only or name == 'INSTALL_UUID':
setting = Setting.objects.filter(key=name, user__isnull=True).order_by('pk').first()
if setting:
if getattr(field, 'encrypted', False):

View File

@ -219,17 +219,11 @@ def projects_by_scm_type(since, **kwargs):
return counts
def _get_isolated_datetime(last_check):
if last_check:
return last_check.isoformat()
return last_check
@register('instance_info', '1.0', description=_('Cluster topology and capacity'))
@register('instance_info', '1.1', description=_('Cluster topology and capacity'))
def instance_info(since, include_hostnames=False, **kwargs):
info = {}
instances = models.Instance.objects.values_list('hostname').values(
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'last_isolated_check', 'enabled'
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'enabled'
)
for instance in instances:
consumed_capacity = sum(x.task_impact for x in models.UnifiedJob.objects.filter(execution_node=instance['hostname'], status__in=('running', 'waiting')))
@ -240,7 +234,6 @@ def instance_info(since, include_hostnames=False, **kwargs):
'cpu': instance['cpu'],
'memory': instance['memory'],
'managed_by_policy': instance['managed_by_policy'],
'last_isolated_check': _get_isolated_datetime(instance['last_isolated_check']),
'enabled': instance['enabled'],
'consumed_capacity': consumed_capacity,
'remaining_capacity': instance['capacity'] - consumed_capacity,

View File

@ -184,7 +184,6 @@ def metrics():
INSTANCE_INFO.labels(hostname=hostname, instance_uuid=uuid).info(
{
'enabled': str(instance_data[uuid]['enabled']),
'last_isolated_check': getattr(instance_data[uuid], 'last_isolated_check', 'None'),
'managed_by_policy': str(instance_data[uuid]['managed_by_policy']),
'version': instance_data[uuid]['version'],
}

View File

@ -250,95 +250,6 @@ register(
category_slug='jobs',
)
register(
'AWX_ISOLATED_CHECK_INTERVAL',
field_class=fields.IntegerField,
min_value=0,
label=_('Isolated status check interval'),
help_text=_('The number of seconds to sleep between status checks for jobs running on isolated instances.'),
category=_('Jobs'),
category_slug='jobs',
unit=_('seconds'),
)
register(
'AWX_ISOLATED_LAUNCH_TIMEOUT',
field_class=fields.IntegerField,
min_value=0,
label=_('Isolated launch timeout'),
help_text=_(
'The timeout (in seconds) for launching jobs on isolated instances. '
'This includes the time needed to copy source control files (playbooks) to the isolated instance.'
),
category=_('Jobs'),
category_slug='jobs',
unit=_('seconds'),
)
register(
'AWX_ISOLATED_CONNECTION_TIMEOUT',
field_class=fields.IntegerField,
min_value=0,
default=10,
label=_('Isolated connection timeout'),
help_text=_(
'Ansible SSH connection timeout (in seconds) to use when communicating with isolated instances. '
'Value should be substantially greater than expected network latency.'
),
category=_('Jobs'),
category_slug='jobs',
unit=_('seconds'),
)
register(
'AWX_ISOLATED_HOST_KEY_CHECKING',
field_class=fields.BooleanField,
label=_('Isolated host key checking'),
help_text=_('When set to True, AWX will enforce strict host key checking for communication with isolated nodes.'),
category=_('Jobs'),
category_slug='jobs',
default=False,
)
register(
'AWX_ISOLATED_KEY_GENERATION',
field_class=fields.BooleanField,
default=True,
label=_('Generate RSA keys for isolated instances'),
help_text=_(
'If set, a random RSA key will be generated and distributed to '
'isolated instances. To disable this behavior and manage authentication '
'for isolated instances outside of Tower, disable this setting.'
), # noqa
category=_('Jobs'),
category_slug='jobs',
)
register(
'AWX_ISOLATED_PRIVATE_KEY',
field_class=fields.CharField,
default='',
allow_blank=True,
encrypted=True,
read_only=True,
label=_('The RSA private key for SSH traffic to isolated instances'),
help_text=_('The RSA private key for SSH traffic to isolated instances'), # noqa
category=_('Jobs'),
category_slug='jobs',
)
register(
'AWX_ISOLATED_PUBLIC_KEY',
field_class=fields.CharField,
default='',
allow_blank=True,
read_only=True,
label=_('The RSA public key for SSH traffic to isolated instances'),
help_text=_('The RSA public key for SSH traffic to isolated instances'), # noqa
category=_('Jobs'),
category_slug='jobs',
)
register(
'AWX_TASK_ENV',
field_class=fields.KeyValueField,

View File

@ -1 +0,0 @@
authorized_keys

View File

@ -1,365 +0,0 @@
import fnmatch
import json
import os
import shutil
import stat
import tempfile
import time
import logging
import datetime
from django.conf import settings
import ansible_runner
import awx
from awx.main.utils import get_system_task_capacity
logger = logging.getLogger('awx.isolated.manager')
playbook_logger = logging.getLogger('awx.isolated.manager.playbooks')
def set_pythonpath(venv_libdir, env):
env.pop('PYTHONPATH', None) # default to none if no python_ver matches
for version in os.listdir(venv_libdir):
if fnmatch.fnmatch(version, 'python[23].*'):
if os.path.isdir(os.path.join(venv_libdir, version)):
env['PYTHONPATH'] = os.path.join(venv_libdir, version, "site-packages") + ":"
break
class IsolatedManager(object):
def __init__(self, event_handler, canceled_callback=None, check_callback=None):
"""
:param event_handler: a callable used to persist event data from isolated nodes
:param canceled_callback: a callable - which returns `True` or `False`
- signifying if the job has been prematurely
canceled
"""
self.event_handler = event_handler
self.canceled_callback = canceled_callback
self.check_callback = check_callback
self.started_at = None
self.captured_command_artifact = False
self.instance = None
def build_inventory(self, hosts):
inventory = '\n'.join(['{} ansible_ssh_user={}'.format(host, settings.AWX_ISOLATED_USERNAME) for host in hosts])
return inventory
def build_runner_params(self, hosts, verbosity=1):
env = dict(os.environ.items())
env['ANSIBLE_RETRY_FILES_ENABLED'] = 'False'
env['ANSIBLE_HOST_KEY_CHECKING'] = str(settings.AWX_ISOLATED_HOST_KEY_CHECKING)
env['ANSIBLE_LIBRARY'] = os.path.join(os.path.dirname(awx.__file__), 'plugins', 'isolated')
env['ANSIBLE_COLLECTIONS_PATHS'] = settings.AWX_ANSIBLE_COLLECTIONS_PATHS
set_pythonpath(os.path.join(settings.ANSIBLE_VENV_PATH, 'lib'), env)
def finished_callback(runner_obj):
if runner_obj.status == 'failed' and runner_obj.config.playbook != 'check_isolated.yml':
# failed for clean_isolated.yml just means the playbook hasn't
# exited on the isolated host
stdout = runner_obj.stdout.read()
playbook_logger.error(stdout)
elif runner_obj.status == 'timeout':
# this means that the default idle timeout of
# (2 * AWX_ISOLATED_CONNECTION_TIMEOUT) was exceeded
# (meaning, we tried to sync with an isolated node, and we got
# no new output for 2 * AWX_ISOLATED_CONNECTION_TIMEOUT seconds)
# this _usually_ means SSH key auth from the controller ->
# isolated didn't work, and ssh is hung waiting on interactive
# input e.g.,
#
# awx@isolated's password:
stdout = runner_obj.stdout.read()
playbook_logger.error(stdout)
else:
playbook_logger.info(runner_obj.stdout.read())
return {
'project_dir': os.path.abspath(os.path.join(os.path.dirname(awx.__file__), 'playbooks')),
'inventory': self.build_inventory(hosts),
'envvars': env,
'finished_callback': finished_callback,
'verbosity': verbosity,
'cancel_callback': self.canceled_callback,
'settings': {
'job_timeout': settings.AWX_ISOLATED_LAUNCH_TIMEOUT,
'suppress_ansible_output': True,
},
}
def path_to(self, *args):
return os.path.join(self.private_data_dir, *args)
def run_management_playbook(self, playbook, private_data_dir, idle_timeout=None, **kw):
iso_dir = tempfile.mkdtemp(prefix=playbook, dir=private_data_dir)
params = self.runner_params.copy()
params.get('envvars', dict())['ANSIBLE_CALLBACK_WHITELIST'] = 'profile_tasks'
params['playbook'] = playbook
params['private_data_dir'] = iso_dir
if idle_timeout:
params['settings']['idle_timeout'] = idle_timeout
else:
params['settings'].pop('idle_timeout', None)
params.update(**kw)
if all([getattr(settings, 'AWX_ISOLATED_KEY_GENERATION', False) is True, getattr(settings, 'AWX_ISOLATED_PRIVATE_KEY', None)]):
params['ssh_key'] = settings.AWX_ISOLATED_PRIVATE_KEY
return ansible_runner.interface.run(**params)
def dispatch(self, playbook=None, module=None, module_args=None):
"""
Ship the runner payload to a remote host for isolated execution.
"""
self.handled_events = set()
self.started_at = time.time()
# exclude certain files from the rsync
rsync_exclude = [
# don't rsync source control metadata (it can be huge!)
'- /project/.git',
'- /project/.svn',
# don't rsync job events that are in the process of being written
'- /artifacts/job_events/*-partial.json.tmp',
# don't rsync the ssh_key FIFO
'- /env/ssh_key',
# don't rsync kube config files
'- .kubeconfig*',
]
for filename, data in (['.rsync-filter', '\n'.join(rsync_exclude)],):
path = self.path_to(filename)
with open(path, 'w') as f:
f.write(data)
os.chmod(path, stat.S_IRUSR)
extravars = {
'src': self.private_data_dir,
'dest': settings.AWX_ISOLATION_BASE_PATH,
'ident': self.ident,
'job_id': self.instance.id,
}
if playbook:
extravars['playbook'] = playbook
if module and module_args:
extravars['module'] = module
extravars['module_args'] = module_args
logger.debug('Starting job {} on isolated host with `run_isolated.yml` playbook.'.format(self.instance.id))
runner_obj = self.run_management_playbook(
'run_isolated.yml', self.private_data_dir, idle_timeout=max(60, 2 * settings.AWX_ISOLATED_CONNECTION_TIMEOUT), extravars=extravars
)
if runner_obj.status == 'failed':
self.instance.result_traceback = runner_obj.stdout.read()
self.instance.save(update_fields=['result_traceback'])
return 'error', runner_obj.rc
return runner_obj.status, runner_obj.rc
def check(self, interval=None):
"""
Repeatedly poll the isolated node to determine if the job has run.
On success, copy job artifacts to the controlling node.
On failure, continue to poll the isolated node (until the job timeout
is exceeded).
For a completed job run, this function returns (status, rc),
representing the status and return code of the isolated
`ansible-playbook` run.
:param interval: an interval (in seconds) to wait between status polls
"""
interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
extravars = {'src': self.private_data_dir, 'job_id': self.instance.id}
status = 'failed'
rc = None
last_check = time.time()
while status == 'failed':
canceled = self.canceled_callback() if self.canceled_callback else False
if not canceled and time.time() - last_check < interval:
# If the job isn't canceled, but we haven't waited `interval` seconds, wait longer
time.sleep(1)
continue
if canceled:
logger.warning('Isolated job {} was manually canceled.'.format(self.instance.id))
logger.debug('Checking on isolated job {} with `check_isolated.yml`.'.format(self.instance.id))
time_start = datetime.datetime.now()
runner_obj = self.run_management_playbook('check_isolated.yml', self.private_data_dir, extravars=extravars)
time_end = datetime.datetime.now()
time_diff = time_end - time_start
logger.debug('Finished checking on isolated job {} with `check_isolated.yml` took {} seconds.'.format(self.instance.id, time_diff.total_seconds()))
status, rc = runner_obj.status, runner_obj.rc
if self.check_callback is not None and not self.captured_command_artifact:
command_path = self.path_to('artifacts', self.ident, 'command')
# If the configuration artifact has been synced back, update the model
if os.path.exists(command_path):
try:
with open(command_path, 'r') as f:
data = json.load(f)
self.check_callback(data)
self.captured_command_artifact = True
except json.decoder.JSONDecodeError: # Just in case it's not fully here yet.
pass
self.consume_events()
last_check = time.time()
if status == 'successful':
status_path = self.path_to('artifacts', self.ident, 'status')
rc_path = self.path_to('artifacts', self.ident, 'rc')
if os.path.exists(status_path):
with open(status_path, 'r') as f:
status = f.readline()
with open(rc_path, 'r') as f:
rc = int(f.readline())
else:
# if there's no status file, it means that runner _probably_
# exited with a traceback (which should be logged to
# daemon.log) Record it so we can see how runner failed.
daemon_path = self.path_to('daemon.log')
if os.path.exists(daemon_path):
with open(daemon_path, 'r') as f:
self.instance.result_traceback = f.read()
self.instance.save(update_fields=['result_traceback'])
else:
logger.error('Failed to rsync daemon.log (is ansible-runner installed on the isolated host?)')
status = 'failed'
rc = 1
# consume events one last time just to be sure we didn't miss anything
# in the final sync
self.consume_events()
return status, rc
def consume_events(self):
# discover new events and ingest them
events_path = self.path_to('artifacts', self.ident, 'job_events')
# it's possible that `events_path` doesn't exist *yet*, because runner
# hasn't actually written any events yet (if you ran e.g., a sleep 30)
# only attempt to consume events if any were rsynced back
if os.path.exists(events_path):
for event in set(os.listdir(events_path)) - self.handled_events:
path = os.path.join(events_path, event)
if os.path.exists(path) and os.path.isfile(path):
try:
event_data = json.load(open(os.path.join(events_path, event), 'r'))
except json.decoder.JSONDecodeError:
# This means the event we got back isn't valid JSON
# that can happen if runner is still partially
# writing an event file while it's rsyncing
# these event writes are _supposed_ to be atomic
# but it doesn't look like they actually are in
# practice
# in this scenario, just ignore this event and try it
# again on the next sync
continue
self.event_handler(event_data)
self.handled_events.add(event)
def cleanup(self):
extravars = {
'private_data_dir': self.private_data_dir,
'cleanup_dirs': [
self.private_data_dir,
],
}
logger.debug('Cleaning up job {} on isolated host with `clean_isolated.yml` playbook.'.format(self.instance.id))
self.run_management_playbook('clean_isolated.yml', self.private_data_dir, extravars=extravars)
@classmethod
def update_capacity(cls, instance, task_result):
instance.version = 'ansible-runner-{}'.format(task_result['version'])
if instance.capacity == 0 and task_result['capacity_cpu']:
logger.warning('Isolated instance {} has re-joined.'.format(instance.hostname))
instance.cpu = int(task_result['cpu'])
instance.memory = int(task_result['mem'])
instance.cpu_capacity = int(task_result['capacity_cpu'])
instance.mem_capacity = int(task_result['capacity_mem'])
instance.capacity = get_system_task_capacity(
scale=instance.capacity_adjustment, cpu_capacity=int(task_result['capacity_cpu']), mem_capacity=int(task_result['capacity_mem'])
)
instance.save(update_fields=['cpu', 'memory', 'cpu_capacity', 'mem_capacity', 'capacity', 'version', 'modified'])
def health_check(self, instance_qs):
"""
:param instance_qs: List of Django objects representing the
isolated instances to manage
Runs playbook that will
- determine if instance is reachable
- find the instance capacity
- clean up orphaned private files
Performs save on each instance to update its capacity.
"""
instance_qs = [i for i in instance_qs if i.enabled]
if not len(instance_qs):
return
try:
private_data_dir = tempfile.mkdtemp(prefix='awx_iso_heartbeat_', dir=settings.AWX_ISOLATION_BASE_PATH)
self.runner_params = self.build_runner_params([instance.hostname for instance in instance_qs])
self.runner_params['private_data_dir'] = private_data_dir
self.runner_params['forks'] = len(instance_qs)
runner_obj = self.run_management_playbook('heartbeat_isolated.yml', private_data_dir)
for instance in instance_qs:
task_result = {}
try:
task_result = runner_obj.get_fact_cache(instance.hostname)
except Exception:
logger.exception('Failed to read status from isolated instances')
if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result:
task_result = {
'cpu': task_result['awx_cpu'],
'mem': task_result['awx_mem'],
'capacity_cpu': task_result['awx_capacity_cpu'],
'capacity_mem': task_result['awx_capacity_mem'],
'version': task_result['awx_capacity_version'],
}
IsolatedManager.update_capacity(instance, task_result)
logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname))
elif instance.capacity == 0:
logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format(instance.hostname))
else:
logger.warning('Could not update status of isolated instance {}'.format(instance.hostname))
if instance.is_lost(isolated=True):
instance.capacity = 0
instance.save(update_fields=['capacity'])
logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format(instance.hostname, instance.modified))
finally:
if os.path.exists(private_data_dir):
shutil.rmtree(private_data_dir)
def run(self, instance, private_data_dir, playbook, module, module_args, ident=None):
"""
Run a job on an isolated host.
:param instance: a `model.Job` instance
:param private_data_dir: an absolute path on the local file system
where job-specific data should be written
(i.e., `/tmp/awx_N_xyz/`)
:param playbook: the playbook to run
:param module: the module to run
:param module_args: the module args to use
For a completed job run, this function returns (status, rc),
representing the status and return code of the isolated
`ansible-playbook` run.
"""
self.ident = ident
self.instance = instance
self.private_data_dir = private_data_dir
self.runner_params = self.build_runner_params([instance.execution_node], verbosity=min(5, self.instance.verbosity))
status, rc = self.dispatch(playbook, module, module_args)
if status == 'successful':
status, rc = self.check()
return status, rc

View File

@ -1,38 +0,0 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
import datetime
from django.utils.encoding import smart_str
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from django.conf import settings
from django.core.management.base import BaseCommand
from awx.conf.models import Setting
class Command(BaseCommand):
"""Generate and store a randomized RSA key for SSH traffic to isolated instances"""
help = 'Generates and stores a randomized RSA key for SSH traffic to isolated instances'
def handle(self, *args, **kwargs):
if getattr(settings, 'AWX_ISOLATED_PRIVATE_KEY', False):
print(settings.AWX_ISOLATED_PUBLIC_KEY)
return
key = rsa.generate_private_key(public_exponent=65537, key_size=4096, backend=default_backend())
Setting.objects.create(
key='AWX_ISOLATED_PRIVATE_KEY',
value=key.private_bytes(
encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption()
),
).save()
pemfile = Setting.objects.create(
key='AWX_ISOLATED_PUBLIC_KEY',
value=smart_str(key.public_key().public_bytes(encoding=serialization.Encoding.OpenSSH, format=serialization.PublicFormat.OpenSSH))
+ " generated-by-awx@%s" % datetime.datetime.utcnow().isoformat(),
)
pemfile.save()
print(pemfile.value)

View File

@ -10,7 +10,6 @@ class Ungrouped(object):
name = 'ungrouped'
policy_instance_percentage = None
policy_instance_minimum = None
controller = None
@property
def instances(self):
@ -18,7 +17,7 @@ class Ungrouped(object):
@property
def capacity(self):
return sum([x.capacity for x in self.instances])
return sum(x.capacity for x in self.instances)
class Command(BaseCommand):
@ -38,8 +37,6 @@ class Command(BaseCommand):
fmt += ' policy={0.policy_instance_percentage}%'
if instance_group.policy_instance_minimum:
fmt += ' policy>={0.policy_instance_minimum}'
if instance_group.controller:
fmt += ' controller={0.controller.name}'
print((fmt + ']').format(instance_group))
for x in instance_group.instances.all():
color = '\033[92m'
@ -48,8 +45,6 @@ class Command(BaseCommand):
if x.enabled is False:
color = '\033[90m[DISABLED] '
fmt = '\t' + color + '{0.hostname} capacity={0.capacity} version={1}'
if x.last_isolated_check:
fmt += ' last_isolated_check="{0.last_isolated_check:%Y-%m-%d %H:%M:%S}"'
if x.capacity:
fmt += ' heartbeat="{0.modified:%Y-%m-%d %H:%M:%S}"'
print((fmt + '\033[0m').format(x, x.version or '?'))

View File

@ -1,13 +1,11 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved
from uuid import uuid4
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from awx.main.models import Instance
from django.conf import settings
from django.db import transaction
from django.core.management.base import BaseCommand, CommandError
class Command(BaseCommand):
@ -20,7 +18,6 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('--hostname', dest='hostname', type=str, help='Hostname used during provisioning')
parser.add_argument('--is-isolated', dest='is_isolated', action='store_true', help='Specify whether the instance is isolated')
def _register_hostname(self, hostname):
if not hostname:
@ -36,10 +33,7 @@ class Command(BaseCommand):
def handle(self, **options):
if not options.get('hostname'):
raise CommandError("Specify `--hostname` to use this command.")
if options['is_isolated']:
self.uuid = str(uuid4())
else:
self.uuid = settings.SYSTEM_UUID
self.uuid = settings.SYSTEM_UUID
self.changed = False
self._register_hostname(options.get('hostname'))
if self.changed:

View File

@ -17,10 +17,9 @@ class InstanceNotFound(Exception):
class RegisterQueue:
def __init__(self, queuename, controller, instance_percent, inst_min, hostname_list, is_container_group=None):
def __init__(self, queuename, instance_percent, inst_min, hostname_list, is_container_group=None):
self.instance_not_found_err = None
self.queuename = queuename
self.controller = controller
self.instance_percent = instance_percent
self.instance_min = inst_min
self.hostname_list = hostname_list
@ -46,20 +45,6 @@ class RegisterQueue:
return (ig, created, changed)
def update_instance_group_controller(self, ig):
changed = False
control_ig = None
if self.controller:
control_ig = InstanceGroup.objects.filter(name=self.controller).first()
if control_ig and ig.controller_id != control_ig.pk:
ig.controller = control_ig
ig.save()
changed = True
return (control_ig, changed)
def add_instances_to_group(self, ig):
changed = False
@ -88,26 +73,20 @@ class RegisterQueue:
with advisory_lock('cluster_policy_lock'):
with transaction.atomic():
changed2 = False
changed3 = False
(ig, created, changed1) = self.get_create_update_instance_group()
if created:
print("Creating instance group {}".format(ig.name))
elif not created:
print("Instance Group already registered {}".format(ig.name))
if self.controller:
(ig_ctrl, changed2) = self.update_instance_group_controller(ig)
if changed2:
print("Set controller group {} on {}.".format(self.controller, self.queuename))
try:
(instances, changed3) = self.add_instances_to_group(ig)
(instances, changed2) = self.add_instances_to_group(ig)
for i in instances:
print("Added instance {} to {}".format(i.hostname, ig.name))
except InstanceNotFound as e:
self.instance_not_found_err = e
if any([changed1, changed2, changed3]):
if changed1 or changed2:
print('(changed: True)')
@ -117,7 +96,6 @@ class Command(BaseCommand):
parser.add_argument(
'--hostnames', dest='hostnames', type=str, help='Comma-Delimited Hosts to add to the Queue (will not remove already assigned instances)'
)
parser.add_argument('--controller', dest='controller', type=str, default='', help='The controlling group (makes this an isolated group)')
parser.add_argument(
'--instance_percent', dest='instance_percent', type=int, default=0, help='The percentage of active instances that will be assigned to this group'
),
@ -133,14 +111,13 @@ class Command(BaseCommand):
queuename = options.get('queuename')
if not queuename:
raise CommandError("Specify `--queuename` to use this command.")
ctrl = options.get('controller')
inst_per = options.get('instance_percent')
instance_min = options.get('instance_minimum')
hostname_list = []
if options.get('hostnames'):
hostname_list = options.get('hostnames').split(",")
rq = RegisterQueue(queuename, ctrl, inst_per, instance_min, hostname_list)
rq = RegisterQueue(queuename, inst_per, instance_min, hostname_list)
rq.register()
if rq.instance_not_found_err:
print(rq.instance_not_found_err.message)

View File

@ -10,7 +10,6 @@ from datetime import datetime as dt
from django.core.management.base import BaseCommand
from django.db import connection
from django.db.models import Q
from django.db.migrations.executor import MigrationExecutor
from awx.main.analytics.broadcast_websocket import (
@ -140,7 +139,7 @@ class Command(BaseCommand):
data[family.name] = family.samples[0].value
me = Instance.objects.me()
hostnames = [i.hostname for i in Instance.objects.exclude(Q(hostname=me.hostname) | Q(rampart_groups__controller__isnull=False))]
hostnames = [i.hostname for i in Instance.objects.exclude(hostname=me.hostname)]
host_stats = Command.get_connection_status(me, hostnames, data)
lines = Command._format_lines(host_stats)

View File

@ -1,47 +0,0 @@
import os
import shutil
import sys
import tempfile
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
import ansible_runner
from awx.main.isolated.manager import set_pythonpath
class Command(BaseCommand):
"""Tests SSH connectivity between a controller and target isolated node"""
help = 'Tests SSH connectivity between a controller and target isolated node'
def add_arguments(self, parser):
parser.add_argument('--hostname', dest='hostname', type=str, help='Hostname of an isolated node')
def handle(self, *args, **options):
hostname = options.get('hostname')
if not hostname:
raise CommandError("--hostname is a required argument")
try:
path = tempfile.mkdtemp(prefix='awx_isolated_ssh', dir=settings.AWX_ISOLATION_BASE_PATH)
ssh_key = None
if all([getattr(settings, 'AWX_ISOLATED_KEY_GENERATION', False) is True, getattr(settings, 'AWX_ISOLATED_PRIVATE_KEY', None)]):
ssh_key = settings.AWX_ISOLATED_PRIVATE_KEY
env = dict(os.environ.items())
env['ANSIBLE_HOST_KEY_CHECKING'] = str(settings.AWX_ISOLATED_HOST_KEY_CHECKING)
set_pythonpath(os.path.join(settings.ANSIBLE_VENV_PATH, 'lib'), env)
res = ansible_runner.interface.run(
private_data_dir=path,
host_pattern='all',
inventory='{} ansible_ssh_user={}'.format(hostname, settings.AWX_ISOLATED_USERNAME),
module='shell',
module_args='ansible-runner --version',
envvars=env,
verbosity=3,
ssh_key=ssh_key,
)
sys.exit(res.rc)
finally:
shutil.rmtree(path)

View File

@ -142,7 +142,7 @@ class InstanceManager(models.Manager):
pod_ip = os.environ.get('MY_POD_IP')
registered = self.register(ip_address=pod_ip)
is_container_group = settings.IS_K8S
RegisterQueue('tower', None, 100, 0, [], is_container_group).register()
RegisterQueue('tower', 100, 0, [], is_container_group).register()
return registered
else:
return (False, self.me())
@ -155,9 +155,6 @@ class InstanceManager(models.Manager):
# NOTE: TODO: Likely to repurpose this once standalone ramparts are a thing
return "tower"
def all_non_isolated(self):
return self.exclude(rampart_groups__controller__isnull=False)
class InstanceGroupManager(models.Manager):
"""A custom manager class for the Instance model.

View File

@ -0,0 +1,26 @@
# Generated by Django 2.2.16 on 2021-04-21 15:02
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0138_custom_inventory_scripts_removal'),
]
operations = [
migrations.RemoveField(
model_name='instance',
name='last_isolated_check',
),
migrations.RemoveField(
model_name='instancegroup',
name='controller',
),
migrations.AlterField(
model_name='unifiedjob',
name='controller_node',
field=models.TextField(blank=True, default='', editable=False, help_text='The instance that managed the execution environment.'),
),
]

View File

@ -146,10 +146,6 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):
return RunAdHocCommand
@classmethod
def supports_isolation(cls):
return True
@property
def is_container_group_task(self):
return bool(self.instance_group and self.instance_group.is_container_group)

View File

@ -1,7 +1,6 @@
# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved.
import random
from decimal import Decimal
from django.core.validators import MinValueValidator
@ -63,10 +62,6 @@ class Instance(HasPolicyEditsMixin, BaseModel):
)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
last_isolated_check = models.DateTimeField(
null=True,
editable=False,
)
version = models.CharField(max_length=120, blank=True)
capacity = models.PositiveIntegerField(
default=100,
@ -128,20 +123,12 @@ class Instance(HasPolicyEditsMixin, BaseModel):
def jobs_total(self):
return UnifiedJob.objects.filter(execution_node=self.hostname).count()
def is_lost(self, ref_time=None, isolated=False):
def is_lost(self, ref_time=None):
if ref_time is None:
ref_time = now()
grace_period = 120
if isolated:
grace_period = settings.AWX_ISOLATED_PERIODIC_CHECK * 2
return self.modified < ref_time - timedelta(seconds=grace_period)
def is_controller(self):
return Instance.objects.filter(rampart_groups__controller__instances=self).exists()
def is_isolated(self):
return self.rampart_groups.filter(controller__isnull=False).exists()
def refresh_capacity(self):
if settings.IS_K8S:
self.capacity = self.cpu = self.memory = self.cpu_capacity = self.mem_capacity = 0 # noqa
@ -185,15 +172,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
editable=False,
help_text=_('Instances that are members of this InstanceGroup'),
)
controller = models.ForeignKey(
'InstanceGroup',
related_name='controlled_groups',
help_text=_('Instance Group to remotely control this group.'),
editable=False,
default=None,
null=True,
on_delete=models.CASCADE,
)
is_container_group = models.BooleanField(default=False)
credential = models.ForeignKey(
'Credential',
@ -215,7 +193,7 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
default=[], blank=True, help_text=_("List of exact-match Instances that will always be automatically assigned to this group")
)
POLICY_FIELDS = frozenset(('policy_instance_list', 'policy_instance_minimum', 'policy_instance_percentage', 'controller'))
POLICY_FIELDS = frozenset(('policy_instance_list', 'policy_instance_minimum', 'policy_instance_percentage'))
def get_absolute_url(self, request=None):
return reverse('api:instance_group_detail', kwargs={'pk': self.pk}, request=request)
@ -232,14 +210,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
def jobs_total(self):
return UnifiedJob.objects.filter(instance_group=self).count()
@property
def is_controller(self):
return self.controlled_groups.exists()
@property
def is_isolated(self):
return bool(self.controller)
'''
RelatedJobsMixin
'''
@ -271,9 +241,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
largest_instance = i
return largest_instance
def choose_online_controller_node(self):
return random.choice(list(self.controller.instances.filter(capacity__gt=0, enabled=True).values_list('hostname', flat=True)))
def set_default_policy_fields(self):
self.policy_instance_list = []
self.policy_instance_minimum = 0

View File

@ -587,10 +587,6 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
return RunJob
@classmethod
def supports_isolation(cls):
return True
def _global_timeout_setting(self):
return 'DEFAULT_JOB_TIMEOUT'
@ -759,7 +755,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
@property
def can_run_containerized(self):
return any([ig for ig in self.preferred_instance_groups if ig.is_container_group])
return True
@property
def is_container_group_task(self):

View File

@ -588,7 +588,7 @@ class UnifiedJob(
blank=True,
default='',
editable=False,
help_text=_("The instance that managed the isolated execution environment."),
help_text=_("The instance that managed the execution environment."),
)
notifications = models.ManyToManyField(
'Notification',
@ -737,10 +737,6 @@ class UnifiedJob(
def _get_task_class(cls):
raise NotImplementedError # Implement in subclasses.
@classmethod
def supports_isolation(cls):
return False
@property
def can_run_containerized(self):
return False
@ -1402,12 +1398,11 @@ class UnifiedJob(
@property
def preferred_instance_groups(self):
"""
Return Instance/Rampart Groups preferred by this unified job templates
Return Instance/Rampart Groups preferred by this unified job template
"""
if not self.unified_job_template:
return []
template_groups = [x for x in self.unified_job_template.instance_groups.all()]
return template_groups
return list(self.unified_job_template.instance_groups.all())
@property
def global_instance_groups(self):
@ -1467,9 +1462,6 @@ class UnifiedJob(
def get_queue_name(self):
return self.controller_node or self.execution_node or get_local_queuename()
def is_isolated(self):
return bool(self.controller_node)
@property
def is_container_group_task(self):
return False

View File

@ -6,7 +6,6 @@ from datetime import timedelta
import logging
import uuid
import json
import random
from types import SimpleNamespace
# Django
@ -253,14 +252,6 @@ class TaskManager:
}
dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]
controller_node = None
if task.supports_isolation() and rampart_group.controller_id:
try:
controller_node = rampart_group.choose_online_controller_node()
except IndexError:
logger.debug("No controllers available in group {} to run {}".format(rampart_group.name, task.log_format))
return
task.status = 'waiting'
(start_status, opts) = task.pre_start()
@ -277,38 +268,24 @@ class TaskManager:
task.send_notification_templates('running')
logger.debug('Transitioning %s to running status.', task.log_format)
schedule_task_manager()
elif not task.supports_isolation() and rampart_group.controller_id:
# non-Ansible jobs on isolated instances run on controller
task.instance_group = rampart_group.controller
task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
logger.debug('Submitting isolated {} to queue {} on node {}.'.format(task.log_format, task.instance_group.name, task.execution_node))
elif controller_node:
task.instance_group = rampart_group
task.execution_node = instance.hostname
task.controller_node = controller_node
logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format(task.log_format, task.execution_node, controller_node))
elif rampart_group.is_container_group:
# find one real, non-containerized instance with capacity to
# act as the controller for k8s API interaction
match = None
for group in InstanceGroup.objects.all():
if group.is_container_group or group.controller_id:
continue
for group in InstanceGroup.objects.filter(is_container_group=False):
match = group.fit_task_to_most_remaining_capacity_instance(task, group.instances.all())
if match:
break
task.instance_group = rampart_group
if match is None:
logger.warn('No available capacity to run containerized <{}>.'.format(task.log_format))
elif task.can_run_containerized and any(ig.is_container_group for ig in task.preferred_instance_groups):
task.controller_node = match.hostname
else:
if task.supports_isolation():
task.controller_node = match.hostname
else:
# project updates and inventory updates don't *actually* run in pods,
# so just pick *any* non-isolated, non-containerized host and use it
# as the execution node
task.execution_node = match.hostname
logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
# project updates and inventory updates don't *actually* run in pods, so
# just pick *any* non-containerized host and use it as the execution node
task.execution_node = match.hostname
logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
else:
task.instance_group = rampart_group
if instance is not None:

View File

@ -33,7 +33,7 @@ import subprocess
from django.conf import settings
from django.db import transaction, DatabaseError, IntegrityError, ProgrammingError, connection
from django.db.models.fields.related import ForeignKey
from django.utils.timezone import now, timedelta
from django.utils.timezone import now
from django.utils.encoding import smart_str
from django.contrib.auth.models import User
from django.utils.translation import ugettext_lazy as _, gettext_noop
@ -84,7 +84,6 @@ from awx.main.models import (
from awx.main.constants import ACTIVE_STATES
from awx.main.exceptions import AwxTaskError, PostRunError
from awx.main.queue import CallbackQueueDispatcher
from awx.main.isolated import manager as isolated_manager
from awx.main.dispatch.publish import task
from awx.main.dispatch import get_local_queuename, reaper
from awx.main.utils import (
@ -170,8 +169,6 @@ def dispatch_startup():
#
apply_cluster_membership_policies()
cluster_node_heartbeat()
if Instance.objects.me().is_controller():
awx_isolated_heartbeat()
Metrics().clear_values()
# Update Tower's rsyslog.conf file based on loggins settings in the db
@ -205,13 +202,8 @@ def apply_cluster_membership_policies():
started_compute = time.time()
all_instances = list(Instance.objects.order_by('id'))
all_groups = list(InstanceGroup.objects.prefetch_related('instances'))
iso_hostnames = set([])
for ig in all_groups:
if ig.controller_id is not None:
iso_hostnames.update(ig.policy_instance_list)
considered_instances = [inst for inst in all_instances if inst.hostname not in iso_hostnames]
total_instances = len(considered_instances)
total_instances = len(all_instances)
actual_groups = []
actual_instances = []
Group = namedtuple('Group', ['obj', 'instances', 'prior_instances'])
@ -232,18 +224,12 @@ def apply_cluster_membership_policies():
if group_actual.instances:
logger.debug("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name))
if ig.controller_id is None:
actual_groups.append(group_actual)
else:
# For isolated groups, _only_ apply the policy_instance_list
# do not add to in-memory list, so minimum rules not applied
logger.debug('Committing instances to isolated group {}'.format(ig.name))
ig.instances.set(group_actual.instances)
actual_groups.append(group_actual)
# Process Instance minimum policies next, since it represents a concrete lower bound to the
# number of instances to make available to instance groups
actual_instances = [Node(obj=i, groups=[]) for i in considered_instances if i.managed_by_policy]
logger.debug("Total non-isolated instances:{} available for policy: {}".format(total_instances, len(actual_instances)))
actual_instances = [Node(obj=i, groups=[]) for i in all_instances if i.managed_by_policy]
logger.debug("Total instances: {}, available for policy: {}".format(total_instances, len(actual_instances)))
for g in sorted(actual_groups, key=lambda x: len(x.instances)):
policy_min_added = []
for i in sorted(actual_instances, key=lambda x: len(x.groups)):
@ -285,7 +271,7 @@ def apply_cluster_membership_policies():
logger.debug('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute))
return
# On a differential basis, apply instances to non-isolated groups
# On a differential basis, apply instances to groups
with transaction.atomic():
for g in actual_groups:
if g.obj.is_container_group:
@ -419,7 +405,7 @@ def cleanup_execution_environment_images():
def cluster_node_heartbeat():
logger.debug("Cluster node heartbeat task.")
nowtime = now()
instance_list = list(Instance.objects.all_non_isolated())
instance_list = list(Instance.objects.all())
this_inst = None
lost_instances = []
@ -503,30 +489,6 @@ def awx_k8s_reaper():
logger.exception("Failed to delete orphaned pod {} from {}".format(job.log_format, group))
@task(queue=get_local_queuename)
def awx_isolated_heartbeat():
local_hostname = settings.CLUSTER_HOST_ID
logger.debug("Controlling node checking for any isolated management tasks.")
poll_interval = settings.AWX_ISOLATED_PERIODIC_CHECK
# Get isolated instances not checked since poll interval - some buffer
nowtime = now()
accept_before = nowtime - timedelta(seconds=(poll_interval - 10))
isolated_instance_qs = Instance.objects.filter(
rampart_groups__controller__instances__hostname=local_hostname,
)
isolated_instance_qs = isolated_instance_qs.filter(last_isolated_check__lt=accept_before) | isolated_instance_qs.filter(last_isolated_check=None)
# Fast pass of isolated instances, claiming the nodes to update
with transaction.atomic():
for isolated_instance in isolated_instance_qs:
isolated_instance.last_isolated_check = nowtime
# Prevent modified time from being changed, as in normal heartbeat
isolated_instance.save(update_fields=['last_isolated_check'])
# Slow pass looping over isolated IGs and their isolated instances
if len(isolated_instance_qs) > 0:
logger.debug("Managing isolated instances {}.".format(','.join([inst.hostname for inst in isolated_instance_qs])))
isolated_manager.IsolatedManager(CallbackQueueDispatcher.dispatch).health_check(isolated_instance_qs)
@task(queue=get_local_queuename)
def awx_periodic_scheduler():
with advisory_lock('awx_periodic_scheduler_lock', wait=False) as acquired:
@ -1017,7 +979,7 @@ class BaseTask(object):
else:
env['PATH'] = os.path.join(settings.AWX_VENV_PATH, "bin")
def build_env(self, instance, private_data_dir, isolated, private_data_files=None):
def build_env(self, instance, private_data_dir, private_data_files=None):
"""
Build environment dictionary for ansible-playbook.
"""
@ -1138,7 +1100,7 @@ class BaseTask(object):
"""
instance.log_lifecycle("post_run")
def final_run_hook(self, instance, status, private_data_dir, fact_modification_times, isolated_manager_instance=None):
def final_run_hook(self, instance, status, private_data_dir, fact_modification_times):
"""
Hook for any steps to run after job/task is marked as complete.
"""
@ -1284,16 +1246,6 @@ class BaseTask(object):
if result_traceback:
self.instance = self.update_model(self.instance.pk, result_traceback=result_traceback)
def check_handler(self, config):
"""
IsolatedManager callback triggered by the repeated checks of the isolated node
"""
job_env = build_safe_env(config['env'])
for k, v in self.safe_cred_env.items():
if k in job_env:
job_env[k] = v
self.instance = self.update_model(self.instance.pk, job_args=json.dumps(config['command']), job_cwd=config['cwd'], job_env=job_env)
@with_path_cleanup
def run(self, pk, **kwargs):
"""
@ -1321,7 +1273,6 @@ class BaseTask(object):
self.safe_env = {}
self.safe_cred_env = {}
private_data_dir = None
isolated_manager_instance = None
# store a reference to the parent workflow job (if any) so we can include
# it in event data JSON
@ -1329,7 +1280,6 @@ class BaseTask(object):
self.parent_workflow_job_id = self.instance.get_workflow_job().id
try:
isolated = self.instance.is_isolated()
self.instance.send_notification_templates("running")
private_data_dir = self.build_private_data_dir(self.instance)
self.pre_run_hook(self.instance, private_data_dir)
@ -1363,7 +1313,7 @@ class BaseTask(object):
passwords = self.build_passwords(self.instance, kwargs)
self.build_extra_vars_file(self.instance, private_data_dir)
args = self.build_args(self.instance, private_data_dir, passwords)
env = self.build_env(self.instance, private_data_dir, isolated, private_data_files=private_data_files)
env = self.build_env(self.instance, private_data_dir, private_data_files=private_data_files)
self.safe_env = build_safe_env(env)
credentials = self.build_credentials_list(self.instance)
@ -1464,7 +1414,7 @@ class BaseTask(object):
self.instance = self.update_model(pk, status=status, emitted_events=self.event_ct, **extra_update_fields)
try:
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times, isolated_manager_instance=isolated_manager_instance)
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times)
except Exception:
logger.exception('{} Final run hook errored.'.format(self.instance.log_format))
@ -1549,11 +1499,11 @@ class RunJob(BaseTask):
return passwords
def build_env(self, job, private_data_dir, isolated=False, private_data_files=None):
def build_env(self, job, private_data_dir, private_data_files=None):
"""
Build environment dictionary for ansible-playbook.
"""
env = super(RunJob, self).build_env(job, private_data_dir, isolated=isolated, private_data_files=private_data_files)
env = super(RunJob, self).build_env(job, private_data_dir, private_data_files=private_data_files)
if private_data_files is None:
private_data_files = {}
# Set environment variables needed for inventory and job event
@ -1564,10 +1514,9 @@ class RunJob(BaseTask):
env['PROJECT_REVISION'] = job.project.scm_revision
env['ANSIBLE_RETRY_FILES_ENABLED'] = "False"
env['MAX_EVENT_RES'] = str(settings.MAX_EVENT_RES_DATA)
if not isolated:
if hasattr(settings, 'AWX_ANSIBLE_CALLBACK_PLUGINS') and settings.AWX_ANSIBLE_CALLBACK_PLUGINS:
env['ANSIBLE_CALLBACK_PLUGINS'] = ':'.join(settings.AWX_ANSIBLE_CALLBACK_PLUGINS)
env['AWX_HOST'] = settings.TOWER_URL_BASE
if hasattr(settings, 'AWX_ANSIBLE_CALLBACK_PLUGINS') and settings.AWX_ANSIBLE_CALLBACK_PLUGINS:
env['ANSIBLE_CALLBACK_PLUGINS'] = ':'.join(settings.AWX_ANSIBLE_CALLBACK_PLUGINS)
env['AWX_HOST'] = settings.TOWER_URL_BASE
# Create a directory for ControlPath sockets that is unique to each job
cp_dir = os.path.join(private_data_dir, 'cp')
@ -1798,9 +1747,6 @@ class RunJob(BaseTask):
if sync_needs:
pu_ig = job.instance_group
pu_en = job.execution_node
if job.is_isolated() is True:
pu_ig = pu_ig.controller
pu_en = settings.CLUSTER_HOST_ID
sync_metafields = dict(
launch_type="sync",
@ -1855,7 +1801,7 @@ class RunJob(BaseTask):
# ran inside of the event saving code
update_smart_memberships_for_inventory(job.inventory)
def final_run_hook(self, job, status, private_data_dir, fact_modification_times, isolated_manager_instance=None):
def final_run_hook(self, job, status, private_data_dir, fact_modification_times):
super(RunJob, self).final_run_hook(job, status, private_data_dir, fact_modification_times)
if not private_data_dir:
# If there's no private data dir, that means we didn't get into the
@ -1867,8 +1813,6 @@ class RunJob(BaseTask):
os.path.join(private_data_dir, 'artifacts', 'fact_cache'),
fact_modification_times,
)
if isolated_manager_instance and not job.is_container_group_task:
isolated_manager_instance.cleanup()
try:
inventory = job.inventory
@ -1932,11 +1876,11 @@ class RunProjectUpdate(BaseTask):
passwords['scm_password'] = project_update.credential.get_input('password', default='')
return passwords
def build_env(self, project_update, private_data_dir, isolated=False, private_data_files=None):
def build_env(self, project_update, private_data_dir, private_data_files=None):
"""
Build environment dictionary for ansible-playbook.
"""
env = super(RunProjectUpdate, self).build_env(project_update, private_data_dir, isolated=isolated, private_data_files=private_data_files)
env = super(RunProjectUpdate, self).build_env(project_update, private_data_dir, private_data_files=private_data_files)
env['ANSIBLE_RETRY_FILES_ENABLED'] = str(False)
env['ANSIBLE_ASK_PASS'] = str(False)
env['ANSIBLE_BECOME_ASK_PASS'] = str(False)
@ -2391,14 +2335,14 @@ class RunInventoryUpdate(BaseTask):
injector = InventorySource.injectors[inventory_update.source]()
return injector.build_private_data(inventory_update, private_data_dir)
def build_env(self, inventory_update, private_data_dir, isolated, private_data_files=None):
def build_env(self, inventory_update, private_data_dir, private_data_files=None):
"""Build environment dictionary for ansible-inventory.
Most environment variables related to credentials or configuration
are accomplished by the inventory source injectors (in this method)
or custom credential type injectors (in main run method).
"""
env = super(RunInventoryUpdate, self).build_env(inventory_update, private_data_dir, isolated, private_data_files=private_data_files)
env = super(RunInventoryUpdate, self).build_env(inventory_update, private_data_dir, private_data_files=private_data_files)
if private_data_files is None:
private_data_files = {}
@ -2715,11 +2659,11 @@ class RunAdHocCommand(BaseTask):
passwords[field] = value
return passwords
def build_env(self, ad_hoc_command, private_data_dir, isolated=False, private_data_files=None):
def build_env(self, ad_hoc_command, private_data_dir, private_data_files=None):
"""
Build environment dictionary for ansible.
"""
env = super(RunAdHocCommand, self).build_env(ad_hoc_command, private_data_dir, isolated=isolated, private_data_files=private_data_files)
env = super(RunAdHocCommand, self).build_env(ad_hoc_command, private_data_dir, private_data_files=private_data_files)
# Set environment variables needed for inventory and ad hoc event
# callbacks to work.
env['AD_HOC_COMMAND_ID'] = str(ad_hoc_command.pk)
@ -2825,11 +2769,6 @@ class RunAdHocCommand(BaseTask):
d[r'Password:\s*?$'] = 'ssh_password'
return d
def final_run_hook(self, adhoc_job, status, private_data_dir, fact_modification_times, isolated_manager_instance=None):
super(RunAdHocCommand, self).final_run_hook(adhoc_job, status, private_data_dir, fact_modification_times)
if isolated_manager_instance:
isolated_manager_instance.cleanup()
@task(queue=get_local_queuename)
class RunSystemJob(BaseTask):
@ -2871,8 +2810,8 @@ class RunSystemJob(BaseTask):
os.chmod(path, stat.S_IRUSR)
return path
def build_env(self, instance, private_data_dir, isolated=False, private_data_files=None):
base_env = super(RunSystemJob, self).build_env(instance, private_data_dir, isolated=isolated, private_data_files=private_data_files)
def build_env(self, instance, private_data_dir, private_data_files=None):
base_env = super(RunSystemJob, self).build_env(instance, private_data_dir, private_data_files=private_data_files)
# TODO: this is able to run by turning off isolation
# the goal is to run it a container instead
env = dict(os.environ.items())

View File

@ -20,13 +20,7 @@ def tower_instance_group():
@pytest.fixture
def instance():
instance = Instance.objects.create(hostname='iso')
return instance
@pytest.fixture
def non_iso_instance():
return Instance.objects.create(hostname='iamnotanisolatedinstance')
return Instance.objects.create(hostname='instance')
@pytest.fixture
@ -36,15 +30,6 @@ def instance_group(job_factory):
return ig
@pytest.fixture
def isolated_instance_group(instance_group, instance):
ig = InstanceGroup(name="iso", controller=instance_group)
ig.save()
ig.instances.set([instance])
ig.save()
return ig
@pytest.fixture
def containerized_instance_group(instance_group, kube_credential):
ig = InstanceGroup(name="container")
@ -97,26 +82,6 @@ def source_model(request):
return request.getfixturevalue(request.param)
@pytest.mark.django_db
def test_instance_group_is_controller(instance_group, isolated_instance_group, non_iso_instance):
assert not isolated_instance_group.is_controller
assert instance_group.is_controller
instance_group.instances.set([non_iso_instance])
assert instance_group.is_controller
@pytest.mark.django_db
def test_instance_group_is_isolated(instance_group, isolated_instance_group):
assert not instance_group.is_isolated
assert isolated_instance_group.is_isolated
isolated_instance_group.instances.set([])
assert isolated_instance_group.is_isolated
@pytest.mark.django_db
def test_delete_instance_group_jobs(delete, instance_group_jobs_successful, instance_group, admin):
url = reverse("api:instance_group_detail", kwargs={'pk': instance_group.pk})
@ -160,61 +125,6 @@ def test_delete_rename_tower_instance_group_prevented(delete, options, tower_ins
patch(url, {'name': 'foobar'}, super_user, expect=200)
@pytest.mark.django_db
def test_prevent_delete_iso_and_control_groups(delete, isolated_instance_group, admin):
iso_url = reverse("api:instance_group_detail", kwargs={'pk': isolated_instance_group.pk})
controller_url = reverse("api:instance_group_detail", kwargs={'pk': isolated_instance_group.controller.pk})
delete(iso_url, None, admin, expect=403)
delete(controller_url, None, admin, expect=403)
@pytest.mark.django_db
def test_prevent_isolated_instance_added_to_non_isolated_instance_group(post, admin, instance, instance_group, isolated_instance_group):
url = reverse("api:instance_group_instance_list", kwargs={'pk': instance_group.pk})
assert True is instance.is_isolated()
resp = post(url, {'associate': True, 'id': instance.id}, admin, expect=400)
assert u"Isolated instances may not be added or removed from instances groups via the API." == resp.data['error']
@pytest.mark.django_db
def test_prevent_isolated_instance_added_to_non_isolated_instance_group_via_policy_list(patch, admin, instance, instance_group, isolated_instance_group):
url = reverse("api:instance_group_detail", kwargs={'pk': instance_group.pk})
assert True is instance.is_isolated()
resp = patch(url, {'policy_instance_list': [instance.hostname]}, user=admin, expect=400)
assert [u"Isolated instances may not be added or removed from instances groups via the API."] == resp.data['policy_instance_list']
assert instance_group.policy_instance_list == []
@pytest.mark.django_db
def test_prevent_isolated_instance_removal_from_isolated_instance_group(post, admin, instance, instance_group, isolated_instance_group):
url = reverse("api:instance_group_instance_list", kwargs={'pk': isolated_instance_group.pk})
assert True is instance.is_isolated()
resp = post(url, {'disassociate': True, 'id': instance.id}, admin, expect=400)
assert u"Isolated instances may not be added or removed from instances groups via the API." == resp.data['error']
@pytest.mark.django_db
def test_prevent_non_isolated_instance_added_to_isolated_instance_group(post, admin, non_iso_instance, isolated_instance_group):
url = reverse("api:instance_group_instance_list", kwargs={'pk': isolated_instance_group.pk})
assert False is non_iso_instance.is_isolated()
resp = post(url, {'associate': True, 'id': non_iso_instance.id}, admin, expect=400)
assert u"Isolated instance group membership may not be managed via the API." == resp.data['error']
@pytest.mark.django_db
def test_prevent_non_isolated_instance_added_to_isolated_instance_group_via_policy_list(patch, admin, non_iso_instance, isolated_instance_group):
url = reverse("api:instance_group_detail", kwargs={'pk': isolated_instance_group.pk})
assert False is non_iso_instance.is_isolated()
resp = patch(url, {'policy_instance_list': [non_iso_instance.hostname]}, user=admin, expect=400)
assert [u"Isolated instance group membership may not be managed via the API."] == resp.data['policy_instance_list']
assert isolated_instance_group.policy_instance_list == []
@pytest.mark.django_db
@pytest.mark.parametrize('source_model', ['job_template', 'inventory', 'organization'], indirect=True)
def test_instance_group_order_persistence(get, post, admin, source_model):
@ -222,7 +132,7 @@ def test_instance_group_order_persistence(get, post, admin, source_model):
total = 5
pks = list(range(total))
random.shuffle(pks)
instances = [InstanceGroup.objects.create(name='iso-%d' % i) for i in pks]
instances = [InstanceGroup.objects.create(name='group-%d' % i) for i in pks]
view_name = camelcase_to_underscore(source_model.__class__.__name__)
url = reverse('api:{}_instance_groups_list'.format(view_name), kwargs={'pk': source_model.pk})
@ -252,7 +162,6 @@ def test_instance_group_update_fields(patch, instance, instance_group, admin, co
# instance group (not containerized)
ig_url = reverse("api:instance_group_detail", kwargs={'pk': instance_group.pk})
assert not instance_group.is_container_group
assert not containerized_instance_group.is_isolated
resp = patch(ig_url, {'policy_instance_percentage': 15}, admin, expect=200)
assert 15 == resp.data['policy_instance_percentage']
resp = patch(ig_url, {'policy_instance_minimum': 15}, admin, expect=200)
@ -263,7 +172,6 @@ def test_instance_group_update_fields(patch, instance, instance_group, admin, co
# containerized instance group
cg_url = reverse("api:instance_group_detail", kwargs={'pk': containerized_instance_group.pk})
assert containerized_instance_group.is_container_group
assert not containerized_instance_group.is_isolated
resp = patch(cg_url, {'policy_instance_percentage': 15}, admin, expect=400)
assert ["Containerized instances may not be managed via the API"] == resp.data['policy_instance_percentage']
resp = patch(cg_url, {'policy_instance_minimum': 15}, admin, expect=400)

View File

@ -5,8 +5,6 @@
# Python
import pytest
from django.conf import settings
# AWX
from awx.api.versioning import reverse
from awx.conf.models import Setting
@ -322,60 +320,6 @@ def test_logging_aggregator_connection_test_valid(put, post, admin):
post(url, {}, user=admin, expect=202)
@pytest.mark.django_db
@pytest.mark.parametrize(
'setting_name',
[
'AWX_ISOLATED_CHECK_INTERVAL',
'AWX_ISOLATED_LAUNCH_TIMEOUT',
'AWX_ISOLATED_CONNECTION_TIMEOUT',
],
)
def test_isolated_job_setting_validation(get, patch, admin, setting_name):
url = reverse('api:setting_singleton_detail', kwargs={'category_slug': 'jobs'})
patch(url, user=admin, data={setting_name: -1}, expect=400)
data = get(url, user=admin).data
assert data[setting_name] != -1
@pytest.mark.django_db
@pytest.mark.parametrize(
'key, expected',
[
['AWX_ISOLATED_PRIVATE_KEY', '$encrypted$'],
['AWX_ISOLATED_PUBLIC_KEY', 'secret'],
],
)
def test_isolated_keys_readonly(get, patch, delete, admin, key, expected):
Setting.objects.create(key=key, value='secret').save()
assert getattr(settings, key) == 'secret'
url = reverse('api:setting_singleton_detail', kwargs={'category_slug': 'jobs'})
resp = get(url, user=admin)
assert resp.data[key] == expected
patch(url, user=admin, data={key: 'new-secret'})
assert getattr(settings, key) == 'secret'
delete(url, user=admin)
assert getattr(settings, key) == 'secret'
@pytest.mark.django_db
def test_isolated_key_flag_readonly(get, patch, delete, admin):
settings.AWX_ISOLATED_KEY_GENERATION = True
url = reverse('api:setting_singleton_detail', kwargs={'category_slug': 'jobs'})
resp = get(url, user=admin)
assert resp.data['AWX_ISOLATED_KEY_GENERATION'] is True
patch(url, user=admin, data={'AWX_ISOLATED_KEY_GENERATION': False})
assert settings.AWX_ISOLATED_KEY_GENERATION is True
delete(url, user=admin)
assert settings.AWX_ISOLATED_KEY_GENERATION is True
@pytest.mark.django_db
@pytest.mark.parametrize('headers', [True, False])
def test_saml_x509cert_validation(patch, get, admin, headers):

View File

@ -2,10 +2,8 @@ import pytest
from unittest import mock
import os
from django.utils.timezone import now, timedelta
from awx.main.tasks import RunProjectUpdate, RunInventoryUpdate, awx_isolated_heartbeat, isolated_manager
from awx.main.models import ProjectUpdate, InventoryUpdate, InventorySource, Instance, InstanceGroup
from awx.main.tasks import RunProjectUpdate, RunInventoryUpdate
from awx.main.models import ProjectUpdate, InventoryUpdate, InventorySource
@pytest.fixture
@ -70,61 +68,3 @@ class TestDependentInventoryUpdate:
# Verify that it bails after 1st update, detecting a cancel
assert is2.inventory_updates.count() == 0
iu_run_mock.assert_called_once()
class MockSettings:
AWX_ISOLATED_PERIODIC_CHECK = 60
CLUSTER_HOST_ID = 'tower_1'
@pytest.mark.django_db
class TestIsolatedManagementTask:
@pytest.fixture
def control_group(self):
return InstanceGroup.objects.create(name='alpha')
@pytest.fixture
def control_instance(self, control_group):
return control_group.instances.create(hostname='tower_1')
@pytest.fixture
def needs_updating(self, control_group):
ig = InstanceGroup.objects.create(name='thepentagon', controller=control_group)
inst = ig.instances.create(hostname='isolated', capacity=103)
inst.last_isolated_check = now() - timedelta(seconds=MockSettings.AWX_ISOLATED_PERIODIC_CHECK)
inst.save()
return ig
@pytest.fixture
def just_updated(self, control_group):
ig = InstanceGroup.objects.create(name='thepentagon', controller=control_group)
inst = ig.instances.create(hostname='isolated', capacity=103)
inst.last_isolated_check = now()
inst.save()
return inst
@pytest.fixture
def old_version(self, control_group):
ig = InstanceGroup.objects.create(name='thepentagon', controller=control_group)
inst = ig.instances.create(hostname='isolated-old', capacity=103)
inst.save()
return inst
def test_takes_action(self, control_instance, needs_updating):
original_isolated_instance = needs_updating.instances.all().first()
with mock.patch('awx.main.tasks.settings', MockSettings()):
with mock.patch.object(isolated_manager.IsolatedManager, 'health_check') as check_mock:
awx_isolated_heartbeat()
iso_instance = Instance.objects.get(hostname='isolated')
call_args, _ = check_mock.call_args
assert call_args[0][0] == iso_instance
assert iso_instance.last_isolated_check > original_isolated_instance.last_isolated_check
assert iso_instance.modified == original_isolated_instance.modified
def test_does_not_take_action(self, control_instance, just_updated):
with mock.patch('awx.main.tasks.settings', MockSettings()):
with mock.patch.object(isolated_manager.IsolatedManager, 'health_check') as check_mock:
awx_isolated_heartbeat()
iso_instance = Instance.objects.get(hostname='isolated')
check_mock.assert_not_called()
assert iso_instance.capacity == 103

View File

@ -645,102 +645,6 @@ class TestAdhocRun(TestJobExecution):
assert extra_vars['awx_user_name'] == "angry-spud"
@pytest.mark.skip(reason="Isolated code path needs updating after runner integration")
class TestIsolatedExecution(TestJobExecution):
ISOLATED_HOST = 'some-isolated-host'
ISOLATED_CONTROLLER_HOST = 'some-isolated-controller-host'
@pytest.fixture
def job(self):
job = Job(pk=1, id=1, project=Project(), inventory=Inventory(), job_template=JobTemplate(id=1, name='foo'))
job.controller_node = self.ISOLATED_CONTROLLER_HOST
job.execution_node = self.ISOLATED_HOST
return job
def test_with_ssh_credentials(self, job):
ssh = CredentialType.defaults['ssh']()
credential = Credential(pk=1, credential_type=ssh, inputs={'username': 'bob', 'password': 'secret', 'ssh_key_data': self.EXAMPLE_PRIVATE_KEY})
credential.inputs['password'] = encrypt_field(credential, 'password')
job.credentials.add(credential)
private_data = tempfile.mkdtemp(prefix='awx_')
self.task.build_private_data_dir = mock.Mock(return_value=private_data)
def _mock_job_artifacts(*args, **kw):
artifacts = os.path.join(private_data, 'artifacts')
if not os.path.exists(artifacts):
os.makedirs(artifacts)
if 'run_isolated.yml' in args[0]:
for filename, data in (
['status', 'successful'],
['rc', '0'],
['stdout', 'IT WORKED!'],
):
with open(os.path.join(artifacts, filename), 'w') as f:
f.write(data)
return ('successful', 0)
self.run_pexpect.side_effect = _mock_job_artifacts
self.task.run(self.pk)
playbook_run = self.run_pexpect.call_args_list[0][0]
assert ' '.join(playbook_run[0]).startswith(
' '.join(
[
'ansible-playbook',
'run_isolated.yml',
'-u',
settings.AWX_ISOLATED_USERNAME,
'-T',
str(settings.AWX_ISOLATED_CONNECTION_TIMEOUT),
'-i',
self.ISOLATED_HOST + ',',
'-e',
]
)
)
extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1]
extra_vars = json.loads(extra_vars)
assert extra_vars['dest'] == '/tmp'
assert extra_vars['src'] == private_data
def test_systemctl_failure(self):
# If systemctl fails, read the contents of `artifacts/systemctl_logs`
mock_get = mock.Mock()
ssh = CredentialType.defaults['ssh']()
credential = Credential(
pk=1,
credential_type=ssh,
inputs={
'username': 'bob',
},
)
self.instance.credentials.add(credential)
private_data = tempfile.mkdtemp(prefix='awx_')
self.task.build_private_data_dir = mock.Mock(return_value=private_data)
inventory = json.dumps({"all": {"hosts": ["localhost"]}})
def _mock_job_artifacts(*args, **kw):
artifacts = os.path.join(private_data, 'artifacts')
if not os.path.exists(artifacts):
os.makedirs(artifacts)
if 'run_isolated.yml' in args[0]:
for filename, data in (['daemon.log', 'ERROR IN RUN.PY'],):
with open(os.path.join(artifacts, filename), 'w') as f:
f.write(data)
return ('successful', 0)
self.run_pexpect.side_effect = _mock_job_artifacts
with mock.patch('time.sleep'):
with mock.patch('requests.get') as mock_get:
mock_get.return_value = mock.Mock(content=inventory)
with pytest.raises(Exception):
self.task.run(self.pk, self.ISOLATED_HOST)
class TestJobCredentials(TestJobExecution):
@pytest.fixture
def job(self, execution_environment):
@ -1625,7 +1529,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
assert 'AWS_ACCESS_KEY_ID' not in env
assert 'AWS_SECRET_ACCESS_KEY' not in env
@ -1645,7 +1549,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
safe_env = build_safe_env(env)
@ -1669,7 +1573,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
safe_env = {}
credentials = task.build_credentials_list(inventory_update)
@ -1706,7 +1610,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
safe_env = build_safe_env(env)
@ -1736,7 +1640,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
safe_env = build_safe_env(env)
@ -1763,7 +1667,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
def run(expected_gce_zone):
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
safe_env = {}
credentials = task.build_credentials_list(inventory_update)
for credential in credentials:
@ -1797,7 +1701,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
path = os.path.join(private_data_dir, os.path.basename(env['OS_CLIENT_CONFIG_FILE']))
shade_config = open(path, 'r').read()
@ -1832,7 +1736,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
safe_env = build_safe_env(env)
assert env["FOREMAN_SERVER"] == "https://example.org"
@ -1856,7 +1760,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_cloud_credential = get_cred
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
env = task.build_env(inventory_update, private_data_dir, False)
env = task.build_env(inventory_update, private_data_dir)
safe_env = build_safe_env(env)
@ -1888,7 +1792,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
inventory_update.get_cloud_credential = get_cred
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
env = task.build_env(inventory_update, private_data_dir, False)
env = task.build_env(inventory_update, private_data_dir)
safe_env = {}
credentials = task.build_credentials_list(inventory_update)
for credential in credentials:
@ -1919,7 +1823,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
settings.AWX_TASK_ENV = {'FOO': 'BAR'}
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
env = task.build_env(inventory_update, private_data_dir, private_data_files)
assert env['FOO'] == 'BAR'

View File

@ -32,13 +32,7 @@ def unwrap_broadcast_msg(payload: dict):
def get_broadcast_hosts():
Instance = apps.get_model('main', 'Instance')
instances = (
Instance.objects.filter(rampart_groups__controller__isnull=True)
.exclude(hostname=Instance.objects.me().hostname)
.order_by('hostname')
.values('hostname', 'ip_address')
.distinct()
)
instances = Instance.objects.exclude(hostname=Instance.objects.me().hostname).order_by('hostname').values('hostname', 'ip_address').distinct()
return {i['hostname']: i['ip_address'] or i['hostname'] for i in instances}

View File

@ -1,70 +0,0 @@
---
# The following variables will be set by the runner of this playbook:
# src: /tmp/some/path/private_data_dir/
- name: Poll for status of active job.
hosts: all
gather_facts: false
collections:
- ansible.posix
tasks:
- name: "Output job the playbook is running for"
debug:
msg: "Checking on job {{ job_id }}"
- name: Determine if daemon process is alive.
shell: "ansible-runner is-alive {{src}}"
register: is_alive
ignore_errors: true
- name: Copy artifacts from the isolated host.
synchronize:
src: "{{src}}/artifacts/"
dest: "{{src}}/artifacts/"
mode: pull
delete: true
recursive: true
when: ansible_kubectl_config is not defined
- name: Copy daemon log from the isolated host
synchronize:
src: "{{src}}/daemon.log"
dest: "{{src}}/daemon.log"
mode: pull
when: ansible_kubectl_config is not defined
- name: Copy artifacts from pod
synchronize:
src: "{{src}}/artifacts/"
dest: "{{src}}/artifacts/"
mode: pull
delete: true
recursive: true
set_remote_user: false
rsync_opts:
- "--blocking-io"
- "--rsh=$RSH"
environment:
RSH: "oc rsh --config={{ ansible_kubectl_config }}"
delegate_to: localhost
when: ansible_kubectl_config is defined
- name: Copy daemon log from pod
synchronize:
src: "{{src}}/daemon.log"
dest: "{{src}}/daemon.log"
mode: pull
set_remote_user: false
rsync_opts:
- "--blocking-io"
- "--rsh=$RSH"
environment:
RSH: "oc rsh --config={{ ansible_kubectl_config }}"
delegate_to: localhost
when: ansible_kubectl_config is defined
- name: Fail if previous check determined that process is not alive.
fail:
msg: "isolated task is still running"
when: "is_alive.rc == 0"

View File

@ -1,31 +0,0 @@
---
# The following variables will be set by the runner of this playbook:
# cleanup_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
# private_data_dir: '/tmp/path/private_data_dir/'
- name: Clean up from isolated job run.
hosts: all
gather_facts: false
tasks:
- name: cancel the job
command: "ansible-runner stop {{private_data_dir}}"
ignore_errors: true
- name: remove build artifacts
file:
path: '{{item}}'
state: absent
register: result
with_items: "{{cleanup_dirs}}"
until: result is succeeded
ignore_errors: true
retries: 3
delay: 5
- name: fail if build artifacts were not cleaned
fail:
msg: 'Unable to cleanup build artifacts'
when: not result is succeeded

View File

@ -1,12 +0,0 @@
---
- name: Periodic background status check of isolated instances.
hosts: all
gather_facts: false
tasks:
- name: Get capacity of the instance
awx_capacity:
- name: Remove any stale temporary files
awx_isolated_cleanup:

View File

@ -1,61 +0,0 @@
---
# The following variables will be set by the runner of this playbook:
# src: /tmp/some/path/private_data_dir
# dest: /tmp/some/path/
- name: Prepare data, dispatch job in isolated environment.
hosts: all
gather_facts: false
vars:
secret: "{{ lookup('pipe', 'cat ' + src + '/env/ssh_key') }}"
collections:
- ansible.posix
tasks:
- name: "Output job the playbook is running for"
debug:
msg: "Checking on job {{ job_id }}"
- name: synchronize job environment with isolated host
synchronize:
copy_links: true
src: "{{ src }}"
dest: "{{ dest }}"
when: ansible_kubectl_config is not defined
- name: synchronize job environment with remote job container
synchronize:
copy_links: true
src: "{{ src }}"
dest: "{{ dest }}"
set_remote_user: false
rsync_opts:
- "--blocking-io"
- "--rsh=$RSH"
environment:
RSH: "oc rsh --config={{ ansible_kubectl_config }}"
delegate_to: localhost
when: ansible_kubectl_config is defined
- local_action: stat path="{{src}}/env/ssh_key"
register: key
- name: create a named pipe for secret environment data
command: "mkfifo {{src}}/env/ssh_key"
when: key.stat.exists
- name: spawn the playbook
command: "ansible-runner start {{src}} -p '{{playbook}}' -i {{ident}}"
when: playbook is defined
- name: spawn the adhoc command
command: "ansible-runner start {{src}} -m {{module}} -a {{module_args}} -i {{ident}}"
when: module is defined
- name: write the secret environment data
mkfifo:
content: "{{secret}}"
path: "{{src}}/env/ssh_key"
when: key.stat.exists
no_log: true

View File

@ -1,73 +0,0 @@
# Copyright (c) 2017 Ansible by Red Hat
#
# This file is part of Ansible Tower, but depends on code imported from Ansible.
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
from ansible.module_utils._text import to_text
from ansible.module_utils.basic import AnsibleModule
import subprocess
import os
import psutil
def get_cpu_capacity():
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
cpu = psutil.cpu_count()
if env_forkcpu:
forkcpu = int(env_forkcpu)
else:
forkcpu = 4
return (cpu, cpu * forkcpu)
def get_mem_capacity():
env_forkmem = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
if env_forkmem:
forkmem = int(env_forkmem)
else:
forkmem = 100
mem = psutil.virtual_memory().total
return (mem, max(1, ((mem / 1024 / 1024) - 2048) / forkmem))
def main():
module = AnsibleModule(argument_spec=dict())
ar = module.get_bin_path('ansible-runner', required=True)
try:
version = subprocess.check_output([ar, '--version'], stderr=subprocess.STDOUT).strip()
except subprocess.CalledProcessError as e:
module.fail_json(msg=to_text(e))
return
# NOTE: Duplicated with awx.main.utils.common capacity utilities
cpu, capacity_cpu = get_cpu_capacity()
mem, capacity_mem = get_mem_capacity()
# Module never results in a change
module.exit_json(
changed=False,
capacity_cpu=capacity_cpu,
capacity_mem=capacity_mem,
version=version,
ansible_facts=dict(awx_cpu=cpu, awx_mem=mem, awx_capacity_cpu=capacity_cpu, awx_capacity_mem=capacity_mem, awx_capacity_version=version),
)
if __name__ == '__main__':
main()

View File

@ -1,68 +0,0 @@
# Copyright (c) 2017 Ansible by Red Hat
#
# This file is part of Ansible Tower, but depends on code imported from Ansible.
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
from ansible.module_utils.basic import AnsibleModule
import glob
import os
import re
import shutil
import datetime
import subprocess
def main():
module = AnsibleModule(argument_spec=dict())
changed = False
paths_removed = set([])
# If a folder was last modified before this datetime, it will always be deleted
folder_cutoff = datetime.datetime.now() - datetime.timedelta(days=7)
# If a folder does not have an associated job running and is older than
# this datetime, then it will be deleted because its job has finished
job_cutoff = datetime.datetime.now() - datetime.timedelta(hours=1)
for search_pattern in ['/tmp/awx_[0-9]*_*', '/tmp/ansible_runner_pi_*']:
for path in glob.iglob(search_pattern):
st = os.stat(path)
modtime = datetime.datetime.fromtimestamp(st.st_mtime)
if modtime > job_cutoff:
continue
elif modtime > folder_cutoff:
try:
re_match = re.match(r'\/tmp\/awx_\d+_.+', path)
if re_match is not None:
try:
if subprocess.check_call(['ansible-runner', 'is-alive', path]) == 0:
continue
except subprocess.CalledProcessError:
# the job isn't running anymore, clean up this path
module.debug('Deleting path {} its job has completed.'.format(path))
except (ValueError, IndexError):
continue
else:
module.debug('Deleting path {} because modification date is too old.'.format(path))
changed = True
paths_removed.add(path)
shutil.rmtree(path)
module.exit_json(changed=changed, paths_removed=list(paths_removed))
if __name__ == '__main__':
main()

View File

@ -1,31 +0,0 @@
import os
import stat
from ansible.module_utils.basic import AnsibleModule
#
# the purpose of this plugin is to call mkfifo and
# write raw SSH key data into the fifo created on the remote isolated host
#
def main():
module = AnsibleModule(argument_spec={'path': {'required': True, 'type': 'str'}, 'content': {'required': True, 'type': 'str'}}, supports_check_mode=False)
path = module.params['path']
os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
with open(path, 'w') as fifo:
data = module.params['content']
if 'OPENSSH PRIVATE KEY' in data and not data.endswith('\n'):
# we use ansible's lookup() to read this file from the disk,
# but ansible's lookup() *strips* newlines
# OpenSSH wants certain private keys to end with a newline (or it
# won't accept them)
data += '\n'
fifo.write(data)
module.exit_json(dest=path, changed=True)
if __name__ == '__main__':
main()

View File

@ -126,7 +126,7 @@ LOGIN_URL = '/api/login/'
PROJECTS_ROOT = '/var/lib/awx/projects/'
# Absolute filesystem path to the directory to host collections for
# running inventory imports, isolated playbooks
# running inventory imports
AWX_ANSIBLE_COLLECTIONS_PATHS = os.path.join(BASE_DIR, 'vendor', 'awx_ansible_collections')
# Absolute filesystem path to the directory for job status stdout (default for
@ -408,23 +408,6 @@ AUTH_BASIC_ENABLED = True
# when trying to access a UI page that requries authentication.
LOGIN_REDIRECT_OVERRIDE = ''
# Default to skipping isolated host key checking (the initial connection will
# hang on an interactive "The authenticity of host example.org can't be
# established" message)
AWX_ISOLATED_HOST_KEY_CHECKING = False
# The number of seconds to sleep between status checks for jobs running on isolated nodes
AWX_ISOLATED_CHECK_INTERVAL = 30
# The timeout (in seconds) for launching jobs on isolated nodes
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
# Ansible connection timeout (in seconds) for communicating with isolated instances
AWX_ISOLATED_CONNECTION_TIMEOUT = 10
# The time (in seconds) between the periodic isolated heartbeat status check
AWX_ISOLATED_PERIODIC_CHECK = 600
DEVSERVER_DEFAULT_ADDR = '0.0.0.0'
DEVSERVER_DEFAULT_PORT = '8013'
@ -440,7 +423,6 @@ CELERYBEAT_SCHEDULE = {
'k8s_reaper': {'task': 'awx.main.tasks.awx_k8s_reaper', 'schedule': timedelta(seconds=60), 'options': {'expires': 50}},
'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)},
'cleanup_images': {'task': 'awx.main.tasks.cleanup_execution_environment_images', 'schedule': timedelta(hours=3)},
# 'isolated_heartbeat': set up at the end of production.py and development.py
}
# Django Caching Configuration
@ -854,12 +836,6 @@ LOGGING = {
'filename': os.path.join(LOG_ROOT, 'tower_rbac_migrations.log'),
'formatter': 'simple',
},
'isolated_manager': {
'level': 'WARNING',
'class': 'logging.handlers.WatchedFileHandler',
'filename': os.path.join(LOG_ROOT, 'isolated_manager.log'),
'formatter': 'simple',
},
'job_lifecycle': {
'level': 'DEBUG',
'class': 'logging.handlers.WatchedFileHandler',
@ -881,8 +857,6 @@ LOGGING = {
'awx.main.dispatch': {'handlers': ['dispatcher']},
'awx.main.consumers': {'handlers': ['console', 'file', 'tower_warnings'], 'level': 'INFO'},
'awx.main.wsbroadcast': {'handlers': ['wsbroadcast']},
'awx.isolated.manager': {'level': 'WARNING', 'handlers': ['console', 'file', 'isolated_manager'], 'propagate': True},
'awx.isolated.manager.playbooks': {'handlers': ['management_playbooks'], 'propagate': False},
'awx.main.commands.inventory_import': {'handlers': ['inventory_import'], 'propagate': False},
'awx.main.tasks': {'handlers': ['task_system', 'external_logger'], 'propagate': False},
'awx.main.analytics': {'handlers': ['task_system', 'external_logger'], 'level': 'INFO', 'propagate': False},

View File

@ -35,9 +35,6 @@ LOGGING['handlers']['console']['()'] = 'awx.main.utils.handlers.ColorHandler' #
LOGGING['handlers']['task_system'] = LOGGING['handlers']['console'].copy() # noqa
COLOR_LOGS = True
# Pipe management playbook output to console
LOGGING['loggers']['awx.isolated.manager.playbooks']['propagate'] = True # noqa
# celery is annoyingly loud when docker containers start
LOGGING['loggers'].pop('celery', None) # noqa
# avoid awx.main.dispatch WARNING-level scaling worker up/down messages
@ -67,10 +64,6 @@ CALLBACK_QUEUE = "callback_tasks"
# Note: This setting may be overridden by database settings.
AWX_ROLES_ENABLED = True
AWX_ISOLATED_USERNAME = 'root'
AWX_ISOLATED_CHECK_INTERVAL = 1
AWX_ISOLATED_PERIODIC_CHECK = 30
# Disable Pendo on the UI for development/test.
# Note: This setting may be overridden by database settings.
PENDO_TRACKING_STATE = "off"
@ -136,17 +129,6 @@ if "pytest" in sys.modules:
}
}
CELERYBEAT_SCHEDULE.update(
{ # noqa
'isolated_heartbeat': {
'task': 'awx.main.tasks.awx_isolated_heartbeat',
'schedule': timedelta(seconds=AWX_ISOLATED_PERIODIC_CHECK), # noqa
'options': {'expires': AWX_ISOLATED_PERIODIC_CHECK * 2}, # noqa
}
}
)
CLUSTER_HOST_ID = socket.gethostname()
AWX_CALLBACK_PROFILE = True

View File

@ -40,8 +40,6 @@ ANSIBLE_VENV_PATH = os.path.join(BASE_VENV_PATH, "ansible")
# Tower base virtualenv paths and enablement
AWX_VENV_PATH = os.path.join(BASE_VENV_PATH, "awx")
AWX_ISOLATED_USERNAME = 'awx'
# Store a snapshot of default settings at this point before loading any
# customizable config files.
DEFAULTS_SNAPSHOT = {}
@ -91,14 +89,4 @@ except IOError:
# The below runs AFTER all of the custom settings are imported.
CELERYBEAT_SCHEDULE.update(
{ # noqa
'isolated_heartbeat': {
'task': 'awx.main.tasks.awx_isolated_heartbeat',
'schedule': timedelta(seconds=AWX_ISOLATED_PERIODIC_CHECK), # noqa
'options': {'expires': AWX_ISOLATED_PERIODIC_CHECK * 2}, # noqa
}
}
)
DATABASES['default'].setdefault('OPTIONS', dict()).setdefault('application_name', f'{CLUSTER_HOST_ID}-{os.getpid()}-{" ".join(sys.argv)}'[:63]) # noqa

View File

@ -71,62 +71,6 @@ Recommendations and constraints:
- Do not name any instance the same as a group name.
### Security-Isolated Rampart Groups
In Tower versions 3.2+, customers may optionally define isolated groups inside of security-restricted networking zones from which to run jobs and ad hoc commands. Instances in these groups will _not_ have a full install of Tower, but will have a minimal set of utilities used to run jobs. Isolated groups must be specified in the inventory file prefixed with `isolated_group_`. An example inventory file is shown below:
```
[tower]
towerA
towerB
towerC
[instance_group_security]
towerB
towerC
[isolated_group_govcloud]
isolatedA
isolatedB
[isolated_group_govcloud:vars]
controller=security
```
In the isolated rampart model, "controller" instances interact with "isolated" instances via a series of Ansible playbooks over SSH. At installation time, a randomized RSA key is generated and distributed as an authorized key to all "isolated" instances. The private half of the key is encrypted and stored within Tower, and is used to authenticate from "controller" instances to "isolated" instances when jobs are run.
When a job is scheduled to run on an "isolated" instance:
* The "controller" instance compiles metadata required to run the job and copies it to the "isolated" instance via `rsync` (any related project or inventory updates are run on the controller instance). This metadata includes:
- the entire SCM checkout directory for the project
- a static inventory file
- pexpect passwords
- environment variables
- the `ansible`/`ansible-playbook` command invocation, _i.e._, `ansible-playbook -i /path/to/inventory /path/to/playbook.yml -e ...`
* Once the metadata has been `rsync`ed to the isolated host, the "controller instance" starts a process on the "isolated" instance which consumes the metadata and starts running `ansible`/`ansible-playbook`. As the playbook runs, job artifacts (such as `stdout` and job events) are written to disk on the "isolated" instance.
* While the job runs on the "isolated" instance, the "controller" instance periodically copies job artifacts (`stdout` and job events) from the "isolated" instance using `rsync`. It consumes these until the job finishes running on the "isolated" instance.
Isolated groups are architected such that they may exist inside of a VPC with security rules that _only_ permit the instances in its `controller` group to access them; only ingress SSH traffic from "controller" instances to "isolated" instances is required.
Recommendations for system configuration with isolated groups:
- Do not create a group named `isolated_group_tower`.
- Do not put any isolated instances inside the `tower` group or other ordinary instance groups.
- Define the `controller` variable as either a group var or as a hostvar on all the instances in the isolated group. Please _do not_ allow isolated instances in the same group have a different value for this variable - the behavior in this case can not be predicted.
- Do not put an isolated instance in more than one isolated group.
Isolated Instance Authentication
--------------------------------
At installation time, by default, a randomized RSA key is generated and distributed as an authorized key to all "isolated" instances. The private half of the key is encrypted and stored within Tower, and is used to authenticate from "controller" instances to "isolated" instances when jobs are run.
For users who wish to manage SSH authentication from controlling instances to isolated instances via some system _outside_ of Tower (such as externally-managed, password-less SSH keys), this behavior can be disabled by unsetting two Tower API settings values:
`HTTP PATCH /api/v2/settings/jobs/ {'AWX_ISOLATED_PRIVATE_KEY': '', 'AWX_ISOLATED_PUBLIC_KEY': ''}`
### Provisioning and Deprovisioning Instances and Groups
* **Provisioning** - Provisioning Instances after installation is supported by updating the `inventory` file and re-running the setup playbook. It's important that this file contain all passwords and information used when installing the cluster, or other instances may be reconfigured (this can be done intentionally).

View File

@ -1,13 +1,11 @@
# Container Groups
In a traditional AWX installation, jobs (ansible-playbook runs) are executed
either directly on a member of the cluster or on a pre-provisioned "isolated"
node.
The concept of a Container Group (working name) allows for job environments to
be provisioned on-demand as a Pod that exists only for the duration of the
playbook run. This is known as the ephemeral execution model and ensures a clean
environment for every job run.
In a traditional AWX installation, jobs (ansible-playbook runs) are
executed directly on a member of the cluster. The concept of a
Container Group (working name) allows for job environments to be
provisioned on-demand as a Pod that exists only for the duration of
the playbook run. This is known as the ephemeral execution model and
ensures a clean environment for every job run.
In some cases it is desireable to have the execution environment be "always-on",
this is is done by manually creating an instance through the AWX API or UI.

View File

@ -1,13 +1,11 @@
# Container Groups
In a traditional AWX installation, jobs (ansible-playbook runs) are executed
either directly on a member of the cluster or on a pre-provisioned "isolated"
node.
The concept of a Container Group (working name) allows for job environments to
be provisioned on-demand as a Pod that exists only for the duration of the
playbook run. This is known as the ephemeral execution model and ensures a clean
environment for every job run.
In a traditional AWX installation, jobs (ansible-playbook runs) are
executed directly on a member of the cluster. The concept of a
Container Group (working name) allows for job environments to be
provisioned on-demand as a Pod that exists only for the duration of
the playbook run. This is known as the ephemeral execution model and
ensures a clean environment for every job run.
## Configuration

View File

@ -157,17 +157,6 @@ One of the most important tasks in a clustered AWX installation is the periodic
If a node in an AWX cluster discovers that one of its peers has not updated its heartbeat within a certain grace period, it is assumed to be offline, and its capacity is set to zero to avoid scheduling new tasks on that node. Additionally, jobs allegedly running or scheduled to run on that node are assumed to be lost, and "reaped", or marked as failed.
#### Isolated Tasks and Their Heartbeats
AWX reports as much status as it can via the browsable API at `/api/v2/ping` in order to provide validation of the health of an instance, including the timestamps of the last heartbeat. Since isolated nodes don't have access to the AWX database, their heartbeats are performed by controller nodes instead. A periodic task, `awx_isolated_heartbeat`, is responsible for periodically connecting from a controller to each isolated node and retrieving its capacity (via SSH).
When a job is scheduled to run on an isolated instance, the controller instance puts together the metadata required to run the job and then transfers it to the isolated instance. Once the metadata has been synchronized to the isolated host, the controller instance starts a process on the isolated instance, which consumes the metadata and starts running `ansible/ansible-playbook`. As the playbook runs, job artifacts (such as `stdout` and job events) are written to disk on the isolated instance.
Alternatively: "While the job runs on the isolated instance, the controller instance periodically checks for and copies the job artifacts (_e.g._, `stdout` and job events) that it produces. It processes these until the job finishes running."
To read more about Isolated Instances, refer to the [Isolated Instance Groups](https://docs.ansible.com/ansible-tower/latest/html/administration/clustering.html#isolated-instance-groups) section of the Clustering page in the Ansible Tower Administration guide.
## AWX Jobs
### Unified Jobs

View File

@ -58,7 +58,6 @@ LOGGING['loggers']['django_auth_ldap']['handlers'] = ['console']
LOGGING['loggers']['social']['handlers'] = ['console']
LOGGING['loggers']['system_tracking_migrations']['handlers'] = ['console']
LOGGING['loggers']['rbac_migrations']['handlers'] = ['console']
LOGGING['loggers']['awx.isolated.manager.playbooks']['handlers'] = ['console']
LOGGING['handlers']['callback_receiver'] = {'class': 'logging.NullHandler'}
LOGGING['handlers']['task_system'] = {'class': 'logging.NullHandler'}
LOGGING['handlers']['tower_warnings'] = {'class': 'logging.NullHandler'}

View File

@ -1,17 +0,0 @@
---
version: '2'
services:
# Primary Tower Development Container link
awx:
environment:
AWX_GROUP_QUEUES: tower,thepentagon
links:
- isolated
# Isolated Rampart Container
isolated:
image: ${DEV_DOCKER_TAG_BASE}/awx_isolated:${TAG}
container_name: tools_isolated_1
hostname: isolated
volumes:
- "../awx/main/isolated:/awx_devel"
privileged: true

View File

@ -1,20 +0,0 @@
ARG TAG=latest
FROM ansible/awx_devel:${TAG}
RUN dnf install -y gcc python36-devel openssh-server
RUN python3 -m ensurepip && pip3 install "virtualenv < 20" ansible-runner
RUN dnf remove -y gcc python36-devel && rm -rf /var/cache/dnf
RUN rm -f /etc/ssh/ssh_host_ecdsa_key /etc/ssh/ssh_host_rsa_key
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_ecdsa_key
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN sed -i "s/#UsePrivilegeSeparation.*/UsePrivilegeSeparation no/g" /etc/ssh/sshd_config
RUN sed -i "s/UsePAM.*/UsePAM yes/g" /etc/ssh/sshd_config
RUN sed -i "s/#StrictModes.*/StrictModes no/g" /etc/ssh/sshd_config
RUN mkdir -p /root/.ssh
RUN ln -s /awx_devel/authorized_keys /root/.ssh/authorized_keys
ENTRYPOINT ["tini", "--"]
CMD ["/usr/sbin/sshd", "-D"]
EXPOSE 22

View File

@ -1,67 +0,0 @@
## Instructions on using an isolated node
The building of the isolated node is done in the `make docker-compose-build`
target. Its image uses a different tag from the tools_awx container.
Given that the images are built, you can run the combined docker compose target. This uses
the base `docker-compose.yml` with modifications found in `docker-isolated-override.yml`.
You will still need to give COMPOSE_TAG with whatever your intended
base branch is. For example:
```bash
make docker-isolated COMPOSE_TAG=devel
```
This will automatically exchange the keys in order for the `tools_awx_1`
container to access the `tools_isolated_1` container over ssh.
After that, it will bring up all the containers like the normal docker-compose
workflow.
### Running a job on the Isolated Node
Create a job template that runs normally. Add the id of the instance
group named `thepentagon` to the JT's instance groups. To do this, POST
the id (probably id=2) to `/api/v2/job_templates/N/instance_groups/`.
After that, run the job template.
The models are automatically created when running the Makefile target,
and they are structured as follows:
+-------+ +-------------+
| tower |<----+ thepentagon |
+-------+ +-------------+
^ ^
| |
| |
+---+---+ +-----+----+
| tower | | isolated |
+-------+ +----------+
The `controller` for the group "thepentagon" and all hosts therein is
determined by a ForeignKey within the instance group.
### Run a playbook
In order to run an isolated job, associate the instance group `thepentagon` with
a job template, inventory, or organization, then run a job that derives from
that resource. You should be able to confirm success by inspecting the
`instance_group` of the job.
#### Advanced Manual Testing
If you want to run a job manually inside of the isolated container with this
tooling, you need a private data directory. Normal isolated job runs will
clean up their private data directory, but you can temporarily disable this
by disabling some parts of the cleanup_isolated.yml playbook.
Example location of a private data directory:
`/tmp/awx_29_OM6Mnx/`
The following command would run the playbook corresponding to that job.
```bash
ansible-runner start /tmp/awx_29_OM6Mnx/ -p some_playbook.yml
```
Other ansible-runner commands include `start`, `is-alive`, and `stop`.