mirror of
https://github.com/ansible/awx.git
synced 2026-01-10 15:32:07 -03:30
Merge pull request #9957 from jbradberry/isolated-removal
Isolated removal SUMMARY Removal of the isolated nodes feature. ISSUE TYPE Feature Pull Request COMPONENT NAME API AWX VERSION Reviewed-by: Alan Rominger <arominge@redhat.com> Reviewed-by: Jeff Bradberry <None> Reviewed-by: Elyézer Rezende <None> Reviewed-by: Bianca Henderson <beeankha@gmail.com>
This commit is contained in:
commit
6bea5dd294
7
Makefile
7
Makefile
@ -174,12 +174,7 @@ init:
|
||||
. $(VENV_BASE)/awx/bin/activate; \
|
||||
fi; \
|
||||
$(MANAGEMENT_COMMAND) provision_instance --hostname=$(COMPOSE_HOST); \
|
||||
$(MANAGEMENT_COMMAND) register_queue --queuename=tower --instance_percent=100;\
|
||||
if [ "$(AWX_GROUP_QUEUES)" == "tower,thepentagon" ]; then \
|
||||
$(MANAGEMENT_COMMAND) provision_instance --hostname=isolated; \
|
||||
$(MANAGEMENT_COMMAND) register_queue --queuename='thepentagon' --hostnames=isolated --controller=tower; \
|
||||
$(MANAGEMENT_COMMAND) generate_isolated_key > /awx_devel/awx/main/isolated/authorized_keys; \
|
||||
fi;
|
||||
$(MANAGEMENT_COMMAND) register_queue --queuename=tower --instance_percent=100;
|
||||
|
||||
# Refresh development environment after pulling new code.
|
||||
refresh: clean requirements_dev version_file develop migrate
|
||||
|
||||
@ -170,7 +170,7 @@ SUMMARIZABLE_FK_FIELDS = {
|
||||
'inventory_source': ('id', 'name', 'source', 'last_updated', 'status'),
|
||||
'role': ('id', 'role_field'),
|
||||
'notification_template': DEFAULT_SUMMARY_FIELDS,
|
||||
'instance_group': ('id', 'name', 'controller_id', 'is_container_group'),
|
||||
'instance_group': ('id', 'name', 'is_container_group'),
|
||||
'insights_credential': DEFAULT_SUMMARY_FIELDS,
|
||||
'source_credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'cloud', 'credential_type_id'),
|
||||
'target_credential': DEFAULT_SUMMARY_FIELDS + ('kind', 'cloud', 'credential_type_id'),
|
||||
@ -4818,10 +4818,6 @@ class InstanceGroupSerializer(BaseSerializer):
|
||||
)
|
||||
jobs_total = serializers.IntegerField(help_text=_('Count of all jobs that target this instance group'), read_only=True)
|
||||
instances = serializers.SerializerMethodField()
|
||||
is_controller = serializers.BooleanField(help_text=_('Indicates whether instance group controls any other group'), read_only=True)
|
||||
is_isolated = serializers.BooleanField(
|
||||
help_text=_('Indicates whether instances in this group are isolated.' 'Isolated groups have a designated controller group.'), read_only=True
|
||||
)
|
||||
is_container_group = serializers.BooleanField(
|
||||
required=False,
|
||||
help_text=_('Indicates whether instances in this group are containerized.' 'Containerized groups have a designated Openshift or Kubernetes cluster.'),
|
||||
@ -4869,9 +4865,6 @@ class InstanceGroupSerializer(BaseSerializer):
|
||||
"jobs_running",
|
||||
"jobs_total",
|
||||
"instances",
|
||||
"controller",
|
||||
"is_controller",
|
||||
"is_isolated",
|
||||
"is_container_group",
|
||||
"credential",
|
||||
"policy_instance_percentage",
|
||||
@ -4885,8 +4878,6 @@ class InstanceGroupSerializer(BaseSerializer):
|
||||
res = super(InstanceGroupSerializer, self).get_related(obj)
|
||||
res['jobs'] = self.reverse('api:instance_group_unified_jobs_list', kwargs={'pk': obj.pk})
|
||||
res['instances'] = self.reverse('api:instance_group_instance_list', kwargs={'pk': obj.pk})
|
||||
if obj.controller_id:
|
||||
res['controller'] = self.reverse('api:instance_group_detail', kwargs={'pk': obj.controller_id})
|
||||
if obj.credential:
|
||||
res['credential'] = self.reverse('api:credential_detail', kwargs={'pk': obj.credential_id})
|
||||
|
||||
@ -4898,10 +4889,6 @@ class InstanceGroupSerializer(BaseSerializer):
|
||||
raise serializers.ValidationError(_('Duplicate entry {}.').format(instance_name))
|
||||
if not Instance.objects.filter(hostname=instance_name).exists():
|
||||
raise serializers.ValidationError(_('{} is not a valid hostname of an existing instance.').format(instance_name))
|
||||
if Instance.objects.get(hostname=instance_name).is_isolated():
|
||||
raise serializers.ValidationError(_('Isolated instances may not be added or removed from instances groups via the API.'))
|
||||
if self.instance and self.instance.controller_id is not None:
|
||||
raise serializers.ValidationError(_('Isolated instance group membership may not be managed via the API.'))
|
||||
if value and self.instance and self.instance.is_container_group:
|
||||
raise serializers.ValidationError(_('Containerized instances may not be managed via the API'))
|
||||
return value
|
||||
|
||||
@ -418,14 +418,6 @@ class InstanceGroupDetail(RelatedJobsPreventDeleteMixin, RetrieveUpdateDestroyAP
|
||||
data.pop('policy_instance_list', None)
|
||||
return super(InstanceGroupDetail, self).update_raw_data(data)
|
||||
|
||||
def destroy(self, request, *args, **kwargs):
|
||||
instance = self.get_object()
|
||||
if instance.controller is not None:
|
||||
raise PermissionDenied(detail=_("Isolated Groups can not be removed from the API"))
|
||||
if instance.controlled_groups.count():
|
||||
raise PermissionDenied(detail=_("Instance Groups acting as a controller for an Isolated Group can not be removed from the API"))
|
||||
return super(InstanceGroupDetail, self).destroy(request, *args, **kwargs)
|
||||
|
||||
|
||||
class InstanceGroupUnifiedJobsList(SubListAPIView):
|
||||
|
||||
|
||||
@ -85,15 +85,6 @@ class InstanceGroupMembershipMixin(object):
|
||||
ig_obj.save(update_fields=['policy_instance_list'])
|
||||
return response
|
||||
|
||||
def is_valid_relation(self, parent, sub, created=False):
|
||||
if sub.is_isolated():
|
||||
return {'error': _('Isolated instances may not be added or removed from instances groups via the API.')}
|
||||
if self.parent_model is InstanceGroup:
|
||||
ig_obj = self.get_parent_object()
|
||||
if ig_obj.controller_id is not None:
|
||||
return {'error': _('Isolated instance group membership may not be managed via the API.')}
|
||||
return None
|
||||
|
||||
def unattach_validate(self, request):
|
||||
(sub_id, res) = super(InstanceGroupMembershipMixin, self).unattach_validate(request)
|
||||
if res:
|
||||
|
||||
@ -92,11 +92,7 @@ class SettingsRegistry(object):
|
||||
continue
|
||||
if kwargs.get('category_slug', None) in slugs_to_ignore:
|
||||
continue
|
||||
if (
|
||||
read_only in {True, False}
|
||||
and kwargs.get('read_only', False) != read_only
|
||||
and setting not in ('INSTALL_UUID', 'AWX_ISOLATED_PRIVATE_KEY', 'AWX_ISOLATED_PUBLIC_KEY')
|
||||
):
|
||||
if read_only in {True, False} and kwargs.get('read_only', False) != read_only and setting != 'INSTALL_UUID':
|
||||
# Note: Doesn't catch fields that set read_only via __init__;
|
||||
# read-only field kwargs should always include read_only=True.
|
||||
continue
|
||||
|
||||
@ -81,10 +81,8 @@ class SettingSingletonSerializer(serializers.Serializer):
|
||||
if self.instance and not hasattr(self.instance, key):
|
||||
continue
|
||||
extra_kwargs = {}
|
||||
# Make LICENSE and AWX_ISOLATED_KEY_GENERATION read-only here;
|
||||
# LICENSE is only updated via /api/v2/config/
|
||||
# AWX_ISOLATED_KEY_GENERATION is only set/unset via the setup playbook
|
||||
if key in ('LICENSE', 'AWX_ISOLATED_KEY_GENERATION'):
|
||||
# Make LICENSE read-only here; LICENSE is only updated via /api/v2/config/
|
||||
if key == 'LICENSE':
|
||||
extra_kwargs['read_only'] = True
|
||||
field = settings_registry.get_setting_field(key, mixin_class=SettingFieldMixin, for_user=bool(category_slug == 'user'), **extra_kwargs)
|
||||
fields[key] = field
|
||||
|
||||
@ -350,13 +350,8 @@ class SettingsWrapper(UserSettingsHolder):
|
||||
if value is empty:
|
||||
setting = None
|
||||
setting_id = None
|
||||
if not field.read_only or name in (
|
||||
# these values are read-only - however - we *do* want
|
||||
# to fetch their value from the database
|
||||
'INSTALL_UUID',
|
||||
'AWX_ISOLATED_PRIVATE_KEY',
|
||||
'AWX_ISOLATED_PUBLIC_KEY',
|
||||
):
|
||||
# this value is read-only, however we *do* want to fetch its value from the database
|
||||
if not field.read_only or name == 'INSTALL_UUID':
|
||||
setting = Setting.objects.filter(key=name, user__isnull=True).order_by('pk').first()
|
||||
if setting:
|
||||
if getattr(field, 'encrypted', False):
|
||||
|
||||
@ -219,17 +219,11 @@ def projects_by_scm_type(since, **kwargs):
|
||||
return counts
|
||||
|
||||
|
||||
def _get_isolated_datetime(last_check):
|
||||
if last_check:
|
||||
return last_check.isoformat()
|
||||
return last_check
|
||||
|
||||
|
||||
@register('instance_info', '1.0', description=_('Cluster topology and capacity'))
|
||||
@register('instance_info', '1.1', description=_('Cluster topology and capacity'))
|
||||
def instance_info(since, include_hostnames=False, **kwargs):
|
||||
info = {}
|
||||
instances = models.Instance.objects.values_list('hostname').values(
|
||||
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'last_isolated_check', 'enabled'
|
||||
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'enabled'
|
||||
)
|
||||
for instance in instances:
|
||||
consumed_capacity = sum(x.task_impact for x in models.UnifiedJob.objects.filter(execution_node=instance['hostname'], status__in=('running', 'waiting')))
|
||||
@ -240,7 +234,6 @@ def instance_info(since, include_hostnames=False, **kwargs):
|
||||
'cpu': instance['cpu'],
|
||||
'memory': instance['memory'],
|
||||
'managed_by_policy': instance['managed_by_policy'],
|
||||
'last_isolated_check': _get_isolated_datetime(instance['last_isolated_check']),
|
||||
'enabled': instance['enabled'],
|
||||
'consumed_capacity': consumed_capacity,
|
||||
'remaining_capacity': instance['capacity'] - consumed_capacity,
|
||||
|
||||
@ -184,7 +184,6 @@ def metrics():
|
||||
INSTANCE_INFO.labels(hostname=hostname, instance_uuid=uuid).info(
|
||||
{
|
||||
'enabled': str(instance_data[uuid]['enabled']),
|
||||
'last_isolated_check': getattr(instance_data[uuid], 'last_isolated_check', 'None'),
|
||||
'managed_by_policy': str(instance_data[uuid]['managed_by_policy']),
|
||||
'version': instance_data[uuid]['version'],
|
||||
}
|
||||
|
||||
@ -250,95 +250,6 @@ register(
|
||||
category_slug='jobs',
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_CHECK_INTERVAL',
|
||||
field_class=fields.IntegerField,
|
||||
min_value=0,
|
||||
label=_('Isolated status check interval'),
|
||||
help_text=_('The number of seconds to sleep between status checks for jobs running on isolated instances.'),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
unit=_('seconds'),
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_LAUNCH_TIMEOUT',
|
||||
field_class=fields.IntegerField,
|
||||
min_value=0,
|
||||
label=_('Isolated launch timeout'),
|
||||
help_text=_(
|
||||
'The timeout (in seconds) for launching jobs on isolated instances. '
|
||||
'This includes the time needed to copy source control files (playbooks) to the isolated instance.'
|
||||
),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
unit=_('seconds'),
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_CONNECTION_TIMEOUT',
|
||||
field_class=fields.IntegerField,
|
||||
min_value=0,
|
||||
default=10,
|
||||
label=_('Isolated connection timeout'),
|
||||
help_text=_(
|
||||
'Ansible SSH connection timeout (in seconds) to use when communicating with isolated instances. '
|
||||
'Value should be substantially greater than expected network latency.'
|
||||
),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
unit=_('seconds'),
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_HOST_KEY_CHECKING',
|
||||
field_class=fields.BooleanField,
|
||||
label=_('Isolated host key checking'),
|
||||
help_text=_('When set to True, AWX will enforce strict host key checking for communication with isolated nodes.'),
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
default=False,
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_KEY_GENERATION',
|
||||
field_class=fields.BooleanField,
|
||||
default=True,
|
||||
label=_('Generate RSA keys for isolated instances'),
|
||||
help_text=_(
|
||||
'If set, a random RSA key will be generated and distributed to '
|
||||
'isolated instances. To disable this behavior and manage authentication '
|
||||
'for isolated instances outside of Tower, disable this setting.'
|
||||
), # noqa
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_PRIVATE_KEY',
|
||||
field_class=fields.CharField,
|
||||
default='',
|
||||
allow_blank=True,
|
||||
encrypted=True,
|
||||
read_only=True,
|
||||
label=_('The RSA private key for SSH traffic to isolated instances'),
|
||||
help_text=_('The RSA private key for SSH traffic to isolated instances'), # noqa
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_ISOLATED_PUBLIC_KEY',
|
||||
field_class=fields.CharField,
|
||||
default='',
|
||||
allow_blank=True,
|
||||
read_only=True,
|
||||
label=_('The RSA public key for SSH traffic to isolated instances'),
|
||||
help_text=_('The RSA public key for SSH traffic to isolated instances'), # noqa
|
||||
category=_('Jobs'),
|
||||
category_slug='jobs',
|
||||
)
|
||||
|
||||
register(
|
||||
'AWX_TASK_ENV',
|
||||
field_class=fields.KeyValueField,
|
||||
|
||||
1
awx/main/isolated/.gitignore
vendored
1
awx/main/isolated/.gitignore
vendored
@ -1 +0,0 @@
|
||||
authorized_keys
|
||||
@ -1,365 +0,0 @@
|
||||
import fnmatch
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import stat
|
||||
import tempfile
|
||||
import time
|
||||
import logging
|
||||
import datetime
|
||||
|
||||
from django.conf import settings
|
||||
import ansible_runner
|
||||
|
||||
import awx
|
||||
from awx.main.utils import get_system_task_capacity
|
||||
|
||||
logger = logging.getLogger('awx.isolated.manager')
|
||||
playbook_logger = logging.getLogger('awx.isolated.manager.playbooks')
|
||||
|
||||
|
||||
def set_pythonpath(venv_libdir, env):
|
||||
env.pop('PYTHONPATH', None) # default to none if no python_ver matches
|
||||
for version in os.listdir(venv_libdir):
|
||||
if fnmatch.fnmatch(version, 'python[23].*'):
|
||||
if os.path.isdir(os.path.join(venv_libdir, version)):
|
||||
env['PYTHONPATH'] = os.path.join(venv_libdir, version, "site-packages") + ":"
|
||||
break
|
||||
|
||||
|
||||
class IsolatedManager(object):
|
||||
def __init__(self, event_handler, canceled_callback=None, check_callback=None):
|
||||
"""
|
||||
:param event_handler: a callable used to persist event data from isolated nodes
|
||||
:param canceled_callback: a callable - which returns `True` or `False`
|
||||
- signifying if the job has been prematurely
|
||||
canceled
|
||||
"""
|
||||
self.event_handler = event_handler
|
||||
self.canceled_callback = canceled_callback
|
||||
self.check_callback = check_callback
|
||||
self.started_at = None
|
||||
self.captured_command_artifact = False
|
||||
self.instance = None
|
||||
|
||||
def build_inventory(self, hosts):
|
||||
inventory = '\n'.join(['{} ansible_ssh_user={}'.format(host, settings.AWX_ISOLATED_USERNAME) for host in hosts])
|
||||
|
||||
return inventory
|
||||
|
||||
def build_runner_params(self, hosts, verbosity=1):
|
||||
env = dict(os.environ.items())
|
||||
env['ANSIBLE_RETRY_FILES_ENABLED'] = 'False'
|
||||
env['ANSIBLE_HOST_KEY_CHECKING'] = str(settings.AWX_ISOLATED_HOST_KEY_CHECKING)
|
||||
env['ANSIBLE_LIBRARY'] = os.path.join(os.path.dirname(awx.__file__), 'plugins', 'isolated')
|
||||
env['ANSIBLE_COLLECTIONS_PATHS'] = settings.AWX_ANSIBLE_COLLECTIONS_PATHS
|
||||
set_pythonpath(os.path.join(settings.ANSIBLE_VENV_PATH, 'lib'), env)
|
||||
|
||||
def finished_callback(runner_obj):
|
||||
if runner_obj.status == 'failed' and runner_obj.config.playbook != 'check_isolated.yml':
|
||||
# failed for clean_isolated.yml just means the playbook hasn't
|
||||
# exited on the isolated host
|
||||
stdout = runner_obj.stdout.read()
|
||||
playbook_logger.error(stdout)
|
||||
elif runner_obj.status == 'timeout':
|
||||
# this means that the default idle timeout of
|
||||
# (2 * AWX_ISOLATED_CONNECTION_TIMEOUT) was exceeded
|
||||
# (meaning, we tried to sync with an isolated node, and we got
|
||||
# no new output for 2 * AWX_ISOLATED_CONNECTION_TIMEOUT seconds)
|
||||
# this _usually_ means SSH key auth from the controller ->
|
||||
# isolated didn't work, and ssh is hung waiting on interactive
|
||||
# input e.g.,
|
||||
#
|
||||
# awx@isolated's password:
|
||||
stdout = runner_obj.stdout.read()
|
||||
playbook_logger.error(stdout)
|
||||
else:
|
||||
playbook_logger.info(runner_obj.stdout.read())
|
||||
|
||||
return {
|
||||
'project_dir': os.path.abspath(os.path.join(os.path.dirname(awx.__file__), 'playbooks')),
|
||||
'inventory': self.build_inventory(hosts),
|
||||
'envvars': env,
|
||||
'finished_callback': finished_callback,
|
||||
'verbosity': verbosity,
|
||||
'cancel_callback': self.canceled_callback,
|
||||
'settings': {
|
||||
'job_timeout': settings.AWX_ISOLATED_LAUNCH_TIMEOUT,
|
||||
'suppress_ansible_output': True,
|
||||
},
|
||||
}
|
||||
|
||||
def path_to(self, *args):
|
||||
return os.path.join(self.private_data_dir, *args)
|
||||
|
||||
def run_management_playbook(self, playbook, private_data_dir, idle_timeout=None, **kw):
|
||||
iso_dir = tempfile.mkdtemp(prefix=playbook, dir=private_data_dir)
|
||||
params = self.runner_params.copy()
|
||||
params.get('envvars', dict())['ANSIBLE_CALLBACK_WHITELIST'] = 'profile_tasks'
|
||||
params['playbook'] = playbook
|
||||
params['private_data_dir'] = iso_dir
|
||||
if idle_timeout:
|
||||
params['settings']['idle_timeout'] = idle_timeout
|
||||
else:
|
||||
params['settings'].pop('idle_timeout', None)
|
||||
params.update(**kw)
|
||||
if all([getattr(settings, 'AWX_ISOLATED_KEY_GENERATION', False) is True, getattr(settings, 'AWX_ISOLATED_PRIVATE_KEY', None)]):
|
||||
params['ssh_key'] = settings.AWX_ISOLATED_PRIVATE_KEY
|
||||
return ansible_runner.interface.run(**params)
|
||||
|
||||
def dispatch(self, playbook=None, module=None, module_args=None):
|
||||
"""
|
||||
Ship the runner payload to a remote host for isolated execution.
|
||||
"""
|
||||
self.handled_events = set()
|
||||
self.started_at = time.time()
|
||||
|
||||
# exclude certain files from the rsync
|
||||
rsync_exclude = [
|
||||
# don't rsync source control metadata (it can be huge!)
|
||||
'- /project/.git',
|
||||
'- /project/.svn',
|
||||
# don't rsync job events that are in the process of being written
|
||||
'- /artifacts/job_events/*-partial.json.tmp',
|
||||
# don't rsync the ssh_key FIFO
|
||||
'- /env/ssh_key',
|
||||
# don't rsync kube config files
|
||||
'- .kubeconfig*',
|
||||
]
|
||||
|
||||
for filename, data in (['.rsync-filter', '\n'.join(rsync_exclude)],):
|
||||
path = self.path_to(filename)
|
||||
with open(path, 'w') as f:
|
||||
f.write(data)
|
||||
os.chmod(path, stat.S_IRUSR)
|
||||
|
||||
extravars = {
|
||||
'src': self.private_data_dir,
|
||||
'dest': settings.AWX_ISOLATION_BASE_PATH,
|
||||
'ident': self.ident,
|
||||
'job_id': self.instance.id,
|
||||
}
|
||||
if playbook:
|
||||
extravars['playbook'] = playbook
|
||||
if module and module_args:
|
||||
extravars['module'] = module
|
||||
extravars['module_args'] = module_args
|
||||
|
||||
logger.debug('Starting job {} on isolated host with `run_isolated.yml` playbook.'.format(self.instance.id))
|
||||
runner_obj = self.run_management_playbook(
|
||||
'run_isolated.yml', self.private_data_dir, idle_timeout=max(60, 2 * settings.AWX_ISOLATED_CONNECTION_TIMEOUT), extravars=extravars
|
||||
)
|
||||
|
||||
if runner_obj.status == 'failed':
|
||||
self.instance.result_traceback = runner_obj.stdout.read()
|
||||
self.instance.save(update_fields=['result_traceback'])
|
||||
return 'error', runner_obj.rc
|
||||
|
||||
return runner_obj.status, runner_obj.rc
|
||||
|
||||
def check(self, interval=None):
|
||||
"""
|
||||
Repeatedly poll the isolated node to determine if the job has run.
|
||||
|
||||
On success, copy job artifacts to the controlling node.
|
||||
On failure, continue to poll the isolated node (until the job timeout
|
||||
is exceeded).
|
||||
|
||||
For a completed job run, this function returns (status, rc),
|
||||
representing the status and return code of the isolated
|
||||
`ansible-playbook` run.
|
||||
|
||||
:param interval: an interval (in seconds) to wait between status polls
|
||||
"""
|
||||
interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
|
||||
extravars = {'src': self.private_data_dir, 'job_id': self.instance.id}
|
||||
status = 'failed'
|
||||
rc = None
|
||||
last_check = time.time()
|
||||
|
||||
while status == 'failed':
|
||||
canceled = self.canceled_callback() if self.canceled_callback else False
|
||||
if not canceled and time.time() - last_check < interval:
|
||||
# If the job isn't canceled, but we haven't waited `interval` seconds, wait longer
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
if canceled:
|
||||
logger.warning('Isolated job {} was manually canceled.'.format(self.instance.id))
|
||||
|
||||
logger.debug('Checking on isolated job {} with `check_isolated.yml`.'.format(self.instance.id))
|
||||
time_start = datetime.datetime.now()
|
||||
runner_obj = self.run_management_playbook('check_isolated.yml', self.private_data_dir, extravars=extravars)
|
||||
time_end = datetime.datetime.now()
|
||||
time_diff = time_end - time_start
|
||||
logger.debug('Finished checking on isolated job {} with `check_isolated.yml` took {} seconds.'.format(self.instance.id, time_diff.total_seconds()))
|
||||
status, rc = runner_obj.status, runner_obj.rc
|
||||
|
||||
if self.check_callback is not None and not self.captured_command_artifact:
|
||||
command_path = self.path_to('artifacts', self.ident, 'command')
|
||||
# If the configuration artifact has been synced back, update the model
|
||||
if os.path.exists(command_path):
|
||||
try:
|
||||
with open(command_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
self.check_callback(data)
|
||||
self.captured_command_artifact = True
|
||||
except json.decoder.JSONDecodeError: # Just in case it's not fully here yet.
|
||||
pass
|
||||
|
||||
self.consume_events()
|
||||
|
||||
last_check = time.time()
|
||||
|
||||
if status == 'successful':
|
||||
status_path = self.path_to('artifacts', self.ident, 'status')
|
||||
rc_path = self.path_to('artifacts', self.ident, 'rc')
|
||||
if os.path.exists(status_path):
|
||||
with open(status_path, 'r') as f:
|
||||
status = f.readline()
|
||||
with open(rc_path, 'r') as f:
|
||||
rc = int(f.readline())
|
||||
else:
|
||||
# if there's no status file, it means that runner _probably_
|
||||
# exited with a traceback (which should be logged to
|
||||
# daemon.log) Record it so we can see how runner failed.
|
||||
daemon_path = self.path_to('daemon.log')
|
||||
if os.path.exists(daemon_path):
|
||||
with open(daemon_path, 'r') as f:
|
||||
self.instance.result_traceback = f.read()
|
||||
self.instance.save(update_fields=['result_traceback'])
|
||||
else:
|
||||
logger.error('Failed to rsync daemon.log (is ansible-runner installed on the isolated host?)')
|
||||
status = 'failed'
|
||||
rc = 1
|
||||
|
||||
# consume events one last time just to be sure we didn't miss anything
|
||||
# in the final sync
|
||||
self.consume_events()
|
||||
|
||||
return status, rc
|
||||
|
||||
def consume_events(self):
|
||||
# discover new events and ingest them
|
||||
events_path = self.path_to('artifacts', self.ident, 'job_events')
|
||||
|
||||
# it's possible that `events_path` doesn't exist *yet*, because runner
|
||||
# hasn't actually written any events yet (if you ran e.g., a sleep 30)
|
||||
# only attempt to consume events if any were rsynced back
|
||||
if os.path.exists(events_path):
|
||||
for event in set(os.listdir(events_path)) - self.handled_events:
|
||||
path = os.path.join(events_path, event)
|
||||
if os.path.exists(path) and os.path.isfile(path):
|
||||
try:
|
||||
event_data = json.load(open(os.path.join(events_path, event), 'r'))
|
||||
except json.decoder.JSONDecodeError:
|
||||
# This means the event we got back isn't valid JSON
|
||||
# that can happen if runner is still partially
|
||||
# writing an event file while it's rsyncing
|
||||
# these event writes are _supposed_ to be atomic
|
||||
# but it doesn't look like they actually are in
|
||||
# practice
|
||||
# in this scenario, just ignore this event and try it
|
||||
# again on the next sync
|
||||
continue
|
||||
self.event_handler(event_data)
|
||||
self.handled_events.add(event)
|
||||
|
||||
def cleanup(self):
|
||||
extravars = {
|
||||
'private_data_dir': self.private_data_dir,
|
||||
'cleanup_dirs': [
|
||||
self.private_data_dir,
|
||||
],
|
||||
}
|
||||
logger.debug('Cleaning up job {} on isolated host with `clean_isolated.yml` playbook.'.format(self.instance.id))
|
||||
self.run_management_playbook('clean_isolated.yml', self.private_data_dir, extravars=extravars)
|
||||
|
||||
@classmethod
|
||||
def update_capacity(cls, instance, task_result):
|
||||
instance.version = 'ansible-runner-{}'.format(task_result['version'])
|
||||
|
||||
if instance.capacity == 0 and task_result['capacity_cpu']:
|
||||
logger.warning('Isolated instance {} has re-joined.'.format(instance.hostname))
|
||||
instance.cpu = int(task_result['cpu'])
|
||||
instance.memory = int(task_result['mem'])
|
||||
instance.cpu_capacity = int(task_result['capacity_cpu'])
|
||||
instance.mem_capacity = int(task_result['capacity_mem'])
|
||||
instance.capacity = get_system_task_capacity(
|
||||
scale=instance.capacity_adjustment, cpu_capacity=int(task_result['capacity_cpu']), mem_capacity=int(task_result['capacity_mem'])
|
||||
)
|
||||
instance.save(update_fields=['cpu', 'memory', 'cpu_capacity', 'mem_capacity', 'capacity', 'version', 'modified'])
|
||||
|
||||
def health_check(self, instance_qs):
|
||||
"""
|
||||
:param instance_qs: List of Django objects representing the
|
||||
isolated instances to manage
|
||||
Runs playbook that will
|
||||
- determine if instance is reachable
|
||||
- find the instance capacity
|
||||
- clean up orphaned private files
|
||||
Performs save on each instance to update its capacity.
|
||||
"""
|
||||
instance_qs = [i for i in instance_qs if i.enabled]
|
||||
if not len(instance_qs):
|
||||
return
|
||||
try:
|
||||
private_data_dir = tempfile.mkdtemp(prefix='awx_iso_heartbeat_', dir=settings.AWX_ISOLATION_BASE_PATH)
|
||||
self.runner_params = self.build_runner_params([instance.hostname for instance in instance_qs])
|
||||
self.runner_params['private_data_dir'] = private_data_dir
|
||||
self.runner_params['forks'] = len(instance_qs)
|
||||
runner_obj = self.run_management_playbook('heartbeat_isolated.yml', private_data_dir)
|
||||
|
||||
for instance in instance_qs:
|
||||
task_result = {}
|
||||
try:
|
||||
task_result = runner_obj.get_fact_cache(instance.hostname)
|
||||
except Exception:
|
||||
logger.exception('Failed to read status from isolated instances')
|
||||
if 'awx_capacity_cpu' in task_result and 'awx_capacity_mem' in task_result:
|
||||
task_result = {
|
||||
'cpu': task_result['awx_cpu'],
|
||||
'mem': task_result['awx_mem'],
|
||||
'capacity_cpu': task_result['awx_capacity_cpu'],
|
||||
'capacity_mem': task_result['awx_capacity_mem'],
|
||||
'version': task_result['awx_capacity_version'],
|
||||
}
|
||||
IsolatedManager.update_capacity(instance, task_result)
|
||||
logger.debug('Isolated instance {} successful heartbeat'.format(instance.hostname))
|
||||
elif instance.capacity == 0:
|
||||
logger.debug('Isolated instance {} previously marked as lost, could not re-join.'.format(instance.hostname))
|
||||
else:
|
||||
logger.warning('Could not update status of isolated instance {}'.format(instance.hostname))
|
||||
if instance.is_lost(isolated=True):
|
||||
instance.capacity = 0
|
||||
instance.save(update_fields=['capacity'])
|
||||
logger.error('Isolated instance {} last checked in at {}, marked as lost.'.format(instance.hostname, instance.modified))
|
||||
finally:
|
||||
if os.path.exists(private_data_dir):
|
||||
shutil.rmtree(private_data_dir)
|
||||
|
||||
def run(self, instance, private_data_dir, playbook, module, module_args, ident=None):
|
||||
"""
|
||||
Run a job on an isolated host.
|
||||
|
||||
:param instance: a `model.Job` instance
|
||||
:param private_data_dir: an absolute path on the local file system
|
||||
where job-specific data should be written
|
||||
(i.e., `/tmp/awx_N_xyz/`)
|
||||
:param playbook: the playbook to run
|
||||
:param module: the module to run
|
||||
:param module_args: the module args to use
|
||||
|
||||
For a completed job run, this function returns (status, rc),
|
||||
representing the status and return code of the isolated
|
||||
`ansible-playbook` run.
|
||||
"""
|
||||
self.ident = ident
|
||||
self.instance = instance
|
||||
self.private_data_dir = private_data_dir
|
||||
self.runner_params = self.build_runner_params([instance.execution_node], verbosity=min(5, self.instance.verbosity))
|
||||
|
||||
status, rc = self.dispatch(playbook, module, module_args)
|
||||
if status == 'successful':
|
||||
status, rc = self.check()
|
||||
return status, rc
|
||||
@ -1,38 +0,0 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved
|
||||
import datetime
|
||||
from django.utils.encoding import smart_str
|
||||
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from awx.conf.models import Setting
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Generate and store a randomized RSA key for SSH traffic to isolated instances"""
|
||||
|
||||
help = 'Generates and stores a randomized RSA key for SSH traffic to isolated instances'
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
if getattr(settings, 'AWX_ISOLATED_PRIVATE_KEY', False):
|
||||
print(settings.AWX_ISOLATED_PUBLIC_KEY)
|
||||
return
|
||||
|
||||
key = rsa.generate_private_key(public_exponent=65537, key_size=4096, backend=default_backend())
|
||||
Setting.objects.create(
|
||||
key='AWX_ISOLATED_PRIVATE_KEY',
|
||||
value=key.private_bytes(
|
||||
encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption()
|
||||
),
|
||||
).save()
|
||||
pemfile = Setting.objects.create(
|
||||
key='AWX_ISOLATED_PUBLIC_KEY',
|
||||
value=smart_str(key.public_key().public_bytes(encoding=serialization.Encoding.OpenSSH, format=serialization.PublicFormat.OpenSSH))
|
||||
+ " generated-by-awx@%s" % datetime.datetime.utcnow().isoformat(),
|
||||
)
|
||||
pemfile.save()
|
||||
print(pemfile.value)
|
||||
@ -10,7 +10,6 @@ class Ungrouped(object):
|
||||
name = 'ungrouped'
|
||||
policy_instance_percentage = None
|
||||
policy_instance_minimum = None
|
||||
controller = None
|
||||
|
||||
@property
|
||||
def instances(self):
|
||||
@ -18,7 +17,7 @@ class Ungrouped(object):
|
||||
|
||||
@property
|
||||
def capacity(self):
|
||||
return sum([x.capacity for x in self.instances])
|
||||
return sum(x.capacity for x in self.instances)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@ -38,8 +37,6 @@ class Command(BaseCommand):
|
||||
fmt += ' policy={0.policy_instance_percentage}%'
|
||||
if instance_group.policy_instance_minimum:
|
||||
fmt += ' policy>={0.policy_instance_minimum}'
|
||||
if instance_group.controller:
|
||||
fmt += ' controller={0.controller.name}'
|
||||
print((fmt + ']').format(instance_group))
|
||||
for x in instance_group.instances.all():
|
||||
color = '\033[92m'
|
||||
@ -48,8 +45,6 @@ class Command(BaseCommand):
|
||||
if x.enabled is False:
|
||||
color = '\033[90m[DISABLED] '
|
||||
fmt = '\t' + color + '{0.hostname} capacity={0.capacity} version={1}'
|
||||
if x.last_isolated_check:
|
||||
fmt += ' last_isolated_check="{0.last_isolated_check:%Y-%m-%d %H:%M:%S}"'
|
||||
if x.capacity:
|
||||
fmt += ' heartbeat="{0.modified:%Y-%m-%d %H:%M:%S}"'
|
||||
print((fmt + '\033[0m').format(x, x.version or '?'))
|
||||
|
||||
@ -1,13 +1,11 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved
|
||||
|
||||
from uuid import uuid4
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import transaction
|
||||
|
||||
from awx.main.models import Instance
|
||||
from django.conf import settings
|
||||
|
||||
from django.db import transaction
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@ -20,7 +18,6 @@ class Command(BaseCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--hostname', dest='hostname', type=str, help='Hostname used during provisioning')
|
||||
parser.add_argument('--is-isolated', dest='is_isolated', action='store_true', help='Specify whether the instance is isolated')
|
||||
|
||||
def _register_hostname(self, hostname):
|
||||
if not hostname:
|
||||
@ -36,10 +33,7 @@ class Command(BaseCommand):
|
||||
def handle(self, **options):
|
||||
if not options.get('hostname'):
|
||||
raise CommandError("Specify `--hostname` to use this command.")
|
||||
if options['is_isolated']:
|
||||
self.uuid = str(uuid4())
|
||||
else:
|
||||
self.uuid = settings.SYSTEM_UUID
|
||||
self.uuid = settings.SYSTEM_UUID
|
||||
self.changed = False
|
||||
self._register_hostname(options.get('hostname'))
|
||||
if self.changed:
|
||||
|
||||
@ -17,10 +17,9 @@ class InstanceNotFound(Exception):
|
||||
|
||||
|
||||
class RegisterQueue:
|
||||
def __init__(self, queuename, controller, instance_percent, inst_min, hostname_list, is_container_group=None):
|
||||
def __init__(self, queuename, instance_percent, inst_min, hostname_list, is_container_group=None):
|
||||
self.instance_not_found_err = None
|
||||
self.queuename = queuename
|
||||
self.controller = controller
|
||||
self.instance_percent = instance_percent
|
||||
self.instance_min = inst_min
|
||||
self.hostname_list = hostname_list
|
||||
@ -46,20 +45,6 @@ class RegisterQueue:
|
||||
|
||||
return (ig, created, changed)
|
||||
|
||||
def update_instance_group_controller(self, ig):
|
||||
changed = False
|
||||
control_ig = None
|
||||
|
||||
if self.controller:
|
||||
control_ig = InstanceGroup.objects.filter(name=self.controller).first()
|
||||
|
||||
if control_ig and ig.controller_id != control_ig.pk:
|
||||
ig.controller = control_ig
|
||||
ig.save()
|
||||
changed = True
|
||||
|
||||
return (control_ig, changed)
|
||||
|
||||
def add_instances_to_group(self, ig):
|
||||
changed = False
|
||||
|
||||
@ -88,26 +73,20 @@ class RegisterQueue:
|
||||
with advisory_lock('cluster_policy_lock'):
|
||||
with transaction.atomic():
|
||||
changed2 = False
|
||||
changed3 = False
|
||||
(ig, created, changed1) = self.get_create_update_instance_group()
|
||||
if created:
|
||||
print("Creating instance group {}".format(ig.name))
|
||||
elif not created:
|
||||
print("Instance Group already registered {}".format(ig.name))
|
||||
|
||||
if self.controller:
|
||||
(ig_ctrl, changed2) = self.update_instance_group_controller(ig)
|
||||
if changed2:
|
||||
print("Set controller group {} on {}.".format(self.controller, self.queuename))
|
||||
|
||||
try:
|
||||
(instances, changed3) = self.add_instances_to_group(ig)
|
||||
(instances, changed2) = self.add_instances_to_group(ig)
|
||||
for i in instances:
|
||||
print("Added instance {} to {}".format(i.hostname, ig.name))
|
||||
except InstanceNotFound as e:
|
||||
self.instance_not_found_err = e
|
||||
|
||||
if any([changed1, changed2, changed3]):
|
||||
if changed1 or changed2:
|
||||
print('(changed: True)')
|
||||
|
||||
|
||||
@ -117,7 +96,6 @@ class Command(BaseCommand):
|
||||
parser.add_argument(
|
||||
'--hostnames', dest='hostnames', type=str, help='Comma-Delimited Hosts to add to the Queue (will not remove already assigned instances)'
|
||||
)
|
||||
parser.add_argument('--controller', dest='controller', type=str, default='', help='The controlling group (makes this an isolated group)')
|
||||
parser.add_argument(
|
||||
'--instance_percent', dest='instance_percent', type=int, default=0, help='The percentage of active instances that will be assigned to this group'
|
||||
),
|
||||
@ -133,14 +111,13 @@ class Command(BaseCommand):
|
||||
queuename = options.get('queuename')
|
||||
if not queuename:
|
||||
raise CommandError("Specify `--queuename` to use this command.")
|
||||
ctrl = options.get('controller')
|
||||
inst_per = options.get('instance_percent')
|
||||
instance_min = options.get('instance_minimum')
|
||||
hostname_list = []
|
||||
if options.get('hostnames'):
|
||||
hostname_list = options.get('hostnames').split(",")
|
||||
|
||||
rq = RegisterQueue(queuename, ctrl, inst_per, instance_min, hostname_list)
|
||||
rq = RegisterQueue(queuename, inst_per, instance_min, hostname_list)
|
||||
rq.register()
|
||||
if rq.instance_not_found_err:
|
||||
print(rq.instance_not_found_err.message)
|
||||
|
||||
@ -10,7 +10,6 @@ from datetime import datetime as dt
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import connection
|
||||
from django.db.models import Q
|
||||
from django.db.migrations.executor import MigrationExecutor
|
||||
|
||||
from awx.main.analytics.broadcast_websocket import (
|
||||
@ -140,7 +139,7 @@ class Command(BaseCommand):
|
||||
data[family.name] = family.samples[0].value
|
||||
|
||||
me = Instance.objects.me()
|
||||
hostnames = [i.hostname for i in Instance.objects.exclude(Q(hostname=me.hostname) | Q(rampart_groups__controller__isnull=False))]
|
||||
hostnames = [i.hostname for i in Instance.objects.exclude(hostname=me.hostname)]
|
||||
|
||||
host_stats = Command.get_connection_status(me, hostnames, data)
|
||||
lines = Command._format_lines(host_stats)
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
import ansible_runner
|
||||
|
||||
from awx.main.isolated.manager import set_pythonpath
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Tests SSH connectivity between a controller and target isolated node"""
|
||||
|
||||
help = 'Tests SSH connectivity between a controller and target isolated node'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--hostname', dest='hostname', type=str, help='Hostname of an isolated node')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
hostname = options.get('hostname')
|
||||
if not hostname:
|
||||
raise CommandError("--hostname is a required argument")
|
||||
|
||||
try:
|
||||
path = tempfile.mkdtemp(prefix='awx_isolated_ssh', dir=settings.AWX_ISOLATION_BASE_PATH)
|
||||
ssh_key = None
|
||||
if all([getattr(settings, 'AWX_ISOLATED_KEY_GENERATION', False) is True, getattr(settings, 'AWX_ISOLATED_PRIVATE_KEY', None)]):
|
||||
ssh_key = settings.AWX_ISOLATED_PRIVATE_KEY
|
||||
env = dict(os.environ.items())
|
||||
env['ANSIBLE_HOST_KEY_CHECKING'] = str(settings.AWX_ISOLATED_HOST_KEY_CHECKING)
|
||||
set_pythonpath(os.path.join(settings.ANSIBLE_VENV_PATH, 'lib'), env)
|
||||
res = ansible_runner.interface.run(
|
||||
private_data_dir=path,
|
||||
host_pattern='all',
|
||||
inventory='{} ansible_ssh_user={}'.format(hostname, settings.AWX_ISOLATED_USERNAME),
|
||||
module='shell',
|
||||
module_args='ansible-runner --version',
|
||||
envvars=env,
|
||||
verbosity=3,
|
||||
ssh_key=ssh_key,
|
||||
)
|
||||
sys.exit(res.rc)
|
||||
finally:
|
||||
shutil.rmtree(path)
|
||||
@ -142,7 +142,7 @@ class InstanceManager(models.Manager):
|
||||
pod_ip = os.environ.get('MY_POD_IP')
|
||||
registered = self.register(ip_address=pod_ip)
|
||||
is_container_group = settings.IS_K8S
|
||||
RegisterQueue('tower', None, 100, 0, [], is_container_group).register()
|
||||
RegisterQueue('tower', 100, 0, [], is_container_group).register()
|
||||
return registered
|
||||
else:
|
||||
return (False, self.me())
|
||||
@ -155,9 +155,6 @@ class InstanceManager(models.Manager):
|
||||
# NOTE: TODO: Likely to repurpose this once standalone ramparts are a thing
|
||||
return "tower"
|
||||
|
||||
def all_non_isolated(self):
|
||||
return self.exclude(rampart_groups__controller__isnull=False)
|
||||
|
||||
|
||||
class InstanceGroupManager(models.Manager):
|
||||
"""A custom manager class for the Instance model.
|
||||
|
||||
26
awx/main/migrations/0139_isolated_removal.py
Normal file
26
awx/main/migrations/0139_isolated_removal.py
Normal file
@ -0,0 +1,26 @@
|
||||
# Generated by Django 2.2.16 on 2021-04-21 15:02
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0138_custom_inventory_scripts_removal'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='instance',
|
||||
name='last_isolated_check',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='instancegroup',
|
||||
name='controller',
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='unifiedjob',
|
||||
name='controller_node',
|
||||
field=models.TextField(blank=True, default='', editable=False, help_text='The instance that managed the execution environment.'),
|
||||
),
|
||||
]
|
||||
@ -146,10 +146,6 @@ class AdHocCommand(UnifiedJob, JobNotificationMixin):
|
||||
|
||||
return RunAdHocCommand
|
||||
|
||||
@classmethod
|
||||
def supports_isolation(cls):
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_container_group_task(self):
|
||||
return bool(self.instance_group and self.instance_group.is_container_group)
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
# Copyright (c) 2015 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
|
||||
import random
|
||||
from decimal import Decimal
|
||||
|
||||
from django.core.validators import MinValueValidator
|
||||
@ -63,10 +62,6 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
)
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
modified = models.DateTimeField(auto_now=True)
|
||||
last_isolated_check = models.DateTimeField(
|
||||
null=True,
|
||||
editable=False,
|
||||
)
|
||||
version = models.CharField(max_length=120, blank=True)
|
||||
capacity = models.PositiveIntegerField(
|
||||
default=100,
|
||||
@ -128,20 +123,12 @@ class Instance(HasPolicyEditsMixin, BaseModel):
|
||||
def jobs_total(self):
|
||||
return UnifiedJob.objects.filter(execution_node=self.hostname).count()
|
||||
|
||||
def is_lost(self, ref_time=None, isolated=False):
|
||||
def is_lost(self, ref_time=None):
|
||||
if ref_time is None:
|
||||
ref_time = now()
|
||||
grace_period = 120
|
||||
if isolated:
|
||||
grace_period = settings.AWX_ISOLATED_PERIODIC_CHECK * 2
|
||||
return self.modified < ref_time - timedelta(seconds=grace_period)
|
||||
|
||||
def is_controller(self):
|
||||
return Instance.objects.filter(rampart_groups__controller__instances=self).exists()
|
||||
|
||||
def is_isolated(self):
|
||||
return self.rampart_groups.filter(controller__isnull=False).exists()
|
||||
|
||||
def refresh_capacity(self):
|
||||
if settings.IS_K8S:
|
||||
self.capacity = self.cpu = self.memory = self.cpu_capacity = self.mem_capacity = 0 # noqa
|
||||
@ -185,15 +172,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
|
||||
editable=False,
|
||||
help_text=_('Instances that are members of this InstanceGroup'),
|
||||
)
|
||||
controller = models.ForeignKey(
|
||||
'InstanceGroup',
|
||||
related_name='controlled_groups',
|
||||
help_text=_('Instance Group to remotely control this group.'),
|
||||
editable=False,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=models.CASCADE,
|
||||
)
|
||||
is_container_group = models.BooleanField(default=False)
|
||||
credential = models.ForeignKey(
|
||||
'Credential',
|
||||
@ -215,7 +193,7 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
|
||||
default=[], blank=True, help_text=_("List of exact-match Instances that will always be automatically assigned to this group")
|
||||
)
|
||||
|
||||
POLICY_FIELDS = frozenset(('policy_instance_list', 'policy_instance_minimum', 'policy_instance_percentage', 'controller'))
|
||||
POLICY_FIELDS = frozenset(('policy_instance_list', 'policy_instance_minimum', 'policy_instance_percentage'))
|
||||
|
||||
def get_absolute_url(self, request=None):
|
||||
return reverse('api:instance_group_detail', kwargs={'pk': self.pk}, request=request)
|
||||
@ -232,14 +210,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
|
||||
def jobs_total(self):
|
||||
return UnifiedJob.objects.filter(instance_group=self).count()
|
||||
|
||||
@property
|
||||
def is_controller(self):
|
||||
return self.controlled_groups.exists()
|
||||
|
||||
@property
|
||||
def is_isolated(self):
|
||||
return bool(self.controller)
|
||||
|
||||
'''
|
||||
RelatedJobsMixin
|
||||
'''
|
||||
@ -271,9 +241,6 @@ class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
|
||||
largest_instance = i
|
||||
return largest_instance
|
||||
|
||||
def choose_online_controller_node(self):
|
||||
return random.choice(list(self.controller.instances.filter(capacity__gt=0, enabled=True).values_list('hostname', flat=True)))
|
||||
|
||||
def set_default_policy_fields(self):
|
||||
self.policy_instance_list = []
|
||||
self.policy_instance_minimum = 0
|
||||
|
||||
@ -587,10 +587,6 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
|
||||
return RunJob
|
||||
|
||||
@classmethod
|
||||
def supports_isolation(cls):
|
||||
return True
|
||||
|
||||
def _global_timeout_setting(self):
|
||||
return 'DEFAULT_JOB_TIMEOUT'
|
||||
|
||||
@ -759,7 +755,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
|
||||
|
||||
@property
|
||||
def can_run_containerized(self):
|
||||
return any([ig for ig in self.preferred_instance_groups if ig.is_container_group])
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_container_group_task(self):
|
||||
|
||||
@ -588,7 +588,7 @@ class UnifiedJob(
|
||||
blank=True,
|
||||
default='',
|
||||
editable=False,
|
||||
help_text=_("The instance that managed the isolated execution environment."),
|
||||
help_text=_("The instance that managed the execution environment."),
|
||||
)
|
||||
notifications = models.ManyToManyField(
|
||||
'Notification',
|
||||
@ -737,10 +737,6 @@ class UnifiedJob(
|
||||
def _get_task_class(cls):
|
||||
raise NotImplementedError # Implement in subclasses.
|
||||
|
||||
@classmethod
|
||||
def supports_isolation(cls):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_run_containerized(self):
|
||||
return False
|
||||
@ -1402,12 +1398,11 @@ class UnifiedJob(
|
||||
@property
|
||||
def preferred_instance_groups(self):
|
||||
"""
|
||||
Return Instance/Rampart Groups preferred by this unified job templates
|
||||
Return Instance/Rampart Groups preferred by this unified job template
|
||||
"""
|
||||
if not self.unified_job_template:
|
||||
return []
|
||||
template_groups = [x for x in self.unified_job_template.instance_groups.all()]
|
||||
return template_groups
|
||||
return list(self.unified_job_template.instance_groups.all())
|
||||
|
||||
@property
|
||||
def global_instance_groups(self):
|
||||
@ -1467,9 +1462,6 @@ class UnifiedJob(
|
||||
def get_queue_name(self):
|
||||
return self.controller_node or self.execution_node or get_local_queuename()
|
||||
|
||||
def is_isolated(self):
|
||||
return bool(self.controller_node)
|
||||
|
||||
@property
|
||||
def is_container_group_task(self):
|
||||
return False
|
||||
|
||||
@ -6,7 +6,6 @@ from datetime import timedelta
|
||||
import logging
|
||||
import uuid
|
||||
import json
|
||||
import random
|
||||
from types import SimpleNamespace
|
||||
|
||||
# Django
|
||||
@ -253,14 +252,6 @@ class TaskManager:
|
||||
}
|
||||
dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]
|
||||
|
||||
controller_node = None
|
||||
if task.supports_isolation() and rampart_group.controller_id:
|
||||
try:
|
||||
controller_node = rampart_group.choose_online_controller_node()
|
||||
except IndexError:
|
||||
logger.debug("No controllers available in group {} to run {}".format(rampart_group.name, task.log_format))
|
||||
return
|
||||
|
||||
task.status = 'waiting'
|
||||
|
||||
(start_status, opts) = task.pre_start()
|
||||
@ -277,38 +268,24 @@ class TaskManager:
|
||||
task.send_notification_templates('running')
|
||||
logger.debug('Transitioning %s to running status.', task.log_format)
|
||||
schedule_task_manager()
|
||||
elif not task.supports_isolation() and rampart_group.controller_id:
|
||||
# non-Ansible jobs on isolated instances run on controller
|
||||
task.instance_group = rampart_group.controller
|
||||
task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
|
||||
logger.debug('Submitting isolated {} to queue {} on node {}.'.format(task.log_format, task.instance_group.name, task.execution_node))
|
||||
elif controller_node:
|
||||
task.instance_group = rampart_group
|
||||
task.execution_node = instance.hostname
|
||||
task.controller_node = controller_node
|
||||
logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format(task.log_format, task.execution_node, controller_node))
|
||||
elif rampart_group.is_container_group:
|
||||
# find one real, non-containerized instance with capacity to
|
||||
# act as the controller for k8s API interaction
|
||||
match = None
|
||||
for group in InstanceGroup.objects.all():
|
||||
if group.is_container_group or group.controller_id:
|
||||
continue
|
||||
for group in InstanceGroup.objects.filter(is_container_group=False):
|
||||
match = group.fit_task_to_most_remaining_capacity_instance(task, group.instances.all())
|
||||
if match:
|
||||
break
|
||||
task.instance_group = rampart_group
|
||||
if match is None:
|
||||
logger.warn('No available capacity to run containerized <{}>.'.format(task.log_format))
|
||||
elif task.can_run_containerized and any(ig.is_container_group for ig in task.preferred_instance_groups):
|
||||
task.controller_node = match.hostname
|
||||
else:
|
||||
if task.supports_isolation():
|
||||
task.controller_node = match.hostname
|
||||
else:
|
||||
# project updates and inventory updates don't *actually* run in pods,
|
||||
# so just pick *any* non-isolated, non-containerized host and use it
|
||||
# as the execution node
|
||||
task.execution_node = match.hostname
|
||||
logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
|
||||
# project updates and inventory updates don't *actually* run in pods, so
|
||||
# just pick *any* non-containerized host and use it as the execution node
|
||||
task.execution_node = match.hostname
|
||||
logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
|
||||
else:
|
||||
task.instance_group = rampart_group
|
||||
if instance is not None:
|
||||
|
||||
@ -33,7 +33,7 @@ import subprocess
|
||||
from django.conf import settings
|
||||
from django.db import transaction, DatabaseError, IntegrityError, ProgrammingError, connection
|
||||
from django.db.models.fields.related import ForeignKey
|
||||
from django.utils.timezone import now, timedelta
|
||||
from django.utils.timezone import now
|
||||
from django.utils.encoding import smart_str
|
||||
from django.contrib.auth.models import User
|
||||
from django.utils.translation import ugettext_lazy as _, gettext_noop
|
||||
@ -84,7 +84,6 @@ from awx.main.models import (
|
||||
from awx.main.constants import ACTIVE_STATES
|
||||
from awx.main.exceptions import AwxTaskError, PostRunError
|
||||
from awx.main.queue import CallbackQueueDispatcher
|
||||
from awx.main.isolated import manager as isolated_manager
|
||||
from awx.main.dispatch.publish import task
|
||||
from awx.main.dispatch import get_local_queuename, reaper
|
||||
from awx.main.utils import (
|
||||
@ -170,8 +169,6 @@ def dispatch_startup():
|
||||
#
|
||||
apply_cluster_membership_policies()
|
||||
cluster_node_heartbeat()
|
||||
if Instance.objects.me().is_controller():
|
||||
awx_isolated_heartbeat()
|
||||
Metrics().clear_values()
|
||||
|
||||
# Update Tower's rsyslog.conf file based on loggins settings in the db
|
||||
@ -205,13 +202,8 @@ def apply_cluster_membership_policies():
|
||||
started_compute = time.time()
|
||||
all_instances = list(Instance.objects.order_by('id'))
|
||||
all_groups = list(InstanceGroup.objects.prefetch_related('instances'))
|
||||
iso_hostnames = set([])
|
||||
for ig in all_groups:
|
||||
if ig.controller_id is not None:
|
||||
iso_hostnames.update(ig.policy_instance_list)
|
||||
|
||||
considered_instances = [inst for inst in all_instances if inst.hostname not in iso_hostnames]
|
||||
total_instances = len(considered_instances)
|
||||
total_instances = len(all_instances)
|
||||
actual_groups = []
|
||||
actual_instances = []
|
||||
Group = namedtuple('Group', ['obj', 'instances', 'prior_instances'])
|
||||
@ -232,18 +224,12 @@ def apply_cluster_membership_policies():
|
||||
if group_actual.instances:
|
||||
logger.debug("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name))
|
||||
|
||||
if ig.controller_id is None:
|
||||
actual_groups.append(group_actual)
|
||||
else:
|
||||
# For isolated groups, _only_ apply the policy_instance_list
|
||||
# do not add to in-memory list, so minimum rules not applied
|
||||
logger.debug('Committing instances to isolated group {}'.format(ig.name))
|
||||
ig.instances.set(group_actual.instances)
|
||||
actual_groups.append(group_actual)
|
||||
|
||||
# Process Instance minimum policies next, since it represents a concrete lower bound to the
|
||||
# number of instances to make available to instance groups
|
||||
actual_instances = [Node(obj=i, groups=[]) for i in considered_instances if i.managed_by_policy]
|
||||
logger.debug("Total non-isolated instances:{} available for policy: {}".format(total_instances, len(actual_instances)))
|
||||
actual_instances = [Node(obj=i, groups=[]) for i in all_instances if i.managed_by_policy]
|
||||
logger.debug("Total instances: {}, available for policy: {}".format(total_instances, len(actual_instances)))
|
||||
for g in sorted(actual_groups, key=lambda x: len(x.instances)):
|
||||
policy_min_added = []
|
||||
for i in sorted(actual_instances, key=lambda x: len(x.groups)):
|
||||
@ -285,7 +271,7 @@ def apply_cluster_membership_policies():
|
||||
logger.debug('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute))
|
||||
return
|
||||
|
||||
# On a differential basis, apply instances to non-isolated groups
|
||||
# On a differential basis, apply instances to groups
|
||||
with transaction.atomic():
|
||||
for g in actual_groups:
|
||||
if g.obj.is_container_group:
|
||||
@ -419,7 +405,7 @@ def cleanup_execution_environment_images():
|
||||
def cluster_node_heartbeat():
|
||||
logger.debug("Cluster node heartbeat task.")
|
||||
nowtime = now()
|
||||
instance_list = list(Instance.objects.all_non_isolated())
|
||||
instance_list = list(Instance.objects.all())
|
||||
this_inst = None
|
||||
lost_instances = []
|
||||
|
||||
@ -503,30 +489,6 @@ def awx_k8s_reaper():
|
||||
logger.exception("Failed to delete orphaned pod {} from {}".format(job.log_format, group))
|
||||
|
||||
|
||||
@task(queue=get_local_queuename)
|
||||
def awx_isolated_heartbeat():
|
||||
local_hostname = settings.CLUSTER_HOST_ID
|
||||
logger.debug("Controlling node checking for any isolated management tasks.")
|
||||
poll_interval = settings.AWX_ISOLATED_PERIODIC_CHECK
|
||||
# Get isolated instances not checked since poll interval - some buffer
|
||||
nowtime = now()
|
||||
accept_before = nowtime - timedelta(seconds=(poll_interval - 10))
|
||||
isolated_instance_qs = Instance.objects.filter(
|
||||
rampart_groups__controller__instances__hostname=local_hostname,
|
||||
)
|
||||
isolated_instance_qs = isolated_instance_qs.filter(last_isolated_check__lt=accept_before) | isolated_instance_qs.filter(last_isolated_check=None)
|
||||
# Fast pass of isolated instances, claiming the nodes to update
|
||||
with transaction.atomic():
|
||||
for isolated_instance in isolated_instance_qs:
|
||||
isolated_instance.last_isolated_check = nowtime
|
||||
# Prevent modified time from being changed, as in normal heartbeat
|
||||
isolated_instance.save(update_fields=['last_isolated_check'])
|
||||
# Slow pass looping over isolated IGs and their isolated instances
|
||||
if len(isolated_instance_qs) > 0:
|
||||
logger.debug("Managing isolated instances {}.".format(','.join([inst.hostname for inst in isolated_instance_qs])))
|
||||
isolated_manager.IsolatedManager(CallbackQueueDispatcher.dispatch).health_check(isolated_instance_qs)
|
||||
|
||||
|
||||
@task(queue=get_local_queuename)
|
||||
def awx_periodic_scheduler():
|
||||
with advisory_lock('awx_periodic_scheduler_lock', wait=False) as acquired:
|
||||
@ -1017,7 +979,7 @@ class BaseTask(object):
|
||||
else:
|
||||
env['PATH'] = os.path.join(settings.AWX_VENV_PATH, "bin")
|
||||
|
||||
def build_env(self, instance, private_data_dir, isolated, private_data_files=None):
|
||||
def build_env(self, instance, private_data_dir, private_data_files=None):
|
||||
"""
|
||||
Build environment dictionary for ansible-playbook.
|
||||
"""
|
||||
@ -1138,7 +1100,7 @@ class BaseTask(object):
|
||||
"""
|
||||
instance.log_lifecycle("post_run")
|
||||
|
||||
def final_run_hook(self, instance, status, private_data_dir, fact_modification_times, isolated_manager_instance=None):
|
||||
def final_run_hook(self, instance, status, private_data_dir, fact_modification_times):
|
||||
"""
|
||||
Hook for any steps to run after job/task is marked as complete.
|
||||
"""
|
||||
@ -1284,16 +1246,6 @@ class BaseTask(object):
|
||||
if result_traceback:
|
||||
self.instance = self.update_model(self.instance.pk, result_traceback=result_traceback)
|
||||
|
||||
def check_handler(self, config):
|
||||
"""
|
||||
IsolatedManager callback triggered by the repeated checks of the isolated node
|
||||
"""
|
||||
job_env = build_safe_env(config['env'])
|
||||
for k, v in self.safe_cred_env.items():
|
||||
if k in job_env:
|
||||
job_env[k] = v
|
||||
self.instance = self.update_model(self.instance.pk, job_args=json.dumps(config['command']), job_cwd=config['cwd'], job_env=job_env)
|
||||
|
||||
@with_path_cleanup
|
||||
def run(self, pk, **kwargs):
|
||||
"""
|
||||
@ -1321,7 +1273,6 @@ class BaseTask(object):
|
||||
self.safe_env = {}
|
||||
self.safe_cred_env = {}
|
||||
private_data_dir = None
|
||||
isolated_manager_instance = None
|
||||
|
||||
# store a reference to the parent workflow job (if any) so we can include
|
||||
# it in event data JSON
|
||||
@ -1329,7 +1280,6 @@ class BaseTask(object):
|
||||
self.parent_workflow_job_id = self.instance.get_workflow_job().id
|
||||
|
||||
try:
|
||||
isolated = self.instance.is_isolated()
|
||||
self.instance.send_notification_templates("running")
|
||||
private_data_dir = self.build_private_data_dir(self.instance)
|
||||
self.pre_run_hook(self.instance, private_data_dir)
|
||||
@ -1363,7 +1313,7 @@ class BaseTask(object):
|
||||
passwords = self.build_passwords(self.instance, kwargs)
|
||||
self.build_extra_vars_file(self.instance, private_data_dir)
|
||||
args = self.build_args(self.instance, private_data_dir, passwords)
|
||||
env = self.build_env(self.instance, private_data_dir, isolated, private_data_files=private_data_files)
|
||||
env = self.build_env(self.instance, private_data_dir, private_data_files=private_data_files)
|
||||
self.safe_env = build_safe_env(env)
|
||||
|
||||
credentials = self.build_credentials_list(self.instance)
|
||||
@ -1464,7 +1414,7 @@ class BaseTask(object):
|
||||
self.instance = self.update_model(pk, status=status, emitted_events=self.event_ct, **extra_update_fields)
|
||||
|
||||
try:
|
||||
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times, isolated_manager_instance=isolated_manager_instance)
|
||||
self.final_run_hook(self.instance, status, private_data_dir, fact_modification_times)
|
||||
except Exception:
|
||||
logger.exception('{} Final run hook errored.'.format(self.instance.log_format))
|
||||
|
||||
@ -1549,11 +1499,11 @@ class RunJob(BaseTask):
|
||||
|
||||
return passwords
|
||||
|
||||
def build_env(self, job, private_data_dir, isolated=False, private_data_files=None):
|
||||
def build_env(self, job, private_data_dir, private_data_files=None):
|
||||
"""
|
||||
Build environment dictionary for ansible-playbook.
|
||||
"""
|
||||
env = super(RunJob, self).build_env(job, private_data_dir, isolated=isolated, private_data_files=private_data_files)
|
||||
env = super(RunJob, self).build_env(job, private_data_dir, private_data_files=private_data_files)
|
||||
if private_data_files is None:
|
||||
private_data_files = {}
|
||||
# Set environment variables needed for inventory and job event
|
||||
@ -1564,10 +1514,9 @@ class RunJob(BaseTask):
|
||||
env['PROJECT_REVISION'] = job.project.scm_revision
|
||||
env['ANSIBLE_RETRY_FILES_ENABLED'] = "False"
|
||||
env['MAX_EVENT_RES'] = str(settings.MAX_EVENT_RES_DATA)
|
||||
if not isolated:
|
||||
if hasattr(settings, 'AWX_ANSIBLE_CALLBACK_PLUGINS') and settings.AWX_ANSIBLE_CALLBACK_PLUGINS:
|
||||
env['ANSIBLE_CALLBACK_PLUGINS'] = ':'.join(settings.AWX_ANSIBLE_CALLBACK_PLUGINS)
|
||||
env['AWX_HOST'] = settings.TOWER_URL_BASE
|
||||
if hasattr(settings, 'AWX_ANSIBLE_CALLBACK_PLUGINS') and settings.AWX_ANSIBLE_CALLBACK_PLUGINS:
|
||||
env['ANSIBLE_CALLBACK_PLUGINS'] = ':'.join(settings.AWX_ANSIBLE_CALLBACK_PLUGINS)
|
||||
env['AWX_HOST'] = settings.TOWER_URL_BASE
|
||||
|
||||
# Create a directory for ControlPath sockets that is unique to each job
|
||||
cp_dir = os.path.join(private_data_dir, 'cp')
|
||||
@ -1798,9 +1747,6 @@ class RunJob(BaseTask):
|
||||
if sync_needs:
|
||||
pu_ig = job.instance_group
|
||||
pu_en = job.execution_node
|
||||
if job.is_isolated() is True:
|
||||
pu_ig = pu_ig.controller
|
||||
pu_en = settings.CLUSTER_HOST_ID
|
||||
|
||||
sync_metafields = dict(
|
||||
launch_type="sync",
|
||||
@ -1855,7 +1801,7 @@ class RunJob(BaseTask):
|
||||
# ran inside of the event saving code
|
||||
update_smart_memberships_for_inventory(job.inventory)
|
||||
|
||||
def final_run_hook(self, job, status, private_data_dir, fact_modification_times, isolated_manager_instance=None):
|
||||
def final_run_hook(self, job, status, private_data_dir, fact_modification_times):
|
||||
super(RunJob, self).final_run_hook(job, status, private_data_dir, fact_modification_times)
|
||||
if not private_data_dir:
|
||||
# If there's no private data dir, that means we didn't get into the
|
||||
@ -1867,8 +1813,6 @@ class RunJob(BaseTask):
|
||||
os.path.join(private_data_dir, 'artifacts', 'fact_cache'),
|
||||
fact_modification_times,
|
||||
)
|
||||
if isolated_manager_instance and not job.is_container_group_task:
|
||||
isolated_manager_instance.cleanup()
|
||||
|
||||
try:
|
||||
inventory = job.inventory
|
||||
@ -1932,11 +1876,11 @@ class RunProjectUpdate(BaseTask):
|
||||
passwords['scm_password'] = project_update.credential.get_input('password', default='')
|
||||
return passwords
|
||||
|
||||
def build_env(self, project_update, private_data_dir, isolated=False, private_data_files=None):
|
||||
def build_env(self, project_update, private_data_dir, private_data_files=None):
|
||||
"""
|
||||
Build environment dictionary for ansible-playbook.
|
||||
"""
|
||||
env = super(RunProjectUpdate, self).build_env(project_update, private_data_dir, isolated=isolated, private_data_files=private_data_files)
|
||||
env = super(RunProjectUpdate, self).build_env(project_update, private_data_dir, private_data_files=private_data_files)
|
||||
env['ANSIBLE_RETRY_FILES_ENABLED'] = str(False)
|
||||
env['ANSIBLE_ASK_PASS'] = str(False)
|
||||
env['ANSIBLE_BECOME_ASK_PASS'] = str(False)
|
||||
@ -2391,14 +2335,14 @@ class RunInventoryUpdate(BaseTask):
|
||||
injector = InventorySource.injectors[inventory_update.source]()
|
||||
return injector.build_private_data(inventory_update, private_data_dir)
|
||||
|
||||
def build_env(self, inventory_update, private_data_dir, isolated, private_data_files=None):
|
||||
def build_env(self, inventory_update, private_data_dir, private_data_files=None):
|
||||
"""Build environment dictionary for ansible-inventory.
|
||||
|
||||
Most environment variables related to credentials or configuration
|
||||
are accomplished by the inventory source injectors (in this method)
|
||||
or custom credential type injectors (in main run method).
|
||||
"""
|
||||
env = super(RunInventoryUpdate, self).build_env(inventory_update, private_data_dir, isolated, private_data_files=private_data_files)
|
||||
env = super(RunInventoryUpdate, self).build_env(inventory_update, private_data_dir, private_data_files=private_data_files)
|
||||
|
||||
if private_data_files is None:
|
||||
private_data_files = {}
|
||||
@ -2715,11 +2659,11 @@ class RunAdHocCommand(BaseTask):
|
||||
passwords[field] = value
|
||||
return passwords
|
||||
|
||||
def build_env(self, ad_hoc_command, private_data_dir, isolated=False, private_data_files=None):
|
||||
def build_env(self, ad_hoc_command, private_data_dir, private_data_files=None):
|
||||
"""
|
||||
Build environment dictionary for ansible.
|
||||
"""
|
||||
env = super(RunAdHocCommand, self).build_env(ad_hoc_command, private_data_dir, isolated=isolated, private_data_files=private_data_files)
|
||||
env = super(RunAdHocCommand, self).build_env(ad_hoc_command, private_data_dir, private_data_files=private_data_files)
|
||||
# Set environment variables needed for inventory and ad hoc event
|
||||
# callbacks to work.
|
||||
env['AD_HOC_COMMAND_ID'] = str(ad_hoc_command.pk)
|
||||
@ -2825,11 +2769,6 @@ class RunAdHocCommand(BaseTask):
|
||||
d[r'Password:\s*?$'] = 'ssh_password'
|
||||
return d
|
||||
|
||||
def final_run_hook(self, adhoc_job, status, private_data_dir, fact_modification_times, isolated_manager_instance=None):
|
||||
super(RunAdHocCommand, self).final_run_hook(adhoc_job, status, private_data_dir, fact_modification_times)
|
||||
if isolated_manager_instance:
|
||||
isolated_manager_instance.cleanup()
|
||||
|
||||
|
||||
@task(queue=get_local_queuename)
|
||||
class RunSystemJob(BaseTask):
|
||||
@ -2871,8 +2810,8 @@ class RunSystemJob(BaseTask):
|
||||
os.chmod(path, stat.S_IRUSR)
|
||||
return path
|
||||
|
||||
def build_env(self, instance, private_data_dir, isolated=False, private_data_files=None):
|
||||
base_env = super(RunSystemJob, self).build_env(instance, private_data_dir, isolated=isolated, private_data_files=private_data_files)
|
||||
def build_env(self, instance, private_data_dir, private_data_files=None):
|
||||
base_env = super(RunSystemJob, self).build_env(instance, private_data_dir, private_data_files=private_data_files)
|
||||
# TODO: this is able to run by turning off isolation
|
||||
# the goal is to run it a container instead
|
||||
env = dict(os.environ.items())
|
||||
|
||||
@ -20,13 +20,7 @@ def tower_instance_group():
|
||||
|
||||
@pytest.fixture
|
||||
def instance():
|
||||
instance = Instance.objects.create(hostname='iso')
|
||||
return instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def non_iso_instance():
|
||||
return Instance.objects.create(hostname='iamnotanisolatedinstance')
|
||||
return Instance.objects.create(hostname='instance')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -36,15 +30,6 @@ def instance_group(job_factory):
|
||||
return ig
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_instance_group(instance_group, instance):
|
||||
ig = InstanceGroup(name="iso", controller=instance_group)
|
||||
ig.save()
|
||||
ig.instances.set([instance])
|
||||
ig.save()
|
||||
return ig
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def containerized_instance_group(instance_group, kube_credential):
|
||||
ig = InstanceGroup(name="container")
|
||||
@ -97,26 +82,6 @@ def source_model(request):
|
||||
return request.getfixturevalue(request.param)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_instance_group_is_controller(instance_group, isolated_instance_group, non_iso_instance):
|
||||
assert not isolated_instance_group.is_controller
|
||||
assert instance_group.is_controller
|
||||
|
||||
instance_group.instances.set([non_iso_instance])
|
||||
|
||||
assert instance_group.is_controller
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_instance_group_is_isolated(instance_group, isolated_instance_group):
|
||||
assert not instance_group.is_isolated
|
||||
assert isolated_instance_group.is_isolated
|
||||
|
||||
isolated_instance_group.instances.set([])
|
||||
|
||||
assert isolated_instance_group.is_isolated
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_delete_instance_group_jobs(delete, instance_group_jobs_successful, instance_group, admin):
|
||||
url = reverse("api:instance_group_detail", kwargs={'pk': instance_group.pk})
|
||||
@ -160,61 +125,6 @@ def test_delete_rename_tower_instance_group_prevented(delete, options, tower_ins
|
||||
patch(url, {'name': 'foobar'}, super_user, expect=200)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_prevent_delete_iso_and_control_groups(delete, isolated_instance_group, admin):
|
||||
iso_url = reverse("api:instance_group_detail", kwargs={'pk': isolated_instance_group.pk})
|
||||
controller_url = reverse("api:instance_group_detail", kwargs={'pk': isolated_instance_group.controller.pk})
|
||||
delete(iso_url, None, admin, expect=403)
|
||||
delete(controller_url, None, admin, expect=403)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_prevent_isolated_instance_added_to_non_isolated_instance_group(post, admin, instance, instance_group, isolated_instance_group):
|
||||
url = reverse("api:instance_group_instance_list", kwargs={'pk': instance_group.pk})
|
||||
|
||||
assert True is instance.is_isolated()
|
||||
resp = post(url, {'associate': True, 'id': instance.id}, admin, expect=400)
|
||||
assert u"Isolated instances may not be added or removed from instances groups via the API." == resp.data['error']
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_prevent_isolated_instance_added_to_non_isolated_instance_group_via_policy_list(patch, admin, instance, instance_group, isolated_instance_group):
|
||||
url = reverse("api:instance_group_detail", kwargs={'pk': instance_group.pk})
|
||||
|
||||
assert True is instance.is_isolated()
|
||||
resp = patch(url, {'policy_instance_list': [instance.hostname]}, user=admin, expect=400)
|
||||
assert [u"Isolated instances may not be added or removed from instances groups via the API."] == resp.data['policy_instance_list']
|
||||
assert instance_group.policy_instance_list == []
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_prevent_isolated_instance_removal_from_isolated_instance_group(post, admin, instance, instance_group, isolated_instance_group):
|
||||
url = reverse("api:instance_group_instance_list", kwargs={'pk': isolated_instance_group.pk})
|
||||
|
||||
assert True is instance.is_isolated()
|
||||
resp = post(url, {'disassociate': True, 'id': instance.id}, admin, expect=400)
|
||||
assert u"Isolated instances may not be added or removed from instances groups via the API." == resp.data['error']
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_prevent_non_isolated_instance_added_to_isolated_instance_group(post, admin, non_iso_instance, isolated_instance_group):
|
||||
url = reverse("api:instance_group_instance_list", kwargs={'pk': isolated_instance_group.pk})
|
||||
|
||||
assert False is non_iso_instance.is_isolated()
|
||||
resp = post(url, {'associate': True, 'id': non_iso_instance.id}, admin, expect=400)
|
||||
assert u"Isolated instance group membership may not be managed via the API." == resp.data['error']
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_prevent_non_isolated_instance_added_to_isolated_instance_group_via_policy_list(patch, admin, non_iso_instance, isolated_instance_group):
|
||||
url = reverse("api:instance_group_detail", kwargs={'pk': isolated_instance_group.pk})
|
||||
|
||||
assert False is non_iso_instance.is_isolated()
|
||||
resp = patch(url, {'policy_instance_list': [non_iso_instance.hostname]}, user=admin, expect=400)
|
||||
assert [u"Isolated instance group membership may not be managed via the API."] == resp.data['policy_instance_list']
|
||||
assert isolated_instance_group.policy_instance_list == []
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize('source_model', ['job_template', 'inventory', 'organization'], indirect=True)
|
||||
def test_instance_group_order_persistence(get, post, admin, source_model):
|
||||
@ -222,7 +132,7 @@ def test_instance_group_order_persistence(get, post, admin, source_model):
|
||||
total = 5
|
||||
pks = list(range(total))
|
||||
random.shuffle(pks)
|
||||
instances = [InstanceGroup.objects.create(name='iso-%d' % i) for i in pks]
|
||||
instances = [InstanceGroup.objects.create(name='group-%d' % i) for i in pks]
|
||||
view_name = camelcase_to_underscore(source_model.__class__.__name__)
|
||||
url = reverse('api:{}_instance_groups_list'.format(view_name), kwargs={'pk': source_model.pk})
|
||||
|
||||
@ -252,7 +162,6 @@ def test_instance_group_update_fields(patch, instance, instance_group, admin, co
|
||||
# instance group (not containerized)
|
||||
ig_url = reverse("api:instance_group_detail", kwargs={'pk': instance_group.pk})
|
||||
assert not instance_group.is_container_group
|
||||
assert not containerized_instance_group.is_isolated
|
||||
resp = patch(ig_url, {'policy_instance_percentage': 15}, admin, expect=200)
|
||||
assert 15 == resp.data['policy_instance_percentage']
|
||||
resp = patch(ig_url, {'policy_instance_minimum': 15}, admin, expect=200)
|
||||
@ -263,7 +172,6 @@ def test_instance_group_update_fields(patch, instance, instance_group, admin, co
|
||||
# containerized instance group
|
||||
cg_url = reverse("api:instance_group_detail", kwargs={'pk': containerized_instance_group.pk})
|
||||
assert containerized_instance_group.is_container_group
|
||||
assert not containerized_instance_group.is_isolated
|
||||
resp = patch(cg_url, {'policy_instance_percentage': 15}, admin, expect=400)
|
||||
assert ["Containerized instances may not be managed via the API"] == resp.data['policy_instance_percentage']
|
||||
resp = patch(cg_url, {'policy_instance_minimum': 15}, admin, expect=400)
|
||||
|
||||
@ -5,8 +5,6 @@
|
||||
# Python
|
||||
import pytest
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
# AWX
|
||||
from awx.api.versioning import reverse
|
||||
from awx.conf.models import Setting
|
||||
@ -322,60 +320,6 @@ def test_logging_aggregator_connection_test_valid(put, post, admin):
|
||||
post(url, {}, user=admin, expect=202)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
'setting_name',
|
||||
[
|
||||
'AWX_ISOLATED_CHECK_INTERVAL',
|
||||
'AWX_ISOLATED_LAUNCH_TIMEOUT',
|
||||
'AWX_ISOLATED_CONNECTION_TIMEOUT',
|
||||
],
|
||||
)
|
||||
def test_isolated_job_setting_validation(get, patch, admin, setting_name):
|
||||
url = reverse('api:setting_singleton_detail', kwargs={'category_slug': 'jobs'})
|
||||
patch(url, user=admin, data={setting_name: -1}, expect=400)
|
||||
|
||||
data = get(url, user=admin).data
|
||||
assert data[setting_name] != -1
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize(
|
||||
'key, expected',
|
||||
[
|
||||
['AWX_ISOLATED_PRIVATE_KEY', '$encrypted$'],
|
||||
['AWX_ISOLATED_PUBLIC_KEY', 'secret'],
|
||||
],
|
||||
)
|
||||
def test_isolated_keys_readonly(get, patch, delete, admin, key, expected):
|
||||
Setting.objects.create(key=key, value='secret').save()
|
||||
assert getattr(settings, key) == 'secret'
|
||||
|
||||
url = reverse('api:setting_singleton_detail', kwargs={'category_slug': 'jobs'})
|
||||
resp = get(url, user=admin)
|
||||
assert resp.data[key] == expected
|
||||
|
||||
patch(url, user=admin, data={key: 'new-secret'})
|
||||
assert getattr(settings, key) == 'secret'
|
||||
|
||||
delete(url, user=admin)
|
||||
assert getattr(settings, key) == 'secret'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_isolated_key_flag_readonly(get, patch, delete, admin):
|
||||
settings.AWX_ISOLATED_KEY_GENERATION = True
|
||||
url = reverse('api:setting_singleton_detail', kwargs={'category_slug': 'jobs'})
|
||||
resp = get(url, user=admin)
|
||||
assert resp.data['AWX_ISOLATED_KEY_GENERATION'] is True
|
||||
|
||||
patch(url, user=admin, data={'AWX_ISOLATED_KEY_GENERATION': False})
|
||||
assert settings.AWX_ISOLATED_KEY_GENERATION is True
|
||||
|
||||
delete(url, user=admin)
|
||||
assert settings.AWX_ISOLATED_KEY_GENERATION is True
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.parametrize('headers', [True, False])
|
||||
def test_saml_x509cert_validation(patch, get, admin, headers):
|
||||
|
||||
@ -2,10 +2,8 @@ import pytest
|
||||
from unittest import mock
|
||||
import os
|
||||
|
||||
from django.utils.timezone import now, timedelta
|
||||
|
||||
from awx.main.tasks import RunProjectUpdate, RunInventoryUpdate, awx_isolated_heartbeat, isolated_manager
|
||||
from awx.main.models import ProjectUpdate, InventoryUpdate, InventorySource, Instance, InstanceGroup
|
||||
from awx.main.tasks import RunProjectUpdate, RunInventoryUpdate
|
||||
from awx.main.models import ProjectUpdate, InventoryUpdate, InventorySource
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -70,61 +68,3 @@ class TestDependentInventoryUpdate:
|
||||
# Verify that it bails after 1st update, detecting a cancel
|
||||
assert is2.inventory_updates.count() == 0
|
||||
iu_run_mock.assert_called_once()
|
||||
|
||||
|
||||
class MockSettings:
|
||||
AWX_ISOLATED_PERIODIC_CHECK = 60
|
||||
CLUSTER_HOST_ID = 'tower_1'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
class TestIsolatedManagementTask:
|
||||
@pytest.fixture
|
||||
def control_group(self):
|
||||
return InstanceGroup.objects.create(name='alpha')
|
||||
|
||||
@pytest.fixture
|
||||
def control_instance(self, control_group):
|
||||
return control_group.instances.create(hostname='tower_1')
|
||||
|
||||
@pytest.fixture
|
||||
def needs_updating(self, control_group):
|
||||
ig = InstanceGroup.objects.create(name='thepentagon', controller=control_group)
|
||||
inst = ig.instances.create(hostname='isolated', capacity=103)
|
||||
inst.last_isolated_check = now() - timedelta(seconds=MockSettings.AWX_ISOLATED_PERIODIC_CHECK)
|
||||
inst.save()
|
||||
return ig
|
||||
|
||||
@pytest.fixture
|
||||
def just_updated(self, control_group):
|
||||
ig = InstanceGroup.objects.create(name='thepentagon', controller=control_group)
|
||||
inst = ig.instances.create(hostname='isolated', capacity=103)
|
||||
inst.last_isolated_check = now()
|
||||
inst.save()
|
||||
return inst
|
||||
|
||||
@pytest.fixture
|
||||
def old_version(self, control_group):
|
||||
ig = InstanceGroup.objects.create(name='thepentagon', controller=control_group)
|
||||
inst = ig.instances.create(hostname='isolated-old', capacity=103)
|
||||
inst.save()
|
||||
return inst
|
||||
|
||||
def test_takes_action(self, control_instance, needs_updating):
|
||||
original_isolated_instance = needs_updating.instances.all().first()
|
||||
with mock.patch('awx.main.tasks.settings', MockSettings()):
|
||||
with mock.patch.object(isolated_manager.IsolatedManager, 'health_check') as check_mock:
|
||||
awx_isolated_heartbeat()
|
||||
iso_instance = Instance.objects.get(hostname='isolated')
|
||||
call_args, _ = check_mock.call_args
|
||||
assert call_args[0][0] == iso_instance
|
||||
assert iso_instance.last_isolated_check > original_isolated_instance.last_isolated_check
|
||||
assert iso_instance.modified == original_isolated_instance.modified
|
||||
|
||||
def test_does_not_take_action(self, control_instance, just_updated):
|
||||
with mock.patch('awx.main.tasks.settings', MockSettings()):
|
||||
with mock.patch.object(isolated_manager.IsolatedManager, 'health_check') as check_mock:
|
||||
awx_isolated_heartbeat()
|
||||
iso_instance = Instance.objects.get(hostname='isolated')
|
||||
check_mock.assert_not_called()
|
||||
assert iso_instance.capacity == 103
|
||||
|
||||
@ -645,102 +645,6 @@ class TestAdhocRun(TestJobExecution):
|
||||
assert extra_vars['awx_user_name'] == "angry-spud"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Isolated code path needs updating after runner integration")
|
||||
class TestIsolatedExecution(TestJobExecution):
|
||||
|
||||
ISOLATED_HOST = 'some-isolated-host'
|
||||
ISOLATED_CONTROLLER_HOST = 'some-isolated-controller-host'
|
||||
|
||||
@pytest.fixture
|
||||
def job(self):
|
||||
job = Job(pk=1, id=1, project=Project(), inventory=Inventory(), job_template=JobTemplate(id=1, name='foo'))
|
||||
job.controller_node = self.ISOLATED_CONTROLLER_HOST
|
||||
job.execution_node = self.ISOLATED_HOST
|
||||
return job
|
||||
|
||||
def test_with_ssh_credentials(self, job):
|
||||
ssh = CredentialType.defaults['ssh']()
|
||||
credential = Credential(pk=1, credential_type=ssh, inputs={'username': 'bob', 'password': 'secret', 'ssh_key_data': self.EXAMPLE_PRIVATE_KEY})
|
||||
credential.inputs['password'] = encrypt_field(credential, 'password')
|
||||
job.credentials.add(credential)
|
||||
|
||||
private_data = tempfile.mkdtemp(prefix='awx_')
|
||||
self.task.build_private_data_dir = mock.Mock(return_value=private_data)
|
||||
|
||||
def _mock_job_artifacts(*args, **kw):
|
||||
artifacts = os.path.join(private_data, 'artifacts')
|
||||
if not os.path.exists(artifacts):
|
||||
os.makedirs(artifacts)
|
||||
if 'run_isolated.yml' in args[0]:
|
||||
for filename, data in (
|
||||
['status', 'successful'],
|
||||
['rc', '0'],
|
||||
['stdout', 'IT WORKED!'],
|
||||
):
|
||||
with open(os.path.join(artifacts, filename), 'w') as f:
|
||||
f.write(data)
|
||||
return ('successful', 0)
|
||||
|
||||
self.run_pexpect.side_effect = _mock_job_artifacts
|
||||
self.task.run(self.pk)
|
||||
|
||||
playbook_run = self.run_pexpect.call_args_list[0][0]
|
||||
assert ' '.join(playbook_run[0]).startswith(
|
||||
' '.join(
|
||||
[
|
||||
'ansible-playbook',
|
||||
'run_isolated.yml',
|
||||
'-u',
|
||||
settings.AWX_ISOLATED_USERNAME,
|
||||
'-T',
|
||||
str(settings.AWX_ISOLATED_CONNECTION_TIMEOUT),
|
||||
'-i',
|
||||
self.ISOLATED_HOST + ',',
|
||||
'-e',
|
||||
]
|
||||
)
|
||||
)
|
||||
extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1]
|
||||
extra_vars = json.loads(extra_vars)
|
||||
assert extra_vars['dest'] == '/tmp'
|
||||
assert extra_vars['src'] == private_data
|
||||
|
||||
def test_systemctl_failure(self):
|
||||
# If systemctl fails, read the contents of `artifacts/systemctl_logs`
|
||||
mock_get = mock.Mock()
|
||||
ssh = CredentialType.defaults['ssh']()
|
||||
credential = Credential(
|
||||
pk=1,
|
||||
credential_type=ssh,
|
||||
inputs={
|
||||
'username': 'bob',
|
||||
},
|
||||
)
|
||||
self.instance.credentials.add(credential)
|
||||
|
||||
private_data = tempfile.mkdtemp(prefix='awx_')
|
||||
self.task.build_private_data_dir = mock.Mock(return_value=private_data)
|
||||
inventory = json.dumps({"all": {"hosts": ["localhost"]}})
|
||||
|
||||
def _mock_job_artifacts(*args, **kw):
|
||||
artifacts = os.path.join(private_data, 'artifacts')
|
||||
if not os.path.exists(artifacts):
|
||||
os.makedirs(artifacts)
|
||||
if 'run_isolated.yml' in args[0]:
|
||||
for filename, data in (['daemon.log', 'ERROR IN RUN.PY'],):
|
||||
with open(os.path.join(artifacts, filename), 'w') as f:
|
||||
f.write(data)
|
||||
return ('successful', 0)
|
||||
|
||||
self.run_pexpect.side_effect = _mock_job_artifacts
|
||||
|
||||
with mock.patch('time.sleep'):
|
||||
with mock.patch('requests.get') as mock_get:
|
||||
mock_get.return_value = mock.Mock(content=inventory)
|
||||
with pytest.raises(Exception):
|
||||
self.task.run(self.pk, self.ISOLATED_HOST)
|
||||
|
||||
|
||||
class TestJobCredentials(TestJobExecution):
|
||||
@pytest.fixture
|
||||
def job(self, execution_environment):
|
||||
@ -1625,7 +1529,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
assert 'AWS_ACCESS_KEY_ID' not in env
|
||||
assert 'AWS_SECRET_ACCESS_KEY' not in env
|
||||
@ -1645,7 +1549,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
safe_env = build_safe_env(env)
|
||||
|
||||
@ -1669,7 +1573,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
safe_env = {}
|
||||
credentials = task.build_credentials_list(inventory_update)
|
||||
@ -1706,7 +1610,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
safe_env = build_safe_env(env)
|
||||
|
||||
@ -1736,7 +1640,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
safe_env = build_safe_env(env)
|
||||
|
||||
@ -1763,7 +1667,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
|
||||
def run(expected_gce_zone):
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
safe_env = {}
|
||||
credentials = task.build_credentials_list(inventory_update)
|
||||
for credential in credentials:
|
||||
@ -1797,7 +1701,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
path = os.path.join(private_data_dir, os.path.basename(env['OS_CLIENT_CONFIG_FILE']))
|
||||
shade_config = open(path, 'r').read()
|
||||
@ -1832,7 +1736,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
safe_env = build_safe_env(env)
|
||||
|
||||
assert env["FOREMAN_SERVER"] == "https://example.org"
|
||||
@ -1856,7 +1760,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_cloud_credential = get_cred
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
env = task.build_env(inventory_update, private_data_dir, False)
|
||||
env = task.build_env(inventory_update, private_data_dir)
|
||||
|
||||
safe_env = build_safe_env(env)
|
||||
|
||||
@ -1888,7 +1792,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
inventory_update.get_cloud_credential = get_cred
|
||||
inventory_update.get_extra_credentials = mocker.Mock(return_value=[])
|
||||
|
||||
env = task.build_env(inventory_update, private_data_dir, False)
|
||||
env = task.build_env(inventory_update, private_data_dir)
|
||||
safe_env = {}
|
||||
credentials = task.build_credentials_list(inventory_update)
|
||||
for credential in credentials:
|
||||
@ -1919,7 +1823,7 @@ class TestInventoryUpdateCredentials(TestJobExecution):
|
||||
settings.AWX_TASK_ENV = {'FOO': 'BAR'}
|
||||
|
||||
private_data_files = task.build_private_data_files(inventory_update, private_data_dir)
|
||||
env = task.build_env(inventory_update, private_data_dir, False, private_data_files)
|
||||
env = task.build_env(inventory_update, private_data_dir, private_data_files)
|
||||
|
||||
assert env['FOO'] == 'BAR'
|
||||
|
||||
|
||||
@ -32,13 +32,7 @@ def unwrap_broadcast_msg(payload: dict):
|
||||
|
||||
def get_broadcast_hosts():
|
||||
Instance = apps.get_model('main', 'Instance')
|
||||
instances = (
|
||||
Instance.objects.filter(rampart_groups__controller__isnull=True)
|
||||
.exclude(hostname=Instance.objects.me().hostname)
|
||||
.order_by('hostname')
|
||||
.values('hostname', 'ip_address')
|
||||
.distinct()
|
||||
)
|
||||
instances = Instance.objects.exclude(hostname=Instance.objects.me().hostname).order_by('hostname').values('hostname', 'ip_address').distinct()
|
||||
return {i['hostname']: i['ip_address'] or i['hostname'] for i in instances}
|
||||
|
||||
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
---
|
||||
# The following variables will be set by the runner of this playbook:
|
||||
# src: /tmp/some/path/private_data_dir/
|
||||
|
||||
- name: Poll for status of active job.
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
collections:
|
||||
- ansible.posix
|
||||
|
||||
tasks:
|
||||
- name: "Output job the playbook is running for"
|
||||
debug:
|
||||
msg: "Checking on job {{ job_id }}"
|
||||
|
||||
- name: Determine if daemon process is alive.
|
||||
shell: "ansible-runner is-alive {{src}}"
|
||||
register: is_alive
|
||||
ignore_errors: true
|
||||
|
||||
- name: Copy artifacts from the isolated host.
|
||||
synchronize:
|
||||
src: "{{src}}/artifacts/"
|
||||
dest: "{{src}}/artifacts/"
|
||||
mode: pull
|
||||
delete: true
|
||||
recursive: true
|
||||
when: ansible_kubectl_config is not defined
|
||||
|
||||
- name: Copy daemon log from the isolated host
|
||||
synchronize:
|
||||
src: "{{src}}/daemon.log"
|
||||
dest: "{{src}}/daemon.log"
|
||||
mode: pull
|
||||
when: ansible_kubectl_config is not defined
|
||||
|
||||
- name: Copy artifacts from pod
|
||||
synchronize:
|
||||
src: "{{src}}/artifacts/"
|
||||
dest: "{{src}}/artifacts/"
|
||||
mode: pull
|
||||
delete: true
|
||||
recursive: true
|
||||
set_remote_user: false
|
||||
rsync_opts:
|
||||
- "--blocking-io"
|
||||
- "--rsh=$RSH"
|
||||
environment:
|
||||
RSH: "oc rsh --config={{ ansible_kubectl_config }}"
|
||||
delegate_to: localhost
|
||||
when: ansible_kubectl_config is defined
|
||||
|
||||
- name: Copy daemon log from pod
|
||||
synchronize:
|
||||
src: "{{src}}/daemon.log"
|
||||
dest: "{{src}}/daemon.log"
|
||||
mode: pull
|
||||
set_remote_user: false
|
||||
rsync_opts:
|
||||
- "--blocking-io"
|
||||
- "--rsh=$RSH"
|
||||
environment:
|
||||
RSH: "oc rsh --config={{ ansible_kubectl_config }}"
|
||||
delegate_to: localhost
|
||||
when: ansible_kubectl_config is defined
|
||||
|
||||
- name: Fail if previous check determined that process is not alive.
|
||||
fail:
|
||||
msg: "isolated task is still running"
|
||||
when: "is_alive.rc == 0"
|
||||
@ -1,31 +0,0 @@
|
||||
---
|
||||
|
||||
# The following variables will be set by the runner of this playbook:
|
||||
# cleanup_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
|
||||
# private_data_dir: '/tmp/path/private_data_dir/'
|
||||
|
||||
- name: Clean up from isolated job run.
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
||||
tasks:
|
||||
|
||||
- name: cancel the job
|
||||
command: "ansible-runner stop {{private_data_dir}}"
|
||||
ignore_errors: true
|
||||
|
||||
- name: remove build artifacts
|
||||
file:
|
||||
path: '{{item}}'
|
||||
state: absent
|
||||
register: result
|
||||
with_items: "{{cleanup_dirs}}"
|
||||
until: result is succeeded
|
||||
ignore_errors: true
|
||||
retries: 3
|
||||
delay: 5
|
||||
|
||||
- name: fail if build artifacts were not cleaned
|
||||
fail:
|
||||
msg: 'Unable to cleanup build artifacts'
|
||||
when: not result is succeeded
|
||||
@ -1,12 +0,0 @@
|
||||
---
|
||||
- name: Periodic background status check of isolated instances.
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
||||
tasks:
|
||||
|
||||
- name: Get capacity of the instance
|
||||
awx_capacity:
|
||||
|
||||
- name: Remove any stale temporary files
|
||||
awx_isolated_cleanup:
|
||||
@ -1,61 +0,0 @@
|
||||
---
|
||||
|
||||
# The following variables will be set by the runner of this playbook:
|
||||
# src: /tmp/some/path/private_data_dir
|
||||
# dest: /tmp/some/path/
|
||||
|
||||
- name: Prepare data, dispatch job in isolated environment.
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
vars:
|
||||
secret: "{{ lookup('pipe', 'cat ' + src + '/env/ssh_key') }}"
|
||||
collections:
|
||||
- ansible.posix
|
||||
|
||||
tasks:
|
||||
- name: "Output job the playbook is running for"
|
||||
debug:
|
||||
msg: "Checking on job {{ job_id }}"
|
||||
|
||||
- name: synchronize job environment with isolated host
|
||||
synchronize:
|
||||
copy_links: true
|
||||
src: "{{ src }}"
|
||||
dest: "{{ dest }}"
|
||||
when: ansible_kubectl_config is not defined
|
||||
|
||||
- name: synchronize job environment with remote job container
|
||||
synchronize:
|
||||
copy_links: true
|
||||
src: "{{ src }}"
|
||||
dest: "{{ dest }}"
|
||||
set_remote_user: false
|
||||
rsync_opts:
|
||||
- "--blocking-io"
|
||||
- "--rsh=$RSH"
|
||||
environment:
|
||||
RSH: "oc rsh --config={{ ansible_kubectl_config }}"
|
||||
delegate_to: localhost
|
||||
when: ansible_kubectl_config is defined
|
||||
|
||||
- local_action: stat path="{{src}}/env/ssh_key"
|
||||
register: key
|
||||
|
||||
- name: create a named pipe for secret environment data
|
||||
command: "mkfifo {{src}}/env/ssh_key"
|
||||
when: key.stat.exists
|
||||
|
||||
- name: spawn the playbook
|
||||
command: "ansible-runner start {{src}} -p '{{playbook}}' -i {{ident}}"
|
||||
when: playbook is defined
|
||||
|
||||
- name: spawn the adhoc command
|
||||
command: "ansible-runner start {{src}} -m {{module}} -a {{module_args}} -i {{ident}}"
|
||||
when: module is defined
|
||||
|
||||
- name: write the secret environment data
|
||||
mkfifo:
|
||||
content: "{{secret}}"
|
||||
path: "{{src}}/env/ssh_key"
|
||||
when: key.stat.exists
|
||||
no_log: true
|
||||
@ -1,73 +0,0 @@
|
||||
# Copyright (c) 2017 Ansible by Red Hat
|
||||
#
|
||||
# This file is part of Ansible Tower, but depends on code imported from Ansible.
|
||||
#
|
||||
# Ansible is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Ansible is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from ansible.module_utils._text import to_text
|
||||
from ansible.module_utils.basic import AnsibleModule
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import psutil
|
||||
|
||||
|
||||
def get_cpu_capacity():
|
||||
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)
|
||||
cpu = psutil.cpu_count()
|
||||
|
||||
if env_forkcpu:
|
||||
forkcpu = int(env_forkcpu)
|
||||
else:
|
||||
forkcpu = 4
|
||||
return (cpu, cpu * forkcpu)
|
||||
|
||||
|
||||
def get_mem_capacity():
|
||||
env_forkmem = os.getenv('SYSTEM_TASK_FORKS_MEM', None)
|
||||
if env_forkmem:
|
||||
forkmem = int(env_forkmem)
|
||||
else:
|
||||
forkmem = 100
|
||||
|
||||
mem = psutil.virtual_memory().total
|
||||
return (mem, max(1, ((mem / 1024 / 1024) - 2048) / forkmem))
|
||||
|
||||
|
||||
def main():
|
||||
module = AnsibleModule(argument_spec=dict())
|
||||
|
||||
ar = module.get_bin_path('ansible-runner', required=True)
|
||||
|
||||
try:
|
||||
version = subprocess.check_output([ar, '--version'], stderr=subprocess.STDOUT).strip()
|
||||
except subprocess.CalledProcessError as e:
|
||||
module.fail_json(msg=to_text(e))
|
||||
return
|
||||
# NOTE: Duplicated with awx.main.utils.common capacity utilities
|
||||
cpu, capacity_cpu = get_cpu_capacity()
|
||||
mem, capacity_mem = get_mem_capacity()
|
||||
|
||||
# Module never results in a change
|
||||
module.exit_json(
|
||||
changed=False,
|
||||
capacity_cpu=capacity_cpu,
|
||||
capacity_mem=capacity_mem,
|
||||
version=version,
|
||||
ansible_facts=dict(awx_cpu=cpu, awx_mem=mem, awx_capacity_cpu=capacity_cpu, awx_capacity_mem=capacity_mem, awx_capacity_version=version),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,68 +0,0 @@
|
||||
# Copyright (c) 2017 Ansible by Red Hat
|
||||
#
|
||||
# This file is part of Ansible Tower, but depends on code imported from Ansible.
|
||||
#
|
||||
# Ansible is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Ansible is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from ansible.module_utils.basic import AnsibleModule
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import datetime
|
||||
import subprocess
|
||||
|
||||
|
||||
def main():
|
||||
module = AnsibleModule(argument_spec=dict())
|
||||
changed = False
|
||||
paths_removed = set([])
|
||||
|
||||
# If a folder was last modified before this datetime, it will always be deleted
|
||||
folder_cutoff = datetime.datetime.now() - datetime.timedelta(days=7)
|
||||
# If a folder does not have an associated job running and is older than
|
||||
# this datetime, then it will be deleted because its job has finished
|
||||
job_cutoff = datetime.datetime.now() - datetime.timedelta(hours=1)
|
||||
|
||||
for search_pattern in ['/tmp/awx_[0-9]*_*', '/tmp/ansible_runner_pi_*']:
|
||||
for path in glob.iglob(search_pattern):
|
||||
st = os.stat(path)
|
||||
modtime = datetime.datetime.fromtimestamp(st.st_mtime)
|
||||
|
||||
if modtime > job_cutoff:
|
||||
continue
|
||||
elif modtime > folder_cutoff:
|
||||
try:
|
||||
re_match = re.match(r'\/tmp\/awx_\d+_.+', path)
|
||||
if re_match is not None:
|
||||
try:
|
||||
if subprocess.check_call(['ansible-runner', 'is-alive', path]) == 0:
|
||||
continue
|
||||
except subprocess.CalledProcessError:
|
||||
# the job isn't running anymore, clean up this path
|
||||
module.debug('Deleting path {} its job has completed.'.format(path))
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
else:
|
||||
module.debug('Deleting path {} because modification date is too old.'.format(path))
|
||||
changed = True
|
||||
paths_removed.add(path)
|
||||
shutil.rmtree(path)
|
||||
|
||||
module.exit_json(changed=changed, paths_removed=list(paths_removed))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,31 +0,0 @@
|
||||
import os
|
||||
import stat
|
||||
|
||||
from ansible.module_utils.basic import AnsibleModule
|
||||
|
||||
|
||||
#
|
||||
# the purpose of this plugin is to call mkfifo and
|
||||
# write raw SSH key data into the fifo created on the remote isolated host
|
||||
#
|
||||
|
||||
|
||||
def main():
|
||||
module = AnsibleModule(argument_spec={'path': {'required': True, 'type': 'str'}, 'content': {'required': True, 'type': 'str'}}, supports_check_mode=False)
|
||||
|
||||
path = module.params['path']
|
||||
os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
with open(path, 'w') as fifo:
|
||||
data = module.params['content']
|
||||
if 'OPENSSH PRIVATE KEY' in data and not data.endswith('\n'):
|
||||
# we use ansible's lookup() to read this file from the disk,
|
||||
# but ansible's lookup() *strips* newlines
|
||||
# OpenSSH wants certain private keys to end with a newline (or it
|
||||
# won't accept them)
|
||||
data += '\n'
|
||||
fifo.write(data)
|
||||
module.exit_json(dest=path, changed=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -126,7 +126,7 @@ LOGIN_URL = '/api/login/'
|
||||
PROJECTS_ROOT = '/var/lib/awx/projects/'
|
||||
|
||||
# Absolute filesystem path to the directory to host collections for
|
||||
# running inventory imports, isolated playbooks
|
||||
# running inventory imports
|
||||
AWX_ANSIBLE_COLLECTIONS_PATHS = os.path.join(BASE_DIR, 'vendor', 'awx_ansible_collections')
|
||||
|
||||
# Absolute filesystem path to the directory for job status stdout (default for
|
||||
@ -408,23 +408,6 @@ AUTH_BASIC_ENABLED = True
|
||||
# when trying to access a UI page that requries authentication.
|
||||
LOGIN_REDIRECT_OVERRIDE = ''
|
||||
|
||||
# Default to skipping isolated host key checking (the initial connection will
|
||||
# hang on an interactive "The authenticity of host example.org can't be
|
||||
# established" message)
|
||||
AWX_ISOLATED_HOST_KEY_CHECKING = False
|
||||
|
||||
# The number of seconds to sleep between status checks for jobs running on isolated nodes
|
||||
AWX_ISOLATED_CHECK_INTERVAL = 30
|
||||
|
||||
# The timeout (in seconds) for launching jobs on isolated nodes
|
||||
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
|
||||
|
||||
# Ansible connection timeout (in seconds) for communicating with isolated instances
|
||||
AWX_ISOLATED_CONNECTION_TIMEOUT = 10
|
||||
|
||||
# The time (in seconds) between the periodic isolated heartbeat status check
|
||||
AWX_ISOLATED_PERIODIC_CHECK = 600
|
||||
|
||||
DEVSERVER_DEFAULT_ADDR = '0.0.0.0'
|
||||
DEVSERVER_DEFAULT_PORT = '8013'
|
||||
|
||||
@ -440,7 +423,6 @@ CELERYBEAT_SCHEDULE = {
|
||||
'k8s_reaper': {'task': 'awx.main.tasks.awx_k8s_reaper', 'schedule': timedelta(seconds=60), 'options': {'expires': 50}},
|
||||
'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)},
|
||||
'cleanup_images': {'task': 'awx.main.tasks.cleanup_execution_environment_images', 'schedule': timedelta(hours=3)},
|
||||
# 'isolated_heartbeat': set up at the end of production.py and development.py
|
||||
}
|
||||
|
||||
# Django Caching Configuration
|
||||
@ -854,12 +836,6 @@ LOGGING = {
|
||||
'filename': os.path.join(LOG_ROOT, 'tower_rbac_migrations.log'),
|
||||
'formatter': 'simple',
|
||||
},
|
||||
'isolated_manager': {
|
||||
'level': 'WARNING',
|
||||
'class': 'logging.handlers.WatchedFileHandler',
|
||||
'filename': os.path.join(LOG_ROOT, 'isolated_manager.log'),
|
||||
'formatter': 'simple',
|
||||
},
|
||||
'job_lifecycle': {
|
||||
'level': 'DEBUG',
|
||||
'class': 'logging.handlers.WatchedFileHandler',
|
||||
@ -881,8 +857,6 @@ LOGGING = {
|
||||
'awx.main.dispatch': {'handlers': ['dispatcher']},
|
||||
'awx.main.consumers': {'handlers': ['console', 'file', 'tower_warnings'], 'level': 'INFO'},
|
||||
'awx.main.wsbroadcast': {'handlers': ['wsbroadcast']},
|
||||
'awx.isolated.manager': {'level': 'WARNING', 'handlers': ['console', 'file', 'isolated_manager'], 'propagate': True},
|
||||
'awx.isolated.manager.playbooks': {'handlers': ['management_playbooks'], 'propagate': False},
|
||||
'awx.main.commands.inventory_import': {'handlers': ['inventory_import'], 'propagate': False},
|
||||
'awx.main.tasks': {'handlers': ['task_system', 'external_logger'], 'propagate': False},
|
||||
'awx.main.analytics': {'handlers': ['task_system', 'external_logger'], 'level': 'INFO', 'propagate': False},
|
||||
|
||||
@ -35,9 +35,6 @@ LOGGING['handlers']['console']['()'] = 'awx.main.utils.handlers.ColorHandler' #
|
||||
LOGGING['handlers']['task_system'] = LOGGING['handlers']['console'].copy() # noqa
|
||||
COLOR_LOGS = True
|
||||
|
||||
# Pipe management playbook output to console
|
||||
LOGGING['loggers']['awx.isolated.manager.playbooks']['propagate'] = True # noqa
|
||||
|
||||
# celery is annoyingly loud when docker containers start
|
||||
LOGGING['loggers'].pop('celery', None) # noqa
|
||||
# avoid awx.main.dispatch WARNING-level scaling worker up/down messages
|
||||
@ -67,10 +64,6 @@ CALLBACK_QUEUE = "callback_tasks"
|
||||
# Note: This setting may be overridden by database settings.
|
||||
AWX_ROLES_ENABLED = True
|
||||
|
||||
AWX_ISOLATED_USERNAME = 'root'
|
||||
AWX_ISOLATED_CHECK_INTERVAL = 1
|
||||
AWX_ISOLATED_PERIODIC_CHECK = 30
|
||||
|
||||
# Disable Pendo on the UI for development/test.
|
||||
# Note: This setting may be overridden by database settings.
|
||||
PENDO_TRACKING_STATE = "off"
|
||||
@ -136,17 +129,6 @@ if "pytest" in sys.modules:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CELERYBEAT_SCHEDULE.update(
|
||||
{ # noqa
|
||||
'isolated_heartbeat': {
|
||||
'task': 'awx.main.tasks.awx_isolated_heartbeat',
|
||||
'schedule': timedelta(seconds=AWX_ISOLATED_PERIODIC_CHECK), # noqa
|
||||
'options': {'expires': AWX_ISOLATED_PERIODIC_CHECK * 2}, # noqa
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
CLUSTER_HOST_ID = socket.gethostname()
|
||||
|
||||
AWX_CALLBACK_PROFILE = True
|
||||
|
||||
@ -40,8 +40,6 @@ ANSIBLE_VENV_PATH = os.path.join(BASE_VENV_PATH, "ansible")
|
||||
# Tower base virtualenv paths and enablement
|
||||
AWX_VENV_PATH = os.path.join(BASE_VENV_PATH, "awx")
|
||||
|
||||
AWX_ISOLATED_USERNAME = 'awx'
|
||||
|
||||
# Store a snapshot of default settings at this point before loading any
|
||||
# customizable config files.
|
||||
DEFAULTS_SNAPSHOT = {}
|
||||
@ -91,14 +89,4 @@ except IOError:
|
||||
|
||||
# The below runs AFTER all of the custom settings are imported.
|
||||
|
||||
CELERYBEAT_SCHEDULE.update(
|
||||
{ # noqa
|
||||
'isolated_heartbeat': {
|
||||
'task': 'awx.main.tasks.awx_isolated_heartbeat',
|
||||
'schedule': timedelta(seconds=AWX_ISOLATED_PERIODIC_CHECK), # noqa
|
||||
'options': {'expires': AWX_ISOLATED_PERIODIC_CHECK * 2}, # noqa
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
DATABASES['default'].setdefault('OPTIONS', dict()).setdefault('application_name', f'{CLUSTER_HOST_ID}-{os.getpid()}-{" ".join(sys.argv)}'[:63]) # noqa
|
||||
|
||||
@ -71,62 +71,6 @@ Recommendations and constraints:
|
||||
- Do not name any instance the same as a group name.
|
||||
|
||||
|
||||
### Security-Isolated Rampart Groups
|
||||
|
||||
In Tower versions 3.2+, customers may optionally define isolated groups inside of security-restricted networking zones from which to run jobs and ad hoc commands. Instances in these groups will _not_ have a full install of Tower, but will have a minimal set of utilities used to run jobs. Isolated groups must be specified in the inventory file prefixed with `isolated_group_`. An example inventory file is shown below:
|
||||
|
||||
```
|
||||
[tower]
|
||||
towerA
|
||||
towerB
|
||||
towerC
|
||||
|
||||
[instance_group_security]
|
||||
towerB
|
||||
towerC
|
||||
|
||||
[isolated_group_govcloud]
|
||||
isolatedA
|
||||
isolatedB
|
||||
|
||||
[isolated_group_govcloud:vars]
|
||||
controller=security
|
||||
```
|
||||
|
||||
In the isolated rampart model, "controller" instances interact with "isolated" instances via a series of Ansible playbooks over SSH. At installation time, a randomized RSA key is generated and distributed as an authorized key to all "isolated" instances. The private half of the key is encrypted and stored within Tower, and is used to authenticate from "controller" instances to "isolated" instances when jobs are run.
|
||||
|
||||
When a job is scheduled to run on an "isolated" instance:
|
||||
|
||||
* The "controller" instance compiles metadata required to run the job and copies it to the "isolated" instance via `rsync` (any related project or inventory updates are run on the controller instance). This metadata includes:
|
||||
|
||||
- the entire SCM checkout directory for the project
|
||||
- a static inventory file
|
||||
- pexpect passwords
|
||||
- environment variables
|
||||
- the `ansible`/`ansible-playbook` command invocation, _i.e._, `ansible-playbook -i /path/to/inventory /path/to/playbook.yml -e ...`
|
||||
|
||||
* Once the metadata has been `rsync`ed to the isolated host, the "controller instance" starts a process on the "isolated" instance which consumes the metadata and starts running `ansible`/`ansible-playbook`. As the playbook runs, job artifacts (such as `stdout` and job events) are written to disk on the "isolated" instance.
|
||||
|
||||
* While the job runs on the "isolated" instance, the "controller" instance periodically copies job artifacts (`stdout` and job events) from the "isolated" instance using `rsync`. It consumes these until the job finishes running on the "isolated" instance.
|
||||
|
||||
Isolated groups are architected such that they may exist inside of a VPC with security rules that _only_ permit the instances in its `controller` group to access them; only ingress SSH traffic from "controller" instances to "isolated" instances is required.
|
||||
|
||||
Recommendations for system configuration with isolated groups:
|
||||
- Do not create a group named `isolated_group_tower`.
|
||||
- Do not put any isolated instances inside the `tower` group or other ordinary instance groups.
|
||||
- Define the `controller` variable as either a group var or as a hostvar on all the instances in the isolated group. Please _do not_ allow isolated instances in the same group have a different value for this variable - the behavior in this case can not be predicted.
|
||||
- Do not put an isolated instance in more than one isolated group.
|
||||
|
||||
|
||||
Isolated Instance Authentication
|
||||
--------------------------------
|
||||
At installation time, by default, a randomized RSA key is generated and distributed as an authorized key to all "isolated" instances. The private half of the key is encrypted and stored within Tower, and is used to authenticate from "controller" instances to "isolated" instances when jobs are run.
|
||||
|
||||
For users who wish to manage SSH authentication from controlling instances to isolated instances via some system _outside_ of Tower (such as externally-managed, password-less SSH keys), this behavior can be disabled by unsetting two Tower API settings values:
|
||||
|
||||
`HTTP PATCH /api/v2/settings/jobs/ {'AWX_ISOLATED_PRIVATE_KEY': '', 'AWX_ISOLATED_PUBLIC_KEY': ''}`
|
||||
|
||||
|
||||
### Provisioning and Deprovisioning Instances and Groups
|
||||
|
||||
* **Provisioning** - Provisioning Instances after installation is supported by updating the `inventory` file and re-running the setup playbook. It's important that this file contain all passwords and information used when installing the cluster, or other instances may be reconfigured (this can be done intentionally).
|
||||
|
||||
@ -1,13 +1,11 @@
|
||||
# Container Groups
|
||||
|
||||
In a traditional AWX installation, jobs (ansible-playbook runs) are executed
|
||||
either directly on a member of the cluster or on a pre-provisioned "isolated"
|
||||
node.
|
||||
|
||||
The concept of a Container Group (working name) allows for job environments to
|
||||
be provisioned on-demand as a Pod that exists only for the duration of the
|
||||
playbook run. This is known as the ephemeral execution model and ensures a clean
|
||||
environment for every job run.
|
||||
In a traditional AWX installation, jobs (ansible-playbook runs) are
|
||||
executed directly on a member of the cluster. The concept of a
|
||||
Container Group (working name) allows for job environments to be
|
||||
provisioned on-demand as a Pod that exists only for the duration of
|
||||
the playbook run. This is known as the ephemeral execution model and
|
||||
ensures a clean environment for every job run.
|
||||
|
||||
In some cases it is desireable to have the execution environment be "always-on",
|
||||
this is is done by manually creating an instance through the AWX API or UI.
|
||||
|
||||
@ -1,13 +1,11 @@
|
||||
# Container Groups
|
||||
|
||||
In a traditional AWX installation, jobs (ansible-playbook runs) are executed
|
||||
either directly on a member of the cluster or on a pre-provisioned "isolated"
|
||||
node.
|
||||
|
||||
The concept of a Container Group (working name) allows for job environments to
|
||||
be provisioned on-demand as a Pod that exists only for the duration of the
|
||||
playbook run. This is known as the ephemeral execution model and ensures a clean
|
||||
environment for every job run.
|
||||
In a traditional AWX installation, jobs (ansible-playbook runs) are
|
||||
executed directly on a member of the cluster. The concept of a
|
||||
Container Group (working name) allows for job environments to be
|
||||
provisioned on-demand as a Pod that exists only for the duration of
|
||||
the playbook run. This is known as the ephemeral execution model and
|
||||
ensures a clean environment for every job run.
|
||||
|
||||
## Configuration
|
||||
|
||||
|
||||
@ -157,17 +157,6 @@ One of the most important tasks in a clustered AWX installation is the periodic
|
||||
If a node in an AWX cluster discovers that one of its peers has not updated its heartbeat within a certain grace period, it is assumed to be offline, and its capacity is set to zero to avoid scheduling new tasks on that node. Additionally, jobs allegedly running or scheduled to run on that node are assumed to be lost, and "reaped", or marked as failed.
|
||||
|
||||
|
||||
#### Isolated Tasks and Their Heartbeats
|
||||
|
||||
AWX reports as much status as it can via the browsable API at `/api/v2/ping` in order to provide validation of the health of an instance, including the timestamps of the last heartbeat. Since isolated nodes don't have access to the AWX database, their heartbeats are performed by controller nodes instead. A periodic task, `awx_isolated_heartbeat`, is responsible for periodically connecting from a controller to each isolated node and retrieving its capacity (via SSH).
|
||||
|
||||
When a job is scheduled to run on an isolated instance, the controller instance puts together the metadata required to run the job and then transfers it to the isolated instance. Once the metadata has been synchronized to the isolated host, the controller instance starts a process on the isolated instance, which consumes the metadata and starts running `ansible/ansible-playbook`. As the playbook runs, job artifacts (such as `stdout` and job events) are written to disk on the isolated instance.
|
||||
|
||||
Alternatively: "While the job runs on the isolated instance, the controller instance periodically checks for and copies the job artifacts (_e.g._, `stdout` and job events) that it produces. It processes these until the job finishes running."
|
||||
|
||||
To read more about Isolated Instances, refer to the [Isolated Instance Groups](https://docs.ansible.com/ansible-tower/latest/html/administration/clustering.html#isolated-instance-groups) section of the Clustering page in the Ansible Tower Administration guide.
|
||||
|
||||
|
||||
## AWX Jobs
|
||||
|
||||
### Unified Jobs
|
||||
|
||||
@ -58,7 +58,6 @@ LOGGING['loggers']['django_auth_ldap']['handlers'] = ['console']
|
||||
LOGGING['loggers']['social']['handlers'] = ['console']
|
||||
LOGGING['loggers']['system_tracking_migrations']['handlers'] = ['console']
|
||||
LOGGING['loggers']['rbac_migrations']['handlers'] = ['console']
|
||||
LOGGING['loggers']['awx.isolated.manager.playbooks']['handlers'] = ['console']
|
||||
LOGGING['handlers']['callback_receiver'] = {'class': 'logging.NullHandler'}
|
||||
LOGGING['handlers']['task_system'] = {'class': 'logging.NullHandler'}
|
||||
LOGGING['handlers']['tower_warnings'] = {'class': 'logging.NullHandler'}
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
---
|
||||
version: '2'
|
||||
services:
|
||||
# Primary Tower Development Container link
|
||||
awx:
|
||||
environment:
|
||||
AWX_GROUP_QUEUES: tower,thepentagon
|
||||
links:
|
||||
- isolated
|
||||
# Isolated Rampart Container
|
||||
isolated:
|
||||
image: ${DEV_DOCKER_TAG_BASE}/awx_isolated:${TAG}
|
||||
container_name: tools_isolated_1
|
||||
hostname: isolated
|
||||
volumes:
|
||||
- "../awx/main/isolated:/awx_devel"
|
||||
privileged: true
|
||||
@ -1,20 +0,0 @@
|
||||
ARG TAG=latest
|
||||
FROM ansible/awx_devel:${TAG}
|
||||
|
||||
RUN dnf install -y gcc python36-devel openssh-server
|
||||
RUN python3 -m ensurepip && pip3 install "virtualenv < 20" ansible-runner
|
||||
RUN dnf remove -y gcc python36-devel && rm -rf /var/cache/dnf
|
||||
|
||||
RUN rm -f /etc/ssh/ssh_host_ecdsa_key /etc/ssh/ssh_host_rsa_key
|
||||
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_ecdsa_key
|
||||
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
|
||||
RUN sed -i "s/#UsePrivilegeSeparation.*/UsePrivilegeSeparation no/g" /etc/ssh/sshd_config
|
||||
RUN sed -i "s/UsePAM.*/UsePAM yes/g" /etc/ssh/sshd_config
|
||||
RUN sed -i "s/#StrictModes.*/StrictModes no/g" /etc/ssh/sshd_config
|
||||
RUN mkdir -p /root/.ssh
|
||||
RUN ln -s /awx_devel/authorized_keys /root/.ssh/authorized_keys
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ["/usr/sbin/sshd", "-D"]
|
||||
|
||||
EXPOSE 22
|
||||
@ -1,67 +0,0 @@
|
||||
## Instructions on using an isolated node
|
||||
|
||||
The building of the isolated node is done in the `make docker-compose-build`
|
||||
target. Its image uses a different tag from the tools_awx container.
|
||||
|
||||
Given that the images are built, you can run the combined docker compose target. This uses
|
||||
the base `docker-compose.yml` with modifications found in `docker-isolated-override.yml`.
|
||||
You will still need to give COMPOSE_TAG with whatever your intended
|
||||
base branch is. For example:
|
||||
|
||||
```bash
|
||||
make docker-isolated COMPOSE_TAG=devel
|
||||
```
|
||||
|
||||
This will automatically exchange the keys in order for the `tools_awx_1`
|
||||
container to access the `tools_isolated_1` container over ssh.
|
||||
After that, it will bring up all the containers like the normal docker-compose
|
||||
workflow.
|
||||
|
||||
### Running a job on the Isolated Node
|
||||
|
||||
Create a job template that runs normally. Add the id of the instance
|
||||
group named `thepentagon` to the JT's instance groups. To do this, POST
|
||||
the id (probably id=2) to `/api/v2/job_templates/N/instance_groups/`.
|
||||
After that, run the job template.
|
||||
|
||||
The models are automatically created when running the Makefile target,
|
||||
and they are structured as follows:
|
||||
|
||||
+-------+ +-------------+
|
||||
| tower |<----+ thepentagon |
|
||||
+-------+ +-------------+
|
||||
^ ^
|
||||
| |
|
||||
| |
|
||||
+---+---+ +-----+----+
|
||||
| tower | | isolated |
|
||||
+-------+ +----------+
|
||||
|
||||
The `controller` for the group "thepentagon" and all hosts therein is
|
||||
determined by a ForeignKey within the instance group.
|
||||
|
||||
### Run a playbook
|
||||
|
||||
In order to run an isolated job, associate the instance group `thepentagon` with
|
||||
a job template, inventory, or organization, then run a job that derives from
|
||||
that resource. You should be able to confirm success by inspecting the
|
||||
`instance_group` of the job.
|
||||
|
||||
#### Advanced Manual Testing
|
||||
|
||||
If you want to run a job manually inside of the isolated container with this
|
||||
tooling, you need a private data directory. Normal isolated job runs will
|
||||
clean up their private data directory, but you can temporarily disable this
|
||||
by disabling some parts of the cleanup_isolated.yml playbook.
|
||||
|
||||
Example location of a private data directory:
|
||||
|
||||
`/tmp/awx_29_OM6Mnx/`
|
||||
|
||||
The following command would run the playbook corresponding to that job.
|
||||
|
||||
```bash
|
||||
ansible-runner start /tmp/awx_29_OM6Mnx/ -p some_playbook.yml
|
||||
```
|
||||
|
||||
Other ansible-runner commands include `start`, `is-alive`, and `stop`.
|
||||
Loading…
x
Reference in New Issue
Block a user