diff --git a/awx/main/isolated/isolated_manager.py b/awx/main/isolated/isolated_manager.py index 2449b02b49..caa2e1db01 100644 --- a/awx/main/isolated/isolated_manager.py +++ b/awx/main/isolated/isolated_manager.py @@ -116,7 +116,6 @@ class IsolatedManager(object): extra_vars = { 'src': self.private_data_dir, 'dest': os.path.split(self.private_data_dir)[0], - 'job_id': str(self.instance.pk) } if self.proot_temp_dir: extra_vars['proot_temp_dir'] = self.proot_temp_dir @@ -197,7 +196,7 @@ class IsolatedManager(object): return True return False - def check(self): + def check(self, interval=None): """ Repeatedly poll the isolated node to determine if the job has run. @@ -208,12 +207,11 @@ class IsolatedManager(object): For a completed job run, this function returns (status, rc), representing the status and return code of the isolated `ansible-playbook` run. - """ - extra_vars = { - 'src': self.private_data_dir, - 'job_id': str(self.instance.pk) - } + :param interval: an interval (in seconds) to wait between status polls + """ + interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL + extra_vars = {'src': self.private_data_dir} args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', '%s,' % self.host, 'check_isolated.yml', '-e', json.dumps(extra_vars)] @@ -222,7 +220,8 @@ class IsolatedManager(object): status = 'failed' rc = None - buff = None + buff = cStringIO.StringIO() + last_check = time.time() seek = 0 job_timeout = remaining = self.job_timeout while status == 'failed': @@ -236,7 +235,11 @@ class IsolatedManager(object): status = 'failed' break - time.sleep(settings.AWX_ISOLATED_CHECK_INTERVAL) + canceled = self.cancelled_callback() if self.cancelled_callback else False + if not canceled and time.time() - last_check < interval: + # If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer + time.sleep(1) + continue buff = cStringIO.StringIO() logger.debug('Checking job on isolated host with `check_isolated.yml` playbook.') @@ -257,6 +260,8 @@ class IsolatedManager(object): self.stdout_handle.write(line) seek += len(line) + last_check = time.time() + if status == 'successful': status_path = self.path_to('artifacts', 'status') rc_path = self.path_to('artifacts', 'rc') @@ -278,12 +283,11 @@ class IsolatedManager(object): def cleanup(self): # If the job failed for any reason, make a last-ditch effort at cleanup extra_vars = { - 'private_dirs': [ - '/tmp/ansible_tower/jobs/%s' % self.instance.pk, + 'private_data_dir': self.private_data_dir, + 'cleanup_dirs': [ self.private_data_dir, self.proot_temp_dir, ], - 'job_id': str(self.instance.pk), } args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', '%s,' % self.host, 'clean_isolated.yml', '-e', diff --git a/awx/main/isolated/run.py b/awx/main/isolated/run.py index 95bf4bc8a6..22e7554f78 100755 --- a/awx/main/isolated/run.py +++ b/awx/main/isolated/run.py @@ -258,9 +258,9 @@ def __run__(private_data_dir): if __name__ == '__main__': parser = argparse.ArgumentParser(description='manage a daemonized, isolated ansible playbook') parser.add_argument('command', choices=['start', 'stop', 'is-alive']) - parser.add_argument('job_id') + parser.add_argument('private_data_dir') args = parser.parse_args() - private_data_dir = os.readlink('/tmp/ansible_tower/jobs/%s' % args.job_id) + private_data_dir = args.private_data_dir pidfile = os.path.join(private_data_dir, 'pid') if args.command == 'start': diff --git a/awx/main/tests/unit/isolated/test_expect.py b/awx/main/tests/unit/isolated/test_expect.py index a34d4f3881..a021b8fdc4 100644 --- a/awx/main/tests/unit/isolated/test_expect.py +++ b/awx/main/tests/unit/isolated/test_expect.py @@ -14,6 +14,8 @@ from Crypto import Random from awx.main.isolated import run, isolated_manager +from django.conf import settings + HERE, FILENAME = os.path.split(__file__) @@ -254,7 +256,7 @@ def test_check_isolated_job(private_data_dir, rsa_key): run_pexpect.side_effect = _synchronize_job_artifacts with mock.patch.object(mgr, '_missing_artifacts') as missing_artifacts: missing_artifacts.return_value = False - status, rc = mgr.check() + status, rc = mgr.check(interval=0) assert status == 'failed' assert rc == 1 @@ -262,8 +264,8 @@ def test_check_isolated_job(private_data_dir, rsa_key): run_pexpect.assert_called_with( [ - 'ansible-playbook', '-u', 'root', '-i', 'isolated-host,', - 'check_isolated.yml', '-e', '{"src": "%s", "job_id": "123"}' % private_data_dir, + 'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', 'isolated-host,', + 'check_isolated.yml', '-e', '{"src": "%s"}' % private_data_dir, '-vvvvv' ], '/tower_devel/awx/playbooks', mgr.env, mock.ANY, @@ -294,7 +296,7 @@ def test_check_isolated_job_timeout(private_data_dir, rsa_key): return ('failed', 1) run_pexpect.side_effect = _synchronize_job_artifacts - status, rc = mgr.check() + status, rc = mgr.check(interval=0) assert status == 'failed' assert rc == 1 diff --git a/awx/main/tests/unit/test_tasks.py b/awx/main/tests/unit/test_tasks.py index d76802b308..c7abfe52cb 100644 --- a/awx/main/tests/unit/test_tasks.py +++ b/awx/main/tests/unit/test_tasks.py @@ -11,6 +11,7 @@ import fcntl import mock import pytest import yaml +from django.conf import settings from awx.main.models import ( @@ -340,7 +341,7 @@ class TestIsolatedExecution(TestJobExecution): playbook_run = self.run_pexpect.call_args_list[0][0] assert ' '.join(playbook_run[0]).startswith(' '.join([ - 'ansible-playbook', '-u', 'root', '-i', self.REMOTE_HOST + ',', + 'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', self.REMOTE_HOST + ',', 'run_isolated.yml', '-e', ])) extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1] @@ -348,7 +349,6 @@ class TestIsolatedExecution(TestJobExecution): assert extra_vars['dest'] == '/tmp' assert extra_vars['src'] == private_data assert extra_vars['proot_temp_dir'].startswith('/tmp/ansible_tower_proot_') - assert extra_vars['job_id'] == '1' def test_systemctl_failure(self): # If systemctl fails, read the contents of `artifacts/systemctl_logs` diff --git a/awx/playbooks/check_isolated.yml b/awx/playbooks/check_isolated.yml index 97a3f6499d..508e3ebdff 100644 --- a/awx/playbooks/check_isolated.yml +++ b/awx/playbooks/check_isolated.yml @@ -2,7 +2,6 @@ # The following variables will be set by the runner of this playbook: # src: /tmp/some/path/private_data_dir/ -# job_id: - hosts: all gather_facts: false @@ -16,6 +15,6 @@ mode: pull recursive: yes - - shell: "tower-expect is-alive {{job_id}}" + - shell: "tower-expect is-alive {{src}}" register: is_alive failed_when: "is_alive.rc == 0" diff --git a/awx/playbooks/clean_isolated.yml b/awx/playbooks/clean_isolated.yml index 4ca68f2eb5..0ab4be6d48 100644 --- a/awx/playbooks/clean_isolated.yml +++ b/awx/playbooks/clean_isolated.yml @@ -1,8 +1,8 @@ --- # The following variables will be set by the runner of this playbook: -# private_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/'] -# job_id: +# cleanup_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/'] +# private_data_dir: '/tmp/path/private_data_dir/' - hosts: all gather_facts: false @@ -10,9 +10,9 @@ tasks: - name: cancel the job - command: "tower-expect stop {{job_id}}" + command: "tower-expect stop {{private_data_dir}}" ignore_errors: yes - name: remove build artifacts file: path="{{item}}" state=absent - with_items: "{{private_dirs}}" + with_items: "{{cleanup_dirs}}" diff --git a/awx/playbooks/run_isolated.yml b/awx/playbooks/run_isolated.yml index f06930737d..c5f20c29c1 100644 --- a/awx/playbooks/run_isolated.yml +++ b/awx/playbooks/run_isolated.yml @@ -4,7 +4,6 @@ # src: /tmp/some/path/private_data_dir # dest: /tmp/some/path/ # proot_temp_dir: /tmp/some/path -# job_id: - hosts: all gather_facts: false @@ -26,20 +25,14 @@ src: "{{src}}" dest: "{{dest}}" - - name: create a directory for running jobs - file: path=/tmp/ansible_tower/jobs state=directory mode=0700 - - - name: create symlink keyed by job ID - file: src="{{src}}" dest="/tmp/ansible_tower/jobs/{{job_id}}" state=link - - name: create a named pipe for secret environment data - command: "mkfifo /tmp/ansible_tower/jobs/{{job_id}}/env" + command: "mkfifo {{src}}/env" - name: spawn the playbook - command: "tower-expect start {{job_id}}" + command: "tower-expect start {{src}}" - name: write the secret environment data mkfifo: content: "{{secret}}" - path: "/tmp/ansible_tower/jobs/{{job_id}}/env" + path: "{{src}}/env" no_log: True diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 1297bbcd9f..6530331863 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -605,7 +605,7 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = "" AWX_ACTIVE_NODE_TIME = 7200 # The number of seconds to sleep between status checks for jobs running on isolated nodes -AWX_ISOLATED_CHECK_INTERVAL = 5 +AWX_ISOLATED_CHECK_INTERVAL = 30 # The timeout (in seconds) for launching jobs on isolated nodes AWX_ISOLATED_LAUNCH_TIMEOUT = 600 diff --git a/awx/settings/development.py b/awx/settings/development.py index dbfcfa2c9e..414bcfb48f 100644 --- a/awx/settings/development.py +++ b/awx/settings/development.py @@ -47,6 +47,7 @@ CALLBACK_QUEUE = "callback_tasks" AWX_PROOT_ENABLED = True AWX_ISOLATED_USERNAME = 'root' +AWX_ISOLATED_CHECK_INTERVAL = 1 # Disable Pendo on the UI for development/test. # Note: This setting may be overridden by database settings.