more isolated task execution tweaking

* set a more reasonable default `AWX_ISOLATED_CHECK_INTERVAL`
* make manual cancellation work for high values of
  `AWX_ISOLATED_CHECK_INTERVAL`
* remove the `/tmp/ansible_tower/jobs/` symlink directory

see: #6616
This commit is contained in:
Ryan Petrello 2017-06-16 11:18:01 -04:00
parent 44e0c8621a
commit 1ea03aa4c9
9 changed files with 36 additions and 37 deletions

View File

@ -116,7 +116,6 @@ class IsolatedManager(object):
extra_vars = {
'src': self.private_data_dir,
'dest': os.path.split(self.private_data_dir)[0],
'job_id': str(self.instance.pk)
}
if self.proot_temp_dir:
extra_vars['proot_temp_dir'] = self.proot_temp_dir
@ -197,7 +196,7 @@ class IsolatedManager(object):
return True
return False
def check(self):
def check(self, interval=None):
"""
Repeatedly poll the isolated node to determine if the job has run.
@ -208,12 +207,11 @@ class IsolatedManager(object):
For a completed job run, this function returns (status, rc),
representing the status and return code of the isolated
`ansible-playbook` run.
"""
extra_vars = {
'src': self.private_data_dir,
'job_id': str(self.instance.pk)
}
:param interval: an interval (in seconds) to wait between status polls
"""
interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
extra_vars = {'src': self.private_data_dir}
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
'%s,' % self.host, 'check_isolated.yml', '-e',
json.dumps(extra_vars)]
@ -222,7 +220,8 @@ class IsolatedManager(object):
status = 'failed'
rc = None
buff = None
buff = cStringIO.StringIO()
last_check = time.time()
seek = 0
job_timeout = remaining = self.job_timeout
while status == 'failed':
@ -236,7 +235,11 @@ class IsolatedManager(object):
status = 'failed'
break
time.sleep(settings.AWX_ISOLATED_CHECK_INTERVAL)
canceled = self.cancelled_callback() if self.cancelled_callback else False
if not canceled and time.time() - last_check < interval:
# If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer
time.sleep(1)
continue
buff = cStringIO.StringIO()
logger.debug('Checking job on isolated host with `check_isolated.yml` playbook.')
@ -257,6 +260,8 @@ class IsolatedManager(object):
self.stdout_handle.write(line)
seek += len(line)
last_check = time.time()
if status == 'successful':
status_path = self.path_to('artifacts', 'status')
rc_path = self.path_to('artifacts', 'rc')
@ -278,12 +283,11 @@ class IsolatedManager(object):
def cleanup(self):
# If the job failed for any reason, make a last-ditch effort at cleanup
extra_vars = {
'private_dirs': [
'/tmp/ansible_tower/jobs/%s' % self.instance.pk,
'private_data_dir': self.private_data_dir,
'cleanup_dirs': [
self.private_data_dir,
self.proot_temp_dir,
],
'job_id': str(self.instance.pk),
}
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
'%s,' % self.host, 'clean_isolated.yml', '-e',

View File

@ -258,9 +258,9 @@ def __run__(private_data_dir):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='manage a daemonized, isolated ansible playbook')
parser.add_argument('command', choices=['start', 'stop', 'is-alive'])
parser.add_argument('job_id')
parser.add_argument('private_data_dir')
args = parser.parse_args()
private_data_dir = os.readlink('/tmp/ansible_tower/jobs/%s' % args.job_id)
private_data_dir = args.private_data_dir
pidfile = os.path.join(private_data_dir, 'pid')
if args.command == 'start':

View File

@ -14,6 +14,8 @@ from Crypto import Random
from awx.main.isolated import run, isolated_manager
from django.conf import settings
HERE, FILENAME = os.path.split(__file__)
@ -254,7 +256,7 @@ def test_check_isolated_job(private_data_dir, rsa_key):
run_pexpect.side_effect = _synchronize_job_artifacts
with mock.patch.object(mgr, '_missing_artifacts') as missing_artifacts:
missing_artifacts.return_value = False
status, rc = mgr.check()
status, rc = mgr.check(interval=0)
assert status == 'failed'
assert rc == 1
@ -262,8 +264,8 @@ def test_check_isolated_job(private_data_dir, rsa_key):
run_pexpect.assert_called_with(
[
'ansible-playbook', '-u', 'root', '-i', 'isolated-host,',
'check_isolated.yml', '-e', '{"src": "%s", "job_id": "123"}' % private_data_dir,
'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', 'isolated-host,',
'check_isolated.yml', '-e', '{"src": "%s"}' % private_data_dir,
'-vvvvv'
],
'/tower_devel/awx/playbooks', mgr.env, mock.ANY,
@ -294,7 +296,7 @@ def test_check_isolated_job_timeout(private_data_dir, rsa_key):
return ('failed', 1)
run_pexpect.side_effect = _synchronize_job_artifacts
status, rc = mgr.check()
status, rc = mgr.check(interval=0)
assert status == 'failed'
assert rc == 1

View File

@ -11,6 +11,7 @@ import fcntl
import mock
import pytest
import yaml
from django.conf import settings
from awx.main.models import (
@ -340,7 +341,7 @@ class TestIsolatedExecution(TestJobExecution):
playbook_run = self.run_pexpect.call_args_list[0][0]
assert ' '.join(playbook_run[0]).startswith(' '.join([
'ansible-playbook', '-u', 'root', '-i', self.REMOTE_HOST + ',',
'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', self.REMOTE_HOST + ',',
'run_isolated.yml', '-e',
]))
extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1]
@ -348,7 +349,6 @@ class TestIsolatedExecution(TestJobExecution):
assert extra_vars['dest'] == '/tmp'
assert extra_vars['src'] == private_data
assert extra_vars['proot_temp_dir'].startswith('/tmp/ansible_tower_proot_')
assert extra_vars['job_id'] == '1'
def test_systemctl_failure(self):
# If systemctl fails, read the contents of `artifacts/systemctl_logs`

View File

@ -2,7 +2,6 @@
# The following variables will be set by the runner of this playbook:
# src: /tmp/some/path/private_data_dir/
# job_id: <pk>
- hosts: all
gather_facts: false
@ -16,6 +15,6 @@
mode: pull
recursive: yes
- shell: "tower-expect is-alive {{job_id}}"
- shell: "tower-expect is-alive {{src}}"
register: is_alive
failed_when: "is_alive.rc == 0"

View File

@ -1,8 +1,8 @@
---
# The following variables will be set by the runner of this playbook:
# private_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
# job_id: <pk>
# cleanup_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
# private_data_dir: '/tmp/path/private_data_dir/'
- hosts: all
gather_facts: false
@ -10,9 +10,9 @@
tasks:
- name: cancel the job
command: "tower-expect stop {{job_id}}"
command: "tower-expect stop {{private_data_dir}}"
ignore_errors: yes
- name: remove build artifacts
file: path="{{item}}" state=absent
with_items: "{{private_dirs}}"
with_items: "{{cleanup_dirs}}"

View File

@ -4,7 +4,6 @@
# src: /tmp/some/path/private_data_dir
# dest: /tmp/some/path/
# proot_temp_dir: /tmp/some/path
# job_id: <pk>
- hosts: all
gather_facts: false
@ -26,20 +25,14 @@
src: "{{src}}"
dest: "{{dest}}"
- name: create a directory for running jobs
file: path=/tmp/ansible_tower/jobs state=directory mode=0700
- name: create symlink keyed by job ID
file: src="{{src}}" dest="/tmp/ansible_tower/jobs/{{job_id}}" state=link
- name: create a named pipe for secret environment data
command: "mkfifo /tmp/ansible_tower/jobs/{{job_id}}/env"
command: "mkfifo {{src}}/env"
- name: spawn the playbook
command: "tower-expect start {{job_id}}"
command: "tower-expect start {{src}}"
- name: write the secret environment data
mkfifo:
content: "{{secret}}"
path: "/tmp/ansible_tower/jobs/{{job_id}}/env"
path: "{{src}}/env"
no_log: True

View File

@ -605,7 +605,7 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = ""
AWX_ACTIVE_NODE_TIME = 7200
# The number of seconds to sleep between status checks for jobs running on isolated nodes
AWX_ISOLATED_CHECK_INTERVAL = 5
AWX_ISOLATED_CHECK_INTERVAL = 30
# The timeout (in seconds) for launching jobs on isolated nodes
AWX_ISOLATED_LAUNCH_TIMEOUT = 600

View File

@ -47,6 +47,7 @@ CALLBACK_QUEUE = "callback_tasks"
AWX_PROOT_ENABLED = True
AWX_ISOLATED_USERNAME = 'root'
AWX_ISOLATED_CHECK_INTERVAL = 1
# Disable Pendo on the UI for development/test.
# Note: This setting may be overridden by database settings.