mirror of
https://github.com/ansible/awx.git
synced 2026-01-11 18:09:57 -03:30
more isolated task execution tweaking
* set a more reasonable default `AWX_ISOLATED_CHECK_INTERVAL` * make manual cancellation work for high values of `AWX_ISOLATED_CHECK_INTERVAL` * remove the `/tmp/ansible_tower/jobs/` symlink directory see: #6616
This commit is contained in:
parent
44e0c8621a
commit
1ea03aa4c9
@ -116,7 +116,6 @@ class IsolatedManager(object):
|
||||
extra_vars = {
|
||||
'src': self.private_data_dir,
|
||||
'dest': os.path.split(self.private_data_dir)[0],
|
||||
'job_id': str(self.instance.pk)
|
||||
}
|
||||
if self.proot_temp_dir:
|
||||
extra_vars['proot_temp_dir'] = self.proot_temp_dir
|
||||
@ -197,7 +196,7 @@ class IsolatedManager(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
def check(self):
|
||||
def check(self, interval=None):
|
||||
"""
|
||||
Repeatedly poll the isolated node to determine if the job has run.
|
||||
|
||||
@ -208,12 +207,11 @@ class IsolatedManager(object):
|
||||
For a completed job run, this function returns (status, rc),
|
||||
representing the status and return code of the isolated
|
||||
`ansible-playbook` run.
|
||||
"""
|
||||
|
||||
extra_vars = {
|
||||
'src': self.private_data_dir,
|
||||
'job_id': str(self.instance.pk)
|
||||
}
|
||||
:param interval: an interval (in seconds) to wait between status polls
|
||||
"""
|
||||
interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
|
||||
extra_vars = {'src': self.private_data_dir}
|
||||
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
||||
'%s,' % self.host, 'check_isolated.yml', '-e',
|
||||
json.dumps(extra_vars)]
|
||||
@ -222,7 +220,8 @@ class IsolatedManager(object):
|
||||
|
||||
status = 'failed'
|
||||
rc = None
|
||||
buff = None
|
||||
buff = cStringIO.StringIO()
|
||||
last_check = time.time()
|
||||
seek = 0
|
||||
job_timeout = remaining = self.job_timeout
|
||||
while status == 'failed':
|
||||
@ -236,7 +235,11 @@ class IsolatedManager(object):
|
||||
status = 'failed'
|
||||
break
|
||||
|
||||
time.sleep(settings.AWX_ISOLATED_CHECK_INTERVAL)
|
||||
canceled = self.cancelled_callback() if self.cancelled_callback else False
|
||||
if not canceled and time.time() - last_check < interval:
|
||||
# If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
buff = cStringIO.StringIO()
|
||||
logger.debug('Checking job on isolated host with `check_isolated.yml` playbook.')
|
||||
@ -257,6 +260,8 @@ class IsolatedManager(object):
|
||||
self.stdout_handle.write(line)
|
||||
seek += len(line)
|
||||
|
||||
last_check = time.time()
|
||||
|
||||
if status == 'successful':
|
||||
status_path = self.path_to('artifacts', 'status')
|
||||
rc_path = self.path_to('artifacts', 'rc')
|
||||
@ -278,12 +283,11 @@ class IsolatedManager(object):
|
||||
def cleanup(self):
|
||||
# If the job failed for any reason, make a last-ditch effort at cleanup
|
||||
extra_vars = {
|
||||
'private_dirs': [
|
||||
'/tmp/ansible_tower/jobs/%s' % self.instance.pk,
|
||||
'private_data_dir': self.private_data_dir,
|
||||
'cleanup_dirs': [
|
||||
self.private_data_dir,
|
||||
self.proot_temp_dir,
|
||||
],
|
||||
'job_id': str(self.instance.pk),
|
||||
}
|
||||
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
||||
'%s,' % self.host, 'clean_isolated.yml', '-e',
|
||||
|
||||
@ -258,9 +258,9 @@ def __run__(private_data_dir):
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='manage a daemonized, isolated ansible playbook')
|
||||
parser.add_argument('command', choices=['start', 'stop', 'is-alive'])
|
||||
parser.add_argument('job_id')
|
||||
parser.add_argument('private_data_dir')
|
||||
args = parser.parse_args()
|
||||
private_data_dir = os.readlink('/tmp/ansible_tower/jobs/%s' % args.job_id)
|
||||
private_data_dir = args.private_data_dir
|
||||
pidfile = os.path.join(private_data_dir, 'pid')
|
||||
|
||||
if args.command == 'start':
|
||||
|
||||
@ -14,6 +14,8 @@ from Crypto import Random
|
||||
|
||||
from awx.main.isolated import run, isolated_manager
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
HERE, FILENAME = os.path.split(__file__)
|
||||
|
||||
|
||||
@ -254,7 +256,7 @@ def test_check_isolated_job(private_data_dir, rsa_key):
|
||||
run_pexpect.side_effect = _synchronize_job_artifacts
|
||||
with mock.patch.object(mgr, '_missing_artifacts') as missing_artifacts:
|
||||
missing_artifacts.return_value = False
|
||||
status, rc = mgr.check()
|
||||
status, rc = mgr.check(interval=0)
|
||||
|
||||
assert status == 'failed'
|
||||
assert rc == 1
|
||||
@ -262,8 +264,8 @@ def test_check_isolated_job(private_data_dir, rsa_key):
|
||||
|
||||
run_pexpect.assert_called_with(
|
||||
[
|
||||
'ansible-playbook', '-u', 'root', '-i', 'isolated-host,',
|
||||
'check_isolated.yml', '-e', '{"src": "%s", "job_id": "123"}' % private_data_dir,
|
||||
'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', 'isolated-host,',
|
||||
'check_isolated.yml', '-e', '{"src": "%s"}' % private_data_dir,
|
||||
'-vvvvv'
|
||||
],
|
||||
'/tower_devel/awx/playbooks', mgr.env, mock.ANY,
|
||||
@ -294,7 +296,7 @@ def test_check_isolated_job_timeout(private_data_dir, rsa_key):
|
||||
return ('failed', 1)
|
||||
|
||||
run_pexpect.side_effect = _synchronize_job_artifacts
|
||||
status, rc = mgr.check()
|
||||
status, rc = mgr.check(interval=0)
|
||||
|
||||
assert status == 'failed'
|
||||
assert rc == 1
|
||||
|
||||
@ -11,6 +11,7 @@ import fcntl
|
||||
import mock
|
||||
import pytest
|
||||
import yaml
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
from awx.main.models import (
|
||||
@ -340,7 +341,7 @@ class TestIsolatedExecution(TestJobExecution):
|
||||
|
||||
playbook_run = self.run_pexpect.call_args_list[0][0]
|
||||
assert ' '.join(playbook_run[0]).startswith(' '.join([
|
||||
'ansible-playbook', '-u', 'root', '-i', self.REMOTE_HOST + ',',
|
||||
'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', self.REMOTE_HOST + ',',
|
||||
'run_isolated.yml', '-e',
|
||||
]))
|
||||
extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1]
|
||||
@ -348,7 +349,6 @@ class TestIsolatedExecution(TestJobExecution):
|
||||
assert extra_vars['dest'] == '/tmp'
|
||||
assert extra_vars['src'] == private_data
|
||||
assert extra_vars['proot_temp_dir'].startswith('/tmp/ansible_tower_proot_')
|
||||
assert extra_vars['job_id'] == '1'
|
||||
|
||||
def test_systemctl_failure(self):
|
||||
# If systemctl fails, read the contents of `artifacts/systemctl_logs`
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
|
||||
# The following variables will be set by the runner of this playbook:
|
||||
# src: /tmp/some/path/private_data_dir/
|
||||
# job_id: <pk>
|
||||
|
||||
- hosts: all
|
||||
gather_facts: false
|
||||
@ -16,6 +15,6 @@
|
||||
mode: pull
|
||||
recursive: yes
|
||||
|
||||
- shell: "tower-expect is-alive {{job_id}}"
|
||||
- shell: "tower-expect is-alive {{src}}"
|
||||
register: is_alive
|
||||
failed_when: "is_alive.rc == 0"
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
---
|
||||
|
||||
# The following variables will be set by the runner of this playbook:
|
||||
# private_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
|
||||
# job_id: <pk>
|
||||
# cleanup_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
|
||||
# private_data_dir: '/tmp/path/private_data_dir/'
|
||||
|
||||
- hosts: all
|
||||
gather_facts: false
|
||||
@ -10,9 +10,9 @@
|
||||
tasks:
|
||||
|
||||
- name: cancel the job
|
||||
command: "tower-expect stop {{job_id}}"
|
||||
command: "tower-expect stop {{private_data_dir}}"
|
||||
ignore_errors: yes
|
||||
|
||||
- name: remove build artifacts
|
||||
file: path="{{item}}" state=absent
|
||||
with_items: "{{private_dirs}}"
|
||||
with_items: "{{cleanup_dirs}}"
|
||||
|
||||
@ -4,7 +4,6 @@
|
||||
# src: /tmp/some/path/private_data_dir
|
||||
# dest: /tmp/some/path/
|
||||
# proot_temp_dir: /tmp/some/path
|
||||
# job_id: <pk>
|
||||
|
||||
- hosts: all
|
||||
gather_facts: false
|
||||
@ -26,20 +25,14 @@
|
||||
src: "{{src}}"
|
||||
dest: "{{dest}}"
|
||||
|
||||
- name: create a directory for running jobs
|
||||
file: path=/tmp/ansible_tower/jobs state=directory mode=0700
|
||||
|
||||
- name: create symlink keyed by job ID
|
||||
file: src="{{src}}" dest="/tmp/ansible_tower/jobs/{{job_id}}" state=link
|
||||
|
||||
- name: create a named pipe for secret environment data
|
||||
command: "mkfifo /tmp/ansible_tower/jobs/{{job_id}}/env"
|
||||
command: "mkfifo {{src}}/env"
|
||||
|
||||
- name: spawn the playbook
|
||||
command: "tower-expect start {{job_id}}"
|
||||
command: "tower-expect start {{src}}"
|
||||
|
||||
- name: write the secret environment data
|
||||
mkfifo:
|
||||
content: "{{secret}}"
|
||||
path: "/tmp/ansible_tower/jobs/{{job_id}}/env"
|
||||
path: "{{src}}/env"
|
||||
no_log: True
|
||||
|
||||
@ -605,7 +605,7 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = ""
|
||||
AWX_ACTIVE_NODE_TIME = 7200
|
||||
|
||||
# The number of seconds to sleep between status checks for jobs running on isolated nodes
|
||||
AWX_ISOLATED_CHECK_INTERVAL = 5
|
||||
AWX_ISOLATED_CHECK_INTERVAL = 30
|
||||
|
||||
# The timeout (in seconds) for launching jobs on isolated nodes
|
||||
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
|
||||
|
||||
@ -47,6 +47,7 @@ CALLBACK_QUEUE = "callback_tasks"
|
||||
AWX_PROOT_ENABLED = True
|
||||
|
||||
AWX_ISOLATED_USERNAME = 'root'
|
||||
AWX_ISOLATED_CHECK_INTERVAL = 1
|
||||
|
||||
# Disable Pendo on the UI for development/test.
|
||||
# Note: This setting may be overridden by database settings.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user