mirror of
https://github.com/ansible/awx.git
synced 2026-03-01 08:48:46 -03:30
more isolated task execution tweaking
* set a more reasonable default `AWX_ISOLATED_CHECK_INTERVAL` * make manual cancellation work for high values of `AWX_ISOLATED_CHECK_INTERVAL` * remove the `/tmp/ansible_tower/jobs/` symlink directory see: #6616
This commit is contained in:
@@ -116,7 +116,6 @@ class IsolatedManager(object):
|
|||||||
extra_vars = {
|
extra_vars = {
|
||||||
'src': self.private_data_dir,
|
'src': self.private_data_dir,
|
||||||
'dest': os.path.split(self.private_data_dir)[0],
|
'dest': os.path.split(self.private_data_dir)[0],
|
||||||
'job_id': str(self.instance.pk)
|
|
||||||
}
|
}
|
||||||
if self.proot_temp_dir:
|
if self.proot_temp_dir:
|
||||||
extra_vars['proot_temp_dir'] = self.proot_temp_dir
|
extra_vars['proot_temp_dir'] = self.proot_temp_dir
|
||||||
@@ -197,7 +196,7 @@ class IsolatedManager(object):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def check(self):
|
def check(self, interval=None):
|
||||||
"""
|
"""
|
||||||
Repeatedly poll the isolated node to determine if the job has run.
|
Repeatedly poll the isolated node to determine if the job has run.
|
||||||
|
|
||||||
@@ -208,12 +207,11 @@ class IsolatedManager(object):
|
|||||||
For a completed job run, this function returns (status, rc),
|
For a completed job run, this function returns (status, rc),
|
||||||
representing the status and return code of the isolated
|
representing the status and return code of the isolated
|
||||||
`ansible-playbook` run.
|
`ansible-playbook` run.
|
||||||
"""
|
|
||||||
|
|
||||||
extra_vars = {
|
:param interval: an interval (in seconds) to wait between status polls
|
||||||
'src': self.private_data_dir,
|
"""
|
||||||
'job_id': str(self.instance.pk)
|
interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
|
||||||
}
|
extra_vars = {'src': self.private_data_dir}
|
||||||
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
||||||
'%s,' % self.host, 'check_isolated.yml', '-e',
|
'%s,' % self.host, 'check_isolated.yml', '-e',
|
||||||
json.dumps(extra_vars)]
|
json.dumps(extra_vars)]
|
||||||
@@ -222,7 +220,8 @@ class IsolatedManager(object):
|
|||||||
|
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
rc = None
|
rc = None
|
||||||
buff = None
|
buff = cStringIO.StringIO()
|
||||||
|
last_check = time.time()
|
||||||
seek = 0
|
seek = 0
|
||||||
job_timeout = remaining = self.job_timeout
|
job_timeout = remaining = self.job_timeout
|
||||||
while status == 'failed':
|
while status == 'failed':
|
||||||
@@ -236,7 +235,11 @@ class IsolatedManager(object):
|
|||||||
status = 'failed'
|
status = 'failed'
|
||||||
break
|
break
|
||||||
|
|
||||||
time.sleep(settings.AWX_ISOLATED_CHECK_INTERVAL)
|
canceled = self.cancelled_callback() if self.cancelled_callback else False
|
||||||
|
if not canceled and time.time() - last_check < interval:
|
||||||
|
# If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
buff = cStringIO.StringIO()
|
buff = cStringIO.StringIO()
|
||||||
logger.debug('Checking job on isolated host with `check_isolated.yml` playbook.')
|
logger.debug('Checking job on isolated host with `check_isolated.yml` playbook.')
|
||||||
@@ -257,6 +260,8 @@ class IsolatedManager(object):
|
|||||||
self.stdout_handle.write(line)
|
self.stdout_handle.write(line)
|
||||||
seek += len(line)
|
seek += len(line)
|
||||||
|
|
||||||
|
last_check = time.time()
|
||||||
|
|
||||||
if status == 'successful':
|
if status == 'successful':
|
||||||
status_path = self.path_to('artifacts', 'status')
|
status_path = self.path_to('artifacts', 'status')
|
||||||
rc_path = self.path_to('artifacts', 'rc')
|
rc_path = self.path_to('artifacts', 'rc')
|
||||||
@@ -278,12 +283,11 @@ class IsolatedManager(object):
|
|||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
# If the job failed for any reason, make a last-ditch effort at cleanup
|
# If the job failed for any reason, make a last-ditch effort at cleanup
|
||||||
extra_vars = {
|
extra_vars = {
|
||||||
'private_dirs': [
|
'private_data_dir': self.private_data_dir,
|
||||||
'/tmp/ansible_tower/jobs/%s' % self.instance.pk,
|
'cleanup_dirs': [
|
||||||
self.private_data_dir,
|
self.private_data_dir,
|
||||||
self.proot_temp_dir,
|
self.proot_temp_dir,
|
||||||
],
|
],
|
||||||
'job_id': str(self.instance.pk),
|
|
||||||
}
|
}
|
||||||
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
args = ['ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i',
|
||||||
'%s,' % self.host, 'clean_isolated.yml', '-e',
|
'%s,' % self.host, 'clean_isolated.yml', '-e',
|
||||||
|
|||||||
@@ -258,9 +258,9 @@ def __run__(private_data_dir):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(description='manage a daemonized, isolated ansible playbook')
|
parser = argparse.ArgumentParser(description='manage a daemonized, isolated ansible playbook')
|
||||||
parser.add_argument('command', choices=['start', 'stop', 'is-alive'])
|
parser.add_argument('command', choices=['start', 'stop', 'is-alive'])
|
||||||
parser.add_argument('job_id')
|
parser.add_argument('private_data_dir')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
private_data_dir = os.readlink('/tmp/ansible_tower/jobs/%s' % args.job_id)
|
private_data_dir = args.private_data_dir
|
||||||
pidfile = os.path.join(private_data_dir, 'pid')
|
pidfile = os.path.join(private_data_dir, 'pid')
|
||||||
|
|
||||||
if args.command == 'start':
|
if args.command == 'start':
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ from Crypto import Random
|
|||||||
|
|
||||||
from awx.main.isolated import run, isolated_manager
|
from awx.main.isolated import run, isolated_manager
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
HERE, FILENAME = os.path.split(__file__)
|
HERE, FILENAME = os.path.split(__file__)
|
||||||
|
|
||||||
|
|
||||||
@@ -254,7 +256,7 @@ def test_check_isolated_job(private_data_dir, rsa_key):
|
|||||||
run_pexpect.side_effect = _synchronize_job_artifacts
|
run_pexpect.side_effect = _synchronize_job_artifacts
|
||||||
with mock.patch.object(mgr, '_missing_artifacts') as missing_artifacts:
|
with mock.patch.object(mgr, '_missing_artifacts') as missing_artifacts:
|
||||||
missing_artifacts.return_value = False
|
missing_artifacts.return_value = False
|
||||||
status, rc = mgr.check()
|
status, rc = mgr.check(interval=0)
|
||||||
|
|
||||||
assert status == 'failed'
|
assert status == 'failed'
|
||||||
assert rc == 1
|
assert rc == 1
|
||||||
@@ -262,8 +264,8 @@ def test_check_isolated_job(private_data_dir, rsa_key):
|
|||||||
|
|
||||||
run_pexpect.assert_called_with(
|
run_pexpect.assert_called_with(
|
||||||
[
|
[
|
||||||
'ansible-playbook', '-u', 'root', '-i', 'isolated-host,',
|
'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', 'isolated-host,',
|
||||||
'check_isolated.yml', '-e', '{"src": "%s", "job_id": "123"}' % private_data_dir,
|
'check_isolated.yml', '-e', '{"src": "%s"}' % private_data_dir,
|
||||||
'-vvvvv'
|
'-vvvvv'
|
||||||
],
|
],
|
||||||
'/tower_devel/awx/playbooks', mgr.env, mock.ANY,
|
'/tower_devel/awx/playbooks', mgr.env, mock.ANY,
|
||||||
@@ -294,7 +296,7 @@ def test_check_isolated_job_timeout(private_data_dir, rsa_key):
|
|||||||
return ('failed', 1)
|
return ('failed', 1)
|
||||||
|
|
||||||
run_pexpect.side_effect = _synchronize_job_artifacts
|
run_pexpect.side_effect = _synchronize_job_artifacts
|
||||||
status, rc = mgr.check()
|
status, rc = mgr.check(interval=0)
|
||||||
|
|
||||||
assert status == 'failed'
|
assert status == 'failed'
|
||||||
assert rc == 1
|
assert rc == 1
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import fcntl
|
|||||||
import mock
|
import mock
|
||||||
import pytest
|
import pytest
|
||||||
import yaml
|
import yaml
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
from awx.main.models import (
|
from awx.main.models import (
|
||||||
@@ -340,7 +341,7 @@ class TestIsolatedExecution(TestJobExecution):
|
|||||||
|
|
||||||
playbook_run = self.run_pexpect.call_args_list[0][0]
|
playbook_run = self.run_pexpect.call_args_list[0][0]
|
||||||
assert ' '.join(playbook_run[0]).startswith(' '.join([
|
assert ' '.join(playbook_run[0]).startswith(' '.join([
|
||||||
'ansible-playbook', '-u', 'root', '-i', self.REMOTE_HOST + ',',
|
'ansible-playbook', '-u', settings.AWX_ISOLATED_USERNAME, '-i', self.REMOTE_HOST + ',',
|
||||||
'run_isolated.yml', '-e',
|
'run_isolated.yml', '-e',
|
||||||
]))
|
]))
|
||||||
extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1]
|
extra_vars = playbook_run[0][playbook_run[0].index('-e') + 1]
|
||||||
@@ -348,7 +349,6 @@ class TestIsolatedExecution(TestJobExecution):
|
|||||||
assert extra_vars['dest'] == '/tmp'
|
assert extra_vars['dest'] == '/tmp'
|
||||||
assert extra_vars['src'] == private_data
|
assert extra_vars['src'] == private_data
|
||||||
assert extra_vars['proot_temp_dir'].startswith('/tmp/ansible_tower_proot_')
|
assert extra_vars['proot_temp_dir'].startswith('/tmp/ansible_tower_proot_')
|
||||||
assert extra_vars['job_id'] == '1'
|
|
||||||
|
|
||||||
def test_systemctl_failure(self):
|
def test_systemctl_failure(self):
|
||||||
# If systemctl fails, read the contents of `artifacts/systemctl_logs`
|
# If systemctl fails, read the contents of `artifacts/systemctl_logs`
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
# The following variables will be set by the runner of this playbook:
|
# The following variables will be set by the runner of this playbook:
|
||||||
# src: /tmp/some/path/private_data_dir/
|
# src: /tmp/some/path/private_data_dir/
|
||||||
# job_id: <pk>
|
|
||||||
|
|
||||||
- hosts: all
|
- hosts: all
|
||||||
gather_facts: false
|
gather_facts: false
|
||||||
@@ -16,6 +15,6 @@
|
|||||||
mode: pull
|
mode: pull
|
||||||
recursive: yes
|
recursive: yes
|
||||||
|
|
||||||
- shell: "tower-expect is-alive {{job_id}}"
|
- shell: "tower-expect is-alive {{src}}"
|
||||||
register: is_alive
|
register: is_alive
|
||||||
failed_when: "is_alive.rc == 0"
|
failed_when: "is_alive.rc == 0"
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
---
|
---
|
||||||
|
|
||||||
# The following variables will be set by the runner of this playbook:
|
# The following variables will be set by the runner of this playbook:
|
||||||
# private_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
|
# cleanup_dirs: ['/tmp/path/private_data_dir/', '/tmp//path/proot_temp_dir/']
|
||||||
# job_id: <pk>
|
# private_data_dir: '/tmp/path/private_data_dir/'
|
||||||
|
|
||||||
- hosts: all
|
- hosts: all
|
||||||
gather_facts: false
|
gather_facts: false
|
||||||
@@ -10,9 +10,9 @@
|
|||||||
tasks:
|
tasks:
|
||||||
|
|
||||||
- name: cancel the job
|
- name: cancel the job
|
||||||
command: "tower-expect stop {{job_id}}"
|
command: "tower-expect stop {{private_data_dir}}"
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
|
|
||||||
- name: remove build artifacts
|
- name: remove build artifacts
|
||||||
file: path="{{item}}" state=absent
|
file: path="{{item}}" state=absent
|
||||||
with_items: "{{private_dirs}}"
|
with_items: "{{cleanup_dirs}}"
|
||||||
|
|||||||
@@ -4,7 +4,6 @@
|
|||||||
# src: /tmp/some/path/private_data_dir
|
# src: /tmp/some/path/private_data_dir
|
||||||
# dest: /tmp/some/path/
|
# dest: /tmp/some/path/
|
||||||
# proot_temp_dir: /tmp/some/path
|
# proot_temp_dir: /tmp/some/path
|
||||||
# job_id: <pk>
|
|
||||||
|
|
||||||
- hosts: all
|
- hosts: all
|
||||||
gather_facts: false
|
gather_facts: false
|
||||||
@@ -26,20 +25,14 @@
|
|||||||
src: "{{src}}"
|
src: "{{src}}"
|
||||||
dest: "{{dest}}"
|
dest: "{{dest}}"
|
||||||
|
|
||||||
- name: create a directory for running jobs
|
|
||||||
file: path=/tmp/ansible_tower/jobs state=directory mode=0700
|
|
||||||
|
|
||||||
- name: create symlink keyed by job ID
|
|
||||||
file: src="{{src}}" dest="/tmp/ansible_tower/jobs/{{job_id}}" state=link
|
|
||||||
|
|
||||||
- name: create a named pipe for secret environment data
|
- name: create a named pipe for secret environment data
|
||||||
command: "mkfifo /tmp/ansible_tower/jobs/{{job_id}}/env"
|
command: "mkfifo {{src}}/env"
|
||||||
|
|
||||||
- name: spawn the playbook
|
- name: spawn the playbook
|
||||||
command: "tower-expect start {{job_id}}"
|
command: "tower-expect start {{src}}"
|
||||||
|
|
||||||
- name: write the secret environment data
|
- name: write the secret environment data
|
||||||
mkfifo:
|
mkfifo:
|
||||||
content: "{{secret}}"
|
content: "{{secret}}"
|
||||||
path: "/tmp/ansible_tower/jobs/{{job_id}}/env"
|
path: "{{src}}/env"
|
||||||
no_log: True
|
no_log: True
|
||||||
|
|||||||
@@ -605,7 +605,7 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = ""
|
|||||||
AWX_ACTIVE_NODE_TIME = 7200
|
AWX_ACTIVE_NODE_TIME = 7200
|
||||||
|
|
||||||
# The number of seconds to sleep between status checks for jobs running on isolated nodes
|
# The number of seconds to sleep between status checks for jobs running on isolated nodes
|
||||||
AWX_ISOLATED_CHECK_INTERVAL = 5
|
AWX_ISOLATED_CHECK_INTERVAL = 30
|
||||||
|
|
||||||
# The timeout (in seconds) for launching jobs on isolated nodes
|
# The timeout (in seconds) for launching jobs on isolated nodes
|
||||||
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
|
AWX_ISOLATED_LAUNCH_TIMEOUT = 600
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ CALLBACK_QUEUE = "callback_tasks"
|
|||||||
AWX_PROOT_ENABLED = True
|
AWX_PROOT_ENABLED = True
|
||||||
|
|
||||||
AWX_ISOLATED_USERNAME = 'root'
|
AWX_ISOLATED_USERNAME = 'root'
|
||||||
|
AWX_ISOLATED_CHECK_INTERVAL = 1
|
||||||
|
|
||||||
# Disable Pendo on the UI for development/test.
|
# Disable Pendo on the UI for development/test.
|
||||||
# Note: This setting may be overridden by database settings.
|
# Note: This setting may be overridden by database settings.
|
||||||
|
|||||||
Reference in New Issue
Block a user