From 4dff25884fcaec5b10702724753155bfb5053d40 Mon Sep 17 00:00:00 2001 From: Chris Church Date: Wed, 14 Jan 2015 17:17:00 -0500 Subject: [PATCH] Re-enable proot by default, add python-psutil as a dependency, implement support for terminating SSH control master processes so that playbook runs exit cleanly. --- awx/main/tasks.py | 7 +++ awx/plugins/callback/job_event_callback.py | 67 ++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index e1903d81d4..54e21e0f48 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -558,6 +558,13 @@ class RunJob(BaseTask): elif settings.DEBUG: env['JOB_CALLBACK_DEBUG'] = '1' + # Create a directory for ControlPath sockets that is unique to each + # job and visible inside the proot environment (when enabled). + cp_dir = os.path.join(kwargs['private_data_dir'], 'cp') + if not os.path.exists(cp_dir): + os.mkdir(cp_dir, 0700) + env['ANSIBLE_SSH_CONTROL_PATH'] = os.path.join(cp_dir, 'ansible-ssh-%%h-%%p-%%r') + # When using Ansible >= 1.3, allow the inventory script to include host # variables inline via ['_meta']['hostvars']. try: diff --git a/awx/plugins/callback/job_event_callback.py b/awx/plugins/callback/job_event_callback.py index 555e80181a..a9783eb28c 100644 --- a/awx/plugins/callback/job_event_callback.py +++ b/awx/plugins/callback/job_event_callback.py @@ -31,9 +31,11 @@ # Python import datetime +import glob import json import logging import os +import pwd import sys import urllib import urlparse @@ -43,8 +45,16 @@ from contextlib import closing # Requests import requests +# ZeroMQ import zmq +# PSUtil +try: + import psutil +except ImportError: + psutil = None + + class TokenAuth(requests.auth.AuthBase): def __init__(self, token): @@ -271,3 +281,60 @@ class CallbackModule(object): for attr in ('changed', 'dark', 'failures', 'ok', 'processed', 'skipped'): d[attr] = getattr(stats, attr) self._log_event('playbook_on_stats', **d) + self._terminate_ssh_control_masters() + + def _terminate_ssh_control_masters(self): + # Determine if control persist is being used and if any open sockets + # exist after running the playbook. + cp_path = os.environ.get('ANSIBLE_SSH_CONTROL_PATH', '') + if not cp_path: + return + cp_dir = os.path.dirname(cp_path) + if not os.path.exists(cp_dir): + return + cp_pattern = os.path.join(cp_dir, 'ansible-ssh-*') + cp_files = glob.glob(cp_pattern) + if not cp_files: + return + + # HACK: If psutil isn't available, sleep and allow the control master + # processes to timeout and die. + if not psutil: + time.sleep(60) + + # Attempt to find any running control master processes. + username = pwd.getpwuid(os.getuid())[0] + ssh_cm_procs = [] + for proc in psutil.process_iter(): + try: + pinfo = proc.as_dict(attrs=['pid', 'name', 'cmdline', 'username']) + except psutil.NoSuchProcess: + continue + if pinfo['username'] != username: + continue + if pinfo['name'] != 'ssh': + continue + for cp_file in cp_files: + if pinfo['cmdline'] and cp_file in pinfo['cmdline'][0]: + ssh_cm_procs.append(proc) + break + + # Terminate then kill control master processes. Workaround older + # version of psutil that may not have wait_procs implemented. + for proc in ssh_cm_procs: + proc.terminate() + if hasattr(psutil, 'wait_procs'): + procs_gone, procs_alive = psutil.wait_procs(ssh_cm_procs, timeout=5) + else: + procs_gone = [] + procs_alive = ssh_cm_procs[:] + for x in xrange(5): + for proc in procs_alive[:]: + if not proc.is_running(): + procs_alive.remove(proc) + procs_gone.append(proc) + if not procs_alive: + break + time.sleep(1) + for proc in procs_alive: + proc.kill()