From c819560d39ed101517eb3d8138c33b8d0d8f8547 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Thu, 12 Oct 2017 14:14:30 -0400 Subject: [PATCH] Add automatic deprovisioning support, only enabled for openshift * Implement a config watcher for service restarts * If the configmap bind point changes then restart all services --- awx/main/tasks.py | 4 ++ awx/settings/defaults.py | 3 + installer/image_build/files/supervisor.conf | 9 +++ .../image_build/files/supervisor_task.conf | 9 +++ installer/image_build/tasks/main.yml | 6 ++ installer/image_build/templates/Dockerfile.j2 | 3 +- .../openshift/templates/configmap.yml.j2 | 5 +- tools/scripts/config-watcher | 58 +++++++++++++++++++ 8 files changed, 95 insertions(+), 2 deletions(-) create mode 100755 tools/scripts/config-watcher diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 0b52f2e93e..0e9c8bcea5 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -292,6 +292,10 @@ def cluster_node_heartbeat(self): other_inst.save(update_fields=['capacity']) logger.error("Host {} last checked in at {}, marked as lost.".format( other_inst.hostname, other_inst.modified)) + if settings.AWX_AUTO_DEPROVISION_INSTANCES: + deprovision_hostname = other_inst.hostname + other_inst.delete() + logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname)) except DatabaseError as e: if 'did not affect any rows' in str(e): logger.debug('Another instance has marked {} as lost'.format(other_inst.hostname)) diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index c97348e45e..db3dec23fb 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -641,6 +641,9 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = "" # Time at which an HA node is considered active AWX_ACTIVE_NODE_TIME = 7200 +# Automatically remove nodes that have missed their heartbeats after some time +AWX_AUTO_DEPROVISION_INSTANCES = False + # Enable Pendo on the UI, possible values are 'off', 'anonymous', and 'detailed' # Note: This setting may be overridden by database settings. PENDO_TRACKING_STATE = "off" diff --git a/installer/image_build/files/supervisor.conf b/installer/image_build/files/supervisor.conf index ec0acac101..cfcaf5ebe9 100644 --- a/installer/image_build/files/supervisor.conf +++ b/installer/image_build/files/supervisor.conf @@ -41,6 +41,15 @@ priority=5 # TODO: Exit Handler +[eventlistener:awx-config-watcher] +command=/usr/bin/config-watcher +stderr_logfile=/dev/stdout +stderr_logfile_maxbytes=0 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +events=TICK_60 +priority=0 + [unix_http_server] file=/tmp/supervisor.sock diff --git a/installer/image_build/files/supervisor_task.conf b/installer/image_build/files/supervisor_task.conf index 3bc71cf75b..1a4e613925 100644 --- a/installer/image_build/files/supervisor_task.conf +++ b/installer/image_build/files/supervisor_task.conf @@ -43,6 +43,15 @@ priority=5 # TODO: Exit Handler +[eventlistener:awx-config-watcher] +command=/usr/bin/config-watcher +stderr_logfile=/dev/stdout +stderr_logfile_maxbytes=0 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +events=TICK_60 +priority=0 + [unix_http_server] file=/tmp/supervisor.sock diff --git a/installer/image_build/tasks/main.yml b/installer/image_build/tasks/main.yml index d3dd66207f..f67e3abf7e 100644 --- a/installer/image_build/tasks/main.yml +++ b/installer/image_build/tasks/main.yml @@ -163,6 +163,12 @@ dest: "{{ docker_base_path }}/requirements" delegate_to: localhost +- name: Stage config watcher + copy: + src: ../tools/scripts/config-watcher + dest: "{{ docker_base_path }}/config-watcher" + delegate_to: localhost + - name: Stage Makefile copy: src: ../Makefile diff --git a/installer/image_build/templates/Dockerfile.j2 b/installer/image_build/templates/Dockerfile.j2 index 16c118b1fe..ab3db53490 100644 --- a/installer/image_build/templates/Dockerfile.j2 +++ b/installer/image_build/templates/Dockerfile.j2 @@ -22,6 +22,7 @@ ADD requirements/requirements_ansible.txt \ requirements/requirements_git.txt \ /tmp/requirements/ ADD ansible.repo /etc/yum.repos.d/ansible.repo +ADD config-watcher /usr/bin/config-watcher ADD RPM-GPG-KEY-ansible-release /etc/pki/rpm-gpg/RPM-GPG-KEY-ansible-release # OS Dependencies WORKDIR /tmp @@ -50,7 +51,7 @@ ADD supervisor.conf /supervisor.conf ADD supervisor_task.conf /supervisor_task.conf ADD launch_awx.sh /usr/bin/launch_awx.sh ADD launch_awx_task.sh /usr/bin/launch_awx_task.sh -RUN chmod +rx /usr/bin/launch_awx.sh && chmod +rx /usr/bin/launch_awx_task.sh +RUN chmod +rx /usr/bin/launch_awx.sh && chmod +rx /usr/bin/launch_awx_task.sh && chmod +rx /usr/bin/config-watcher ADD settings.py /etc/tower/settings.py RUN chmod g+w /etc/passwd RUN chmod -R 777 /var/log/nginx && chmod -R 777 /var/lib/nginx diff --git a/installer/openshift/templates/configmap.yml.j2 b/installer/openshift/templates/configmap.yml.j2 index 79c14fefee..8fb1e2b4bf 100644 --- a/installer/openshift/templates/configmap.yml.j2 +++ b/installer/openshift/templates/configmap.yml.j2 @@ -12,7 +12,10 @@ data: # Container environments don't like chroots AWX_PROOT_ENABLED = False - + + # Automatically deprovision pods that go offline + AWX_AUTO_DEPROVISION_INSTANCES = True + #Autoprovisioning should replace this CLUSTER_HOST_ID = socket.gethostname() SYSTEM_UUID = '00000000-0000-0000-0000-000000000000' diff --git a/tools/scripts/config-watcher b/tools/scripts/config-watcher new file mode 100755 index 0000000000..ffa2e56a1f --- /dev/null +++ b/tools/scripts/config-watcher @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import os +import sys +import hashlib +from supervisor import childutils + + +def hash(f): + s = hashlib.sha1() + with open(f, "rb") as fd: + for chunk in iter(lambda: fd.read(4096), b""): + s.update(chunk) + return s.hexdigest() + + +def last_hash(f): + with open(f, "r") as fd: + return fd.read().strip() + + +def write_hash(f, h): + with open(f, "w") as fd: + fd.write(h) + + +def main(): + while 1: + rpc = childutils.getRPCInterface(os.environ) + headers, payload = childutils.listener.wait(sys.stdin, sys.stdout) + if not headers['eventname'].startswith('TICK'): + childutils.listener.ok(sys.stdout) + continue + try: + current_hash = hash("/etc/tower/settings.py") + except: + sys.stderr.write("Could not open settings.py, skipping config watcher") + childutils.listener.ok(sys.stdout) + continue + try: + if current_hash == last_hash("/var/lib/awx/.configsha"): + childutils.listener.ok(sys.stdout) + continue + else: + sys.stderr.write("Config changed, reloading services") + for proc in rpc.supervisor.getAllProcessInfo(): + group = proc['group'] + name = proc['name'] + program = "{}:{}".format(group, name) + if group == "tower-processes": + sys.stderr.write('Restarting %s\n' % program) + rpc.supervisor.stopProcess(program) + rpc.supervisor.startProcess(program) + + except: + sys.stderr.write("No previous hash found") + write_hash("/var/lib/awx/.configsha") + childutils.listener.ok(sys.stdout)