Add automatic deprovisioning support, only enabled for openshift

* Implement a config watcher for service restarts
* If the configmap bind point changes then restart all services
This commit is contained in:
Matthew Jones 2017-10-12 14:14:30 -04:00
parent 0e97dc4b84
commit c819560d39
No known key found for this signature in database
GPG Key ID: 76A4C17A97590C1C
8 changed files with 95 additions and 2 deletions

View File

@ -292,6 +292,10 @@ def cluster_node_heartbeat(self):
other_inst.save(update_fields=['capacity'])
logger.error("Host {} last checked in at {}, marked as lost.".format(
other_inst.hostname, other_inst.modified))
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
deprovision_hostname = other_inst.hostname
other_inst.delete()
logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))
except DatabaseError as e:
if 'did not affect any rows' in str(e):
logger.debug('Another instance has marked {} as lost'.format(other_inst.hostname))

View File

@ -641,6 +641,9 @@ AWX_ANSIBLE_CALLBACK_PLUGINS = ""
# Time at which an HA node is considered active
AWX_ACTIVE_NODE_TIME = 7200
# Automatically remove nodes that have missed their heartbeats after some time
AWX_AUTO_DEPROVISION_INSTANCES = False
# Enable Pendo on the UI, possible values are 'off', 'anonymous', and 'detailed'
# Note: This setting may be overridden by database settings.
PENDO_TRACKING_STATE = "off"

View File

@ -41,6 +41,15 @@ priority=5
# TODO: Exit Handler
[eventlistener:awx-config-watcher]
command=/usr/bin/config-watcher
stderr_logfile=/dev/stdout
stderr_logfile_maxbytes=0
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
events=TICK_60
priority=0
[unix_http_server]
file=/tmp/supervisor.sock

View File

@ -43,6 +43,15 @@ priority=5
# TODO: Exit Handler
[eventlistener:awx-config-watcher]
command=/usr/bin/config-watcher
stderr_logfile=/dev/stdout
stderr_logfile_maxbytes=0
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
events=TICK_60
priority=0
[unix_http_server]
file=/tmp/supervisor.sock

View File

@ -163,6 +163,12 @@
dest: "{{ docker_base_path }}/requirements"
delegate_to: localhost
- name: Stage config watcher
copy:
src: ../tools/scripts/config-watcher
dest: "{{ docker_base_path }}/config-watcher"
delegate_to: localhost
- name: Stage Makefile
copy:
src: ../Makefile

View File

@ -22,6 +22,7 @@ ADD requirements/requirements_ansible.txt \
requirements/requirements_git.txt \
/tmp/requirements/
ADD ansible.repo /etc/yum.repos.d/ansible.repo
ADD config-watcher /usr/bin/config-watcher
ADD RPM-GPG-KEY-ansible-release /etc/pki/rpm-gpg/RPM-GPG-KEY-ansible-release
# OS Dependencies
WORKDIR /tmp
@ -50,7 +51,7 @@ ADD supervisor.conf /supervisor.conf
ADD supervisor_task.conf /supervisor_task.conf
ADD launch_awx.sh /usr/bin/launch_awx.sh
ADD launch_awx_task.sh /usr/bin/launch_awx_task.sh
RUN chmod +rx /usr/bin/launch_awx.sh && chmod +rx /usr/bin/launch_awx_task.sh
RUN chmod +rx /usr/bin/launch_awx.sh && chmod +rx /usr/bin/launch_awx_task.sh && chmod +rx /usr/bin/config-watcher
ADD settings.py /etc/tower/settings.py
RUN chmod g+w /etc/passwd
RUN chmod -R 777 /var/log/nginx && chmod -R 777 /var/lib/nginx

View File

@ -12,7 +12,10 @@ data:
# Container environments don't like chroots
AWX_PROOT_ENABLED = False
# Automatically deprovision pods that go offline
AWX_AUTO_DEPROVISION_INSTANCES = True
#Autoprovisioning should replace this
CLUSTER_HOST_ID = socket.gethostname()
SYSTEM_UUID = '00000000-0000-0000-0000-000000000000'

58
tools/scripts/config-watcher Executable file
View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
import os
import sys
import hashlib
from supervisor import childutils
def hash(f):
s = hashlib.sha1()
with open(f, "rb") as fd:
for chunk in iter(lambda: fd.read(4096), b""):
s.update(chunk)
return s.hexdigest()
def last_hash(f):
with open(f, "r") as fd:
return fd.read().strip()
def write_hash(f, h):
with open(f, "w") as fd:
fd.write(h)
def main():
while 1:
rpc = childutils.getRPCInterface(os.environ)
headers, payload = childutils.listener.wait(sys.stdin, sys.stdout)
if not headers['eventname'].startswith('TICK'):
childutils.listener.ok(sys.stdout)
continue
try:
current_hash = hash("/etc/tower/settings.py")
except:
sys.stderr.write("Could not open settings.py, skipping config watcher")
childutils.listener.ok(sys.stdout)
continue
try:
if current_hash == last_hash("/var/lib/awx/.configsha"):
childutils.listener.ok(sys.stdout)
continue
else:
sys.stderr.write("Config changed, reloading services")
for proc in rpc.supervisor.getAllProcessInfo():
group = proc['group']
name = proc['name']
program = "{}:{}".format(group, name)
if group == "tower-processes":
sys.stderr.write('Restarting %s\n' % program)
rpc.supervisor.stopProcess(program)
rpc.supervisor.startProcess(program)
except:
sys.stderr.write("No previous hash found")
write_hash("/var/lib/awx/.configsha")
childutils.listener.ok(sys.stdout)