mirror of
https://github.com/ansible/awx.git
synced 2026-01-18 05:01:19 -03:30
Merge pull request #11955 from shanemcd/fail-better
Increase resiliency when application crashes
This commit is contained in:
commit
ef0f6ca248
@ -35,9 +35,11 @@ def reap(instance=None, status='failed', excluded_uuids=[]):
|
||||
"""
|
||||
me = instance
|
||||
if me is None:
|
||||
(changed, me) = Instance.objects.get_or_register()
|
||||
if changed:
|
||||
logger.info("Registered node '{}'".format(me.hostname))
|
||||
try:
|
||||
me = Instance.objects.me()
|
||||
except RuntimeError as e:
|
||||
logger.warning(f'Local instance is not registered, not running reaper: {e}')
|
||||
return
|
||||
now = tz_now()
|
||||
workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id
|
||||
jobs = UnifiedJob.objects.filter(
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import transaction
|
||||
from django.conf import settings
|
||||
|
||||
from awx.main.models import Instance
|
||||
|
||||
@ -13,7 +14,7 @@ class Command(BaseCommand):
|
||||
Register this instance with the database for HA tracking.
|
||||
"""
|
||||
|
||||
help = "Add instance to the database. Specify `--hostname` to use this command."
|
||||
help = "Add instance to the database. When no options are provided, the hostname of the current system will be used. Override with `--hostname`."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--hostname', dest='hostname', type=str, help="Hostname used during provisioning")
|
||||
@ -22,8 +23,11 @@ class Command(BaseCommand):
|
||||
|
||||
def _register_hostname(self, hostname, node_type, uuid):
|
||||
if not hostname:
|
||||
return
|
||||
(changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, uuid=uuid)
|
||||
if not settings.AWX_AUTO_DEPROVISION_INSTANCES:
|
||||
raise CommandError('Registering with values from settings only intended for use in K8s installs')
|
||||
(changed, instance) = Instance.objects.get_or_register()
|
||||
else:
|
||||
(changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, uuid=uuid)
|
||||
if changed:
|
||||
print("Successfully registered instance {}".format(hostname))
|
||||
else:
|
||||
@ -32,8 +36,6 @@ class Command(BaseCommand):
|
||||
|
||||
@transaction.atomic
|
||||
def handle(self, **options):
|
||||
if not options.get('hostname'):
|
||||
raise CommandError("Specify `--hostname` to use this command.")
|
||||
self.changed = False
|
||||
self._register_hostname(options.get('hostname'), options.get('node_type'), options.get('uuid'))
|
||||
if self.changed:
|
||||
|
||||
@ -490,10 +490,6 @@ def cluster_node_heartbeat():
|
||||
if inst.hostname == settings.CLUSTER_HOST_ID:
|
||||
this_inst = inst
|
||||
break
|
||||
else:
|
||||
(changed, this_inst) = Instance.objects.get_or_register()
|
||||
if changed:
|
||||
logger.info("Registered tower control node '{}'".format(this_inst.hostname))
|
||||
|
||||
inspect_execution_nodes(instance_list)
|
||||
|
||||
|
||||
@ -17,4 +17,11 @@ set -e
|
||||
|
||||
wait-for-migrations
|
||||
|
||||
supervisord -c /etc/supervisord.conf
|
||||
# This file will be re-written when the dispatcher calls reconfigure_rsyslog(),
|
||||
# but it needs to exist when supervisor initially starts rsyslog to prevent the
|
||||
# container from crashing. This was the most minimal config I could get working.
|
||||
cat << EOF > /var/lib/awx/rsyslog/rsyslog.conf
|
||||
action(type="omfile" file="/dev/null")
|
||||
EOF
|
||||
|
||||
exec supervisord -c /etc/supervisord.conf
|
||||
|
||||
@ -17,4 +17,6 @@ set -e
|
||||
|
||||
wait-for-migrations
|
||||
|
||||
supervisord -c /etc/supervisord_task.conf
|
||||
awx-manage provision_instance
|
||||
|
||||
exec supervisord -c /etc/supervisord_task.conf
|
||||
|
||||
8
tools/ansible/roles/dockerfile/files/stop-supervisor
Executable file
8
tools/ansible/roles/dockerfile/files/stop-supervisor
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
printf "READY\n";
|
||||
|
||||
while read line; do
|
||||
echo "Processing Event: $line" >&2;
|
||||
kill -SIGQUIT $PPID
|
||||
done < /dev/stdin
|
||||
@ -120,11 +120,7 @@ RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master
|
||||
chmod 700 get_helm.sh && \
|
||||
./get_helm.sh
|
||||
|
||||
# Install tini
|
||||
RUN curl -L -o /usr/bin/tini https://github.com/krallin/tini/releases/download/v0.19.0/tini-{{ tini_architecture | default('amd64') }} && \
|
||||
chmod +x /usr/bin/tini
|
||||
|
||||
RUN pip3 install virtualenv supervisor
|
||||
RUN pip3 install virtualenv supervisor dumb-init
|
||||
|
||||
RUN rm -rf /root/.cache && rm -rf /tmp/*
|
||||
|
||||
@ -194,6 +190,7 @@ RUN mkdir -p /etc/containers/registries.conf.d/ && echo "unqualified-search-regi
|
||||
# Create default awx rsyslog config
|
||||
ADD tools/ansible/roles/dockerfile/files/rsyslog.conf /var/lib/awx/rsyslog/rsyslog.conf
|
||||
ADD tools/ansible/roles/dockerfile/files/wait-for-migrations /usr/local/bin/wait-for-migrations
|
||||
ADD tools/ansible/roles/dockerfile/files/stop-supervisor /usr/local/bin/stop-supervisor
|
||||
|
||||
## File mappings
|
||||
{% if build_dev|bool %}
|
||||
@ -264,9 +261,9 @@ RUN for dir in \
|
||||
for file in \
|
||||
/etc/containers/containers.conf \
|
||||
/var/lib/awx/.config/containers/containers.conf \
|
||||
/var/lib/shared/overlay-images/images.lock \
|
||||
/var/lib/shared/overlay-images/images.lock \
|
||||
/var/lib/shared/overlay-layers/layers.lock \
|
||||
/var/lib/shared/vfs-images/images.lock \
|
||||
/var/lib/shared/vfs-images/images.lock \
|
||||
/var/lib/shared/vfs-layers/layers.lock \
|
||||
/var/run/nginx.pid \
|
||||
/var/lib/awx/venv/awx/lib/python3.9/site-packages/awx.egg-link ; \
|
||||
@ -292,7 +289,7 @@ CMD ["/bin/bash"]
|
||||
USER 1000
|
||||
EXPOSE 8052
|
||||
|
||||
ENTRYPOINT ["/usr/bin/tini", "--"]
|
||||
ENTRYPOINT ["dumb-init", "--"]
|
||||
CMD /usr/bin/launch_awx.sh
|
||||
VOLUME /var/lib/nginx
|
||||
VOLUME /var/lib/awx/.local/share/containers
|
||||
|
||||
@ -12,9 +12,10 @@ directory = /awx_devel
|
||||
{% else %}
|
||||
command = nginx -g "daemon off;"
|
||||
{% endif %}
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 5
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
@ -31,12 +32,10 @@ environment =
|
||||
command = /var/lib/awx/venv/awx/bin/uwsgi /etc/tower/uwsgi.ini
|
||||
directory = /var/lib/awx
|
||||
{% endif %}
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 15
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stopsignal=KILL
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
@ -50,10 +49,8 @@ directory = /awx_devel
|
||||
command = /var/lib/awx/venv/awx/bin/daphne -b 127.0.0.1 -p 8051 --websocket_timeout -1 awx.asgi:channel_layer
|
||||
directory = /var/lib/awx
|
||||
{% endif %}
|
||||
autostart = true
|
||||
stopsignal=KILL
|
||||
autorestart = true
|
||||
stopwaitsecs = 5
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
@ -69,9 +66,8 @@ directory = /awx_devel
|
||||
command = awx-manage run_wsbroadcast
|
||||
directory = /var/lib/awx
|
||||
{% endif %}
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 5
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
@ -81,33 +77,26 @@ stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-rsyslogd]
|
||||
command = rsyslogd -n -i /var/run/awx-rsyslog/rsyslog.pid -f /var/lib/awx/rsyslog/rsyslog.conf
|
||||
autostart = true
|
||||
autorestart = true
|
||||
startretries = 10
|
||||
stopwaitsecs = 5
|
||||
stopsignal=TERM
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
redirect_stderr=true
|
||||
stdout_logfile=/dev/stderr
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[group:tower-processes]
|
||||
programs=nginx,uwsgi,daphne,wsbroadcast,awx-rsyslogd
|
||||
priority=5
|
||||
|
||||
# TODO: Exit Handler
|
||||
|
||||
{% if kube_dev | bool %}
|
||||
[eventlistener:awx-config-watcher]
|
||||
command=/usr/bin/config-watcher
|
||||
stderr_logfile=/dev/stdout
|
||||
stderr_logfile_maxbytes=0
|
||||
[eventlistener:superwatcher]
|
||||
command=stop-supervisor
|
||||
events=PROCESS_STATE_FATAL
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
events=TICK_60
|
||||
priority=0
|
||||
{% endif %}
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[unix_http_server]
|
||||
file=/var/run/supervisor/supervisor.web.sock
|
||||
|
||||
@ -13,9 +13,8 @@ directory = /awx_devel
|
||||
command = awx-manage run_dispatcher
|
||||
directory = /var/lib/awx
|
||||
{% endif %}
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 5
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
@ -31,9 +30,8 @@ directory = /awx_devel
|
||||
command = awx-manage run_callback_receiver
|
||||
directory = /var/lib/awx
|
||||
{% endif %}
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 5
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
@ -45,18 +43,14 @@ stderr_logfile_maxbytes=0
|
||||
programs=dispatcher,callback-receiver
|
||||
priority=5
|
||||
|
||||
# TODO: Exit Handler
|
||||
|
||||
{% if kube_dev | bool %}
|
||||
[eventlistener:awx-config-watcher]
|
||||
command=/usr/bin/config-watcher
|
||||
stderr_logfile=/dev/stdout
|
||||
stderr_logfile_maxbytes=0
|
||||
[eventlistener:superwatcher]
|
||||
command=stop-supervisor
|
||||
events=PROCESS_STATE_FATAL
|
||||
autorestart = true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
events=TICK_60
|
||||
priority=0
|
||||
{% endif %}
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[unix_http_server]
|
||||
file=/var/run/supervisor/supervisor.sock
|
||||
|
||||
@ -5,4 +5,4 @@ bootstrap_development.sh
|
||||
|
||||
cd /awx_devel
|
||||
# Start the services
|
||||
exec tini -- make supervisor
|
||||
exec make supervisor
|
||||
|
||||
@ -5,79 +5,75 @@ nodaemon=true
|
||||
|
||||
[program:awx-dispatcher]
|
||||
command = make dispatcher
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 1
|
||||
stopsignal=KILL
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
redirect_stderr=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-receiver]
|
||||
command = make receiver
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 1
|
||||
stopsignal=KILL
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
redirect_stderr=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-wsbroadcast]
|
||||
command = make wsbroadcast
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 1
|
||||
stopsignal=KILL
|
||||
startsecs = 30
|
||||
autorestart = true
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
redirect_stderr=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-uwsgi]
|
||||
command = make uwsgi
|
||||
autostart = true
|
||||
autorestart = true
|
||||
redirect_stderr=true
|
||||
stopwaitsecs = 1
|
||||
stopsignal=KILL
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-daphne]
|
||||
command = make daphne
|
||||
autostart = true
|
||||
autorestart = true
|
||||
redirect_stderr=true
|
||||
stopwaitsecs = 1
|
||||
stopsignal=KILL
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-nginx]
|
||||
command = make nginx
|
||||
autostart = true
|
||||
autorestart = true
|
||||
redirect_stderr=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:awx-rsyslogd]
|
||||
command = rsyslogd -n -i /var/run/awx-rsyslog/rsyslog.pid -f /var/lib/awx/rsyslog/rsyslog.conf
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopwaitsecs = 5
|
||||
stopsignal=TERM
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
redirect_stderr=true
|
||||
@ -86,19 +82,28 @@ stderr_events_enabled = true
|
||||
|
||||
[program:awx-receptor]
|
||||
command = receptor --config /etc/receptor/receptor.conf
|
||||
autostart = true
|
||||
autorestart = true
|
||||
stopsignal = KILL
|
||||
stopasgroup = true
|
||||
killasgroup = true
|
||||
redirect_stderr=true
|
||||
stdout_events_enabled = true
|
||||
stderr_events_enabled = true
|
||||
startsecs = 30
|
||||
stopasgroup=true
|
||||
killasgroup=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[group:tower-processes]
|
||||
programs=awx-dispatcher,awx-receiver,awx-uwsgi,awx-daphne,awx-nginx,awx-wsbroadcast,awx-rsyslogd
|
||||
priority=5
|
||||
|
||||
[eventlistener:superwatcher]
|
||||
command=stop-supervisor
|
||||
events=PROCESS_STATE_FATAL
|
||||
autorestart = true
|
||||
stderr_logfile=/dev/stdout
|
||||
stderr_logfile_maxbytes=0
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
|
||||
[unix_http_server]
|
||||
file=/var/run/supervisor/supervisor.sock
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user