diff --git a/awx/main/dispatch/reaper.py b/awx/main/dispatch/reaper.py index d256c7206f..1a1fb2a40e 100644 --- a/awx/main/dispatch/reaper.py +++ b/awx/main/dispatch/reaper.py @@ -35,9 +35,11 @@ def reap(instance=None, status='failed', excluded_uuids=[]): """ me = instance if me is None: - (changed, me) = Instance.objects.get_or_register() - if changed: - logger.info("Registered node '{}'".format(me.hostname)) + try: + me = Instance.objects.me() + except RuntimeError as e: + logger.warning(f'Local instance is not registered, not running reaper: {e}') + return now = tz_now() workflow_ctype_id = ContentType.objects.get_for_model(WorkflowJob).id jobs = UnifiedJob.objects.filter( diff --git a/awx/main/management/commands/provision_instance.py b/awx/main/management/commands/provision_instance.py index c3c8f188ab..aeae496306 100644 --- a/awx/main/management/commands/provision_instance.py +++ b/awx/main/management/commands/provision_instance.py @@ -3,6 +3,7 @@ from django.core.management.base import BaseCommand, CommandError from django.db import transaction +from django.conf import settings from awx.main.models import Instance @@ -13,7 +14,7 @@ class Command(BaseCommand): Register this instance with the database for HA tracking. """ - help = "Add instance to the database. Specify `--hostname` to use this command." + help = "Add instance to the database. When no options are provided, the hostname of the current system will be used. Override with `--hostname`." def add_arguments(self, parser): parser.add_argument('--hostname', dest='hostname', type=str, help="Hostname used during provisioning") @@ -22,8 +23,11 @@ class Command(BaseCommand): def _register_hostname(self, hostname, node_type, uuid): if not hostname: - return - (changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, uuid=uuid) + if not settings.AWX_AUTO_DEPROVISION_INSTANCES: + raise CommandError('Registering with values from settings only intended for use in K8s installs') + (changed, instance) = Instance.objects.get_or_register() + else: + (changed, instance) = Instance.objects.register(hostname=hostname, node_type=node_type, uuid=uuid) if changed: print("Successfully registered instance {}".format(hostname)) else: @@ -32,8 +36,6 @@ class Command(BaseCommand): @transaction.atomic def handle(self, **options): - if not options.get('hostname'): - raise CommandError("Specify `--hostname` to use this command.") self.changed = False self._register_hostname(options.get('hostname'), options.get('node_type'), options.get('uuid')) if self.changed: diff --git a/awx/main/tasks/system.py b/awx/main/tasks/system.py index 7ee3849b36..9ab3010919 100644 --- a/awx/main/tasks/system.py +++ b/awx/main/tasks/system.py @@ -490,10 +490,6 @@ def cluster_node_heartbeat(): if inst.hostname == settings.CLUSTER_HOST_ID: this_inst = inst break - else: - (changed, this_inst) = Instance.objects.get_or_register() - if changed: - logger.info("Registered tower control node '{}'".format(this_inst.hostname)) inspect_execution_nodes(instance_list) diff --git a/tools/ansible/roles/dockerfile/files/launch_awx.sh b/tools/ansible/roles/dockerfile/files/launch_awx.sh index 8a7af3c4de..edababb445 100755 --- a/tools/ansible/roles/dockerfile/files/launch_awx.sh +++ b/tools/ansible/roles/dockerfile/files/launch_awx.sh @@ -17,4 +17,11 @@ set -e wait-for-migrations -supervisord -c /etc/supervisord.conf +# This file will be re-written when the dispatcher calls reconfigure_rsyslog(), +# but it needs to exist when supervisor initially starts rsyslog to prevent the +# container from crashing. This was the most minimal config I could get working. +cat << EOF > /var/lib/awx/rsyslog/rsyslog.conf +action(type="omfile" file="/dev/null") +EOF + +exec supervisord -c /etc/supervisord.conf diff --git a/tools/ansible/roles/dockerfile/files/launch_awx_task.sh b/tools/ansible/roles/dockerfile/files/launch_awx_task.sh index ae1a87a6b0..fb59f6431c 100755 --- a/tools/ansible/roles/dockerfile/files/launch_awx_task.sh +++ b/tools/ansible/roles/dockerfile/files/launch_awx_task.sh @@ -17,4 +17,6 @@ set -e wait-for-migrations -supervisord -c /etc/supervisord_task.conf +awx-manage provision_instance + +exec supervisord -c /etc/supervisord_task.conf diff --git a/tools/ansible/roles/dockerfile/files/stop-supervisor b/tools/ansible/roles/dockerfile/files/stop-supervisor new file mode 100755 index 0000000000..1c27714aaf --- /dev/null +++ b/tools/ansible/roles/dockerfile/files/stop-supervisor @@ -0,0 +1,8 @@ +#!/bin/bash + +printf "READY\n"; + +while read line; do + echo "Processing Event: $line" >&2; + kill -SIGQUIT $PPID +done < /dev/stdin diff --git a/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 b/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 index c75616540d..263c73d0ba 100644 --- a/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 +++ b/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 @@ -120,11 +120,7 @@ RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master chmod 700 get_helm.sh && \ ./get_helm.sh -# Install tini -RUN curl -L -o /usr/bin/tini https://github.com/krallin/tini/releases/download/v0.19.0/tini-{{ tini_architecture | default('amd64') }} && \ - chmod +x /usr/bin/tini - -RUN pip3 install virtualenv supervisor +RUN pip3 install virtualenv supervisor dumb-init RUN rm -rf /root/.cache && rm -rf /tmp/* @@ -194,6 +190,7 @@ RUN mkdir -p /etc/containers/registries.conf.d/ && echo "unqualified-search-regi # Create default awx rsyslog config ADD tools/ansible/roles/dockerfile/files/rsyslog.conf /var/lib/awx/rsyslog/rsyslog.conf ADD tools/ansible/roles/dockerfile/files/wait-for-migrations /usr/local/bin/wait-for-migrations +ADD tools/ansible/roles/dockerfile/files/stop-supervisor /usr/local/bin/stop-supervisor ## File mappings {% if build_dev|bool %} @@ -264,9 +261,9 @@ RUN for dir in \ for file in \ /etc/containers/containers.conf \ /var/lib/awx/.config/containers/containers.conf \ - /var/lib/shared/overlay-images/images.lock \ + /var/lib/shared/overlay-images/images.lock \ /var/lib/shared/overlay-layers/layers.lock \ - /var/lib/shared/vfs-images/images.lock \ + /var/lib/shared/vfs-images/images.lock \ /var/lib/shared/vfs-layers/layers.lock \ /var/run/nginx.pid \ /var/lib/awx/venv/awx/lib/python3.9/site-packages/awx.egg-link ; \ @@ -292,7 +289,7 @@ CMD ["/bin/bash"] USER 1000 EXPOSE 8052 -ENTRYPOINT ["/usr/bin/tini", "--"] +ENTRYPOINT ["dumb-init", "--"] CMD /usr/bin/launch_awx.sh VOLUME /var/lib/nginx VOLUME /var/lib/awx/.local/share/containers diff --git a/tools/ansible/roles/dockerfile/templates/supervisor.conf.j2 b/tools/ansible/roles/dockerfile/templates/supervisor.conf.j2 index cb93755e44..95a25971aa 100644 --- a/tools/ansible/roles/dockerfile/templates/supervisor.conf.j2 +++ b/tools/ansible/roles/dockerfile/templates/supervisor.conf.j2 @@ -12,9 +12,10 @@ directory = /awx_devel {% else %} command = nginx -g "daemon off;" {% endif %} -autostart = true autorestart = true -stopwaitsecs = 5 +startsecs = 30 +stopasgroup=true +killasgroup=true stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr @@ -31,12 +32,10 @@ environment = command = /var/lib/awx/venv/awx/bin/uwsgi /etc/tower/uwsgi.ini directory = /var/lib/awx {% endif %} -autostart = true autorestart = true -stopwaitsecs = 15 +startsecs = 30 stopasgroup=true killasgroup=true -stopsignal=KILL stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr @@ -50,10 +49,8 @@ directory = /awx_devel command = /var/lib/awx/venv/awx/bin/daphne -b 127.0.0.1 -p 8051 --websocket_timeout -1 awx.asgi:channel_layer directory = /var/lib/awx {% endif %} -autostart = true -stopsignal=KILL autorestart = true -stopwaitsecs = 5 +startsecs = 30 stopasgroup=true killasgroup=true stdout_logfile=/dev/stdout @@ -69,9 +66,8 @@ directory = /awx_devel command = awx-manage run_wsbroadcast directory = /var/lib/awx {% endif %} -autostart = true autorestart = true -stopwaitsecs = 5 +startsecs = 30 stopasgroup=true killasgroup=true stdout_logfile=/dev/stdout @@ -81,33 +77,26 @@ stderr_logfile_maxbytes=0 [program:awx-rsyslogd] command = rsyslogd -n -i /var/run/awx-rsyslog/rsyslog.pid -f /var/lib/awx/rsyslog/rsyslog.conf -autostart = true autorestart = true -startretries = 10 -stopwaitsecs = 5 -stopsignal=TERM +startsecs = 30 stopasgroup=true killasgroup=true -redirect_stderr=true -stdout_logfile=/dev/stderr +stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [group:tower-processes] programs=nginx,uwsgi,daphne,wsbroadcast,awx-rsyslogd priority=5 -# TODO: Exit Handler - -{% if kube_dev | bool %} -[eventlistener:awx-config-watcher] -command=/usr/bin/config-watcher -stderr_logfile=/dev/stdout -stderr_logfile_maxbytes=0 +[eventlistener:superwatcher] +command=stop-supervisor +events=PROCESS_STATE_FATAL stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 -events=TICK_60 -priority=0 -{% endif %} +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [unix_http_server] file=/var/run/supervisor/supervisor.web.sock diff --git a/tools/ansible/roles/dockerfile/templates/supervisor_task.conf.j2 b/tools/ansible/roles/dockerfile/templates/supervisor_task.conf.j2 index 1e2d025e43..a2f2bd5298 100644 --- a/tools/ansible/roles/dockerfile/templates/supervisor_task.conf.j2 +++ b/tools/ansible/roles/dockerfile/templates/supervisor_task.conf.j2 @@ -13,9 +13,8 @@ directory = /awx_devel command = awx-manage run_dispatcher directory = /var/lib/awx {% endif %} -autostart = true autorestart = true -stopwaitsecs = 5 +startsecs = 30 stopasgroup=true killasgroup=true stdout_logfile=/dev/stdout @@ -31,9 +30,8 @@ directory = /awx_devel command = awx-manage run_callback_receiver directory = /var/lib/awx {% endif %} -autostart = true autorestart = true -stopwaitsecs = 5 +startsecs = 30 stopasgroup=true killasgroup=true stdout_logfile=/dev/stdout @@ -45,18 +43,14 @@ stderr_logfile_maxbytes=0 programs=dispatcher,callback-receiver priority=5 -# TODO: Exit Handler - -{% if kube_dev | bool %} -[eventlistener:awx-config-watcher] -command=/usr/bin/config-watcher -stderr_logfile=/dev/stdout -stderr_logfile_maxbytes=0 +[eventlistener:superwatcher] +command=stop-supervisor +events=PROCESS_STATE_FATAL +autorestart = true stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 -events=TICK_60 -priority=0 -{% endif %} +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [unix_http_server] file=/var/run/supervisor/supervisor.sock diff --git a/tools/docker-compose/launch_awx.sh b/tools/docker-compose/launch_awx.sh index 486c652cf3..d270cdd3fe 100755 --- a/tools/docker-compose/launch_awx.sh +++ b/tools/docker-compose/launch_awx.sh @@ -5,4 +5,4 @@ bootstrap_development.sh cd /awx_devel # Start the services -exec tini -- make supervisor +exec make supervisor diff --git a/tools/docker-compose/supervisor.conf b/tools/docker-compose/supervisor.conf index 9d0cd4ded3..0726c083fa 100644 --- a/tools/docker-compose/supervisor.conf +++ b/tools/docker-compose/supervisor.conf @@ -5,79 +5,75 @@ nodaemon=true [program:awx-dispatcher] command = make dispatcher -autostart = true autorestart = true -stopwaitsecs = 1 -stopsignal=KILL +startsecs = 30 stopasgroup=true killasgroup=true -redirect_stderr=true -stdout_events_enabled = true -stderr_events_enabled = true - +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [program:awx-receiver] command = make receiver -autostart = true autorestart = true -stopwaitsecs = 1 -stopsignal=KILL +startsecs = 30 stopasgroup=true killasgroup=true -redirect_stderr=true -stdout_events_enabled = true -stderr_events_enabled = true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [program:awx-wsbroadcast] command = make wsbroadcast -autostart = true autorestart = true -stopwaitsecs = 1 -stopsignal=KILL +startsecs = 30 +autorestart = true stopasgroup=true killasgroup=true -redirect_stderr=true -stdout_events_enabled = true -stderr_events_enabled = true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [program:awx-uwsgi] command = make uwsgi -autostart = true autorestart = true -redirect_stderr=true -stopwaitsecs = 1 -stopsignal=KILL +startsecs = 30 stopasgroup=true killasgroup=true -stdout_events_enabled = true -stderr_events_enabled = true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [program:awx-daphne] command = make daphne -autostart = true autorestart = true -redirect_stderr=true -stopwaitsecs = 1 -stopsignal=KILL +startsecs = 30 stopasgroup=true killasgroup=true -stdout_events_enabled = true -stderr_events_enabled = true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [program:awx-nginx] command = make nginx -autostart = true autorestart = true -redirect_stderr=true -stdout_events_enabled = true -stderr_events_enabled = true +startsecs = 30 +stopasgroup=true +killasgroup=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [program:awx-rsyslogd] command = rsyslogd -n -i /var/run/awx-rsyslog/rsyslog.pid -f /var/lib/awx/rsyslog/rsyslog.conf -autostart = true autorestart = true -stopwaitsecs = 5 -stopsignal=TERM +startsecs = 30 stopasgroup=true killasgroup=true redirect_stderr=true @@ -86,19 +82,28 @@ stderr_events_enabled = true [program:awx-receptor] command = receptor --config /etc/receptor/receptor.conf -autostart = true autorestart = true -stopsignal = KILL -stopasgroup = true -killasgroup = true -redirect_stderr=true -stdout_events_enabled = true -stderr_events_enabled = true +startsecs = 30 +stopasgroup=true +killasgroup=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 [group:tower-processes] programs=awx-dispatcher,awx-receiver,awx-uwsgi,awx-daphne,awx-nginx,awx-wsbroadcast,awx-rsyslogd priority=5 +[eventlistener:superwatcher] +command=stop-supervisor +events=PROCESS_STATE_FATAL +autorestart = true +stderr_logfile=/dev/stdout +stderr_logfile_maxbytes=0 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 + [unix_http_server] file=/var/run/supervisor/supervisor.sock