From 6e5e1c8fff4dcbd81fbcc964b4583bb390496840 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Mon, 11 Dec 2023 16:12:43 -0500 Subject: [PATCH] Recover rsyslog from 4xx error Due to https://github.com/ansible/awx/issues/7560 'omhttp' module for rsyslog will completely stop forwarding message to external log aggregator after receiving a 4xx error from the external log aggregator This PR is an "workaround" for this problem by restarting rsyslogd after detecting that rsyslog received a 4xx error --- MANIFEST.in | 2 +- .../roles/dockerfile/templates/Dockerfile.j2 | 5 ++++- .../templates/supervisor_rsyslog.conf.j2 | 11 ++++++++++- tools/docker-compose/supervisor.conf | 11 ++++++++++- ...ure-event-handler => rsyslog-4xx-recovery} | 19 ------------------- 5 files changed, 25 insertions(+), 23 deletions(-) rename tools/scripts/{failure-event-handler => rsyslog-4xx-recovery} (72%) diff --git a/MANIFEST.in b/MANIFEST.in index 09a5392c50..3db512ee13 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -22,7 +22,7 @@ recursive-exclude awx/settings local_settings.py* include tools/scripts/request_tower_configuration.sh include tools/scripts/request_tower_configuration.ps1 include tools/scripts/automation-controller-service -include tools/scripts/failure-event-handler +include tools/scripts/rsyslog-4xx-recovery include tools/scripts/awx-python include awx/playbooks/library/mkfifo.py include tools/sosreport/* diff --git a/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 b/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 index cd7e61961d..a4254ed177 100644 --- a/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 +++ b/tools/ansible/roles/dockerfile/templates/Dockerfile.j2 @@ -226,15 +226,18 @@ RUN ln -sf /awx_devel/{{ template_dest }}/supervisor_rsyslog.conf /etc/superviso ADD tools/ansible/roles/dockerfile/files/launch_awx_web.sh /usr/bin/launch_awx_web.sh ADD tools/ansible/roles/dockerfile/files/launch_awx_task.sh /usr/bin/launch_awx_task.sh ADD tools/ansible/roles/dockerfile/files/launch_awx_rsyslog.sh /usr/bin/launch_awx_rsyslog.sh +ADD tools/scripts/rsyslog-4xx-recovery /usr/bin/rsyslog-4xx-recovery ADD {{ template_dest }}/supervisor_web.conf /etc/supervisord_web.conf ADD {{ template_dest }}/supervisor_task.conf /etc/supervisord_task.conf ADD {{ template_dest }}/supervisor_rsyslog.conf /etc/supervisord_rsyslog.conf +ADD tools/scripts/awx-python /usr/bin/awx-python {% endif %} {% if (build_dev|bool) or (kube_dev|bool) %} RUN echo /awx_devel > /var/lib/awx/venv/awx/lib/python3.9/site-packages/awx.egg-link ADD tools/docker-compose/awx-manage /usr/local/bin/awx-manage -ADD tools/scripts/awx-python /usr/bin/awx-python +RUN ln -sf /awx_devel/tools/scripts/awx-python /usr/bin/awx-python +RUN ln -sf /awx_devel/tools/scripts/rsyslog-4xx-recovery /usr/bin/rsyslog-4xx-recovery {% endif %} # Pre-create things we need to access diff --git a/tools/ansible/roles/dockerfile/templates/supervisor_rsyslog.conf.j2 b/tools/ansible/roles/dockerfile/templates/supervisor_rsyslog.conf.j2 index 265b84dca4..0f6c3df426 100644 --- a/tools/ansible/roles/dockerfile/templates/supervisor_rsyslog.conf.j2 +++ b/tools/ansible/roles/dockerfile/templates/supervisor_rsyslog.conf.j2 @@ -8,7 +8,7 @@ pidfile = /var/run/supervisor/supervisor.rsyslog.pid [program:awx-rsyslogd] command = rsyslogd -n -i /var/run/awx-rsyslog/rsyslog.pid -f /var/lib/awx/rsyslog/rsyslog.conf autorestart = true -startsecs = 30 +startsecs = 0 stopasgroup=true killasgroup=true stdout_logfile=/dev/stdout @@ -59,6 +59,15 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 +[eventlistener:rsyslog-4xx-recovery] +command=rsyslog-4xx-recovery +buffer_size = 100 +events=PROCESS_LOG_STDERR +priority=0 +autorestart=true +stdout_events_enabled = true +stderr_events_enabled = true + [unix_http_server] file=/var/run/supervisor/supervisor.rsyslog.sock diff --git a/tools/docker-compose/supervisor.conf b/tools/docker-compose/supervisor.conf index 0efd97d09e..8ae0ddaf59 100644 --- a/tools/docker-compose/supervisor.conf +++ b/tools/docker-compose/supervisor.conf @@ -86,9 +86,9 @@ stderr_events_enabled = true [program:awx-rsyslogd] command = rsyslogd -n -i /var/run/awx-rsyslog/rsyslog.pid -f /var/lib/awx/rsyslog/rsyslog.conf autorestart = true +startsecs=0 stopasgroup=true killasgroup=true -redirect_stderr=true stdout_events_enabled = true stderr_events_enabled = true @@ -113,6 +113,15 @@ killasgroup=true stdout_events_enabled = true stderr_events_enabled = true +[eventlistener:rsyslog-4xx-recovery] +command=/awx_devel/tools/scripts/rsyslog-4xx-recovery +buffer_size = 100 +events=PROCESS_LOG_STDERR +priority=0 +autorestart=true +stdout_events_enabled = true +stderr_events_enabled = true + [unix_http_server] file=/var/run/supervisor/supervisor.sock diff --git a/tools/scripts/failure-event-handler b/tools/scripts/rsyslog-4xx-recovery similarity index 72% rename from tools/scripts/failure-event-handler rename to tools/scripts/rsyslog-4xx-recovery index eef297fd4d..b2ac2946f7 100755 --- a/tools/scripts/failure-event-handler +++ b/tools/scripts/rsyslog-4xx-recovery @@ -11,12 +11,10 @@ def write_stdout(s): sys.stdout.write(s) sys.stdout.flush() - def write_stderr(s): sys.stderr.write(s) sys.stderr.flush() - def main(): while 1: write_stdout("READY\n") @@ -31,23 +29,6 @@ def main(): except ValueError as e: write_stderr(str(e)) - # now decide what do to based on eventnames - if headers["eventname"] == "PROCESS_STATE_FATAL": - headers.update( - dict( - [x.split(":") for x in sys.stdin.read(int(headers["len"])).split()] - ) - ) - - try: - # incoming event that produced PROCESS_STATE_FATAL will have a PID. SIGTERM it! - write_stderr( - f"{datetime.datetime.now(timezone.utc)} - sending SIGTERM to proc={headers} with data={headers}\n" - ) - os.kill(headers["pid"], signal.SIGTERM) - except Exception as e: - write_stderr(str(e)) - # awx-rsyslog PROCESS_LOG_STDERR handler if headers["eventname"] == "PROCESS_LOG_STDERR": # pertinent data to process that produced PROCES_LOG_STDERR is in the first line of the data payload; so lets extract it