mirror of
https://github.com/ansible/awx.git
synced 2026-05-17 06:17:36 -02:30
Recover rsyslog from 4xx error
Due to https://github.com/ansible/awx/issues/7560 'omhttp' module for rsyslog will completely stop forwarding message to external log aggregator after receiving a 4xx error from the external log aggregator This PR is an "workaround" for this problem by restarting rsyslogd after detecting that rsyslog received a 4xx error
This commit is contained in:
54
tools/scripts/rsyslog-4xx-recovery
Executable file
54
tools/scripts/rsyslog-4xx-recovery
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/awx-python
|
||||
import sys
|
||||
import os
|
||||
import signal
|
||||
import datetime
|
||||
|
||||
from datetime import timezone
|
||||
|
||||
|
||||
def write_stdout(s):
|
||||
sys.stdout.write(s)
|
||||
sys.stdout.flush()
|
||||
|
||||
def write_stderr(s):
|
||||
sys.stderr.write(s)
|
||||
sys.stderr.flush()
|
||||
|
||||
def main():
|
||||
while 1:
|
||||
write_stdout("READY\n")
|
||||
|
||||
# read header line and print it to stderr
|
||||
line = sys.stdin.readline()
|
||||
|
||||
# parse header line and decide what to do with it
|
||||
try:
|
||||
headers = dict([x.split(":") for x in line.split()])
|
||||
data = sys.stdin.read(int(headers["len"]))
|
||||
except ValueError as e:
|
||||
write_stderr(str(e))
|
||||
|
||||
# awx-rsyslog PROCESS_LOG_STDERR handler
|
||||
if headers["eventname"] == "PROCESS_LOG_STDERR":
|
||||
# pertinent data to process that produced PROCES_LOG_STDERR is in the first line of the data payload; so lets extract it
|
||||
proc_details = dict([x.split(":") for x in (data.split("\n")[0]).split()])
|
||||
|
||||
if proc_details["processname"] == "awx-rsyslogd":
|
||||
log_message = "".join(data.split("\n")[1:])
|
||||
|
||||
# look for a 4XX HTTP CODE in the log message. if found, issue a sigkill
|
||||
if any(str(x) in log_message.split() for x in range(400, 420)):
|
||||
try:
|
||||
write_stderr(
|
||||
f"{datetime.datetime.now(timezone.utc)} - sending SIGTERM to proc=[{proc_details['processname']}] with pid=[{int(proc_details['pid'])}] due to log_message=[{log_message}]\n"
|
||||
)
|
||||
os.kill(int(proc_details["pid"]), signal.SIGTERM)
|
||||
except Exception as e:
|
||||
write_stderr(str(e))
|
||||
|
||||
write_stdout("RESULT 2\nOK")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user