mirror of
https://github.com/ansible/awx.git
synced 2026-05-08 01:47:35 -02:30
Merge pull request #2102 from ryanpetrello/fix-2085
WIP: add a background process to spot celery hangs and reload the worker pool
This commit is contained in:
61
awx/main/management/commands/watch_celery.py
Normal file
61
awx/main/management/commands/watch_celery.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
|
||||||
|
from celery import Celery
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
"""Watch local celery workers"""
|
||||||
|
help=("Sends a periodic ping to the local celery process over AMQP to ensure "
|
||||||
|
"it's responsive; this command is only intended to run in an environment "
|
||||||
|
"where celeryd is running")
|
||||||
|
|
||||||
|
#
|
||||||
|
# Just because celery is _running_ doesn't mean it's _working_; it's
|
||||||
|
# imperative that celery workers are _actually_ handling AMQP messages on
|
||||||
|
# their appropriate queues for awx to function. Unfortunately, we've been
|
||||||
|
# plagued by a variety of bugs in celery that cause it to hang and become
|
||||||
|
# an unresponsive zombie, such as:
|
||||||
|
#
|
||||||
|
# https://github.com/celery/celery/issues/4185
|
||||||
|
# https://github.com/celery/celery/issues/4457
|
||||||
|
#
|
||||||
|
# The goal of this code is periodically send a broadcast AMQP message to
|
||||||
|
# the celery process on the local host via celery.app.control.ping;
|
||||||
|
# If that _fails_, we attempt to determine the pid of the celery process
|
||||||
|
# and send SIGHUP (which tends to resolve these sorts of issues for us).
|
||||||
|
#
|
||||||
|
|
||||||
|
INTERVAL = 60
|
||||||
|
|
||||||
|
def handle(self, **options):
|
||||||
|
app = Celery('awx')
|
||||||
|
app.config_from_object('django.conf:settings')
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
pongs = app.control.ping(['celery@{}'.format(socket.gethostname())])
|
||||||
|
except:
|
||||||
|
pongs = []
|
||||||
|
if len(pongs):
|
||||||
|
sys.stderr.write(str(pongs) + '\n')
|
||||||
|
else:
|
||||||
|
sys.stderr.write('celery is not responsive to ping over local AMQP\n')
|
||||||
|
pid = self.getpid()
|
||||||
|
if pid:
|
||||||
|
sys.stderr.write('sending SIGHUP to {}\n'.format(pid))
|
||||||
|
os.kill(pid, signal.SIGHUP)
|
||||||
|
time.sleep(self.INTERVAL)
|
||||||
|
|
||||||
|
def getpid(self):
|
||||||
|
cmd = 'supervisorctl pid tower-processes:awx-celeryd'
|
||||||
|
if os.path.exists('/supervisor_task.conf'):
|
||||||
|
cmd = 'supervisorctl -c /supervisor_task.conf pid tower-processes:celery'
|
||||||
|
try:
|
||||||
|
return int(subprocess.check_output(cmd, shell=True))
|
||||||
|
except Exception:
|
||||||
|
sys.stderr.write('could not detect celery pid\n')
|
||||||
@@ -15,6 +15,18 @@ stdout_logfile_maxbytes=0
|
|||||||
stderr_logfile=/dev/stderr
|
stderr_logfile=/dev/stderr
|
||||||
stderr_logfile_maxbytes=0
|
stderr_logfile_maxbytes=0
|
||||||
|
|
||||||
|
[program:celery-watcher]
|
||||||
|
command = /usr/bin/awx-manage celery_watcher
|
||||||
|
directory = /var/lib/awx
|
||||||
|
environment = LANGUAGE="en_US.UTF-8",LANG="en_US.UTF-8",LC_ALL="en_US.UTF-8",LC_CTYPE="en_US.UTF-8"
|
||||||
|
autostart = true
|
||||||
|
autorestart = true
|
||||||
|
stopwaitsecs = 5
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stdout_logfile_maxbytes=0
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stderr_logfile_maxbytes=0
|
||||||
|
|
||||||
[program:callback-receiver]
|
[program:callback-receiver]
|
||||||
command = awx-manage run_callback_receiver
|
command = awx-manage run_callback_receiver
|
||||||
directory = /var/lib/awx
|
directory = /var/lib/awx
|
||||||
@@ -38,7 +50,7 @@ stderr_logfile=/dev/stderr
|
|||||||
stderr_logfile_maxbytes=0
|
stderr_logfile_maxbytes=0
|
||||||
|
|
||||||
[group:tower-processes]
|
[group:tower-processes]
|
||||||
programs=celery,callback-receiver,channels-worker
|
programs=celery,celery-watcher,callback-receiver,channels-worker
|
||||||
priority=5
|
priority=5
|
||||||
|
|
||||||
# TODO: Exit Handler
|
# TODO: Exit Handler
|
||||||
|
|||||||
Reference in New Issue
Block a user