make --status more robust for dispatcher, and add support for receiver

make the --status flag work by fetching a periodically recorded snapshot
of internal process state; additionally, update the callback receiver to
*also* record these statistics so we can gain more insight into any
performance issues
This commit is contained in:
Ryan Petrello
2020-09-17 14:25:00 -04:00
parent 0df6409244
commit 57f8e48894
5 changed files with 43 additions and 8 deletions

View File

@@ -2,6 +2,9 @@ import logging
import uuid
import json
from django.conf import settings
import redis
from awx.main.dispatch import get_local_queuename
from . import pg_bus_conn
@@ -21,7 +24,9 @@ class Control(object):
self.queuename = host or get_local_queuename()
def status(self, *args, **kwargs):
return self.control_with_reply('status', *args, **kwargs)
r = redis.Redis.from_url(settings.BROKER_URL)
stats = r.get(f'awx_{self.service}_statistics') or b''
return stats.decode('utf-8')
def running(self, *args, **kwargs):
return self.control_with_reply('running', *args, **kwargs)

View File

@@ -5,6 +5,7 @@ import signal
import sys
import time
import traceback
from datetime import datetime
from uuid import uuid4
import collections
@@ -257,13 +258,13 @@ class WorkerPool(object):
return idx, worker
def debug(self, *args, **kwargs):
self.cleanup()
tmpl = Template(
'Recorded at: {{ dt }} \n'
'{{ pool.name }}[pid:{{ pool.pid }}] workers total={{ workers|length }} {{ meta }} \n'
'{% for w in workers %}'
'. worker[pid:{{ w.pid }}]{% if not w.alive %} GONE exit={{ w.exitcode }}{% endif %}'
' sent={{ w.messages_sent }}'
' finished={{ w.messages_finished }}'
'{% if w.messages_finished %} finished={{ w.messages_finished }}{% endif %}'
' qsize={{ w.managed_tasks|length }}'
' rss={{ w.mb }}MB'
'{% for task in w.managed_tasks.values() %}'
@@ -281,7 +282,11 @@ class WorkerPool(object):
'\n'
'{% endfor %}'
)
return tmpl.render(pool=self, workers=self.workers, meta=self.debug_meta)
now = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')
return tmpl.render(
pool=self, workers=self.workers, meta=self.debug_meta,
dt=now
)
def write(self, preferred_queue, body):
queue_order = sorted(range(len(self.workers)), key=lambda x: -1 if x==preferred_queue else x)
@@ -332,6 +337,10 @@ class AutoscalePool(WorkerPool):
# max workers can't be less than min_workers
self.max_workers = max(self.min_workers, self.max_workers)
def debug(self, *args, **kwargs):
self.cleanup()
return super(AutoscalePool, self).debug(*args, **kwargs)
@property
def should_grow(self):
if len(self.workers) < self.min_workers:

View File

@@ -43,6 +43,9 @@ class WorkerSignalHandler:
class AWXConsumerBase(object):
last_stats = time.time()
def __init__(self, name, worker, queues=[], pool=None):
self.should_stop = False
@@ -54,6 +57,7 @@ class AWXConsumerBase(object):
if pool is None:
self.pool = WorkerPool()
self.pool.init_workers(self.worker.work_loop)
self.redis = redis.Redis.from_url(settings.BROKER_URL)
@property
def listening_on(self):
@@ -99,6 +103,16 @@ class AWXConsumerBase(object):
queue = 0
self.pool.write(queue, body)
self.total_messages += 1
self.record_statistics()
def record_statistics(self):
if time.time() - self.last_stats > 1: # buffer stat recording to once per second
try:
self.redis.set(f'awx_{self.name}_statistics', self.pool.debug())
self.last_stats = time.time()
except Exception:
logger.exception(f"encountered an error communicating with redis to store {self.name} statistics")
self.last_stats = time.time()
def run(self, *args, **kwargs):
signal.signal(signal.SIGINT, self.stop)
@@ -120,10 +134,9 @@ class AWXConsumerRedis(AWXConsumerBase):
time_to_sleep = 1
while True:
queue = redis.Redis.from_url(settings.BROKER_URL)
while True:
try:
res = queue.blpop(self.queues)
res = self.redis.blpop(self.queues)
time_to_sleep = 1
res = json.loads(res[1])
self.process_task(res)