AAP-57817 Add Redis connection retry using redis-py 7.0+ built-in (#16176)

* AAP-57817 Add Redis connection retry using redis-py 7.0+ built-in mechanism

* Refactor Redis client helpers to use settings and eliminate code duplication

* Create awx/main/utils/redis.py and move Redis client functions to avoid circular imports

* Fix subsystem_metrics to share Redis connection pool between
  client and pipeline

* Cache Redis clients in RelayConsumer and RelayWebsocketStatsManager to avoid creating new connection pools on every call

* Add cap and base config

* Add Redis retry logic with exponential backoff to handle connection failures during long-running operations

* Add REDIS_BACKOFF_CAP and REDIS_BACKOFF_BASE settings to allow
  adjustment of retry timing in worst-case scenarios without code changes

* Simplify Redis retry tests by removing unnecessary reload logic
This commit is contained in:
Lila Yasin
2025-12-01 09:08:47 -05:00
committed by GitHub
parent 0d86874d5d
commit 4f41b50a09
17 changed files with 264 additions and 24 deletions

View File

@@ -2,11 +2,10 @@ import logging
import uuid
import json
from django.conf import settings
from django.db import connection
import redis
from awx.main.dispatch import get_task_queuename
from awx.main.utils.redis import get_redis_client
from . import pg_bus_conn
@@ -24,7 +23,7 @@ class Control(object):
self.queuename = host or get_task_queuename()
def status(self, *args, **kwargs):
r = redis.Redis.from_url(settings.BROKER_URL)
r = get_redis_client()
if self.service == 'dispatcher':
stats = r.get(f'awx_{self.service}_statistics') or b''
return stats.decode('utf-8')

View File

@@ -19,6 +19,7 @@ import redis.exceptions
from ansible_base.lib.logging.runtime import log_excess_runtime
from awx.main.utils.redis import get_redis_client
from awx.main.dispatch.pool import WorkerPool
from awx.main.dispatch.periodic import Scheduler
from awx.main.dispatch import pg_bus_conn
@@ -59,7 +60,7 @@ class AWXConsumerBase(object):
if pool is None:
self.pool = WorkerPool()
self.pool.init_workers(self.worker.work_loop)
self.redis = redis.Redis.from_url(settings.BROKER_URL)
self.redis = get_redis_client()
@property
def listening_on(self):

View File

@@ -15,6 +15,7 @@ import psutil
import redis
from awx.main.utils.redis import get_redis_client
from awx.main.consumers import emit_channel_notification
from awx.main.models import JobEvent, AdHocCommandEvent, ProjectUpdateEvent, InventoryUpdateEvent, SystemJobEvent, UnifiedJob
from awx.main.constants import ACTIVE_STATES
@@ -72,7 +73,7 @@ class CallbackBrokerWorker(BaseWorker):
def __init__(self):
self.buff = {}
self.redis = redis.Redis.from_url(settings.BROKER_URL)
self.redis = get_redis_client()
self.subsystem_metrics = s_metrics.CallbackReceiverMetrics(auto_pipe_execute=False)
self.queue_pop = 0
self.queue_name = settings.CALLBACK_QUEUE