mirror of
https://github.com/ansible/awx.git
synced 2026-04-14 06:29:25 -02:30
Sensible log behavior when redis is unavailable (#15466)
* Sensible log behavior when redis is unavailable * Consistent behavior with dispatcher and callback
This commit is contained in:
@@ -15,6 +15,7 @@ from datetime import timedelta
|
||||
|
||||
from django import db
|
||||
from django.conf import settings
|
||||
import redis.exceptions
|
||||
|
||||
from ansible_base.lib.logging.runtime import log_excess_runtime
|
||||
|
||||
@@ -130,10 +131,13 @@ class AWXConsumerBase(object):
|
||||
@log_excess_runtime(logger, debug_cutoff=0.05, cutoff=0.2)
|
||||
def record_statistics(self):
|
||||
if time.time() - self.last_stats > 1: # buffer stat recording to once per second
|
||||
save_data = self.pool.debug()
|
||||
try:
|
||||
self.redis.set(f'awx_{self.name}_statistics', self.pool.debug())
|
||||
self.redis.set(f'awx_{self.name}_statistics', save_data)
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
logger.warning(f'Redis connection error saving {self.name} status data:\n{exc}\nmissed data:\n{save_data}')
|
||||
except Exception:
|
||||
logger.exception(f"encountered an error communicating with redis to store {self.name} statistics")
|
||||
logger.exception(f"Unknown redis error saving {self.name} status data:\nmissed data:\n{save_data}")
|
||||
self.last_stats = time.time()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
@@ -189,7 +193,10 @@ class AWXConsumerPG(AWXConsumerBase):
|
||||
current_time = time.time()
|
||||
self.pool.produce_subsystem_metrics(self.subsystem_metrics)
|
||||
self.subsystem_metrics.set('dispatcher_availability', self.listen_cumulative_time / (current_time - self.last_metrics_gather))
|
||||
self.subsystem_metrics.pipe_execute()
|
||||
try:
|
||||
self.subsystem_metrics.pipe_execute()
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
logger.warning(f'Redis connection error saving dispatcher metrics, error:\n{exc}')
|
||||
self.listen_cumulative_time = 0.0
|
||||
self.last_metrics_gather = current_time
|
||||
|
||||
|
||||
@@ -86,6 +86,7 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
return os.getpid()
|
||||
|
||||
def read(self, queue):
|
||||
has_redis_error = False
|
||||
try:
|
||||
res = self.redis.blpop(self.queue_name, timeout=1)
|
||||
if res is None:
|
||||
@@ -95,14 +96,21 @@ class CallbackBrokerWorker(BaseWorker):
|
||||
self.subsystem_metrics.inc('callback_receiver_events_popped_redis', 1)
|
||||
self.subsystem_metrics.inc('callback_receiver_events_in_memory', 1)
|
||||
return json.loads(res[1])
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
# Low noise log, because very common and many workers will write this
|
||||
logger.error(f"redis connection error: {exc}")
|
||||
has_redis_error = True
|
||||
time.sleep(5)
|
||||
except redis.exceptions.RedisError:
|
||||
logger.exception("encountered an error communicating with redis")
|
||||
has_redis_error = True
|
||||
time.sleep(1)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
logger.exception("failed to decode JSON message from redis")
|
||||
finally:
|
||||
self.record_statistics()
|
||||
self.record_read_metrics()
|
||||
if not has_redis_error:
|
||||
self.record_statistics()
|
||||
self.record_read_metrics()
|
||||
|
||||
return {'event': 'FLUSH'}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user