From ef29589940bb071b802671f090158f0916f9c260 Mon Sep 17 00:00:00 2001 From: Rick Elrod Date: Thu, 8 Dec 2022 05:57:47 -0600 Subject: [PATCH] Fix duped stats name and Redis for wsbroadcast This fixes several things related to our wsbroadcast stats handling. This was found during the ongoing wsrelay work. There are really three fixes here: - Logging was not actually enabled for the analytics.broadcast_websocket module, so that has been added to our loggers config. - analytics.broadcast_websocket was not actually able to connect to Redis due to 68614b83c00982259be18b0e6e22550f8dd0449a as part of the work in #13187. But there was no easy way to know this because the logging issue meant no exceptions showed up anywhere reasonable. - Relatedly, and also as part of #13187, we jumped from `prometheus-client` 0.7.1 up to 0.15.0. This included a breaking change where a `Counter` ending with `_total` will clash with a `Gauge` of the same name but without `_total`. I am not 100% sure of the reasoning here, other than "OpenMetrics compatibility". Refs #13301 Refs #13187 Signed-off-by: Rick Elrod --- awx/main/analytics/broadcast_websocket.py | 11 ++++++----- awx/main/wsbroadcast.py | 2 +- awx/settings/defaults.py | 1 + 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/awx/main/analytics/broadcast_websocket.py b/awx/main/analytics/broadcast_websocket.py index 72b6c8513b..37b64361d3 100644 --- a/awx/main/analytics/broadcast_websocket.py +++ b/awx/main/analytics/broadcast_websocket.py @@ -2,6 +2,7 @@ import datetime import asyncio import logging import redis +import redis.asyncio import re from prometheus_client import ( @@ -81,7 +82,7 @@ class BroadcastWebsocketStatsManager: async def run_loop(self): try: - redis_conn = await redis.asyncio.create_redis_pool(settings.BROKER_URL) + redis_conn = await redis.asyncio.Redis.from_url(settings.BROKER_URL) while True: stats_data_str = ''.join(stat.serialize() for stat in self._stats.values()) await redis_conn.set(self._redis_key, stats_data_str) @@ -121,8 +122,8 @@ class BroadcastWebsocketStats: 'Number of messages received, to be forwarded, by the broadcast websocket system', registry=self._registry, ) - self._messages_received = Gauge( - f'awx_{self.remote_name}_messages_received', + self._messages_received_current_conn = Gauge( + f'awx_{self.remote_name}_messages_received_currrent_conn', 'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection', registry=self._registry, ) @@ -143,13 +144,13 @@ class BroadcastWebsocketStats: def record_message_received(self): self._internal_messages_received_per_minute.record() - self._messages_received.inc() + self._messages_received_current_conn.inc() self._messages_received_total.inc() def record_connection_established(self): self._connection.state('connected') self._connection_start.set_to_current_time() - self._messages_received.set(0) + self._messages_received_current_conn.set(0) def record_connection_lost(self): self._connection.state('disconnected') diff --git a/awx/main/wsbroadcast.py b/awx/main/wsbroadcast.py index 5b7172cbfe..c4ed0fc21b 100644 --- a/awx/main/wsbroadcast.py +++ b/awx/main/wsbroadcast.py @@ -118,7 +118,7 @@ class WebsocketTask: logger.warning(f"Connection from {self.name} to {self.remote_host} timed out.") except Exception as e: # Early on, this is our canary. I'm not sure what exceptions we can really encounter. - logger.warning(f"Connection from {self.name} to {self.remote_host} failed for unknown reason: '{e}'.") + logger.exception(f"Connection from {self.name} to {self.remote_host} failed for unknown reason: '{e}'.") else: logger.warning(f"Connection from {self.name} to {self.remote_host} list.") diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 9003f7fb2f..5488f50412 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -853,6 +853,7 @@ LOGGING = { 'awx.main.signals': {'level': 'INFO'}, # very verbose debug-level logs 'awx.api.permissions': {'level': 'INFO'}, # very verbose debug-level logs 'awx.analytics': {'handlers': ['external_logger'], 'level': 'INFO', 'propagate': False}, + 'awx.analytics.broadcast_websocket': {'handlers': ['console', 'file', 'wsbroadcast', 'external_logger'], 'level': 'INFO', 'propagate': False}, 'awx.analytics.performance': {'handlers': ['console', 'file', 'tower_warnings', 'external_logger'], 'level': 'DEBUG', 'propagate': False}, 'awx.analytics.job_lifecycle': {'handlers': ['console', 'job_lifecycle'], 'level': 'DEBUG', 'propagate': False}, 'django_auth_ldap': {'handlers': ['console', 'file', 'tower_warnings'], 'level': 'DEBUG'},