Fix duped stats name and Redis for wsbroadcast

This fixes several things related to our wsbroadcast stats handling.
This was found during the ongoing wsrelay work.

There are really three fixes here:

- Logging was not actually enabled for the analytics.broadcast_websocket
  module, so that has been added to our loggers config.

- analytics.broadcast_websocket was not actually able to connect to
  Redis due to 68614b83c00982259be18b0e6e22550f8dd0449a as part of
  the work in #13187. But there was no easy way to know this because the
  logging issue meant no exceptions showed up anywhere reasonable.

- Relatedly, and also as part of #13187, we jumped from
  `prometheus-client` 0.7.1 up to 0.15.0. This included a breaking
  change where a `Counter` ending with `_total` will clash with a
  `Gauge` of the same name but without `_total`. I am not 100% sure of
  the reasoning here, other than "OpenMetrics compatibility".

Refs #13301
Refs #13187

Signed-off-by: Rick Elrod <rick@elrod.me>
This commit is contained in:
Rick Elrod 2022-12-08 05:57:47 -06:00
parent a0b8215c06
commit ef29589940
3 changed files with 8 additions and 6 deletions

View File

@ -2,6 +2,7 @@ import datetime
import asyncio
import logging
import redis
import redis.asyncio
import re
from prometheus_client import (
@ -81,7 +82,7 @@ class BroadcastWebsocketStatsManager:
async def run_loop(self):
try:
redis_conn = await redis.asyncio.create_redis_pool(settings.BROKER_URL)
redis_conn = await redis.asyncio.Redis.from_url(settings.BROKER_URL)
while True:
stats_data_str = ''.join(stat.serialize() for stat in self._stats.values())
await redis_conn.set(self._redis_key, stats_data_str)
@ -121,8 +122,8 @@ class BroadcastWebsocketStats:
'Number of messages received, to be forwarded, by the broadcast websocket system',
registry=self._registry,
)
self._messages_received = Gauge(
f'awx_{self.remote_name}_messages_received',
self._messages_received_current_conn = Gauge(
f'awx_{self.remote_name}_messages_received_currrent_conn',
'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection',
registry=self._registry,
)
@ -143,13 +144,13 @@ class BroadcastWebsocketStats:
def record_message_received(self):
self._internal_messages_received_per_minute.record()
self._messages_received.inc()
self._messages_received_current_conn.inc()
self._messages_received_total.inc()
def record_connection_established(self):
self._connection.state('connected')
self._connection_start.set_to_current_time()
self._messages_received.set(0)
self._messages_received_current_conn.set(0)
def record_connection_lost(self):
self._connection.state('disconnected')

View File

@ -118,7 +118,7 @@ class WebsocketTask:
logger.warning(f"Connection from {self.name} to {self.remote_host} timed out.")
except Exception as e:
# Early on, this is our canary. I'm not sure what exceptions we can really encounter.
logger.warning(f"Connection from {self.name} to {self.remote_host} failed for unknown reason: '{e}'.")
logger.exception(f"Connection from {self.name} to {self.remote_host} failed for unknown reason: '{e}'.")
else:
logger.warning(f"Connection from {self.name} to {self.remote_host} list.")

View File

@ -853,6 +853,7 @@ LOGGING = {
'awx.main.signals': {'level': 'INFO'}, # very verbose debug-level logs
'awx.api.permissions': {'level': 'INFO'}, # very verbose debug-level logs
'awx.analytics': {'handlers': ['external_logger'], 'level': 'INFO', 'propagate': False},
'awx.analytics.broadcast_websocket': {'handlers': ['console', 'file', 'wsbroadcast', 'external_logger'], 'level': 'INFO', 'propagate': False},
'awx.analytics.performance': {'handlers': ['console', 'file', 'tower_warnings', 'external_logger'], 'level': 'DEBUG', 'propagate': False},
'awx.analytics.job_lifecycle': {'handlers': ['console', 'job_lifecycle'], 'level': 'DEBUG', 'propagate': False},
'django_auth_ldap': {'handlers': ['console', 'file', 'tower_warnings'], 'level': 'DEBUG'},