mirror of
https://github.com/ansible/awx.git
synced 2026-05-25 01:27:45 -02:30
Fix wsrelay connection leak (#15113)
- when re-establishing connection to db close old connection - re-initialize WebSocketRelayManager when restarting asyncio.run - log and ignore error in cleanup_offline_host (this might come back to bite us) - cleanup connection when WebSocketRelayManager crash
This commit is contained in:
@@ -165,11 +165,10 @@ class Command(BaseCommand):
|
||||
return
|
||||
|
||||
WebsocketsMetricsServer().start()
|
||||
websocket_relay_manager = WebSocketRelayManager()
|
||||
|
||||
while True:
|
||||
try:
|
||||
asyncio.run(websocket_relay_manager.run())
|
||||
asyncio.run(WebSocketRelayManager().run())
|
||||
except KeyboardInterrupt:
|
||||
logger.info('Shutting down Websocket Relayer')
|
||||
break
|
||||
|
||||
@@ -285,6 +285,8 @@ class WebSocketRelayManager(object):
|
||||
except asyncio.CancelledError:
|
||||
# Handle the case where the task was already cancelled by the time we got here.
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cancel relay connection for {hostname}: {e}")
|
||||
|
||||
del self.relay_connections[hostname]
|
||||
|
||||
@@ -295,6 +297,8 @@ class WebSocketRelayManager(object):
|
||||
self.stats_mgr.delete_remote_host_stats(hostname)
|
||||
except KeyError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete stats for {hostname}: {e}")
|
||||
|
||||
async def run(self):
|
||||
event_loop = asyncio.get_running_loop()
|
||||
@@ -316,10 +320,22 @@ class WebSocketRelayManager(object):
|
||||
|
||||
task = None
|
||||
|
||||
# Managing the async_conn here so that we can close it if we need to restart the connection
|
||||
async_conn = None
|
||||
|
||||
# Establishes a websocket connection to /websocket/relay on all API servers
|
||||
try:
|
||||
while True:
|
||||
if not task or task.done():
|
||||
try:
|
||||
# Try to close the connection if it's open
|
||||
if async_conn:
|
||||
try:
|
||||
await async_conn.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to close connection to database for pg_notify: {e}")
|
||||
|
||||
# and re-establish the connection
|
||||
async_conn = await psycopg.AsyncConnection.connect(
|
||||
dbname=database_conf['NAME'],
|
||||
host=database_conf['HOST'],
|
||||
@@ -329,6 +345,7 @@ class WebSocketRelayManager(object):
|
||||
)
|
||||
await async_conn.set_autocommit(True)
|
||||
|
||||
# before creating the task that uses the connection
|
||||
task = event_loop.create_task(self.on_ws_heartbeat(async_conn), name="on_ws_heartbeat")
|
||||
logger.info("Creating `on_ws_heartbeat` task in event loop.")
|
||||
|
||||
@@ -370,3 +387,10 @@ class WebSocketRelayManager(object):
|
||||
self.relay_connections[h] = relay_connection
|
||||
|
||||
await asyncio.sleep(settings.BROADCAST_WEBSOCKET_NEW_INSTANCE_POLL_RATE_SECONDS)
|
||||
finally:
|
||||
if async_conn:
|
||||
logger.info("Shutting down db connection for wsrelay.")
|
||||
try:
|
||||
await async_conn.close()
|
||||
except Exception as e:
|
||||
logger.info(f"Failed to close connection to database for pg_notify: {e}")
|
||||
|
||||
Reference in New Issue
Block a user