mirror of
https://github.com/ansible/awx.git
synced 2026-05-20 15:27:47 -02:30
AAP-58539 Move to dispatcherd (#16209)
* WIP First pass * started removing feature flags and adjusting logic * Add decorator * moved to dispatcher decorator * updated as many as I could find * Keep callback receiver working * remove any code that is not used by the call back receiver * add back auto_max_workers * added back get_auto_max_workers into common utils * Remove control and hazmat (squash this not done) * moved status out and deleted control as no longer needed * removed unused imports * adjusted test import to pull correct method * fixed imports and addressed clusternode heartbeat test * Update function comments * Add back hazmat for config and remove baseworker * added back hazmat per @alancoding feedback around config * removed baseworker completely and refactored it into the callback worker * Fix dispatcher run call and remove dispatch setting * remove dispatcher mock publish setting * Adjust heartbeat arg and more formatting * fixed the call to cluster_node_heartbeat missing binder * Fix attribute error in server logs
This commit is contained in:
@@ -4,7 +4,7 @@ import json
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from awx.main.dispatch import pg_bus_conn
|
||||
from awx.main.dispatch.worker.task import TaskWorker
|
||||
from awx.main.dispatch.worker.task import run_callable
|
||||
|
||||
logger = logging.getLogger('awx.main.cache_clear')
|
||||
|
||||
@@ -21,11 +21,11 @@ class Command(BaseCommand):
|
||||
try:
|
||||
with pg_bus_conn() as conn:
|
||||
conn.listen("tower_settings_change")
|
||||
for e in conn.events(yield_timeouts=True):
|
||||
for e in conn.events():
|
||||
if e is not None:
|
||||
body = json.loads(e.payload)
|
||||
logger.info(f"Cache clear request received. Clearing now, payload: {e.payload}")
|
||||
TaskWorker.run_callable(body)
|
||||
run_callable(body)
|
||||
|
||||
except Exception:
|
||||
# Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata
|
||||
|
||||
@@ -8,8 +8,8 @@ from django.core.management.base import BaseCommand, CommandError
|
||||
import redis.exceptions
|
||||
|
||||
from awx.main.analytics.subsystem_metrics import CallbackReceiverMetricsServer
|
||||
from awx.main.dispatch.control import Control
|
||||
from awx.main.dispatch.worker import AWXConsumerRedis, CallbackBrokerWorker
|
||||
from awx.main.utils.redis import get_redis_client
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@@ -26,7 +26,7 @@ class Command(BaseCommand):
|
||||
|
||||
def handle(self, *arg, **options):
|
||||
if options.get('status'):
|
||||
print(Control('callback_receiver').status())
|
||||
print(self.status())
|
||||
return
|
||||
consumer = None
|
||||
|
||||
@@ -46,3 +46,10 @@ class Command(BaseCommand):
|
||||
print('Terminating Callback Receiver')
|
||||
if consumer:
|
||||
consumer.stop()
|
||||
|
||||
def status(self, *args, **kwargs):
|
||||
r = get_redis_client()
|
||||
workers = []
|
||||
for key in r.keys('awx_callback_receiver_statistics_*'):
|
||||
workers.append(r.get(key).decode('utf-8'))
|
||||
return '\n'.join(workers)
|
||||
|
||||
@@ -5,25 +5,16 @@ import logging.config
|
||||
import yaml
|
||||
import copy
|
||||
|
||||
import redis
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.cache import cache as django_cache
|
||||
|
||||
from flags.state import flag_enabled
|
||||
from django.db import connection
|
||||
|
||||
from dispatcherd.factories import get_control_from_settings
|
||||
from dispatcherd import run_service
|
||||
from dispatcherd.config import setup as dispatcher_setup
|
||||
|
||||
from awx.main.dispatch import get_task_queuename
|
||||
from awx.main.dispatch.config import get_dispatcherd_config
|
||||
from awx.main.dispatch.control import Control
|
||||
from awx.main.dispatch.pool import AutoscalePool
|
||||
from awx.main.dispatch.worker import AWXConsumerPG, TaskWorker
|
||||
from awx.main.analytics.subsystem_metrics import DispatcherMetricsServer
|
||||
|
||||
logger = logging.getLogger('awx.main.dispatch')
|
||||
|
||||
@@ -33,14 +24,7 @@ class Command(BaseCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--status', dest='status', action='store_true', help='print the internal state of any running dispatchers')
|
||||
parser.add_argument('--schedule', dest='schedule', action='store_true', help='print the current status of schedules being ran by dispatcher')
|
||||
parser.add_argument('--running', dest='running', action='store_true', help='print the UUIDs of any tasked managed by this dispatcher')
|
||||
parser.add_argument(
|
||||
'--reload',
|
||||
dest='reload',
|
||||
action='store_true',
|
||||
help=('cause the dispatcher to recycle all of its worker processes; running jobs will run to completion first'),
|
||||
)
|
||||
parser.add_argument(
|
||||
'--cancel',
|
||||
dest='cancel',
|
||||
@@ -53,38 +37,17 @@ class Command(BaseCommand):
|
||||
|
||||
def handle(self, *arg, **options):
|
||||
if options.get('status'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('status')
|
||||
if len(running_data) != 1:
|
||||
raise CommandError('Did not receive expected number of replies')
|
||||
print(yaml.dump(running_data[0], default_flow_style=False))
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').status())
|
||||
return
|
||||
if options.get('schedule'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
print('NOT YET IMPLEMENTED')
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').schedule())
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('status')
|
||||
if len(running_data) != 1:
|
||||
raise CommandError('Did not receive expected number of replies')
|
||||
print(yaml.dump(running_data[0], default_flow_style=False))
|
||||
return
|
||||
if options.get('running'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('running')
|
||||
print(yaml.dump(running_data, default_flow_style=False))
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').running())
|
||||
return
|
||||
if options.get('reload'):
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
print('NOT YET IMPLEMENTED')
|
||||
return
|
||||
else:
|
||||
return Control('dispatcher').control({'control': 'reload'})
|
||||
ctl = get_control_from_settings()
|
||||
running_data = ctl.control_with_reply('running')
|
||||
print(yaml.dump(running_data, default_flow_style=False))
|
||||
return
|
||||
if options.get('cancel'):
|
||||
cancel_str = options.get('cancel')
|
||||
try:
|
||||
@@ -94,44 +57,24 @@ class Command(BaseCommand):
|
||||
if not isinstance(cancel_data, list):
|
||||
cancel_data = [cancel_str]
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
ctl = get_control_from_settings()
|
||||
results = []
|
||||
for task_id in cancel_data:
|
||||
# For each task UUID, send an individual cancel command
|
||||
result = ctl.control_with_reply('cancel', data={'uuid': task_id})
|
||||
results.append(result)
|
||||
print(yaml.dump(results, default_flow_style=False))
|
||||
return
|
||||
else:
|
||||
print(Control('dispatcher').cancel(cancel_data))
|
||||
return
|
||||
ctl = get_control_from_settings()
|
||||
results = []
|
||||
for task_id in cancel_data:
|
||||
# For each task UUID, send an individual cancel command
|
||||
result = ctl.control_with_reply('cancel', data={'uuid': task_id})
|
||||
results.append(result)
|
||||
print(yaml.dump(results, default_flow_style=False))
|
||||
return
|
||||
|
||||
if flag_enabled('FEATURE_DISPATCHERD_ENABLED'):
|
||||
self.configure_dispatcher_logging()
|
||||
self.configure_dispatcher_logging()
|
||||
# Close the connection, because the pg_notify broker will create new async connection
|
||||
connection.close()
|
||||
django_cache.close()
|
||||
dispatcher_setup(get_dispatcherd_config(for_service=True))
|
||||
run_service()
|
||||
|
||||
# Close the connection, because the pg_notify broker will create new async connection
|
||||
connection.close()
|
||||
django_cache.close()
|
||||
|
||||
dispatcher_setup(get_dispatcherd_config(for_service=True))
|
||||
run_service()
|
||||
else:
|
||||
consumer = None
|
||||
|
||||
try:
|
||||
DispatcherMetricsServer().start()
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
raise CommandError(f'Dispatcher could not connect to redis, error: {exc}')
|
||||
|
||||
try:
|
||||
queues = ['tower_broadcast_all', 'tower_settings_change', get_task_queuename()]
|
||||
consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues, AutoscalePool(min_workers=4), schedule=settings.CELERYBEAT_SCHEDULE)
|
||||
consumer.run()
|
||||
except KeyboardInterrupt:
|
||||
logger.debug('Terminating Task Dispatcher')
|
||||
if consumer:
|
||||
consumer.stop()
|
||||
dispatcher_setup(get_dispatcherd_config(for_service=True))
|
||||
run_service()
|
||||
|
||||
def configure_dispatcher_logging(self):
|
||||
# Apply special log rule for the parent process
|
||||
|
||||
@@ -5,7 +5,7 @@ from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from awx.main.dispatch import pg_bus_conn
|
||||
from awx.main.dispatch.worker.task import TaskWorker
|
||||
from awx.main.dispatch.worker.task import run_callable
|
||||
from awx.main.utils.external_logging import reconfigure_rsyslog
|
||||
|
||||
logger = logging.getLogger('awx.main.rsyslog_configurer')
|
||||
@@ -26,7 +26,7 @@ class Command(BaseCommand):
|
||||
conn.listen("rsyslog_configurer")
|
||||
# reconfigure rsyslog on start up
|
||||
reconfigure_rsyslog()
|
||||
for e in conn.events(yield_timeouts=True):
|
||||
for e in conn.events():
|
||||
if e is not None:
|
||||
logger.info("Change in logging settings found. Restarting rsyslogd")
|
||||
# clear the cache of relevant settings then restart
|
||||
@@ -34,7 +34,7 @@ class Command(BaseCommand):
|
||||
cache.delete_many(setting_keys)
|
||||
settings._awx_conf_memoizedcache.clear()
|
||||
body = json.loads(e.payload)
|
||||
TaskWorker.run_callable(body)
|
||||
run_callable(body)
|
||||
except Exception:
|
||||
# Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata
|
||||
logger.exception('Encountered unhandled error in rsyslog_configurer main loop')
|
||||
|
||||
Reference in New Issue
Block a user