mirror of
https://github.com/ansible/awx.git
synced 2026-02-16 10:40:01 -03:30
fix a bug that breaks job cancel on single node jobs
1. Install awx w/ a single node.
2. Start a long-running job.
3. Forcibly kill the `awx-manage run_dispatcher` process (e.g.,
SIGKILL) and do not start it again.
4. The job remains in running - without a second cluster to discover
the job, it is never reaped.
5. This PR allows you to cancel the job from the UI+API.
This commit is contained in:
@@ -14,18 +14,18 @@ class Control(object):
|
||||
services = ('dispatcher', 'callback_receiver')
|
||||
result = None
|
||||
|
||||
def __init__(self, service):
|
||||
def __init__(self, service, host=None):
|
||||
if service not in self.services:
|
||||
raise RuntimeError('{} must be in {}'.format(service, self.services))
|
||||
self.service = service
|
||||
queuename = get_local_queuename()
|
||||
self.queue = Queue(queuename, Exchange(queuename), routing_key=queuename)
|
||||
self.queuename = host or get_local_queuename()
|
||||
self.queue = Queue(self.queuename, Exchange(self.queuename), routing_key=self.queuename)
|
||||
|
||||
def publish(self, msg, conn, host, **kwargs):
|
||||
def publish(self, msg, conn, **kwargs):
|
||||
producer = Producer(
|
||||
exchange=self.queue.exchange,
|
||||
channel=conn,
|
||||
routing_key=get_local_queuename()
|
||||
routing_key=self.queuename
|
||||
)
|
||||
producer.publish(msg, expiration=5, **kwargs)
|
||||
|
||||
@@ -35,14 +35,13 @@ class Control(object):
|
||||
def running(self, *args, **kwargs):
|
||||
return self.control_with_reply('running', *args, **kwargs)
|
||||
|
||||
def control_with_reply(self, command, host=None, timeout=5):
|
||||
host = host or settings.CLUSTER_HOST_ID
|
||||
logger.warn('checking {} {} for {}'.format(self.service, command, host))
|
||||
def control_with_reply(self, command, timeout=5):
|
||||
logger.warn('checking {} {} for {}'.format(self.service, command, self.queuename))
|
||||
reply_queue = Queue(name="amq.rabbitmq.reply-to")
|
||||
self.result = None
|
||||
with Connection(settings.BROKER_URL) as conn:
|
||||
with Consumer(conn, reply_queue, callbacks=[self.process_message], no_ack=True):
|
||||
self.publish({'control': command}, conn, host, reply_to='amq.rabbitmq.reply-to')
|
||||
self.publish({'control': command}, conn, reply_to='amq.rabbitmq.reply-to')
|
||||
try:
|
||||
conn.drain_events(timeout=timeout)
|
||||
except socket.timeout:
|
||||
@@ -50,10 +49,9 @@ class Control(object):
|
||||
raise
|
||||
return self.result
|
||||
|
||||
def control(self, msg, host=None, **kwargs):
|
||||
host = host or settings.CLUSTER_HOST_ID
|
||||
def control(self, msg, **kwargs):
|
||||
with Connection(settings.BROKER_URL) as conn:
|
||||
self.publish(msg, conn, host)
|
||||
self.publish(msg, conn)
|
||||
|
||||
def process_message(self, body, message):
|
||||
self.result = body
|
||||
|
||||
Reference in New Issue
Block a user