From b4803ca894e8d0016a54641f18f4cf2918937a86 Mon Sep 17 00:00:00 2001 From: Stanislav Zaprudskiy Date: Fri, 13 Jan 2023 10:22:53 +0100 Subject: [PATCH 1/5] Add disable_instance management command Signed-off-by: Stanislav Zaprudskiy --- .../management/commands/disable_instance.py | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 awx/main/management/commands/disable_instance.py diff --git a/awx/main/management/commands/disable_instance.py b/awx/main/management/commands/disable_instance.py new file mode 100644 index 0000000000..3689154229 --- /dev/null +++ b/awx/main/management/commands/disable_instance.py @@ -0,0 +1,153 @@ +import socket +import time +from urllib.parse import urljoin + +from argparse import ArgumentTypeError + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.db.models import Q +from django.utils.timezone import now + +from awx.main.models import Instance, UnifiedJob + + +class AWXInstance(): + def __init__(self, **filter): + self.filter = filter + self.get_instance() + + def get_instance(self): + filter = self.filter if self.filter is not None else dict(hostname=socket.gethostname()) + qs = Instance.objects.filter(**filter) + if not qs.exists(): + raise ValueError(f"No AWX instance found with {filter} "\ + "parameters") + self.instance = qs.first() + + def disable(self): + if self.instance.enabled: + self.instance.enabled = False + self.instance.save() + return True + + def enable(self): + if not self.instance.enabled: + self.instance.enabled = True + self.instance.save() + return True + + def jobs(self): + return UnifiedJob.objects.filter( + Q(controller_node=self.instance.hostname) | Q(execution_node=self.instance.hostname), + status__in=("running", "waiting") + ) + + def jobs_pretty(self): + jobs = [] + for j in self.jobs(): + # similar calculation of `elapsed` as the corresponding serializer + # does + td = now() - j.started + elapsed = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / (10**6 * 1.0) + elapsed = float(elapsed) + details = dict( + name = j.name, + url = j.get_ui_url(), + elapsed = elapsed, + ) + jobs.append(details) + + jobs = sorted(jobs, reverse=True, key=lambda j: j["elapsed"]) + + return ", ".join( + [f"[\"{j['name']}\"]({j['url']})" for j in jobs] + ) + + def instance_pretty(self): + instance = ( + self.instance.hostname, + urljoin(settings.TOWER_URL_BASE, f"/#/instances/{self.instance.pk}/details"), + ) + return f"[\"{instance[0]}\"]({instance[1]})" + + +class Command(BaseCommand): + help = "Disable instance, optionally waiting for all its managed jobs " \ + "to finish." + + @staticmethod + def int_positive(arg): + int_arg = int(arg) + if int_arg < 1: + raise ArgumentTypeError(f"The value must be a positive number >= 1. Provided: \"{arg}\"") + return int_arg + + def add_arguments(self, parser): + filter_group = parser.add_mutually_exclusive_group() + + filter_group.add_argument("--hostname", type=str, + default=socket.gethostname(), + help=f"{Instance.hostname.field.help_text} Defaults to the " \ + "hostname of the machine where the Python interpreter is " \ + "currently executing".strip() + ) + filter_group.add_argument("--id", type=self.int_positive, + help=Instance.id.field.help_text + ) + + parser.add_argument("--wait", action="store_true", + help="Wait for jobs managed by the instance to finish. With " \ + "default retry arguments waits for about 3h", + ) + + parser.add_argument("--retry", type=self.int_positive, default=360, + help="Number of retries when waiting for jobs to finish. " \ + "Default: 360", + ) + + parser.add_argument("--retry_sleep", type=self.int_positive, default=30, + help="Number of seconds to sleep before consequtive retries " \ + "when waiting. Default: 30", + ) + + def handle(self, *args, **options): + try: + filter = dict(id=options["id"]) if options["id"] is not None else dict(hostname=options["hostname"]) + instance = AWXInstance(**filter) + except ValueError as e: + raise CommandError(e) + + if instance.disable(): + self.stdout.write(self.style.SUCCESS( + f"Instance {instance.instance_pretty()} has been disabled" + )) + else: + self.stdout.write( + f"Instance {instance.instance_pretty()} has already been disabled" + ) + + if not options["wait"]: + return + + rc = 1 + while instance.jobs().count() > 0: + if rc < options["retry"]: + self.stdout.write( + f"{rc}/{options['retry']}: " \ + f"Waiting {options['retry_sleep']}s before the next " \ + "attempt to see if the following instance' managed jobs " \ + f"have finished: {instance.jobs_pretty()}" + ) + rc += 1 + time.sleep(options["retry_sleep"]) + else: + raise CommandError( + f"{rc}/{options['retry']}: " \ + "No more retry attempts left, but the instance still " \ + f"has associated managed jobs: {instance.jobs_pretty()}" + ) + else: + self.stdout.write(self.style.SUCCESS( + "Done waiting for instance' managed jobs to finish!" + )) From d1c608a2819fd322bbc87e3a6d673e6ab5f6b48c Mon Sep 17 00:00:00 2001 From: Stanislav Zaprudskiy Date: Fri, 13 Jan 2023 11:38:01 +0100 Subject: [PATCH 2/5] Reformat with black Signed-off-by: Stanislav Zaprudskiy --- .../management/commands/disable_instance.py | 79 ++++++++----------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/awx/main/management/commands/disable_instance.py b/awx/main/management/commands/disable_instance.py index 3689154229..f17c72446c 100644 --- a/awx/main/management/commands/disable_instance.py +++ b/awx/main/management/commands/disable_instance.py @@ -12,7 +12,7 @@ from django.utils.timezone import now from awx.main.models import Instance, UnifiedJob -class AWXInstance(): +class AWXInstance: def __init__(self, **filter): self.filter = filter self.get_instance() @@ -21,8 +21,7 @@ class AWXInstance(): filter = self.filter if self.filter is not None else dict(hostname=socket.gethostname()) qs = Instance.objects.filter(**filter) if not qs.exists(): - raise ValueError(f"No AWX instance found with {filter} "\ - "parameters") + raise ValueError(f"No AWX instance found with {filter} parameters") self.instance = qs.first() def disable(self): @@ -39,8 +38,7 @@ class AWXInstance(): def jobs(self): return UnifiedJob.objects.filter( - Q(controller_node=self.instance.hostname) | Q(execution_node=self.instance.hostname), - status__in=("running", "waiting") + Q(controller_node=self.instance.hostname) | Q(execution_node=self.instance.hostname), status__in=("running", "waiting") ) def jobs_pretty(self): @@ -52,17 +50,15 @@ class AWXInstance(): elapsed = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / (10**6 * 1.0) elapsed = float(elapsed) details = dict( - name = j.name, - url = j.get_ui_url(), - elapsed = elapsed, + name=j.name, + url=j.get_ui_url(), + elapsed=elapsed, ) jobs.append(details) jobs = sorted(jobs, reverse=True, key=lambda j: j["elapsed"]) - return ", ".join( - [f"[\"{j['name']}\"]({j['url']})" for j in jobs] - ) + return ", ".join([f"[\"{j['name']}\"]({j['url']})" for j in jobs]) def instance_pretty(self): instance = ( @@ -73,8 +69,7 @@ class AWXInstance(): class Command(BaseCommand): - help = "Disable instance, optionally waiting for all its managed jobs " \ - "to finish." + help = "Disable instance, optionally waiting for all its managed jobs to finish." @staticmethod def int_positive(arg): @@ -86,29 +81,32 @@ class Command(BaseCommand): def add_arguments(self, parser): filter_group = parser.add_mutually_exclusive_group() - filter_group.add_argument("--hostname", type=str, + filter_group.add_argument( + "--hostname", + type=str, default=socket.gethostname(), - help=f"{Instance.hostname.field.help_text} Defaults to the " \ - "hostname of the machine where the Python interpreter is " \ - "currently executing".strip() + help=f"{Instance.hostname.field.help_text} Defaults to the hostname of the machine where the Python interpreter is currently executing".strip(), ) - filter_group.add_argument("--id", type=self.int_positive, - help=Instance.id.field.help_text + filter_group.add_argument("--id", type=self.int_positive, help=Instance.id.field.help_text) + + parser.add_argument( + "--wait", + action="store_true", + help="Wait for jobs managed by the instance to finish. With default retry arguments waits for about 3h", ) - parser.add_argument("--wait", action="store_true", - help="Wait for jobs managed by the instance to finish. With " \ - "default retry arguments waits for about 3h", + parser.add_argument( + "--retry", + type=self.int_positive, + default=360, + help="Number of retries when waiting for jobs to finish. Default: 360", ) - parser.add_argument("--retry", type=self.int_positive, default=360, - help="Number of retries when waiting for jobs to finish. " \ - "Default: 360", - ) - - parser.add_argument("--retry_sleep", type=self.int_positive, default=30, - help="Number of seconds to sleep before consequtive retries " \ - "when waiting. Default: 30", + parser.add_argument( + "--retry_sleep", + type=self.int_positive, + default=30, + help="Number of seconds to sleep before consequtive retries when waiting. Default: 30", ) def handle(self, *args, **options): @@ -119,13 +117,9 @@ class Command(BaseCommand): raise CommandError(e) if instance.disable(): - self.stdout.write(self.style.SUCCESS( - f"Instance {instance.instance_pretty()} has been disabled" - )) + self.stdout.write(self.style.SUCCESS(f"Instance {instance.instance_pretty()} has been disabled")) else: - self.stdout.write( - f"Instance {instance.instance_pretty()} has already been disabled" - ) + self.stdout.write(f"Instance {instance.instance_pretty()} has already been disabled") if not options["wait"]: return @@ -134,20 +128,13 @@ class Command(BaseCommand): while instance.jobs().count() > 0: if rc < options["retry"]: self.stdout.write( - f"{rc}/{options['retry']}: " \ - f"Waiting {options['retry_sleep']}s before the next " \ - "attempt to see if the following instance' managed jobs " \ - f"have finished: {instance.jobs_pretty()}" + f"{rc}/{options['retry']}: Waiting {options['retry_sleep']}s before the next attempt to see if the following instance' managed jobs have finished: {instance.jobs_pretty()}" ) rc += 1 time.sleep(options["retry_sleep"]) else: raise CommandError( - f"{rc}/{options['retry']}: " \ - "No more retry attempts left, but the instance still " \ - f"has associated managed jobs: {instance.jobs_pretty()}" + f"{rc}/{options['retry']}: No more retry attempts left, but the instance still has associated managed jobs: {instance.jobs_pretty()}" ) else: - self.stdout.write(self.style.SUCCESS( - "Done waiting for instance' managed jobs to finish!" - )) + self.stdout.write(self.style.SUCCESS("Done waiting for instance' managed jobs to finish!")) From 166b586591fc1e3b5247a94d7900b1a1145189c4 Mon Sep 17 00:00:00 2001 From: Stanislav Zaprudskiy Date: Fri, 13 Jan 2023 19:42:59 +0100 Subject: [PATCH 3/5] Support indefinitely waiting for jobs to finish Signed-off-by: Stanislav Zaprudskiy --- .../management/commands/disable_instance.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/awx/main/management/commands/disable_instance.py b/awx/main/management/commands/disable_instance.py index f17c72446c..6e54912928 100644 --- a/awx/main/management/commands/disable_instance.py +++ b/awx/main/management/commands/disable_instance.py @@ -44,6 +44,8 @@ class AWXInstance: def jobs_pretty(self): jobs = [] for j in self.jobs(): + if not j.started: + continue # similar calculation of `elapsed` as the corresponding serializer # does td = now() - j.started @@ -72,7 +74,10 @@ class Command(BaseCommand): help = "Disable instance, optionally waiting for all its managed jobs to finish." @staticmethod - def int_positive(arg): + def ge_1(arg): + if arg == "inf": + return float(arg) + int_arg = int(arg) if int_arg < 1: raise ArgumentTypeError(f"The value must be a positive number >= 1. Provided: \"{arg}\"") @@ -87,24 +92,24 @@ class Command(BaseCommand): default=socket.gethostname(), help=f"{Instance.hostname.field.help_text} Defaults to the hostname of the machine where the Python interpreter is currently executing".strip(), ) - filter_group.add_argument("--id", type=self.int_positive, help=Instance.id.field.help_text) + filter_group.add_argument("--id", type=self.ge_1, help=Instance.id.field.help_text) parser.add_argument( "--wait", action="store_true", - help="Wait for jobs managed by the instance to finish. With default retry arguments waits for about 3h", + help="Wait for jobs managed by the instance to finish. With default retry arguments waits ~1h", ) parser.add_argument( "--retry", - type=self.int_positive, - default=360, - help="Number of retries when waiting for jobs to finish. Default: 360", + type=self.ge_1, + default=120, + help="Number of retries when waiting for jobs to finish. Default: 120. Also accepts \"inf\" to wait indefinitely", ) parser.add_argument( "--retry_sleep", - type=self.int_positive, + type=self.ge_1, default=30, help="Number of seconds to sleep before consequtive retries when waiting. Default: 30", ) From f2ab8d637cd1921c7d383032b36a89043de8ed1e Mon Sep 17 00:00:00 2001 From: Stanislav Zaprudskiy Date: Tue, 17 Jan 2023 13:28:34 +0100 Subject: [PATCH 4/5] Do not discard jobs w/ .started=None --- awx/main/management/commands/disable_instance.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/awx/main/management/commands/disable_instance.py b/awx/main/management/commands/disable_instance.py index 6e54912928..a0f7bfef10 100644 --- a/awx/main/management/commands/disable_instance.py +++ b/awx/main/management/commands/disable_instance.py @@ -44,11 +44,10 @@ class AWXInstance: def jobs_pretty(self): jobs = [] for j in self.jobs(): - if not j.started: - continue + job_started = j.started if j.started else now() # similar calculation of `elapsed` as the corresponding serializer # does - td = now() - j.started + td = now() - job_started elapsed = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / (10**6 * 1.0) elapsed = float(elapsed) details = dict( @@ -76,7 +75,7 @@ class Command(BaseCommand): @staticmethod def ge_1(arg): if arg == "inf": - return float(arg) + return float("inf") int_arg = int(arg) if int_arg < 1: From 35fbb94aa67e7358584756d68d6de92ef9577884 Mon Sep 17 00:00:00 2001 From: Stanislav Zaprudskiy Date: Fri, 17 Feb 2023 17:33:45 +0100 Subject: [PATCH 5/5] Use `CLUSTER_HOST_ID` as default hostname argument value Incorporates feedback from https://github.com/ansible/awx/pull/13445/files#r1106012308 Signed-off-by: Stanislav Zaprudskiy --- awx/main/management/commands/disable_instance.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/awx/main/management/commands/disable_instance.py b/awx/main/management/commands/disable_instance.py index a0f7bfef10..054e47974b 100644 --- a/awx/main/management/commands/disable_instance.py +++ b/awx/main/management/commands/disable_instance.py @@ -1,4 +1,3 @@ -import socket import time from urllib.parse import urljoin @@ -18,7 +17,7 @@ class AWXInstance: self.get_instance() def get_instance(self): - filter = self.filter if self.filter is not None else dict(hostname=socket.gethostname()) + filter = self.filter if self.filter is not None else dict(hostname=settings.CLUSTER_HOST_ID) qs = Instance.objects.filter(**filter) if not qs.exists(): raise ValueError(f"No AWX instance found with {filter} parameters") @@ -88,7 +87,7 @@ class Command(BaseCommand): filter_group.add_argument( "--hostname", type=str, - default=socket.gethostname(), + default=settings.CLUSTER_HOST_ID, help=f"{Instance.hostname.field.help_text} Defaults to the hostname of the machine where the Python interpreter is currently executing".strip(), ) filter_group.add_argument("--id", type=self.ge_1, help=Instance.id.field.help_text)