From 878008a9c559fc8dadd0edaf053f4d492214dab9 Mon Sep 17 00:00:00 2001
From: Aparna Karve <akarve@redhat.com>
Date: Fri, 3 Mar 2023 14:20:22 -0800
Subject: [PATCH] make `rows_per_file` optional parameter

Removed 2 sql statements that gave the info on row count
which warranted many other changes
---
 awx/main/management/commands/host_metric.py | 145 ++++++++------------
 1 file changed, 58 insertions(+), 87 deletions(-)

diff --git a/awx/main/management/commands/host_metric.py b/awx/main/management/commands/host_metric.py
index 5cf2aef18b..5b38cb5fd5 100644
--- a/awx/main/management/commands/host_metric.py
+++ b/awx/main/management/commands/host_metric.py
@@ -49,36 +49,41 @@ class Command(BaseCommand):
 
         return list_of_queryset
 
-    def paginated_db_retrieval(self, type, filter_kwargs, offset=0, limit=PREFERRED_ROW_COUNT):
+    def paginated_db_retrieval(self, type, filter_kwargs, rows_per_file):
+        offset = 0
         list_of_queryset = []
-        if type == 'host_metric':
-            result = HostMetric.objects.filter(**filter_kwargs)
-            list_of_queryset = self.host_metric_queryset(result, offset, limit)
-        elif type == 'host_metric_summary_monthly':
-            result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
-            list_of_queryset = self.host_metric_summary_monthly_queryset(result, offset, limit)
+        while True:
+            if type == 'host_metric':
+                result = HostMetric.objects.filter(**filter_kwargs)
+                list_of_queryset = self.host_metric_queryset(result, offset, rows_per_file)
+            elif type == 'host_metric_summary_monthly':
+                result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
+                list_of_queryset = self.host_metric_summary_monthly_queryset(result, offset, rows_per_file)
 
-        return list_of_queryset
+            if not list_of_queryset:
+                break
+            else:
+                yield list_of_queryset
 
-    def whole_page_count(self, row_count, rows_per_file):
-        whole_pages = int(row_count / rows_per_file)
-        partial_page = row_count % rows_per_file
-        if partial_page:
-            whole_pages += 1
-        return whole_pages
+            offset += len(list_of_queryset)
 
-    def csv_for_tar(self, options, temp_dir, type, filter_kwargs, index=1, offset=0, rows_per_file=PREFERRED_ROW_COUNT):
-        list_of_queryset = self.paginated_db_retrieval(type, filter_kwargs, offset, rows_per_file)
-        csv_file = f'{temp_dir}/{type}{index}.csv'
-        arcname_file = f'{type}{index}.csv'
+    def csv_for_tar(self, temp_dir, type, filter_kwargs, single_header=False, rows_per_file=PREFERRED_ROW_COUNT):
+        for index, list_of_queryset in enumerate(self.paginated_db_retrieval(type, filter_kwargs, rows_per_file)):
+            csv_file = f'{temp_dir}/{type}{index+1}.csv'
+            arcname_file = f'{type}{index+1}.csv'
 
-        with open(csv_file, 'w', newline='') as output_file:
-            keys = list_of_queryset[0].keys() if list_of_queryset else []
-            dict_writer = csv.DictWriter(output_file, keys)
-            dict_writer.writeheader()
-            dict_writer.writerows(list_of_queryset)
+            with open(csv_file, 'w', newline='') as output_file:
+                try:
+                    keys = list_of_queryset[0].keys() if list_of_queryset else []
+                    dict_writer = csv.DictWriter(output_file, keys)
+                    if not single_header or index == 0:
+                        dict_writer.writeheader()
+                    dict_writer.writerows(list_of_queryset)
 
-        return csv_file, arcname_file
+                except Exception as e:
+                    print(e)
+
+            yield csv_file, arcname_file
 
     def config_for_tar(self, options, temp_dir):
         config_json = json.dumps(config(options.get('since')))
@@ -89,61 +94,37 @@ class Command(BaseCommand):
         return config_file, arcname_file
 
     def output_json(self, options, filter_kwargs):
-        if not options.get('json') or options.get('json') == 'host_metric':
-            result = HostMetric.objects.filter(**filter_kwargs)
-            list_of_queryset = self.host_metric_queryset(result)
-        elif options.get('json') == 'host_metric_summary_monthly':
-            result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
-            list_of_queryset = self.host_metric_summary_monthly_queryset(result)
+        rows_per_file = options['rows_per_file'] or PREFERRED_ROW_COUNT
+        with tempfile.TemporaryDirectory() as temp_dir:
+            for csv_detail in self.csv_for_tar(temp_dir, options.get('json', 'host_metric'), filter_kwargs, False, rows_per_file):
+                csv_file = csv_detail[0]
 
-        json_result = json.dumps(list_of_queryset, cls=DjangoJSONEncoder)
-        print(json_result)
+                with open(csv_file) as f:
+                    reader = csv.DictReader(f)
+                    rows = list(reader)
+                    json_result = json.dumps(rows, cls=DjangoJSONEncoder)
+                    print(json_result)
 
     def output_csv(self, options, filter_kwargs):
+        rows_per_file = options['rows_per_file'] or PREFERRED_ROW_COUNT
         with tempfile.TemporaryDirectory() as temp_dir:
-            if not options.get('csv') or options.get('csv') == 'host_metric':
-                csv_file, _arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs)
-            elif options.get('csv') == 'host_metric_summary_monthly':
-                csv_file, _arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs)
-            with open(csv_file) as f:
-                sys.stdout.write(f.read())
+            for csv_detail in self.csv_for_tar(temp_dir, options.get('csv', 'host_metric'), filter_kwargs, True, rows_per_file):
+                csv_file = csv_detail[0]
+                with open(csv_file) as f:
+                    sys.stdout.write(f.read())
+
+    def output_tarball(self, options, filter_kwargs):
+        single_header = False if options['rows_per_file'] else True
+        rows_per_file = options['rows_per_file'] or PREFERRED_ROW_COUNT
 
-    def output_tarball(self, options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count):
         tar = tarfile.open("./host_metrics.tar.gz", "w:gz")
 
         with tempfile.TemporaryDirectory() as temp_dir:
-            if host_metric_row_count:
-                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs)
-                tar.add(csv_file, arcname=arcname_file)
+            for csv_detail in self.csv_for_tar(temp_dir, 'host_metric', filter_kwargs, single_header, rows_per_file):
+                tar.add(csv_detail[0], arcname=csv_detail[1])
 
-            if host_metric_summary_monthly_row_count:
-                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs)
-                tar.add(csv_file, arcname=arcname_file)
-
-            config_file, arcname_file = self.config_for_tar(options, temp_dir)
-            tar.add(config_file, arcname=arcname_file)
-
-        tar.close()
-
-    def output_rows_per_file(self, options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count):
-        rows_per_file = options.get('rows_per_file', PREFERRED_ROW_COUNT)
-        tar = tarfile.open("./host_metrics.tar.gz", "w:gz")
-
-        host_metric_whole_pages = self.whole_page_count(host_metric_row_count, rows_per_file)
-        host_metric_summary_monthly_whole_pages = self.whole_page_count(host_metric_summary_monthly_row_count, rows_per_file)
-
-        with tempfile.TemporaryDirectory() as temp_dir:
-            for index in range(host_metric_whole_pages):
-                offset = index * rows_per_file
-
-                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs, index + 1, offset, rows_per_file)
-                tar.add(csv_file, arcname=arcname_file)
-
-            for index in range(host_metric_summary_monthly_whole_pages):
-                offset = index * rows_per_file
-
-                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs, index + 1, offset, rows_per_file)
-                tar.add(csv_file, arcname=arcname_file)
+            for csv_detail in self.csv_for_tar(temp_dir, 'host_metric_summary_monthly', filter_kwargs, single_header, rows_per_file):
+                tar.add(csv_detail[0], arcname=csv_detail[1])
 
             config_file, arcname_file = self.config_for_tar(options, temp_dir)
             tar.add(config_file, arcname=arcname_file)
@@ -180,17 +161,8 @@ class Command(BaseCommand):
         if until is not None:
             filter_kwargs_host_metrics_summary['date__lte'] = until
 
-        host_metric_row_count = HostMetric.objects.filter(**filter_kwargs).count()
-        host_metric_summary_monthly_row_count = HostMetricSummaryMonthly.objects.filter(**filter_kwargs_host_metrics_summary).count()
-
-        if (host_metric_row_count > PREFERRED_ROW_COUNT or host_metric_summary_monthly_row_count > PREFERRED_ROW_COUNT) and (
-            not options.get('rows_per_file') or options.get('rows_per_file') > PREFERRED_ROW_COUNT
-        ):
-            print(
-                f"HostMetric / HostMetricSummaryMonthly rows exceed the allowable limit of {PREFERRED_ROW_COUNT}. "
-                f"Set --rows_per_file {PREFERRED_ROW_COUNT} "
-                f"to split the rows in chunks of {PREFERRED_ROW_COUNT}"
-            )
+        if options['rows_per_file'] and options.get('rows_per_file') > PREFERRED_ROW_COUNT:
+            print(f"rows_per_file exceeds the allowable limit of {PREFERRED_ROW_COUNT}.")
             return
 
         # if --json flag is set, output the result in json format
@@ -199,18 +171,17 @@ class Command(BaseCommand):
         elif options['csv']:
             self.output_csv(options, filter_kwargs)
         elif options['tarball']:
-            self.output_tarball(options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count)
-        elif options['rows_per_file']:
-            self.output_rows_per_file(options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count)
+            self.output_tarball(options, filter_kwargs)
 
         # --json flag is not set, output in plain text
         else:
-            print(f"Total Number of hosts automated: {host_metric_row_count}")
+            print(f"Printing up to {PREFERRED_ROW_COUNT } automated hosts:")
             result = HostMetric.objects.filter(**filter_kwargs)
-            for item in result:
+            list_of_queryset = self.host_metric_queryset(result, 0, PREFERRED_ROW_COUNT)
+            for item in list_of_queryset:
                 print(
                     "Hostname : {hostname} | first_automation : {first_automation} | last_automation : {last_automation}".format(
-                        hostname=item.hostname, first_automation=item.first_automation, last_automation=item.last_automation
+                        hostname=item['hostname'], first_automation=item['first_automation'], last_automation=item['last_automation']
                     )
                 )
         return