CLI for host usage collection

2026-03-01 08:48:46 -03:30 · 2023-02-14 16:53:56 -08:00
parent 88bb6e5a6a
commit 311cea5a4a
1 changed files with 203 additions and 11 deletions
--- a/awx/main/management/commands/host_metric.py
+++ b/awx/main/management/commands/host_metric.py
@@ -1,26 +1,196 @@
 from django.core.management.base import BaseCommand
 import datetime
 from django.core.serializers.json import DjangoJSONEncoder
-from awx.main.models.inventory import HostMetric
+from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
+from awx.main.analytics.collectors import config
+from awx.main.utils.encryption import get_encryption_key, Fernet256
+from django.utils.encoding import smart_str, smart_bytes
+import base64
 import json
+import sys
+import tempfile
+import tarfile
+import pandas as pd
+
+PREFERRED_ROW_COUNT = 500000


 class Command(BaseCommand):
    help = 'This is for offline licensing usage'

+    def host_metric_queryset(self, result, offset=0, limit=PREFERRED_ROW_COUNT):
+        list_of_queryset = list(
+            result.values(
+                'id',
+                'hostname',
+                'first_automation',
+                'last_automation',
+                'last_deleted',
+                'automated_counter',
+                'deleted_counter',
+                'deleted',
+                'used_in_inventories',
+            ).order_by('first_automation')[offset : offset + limit]
+        )
+
+        return list_of_queryset
+
+    def host_metric_summary_monthly_queryset(self, result, offset=0, limit=PREFERRED_ROW_COUNT):
+        list_of_queryset = list(
+            result.values(
+                'id',
+                'date',
+                'license_consumed',
+                'license_capacity',
+                'hosts_added',
+                'hosts_deleted',
+                'indirectly_managed_hosts',
+            ).order_by(
+                'date'
+            )[offset : offset + limit]
+        )
+
+        return list_of_queryset
+
+    def paginated_df(self, options, type, filter_kwargs, offset=0, limit=PREFERRED_ROW_COUNT):
+        list_of_queryset = []
+        if type == 'host_metric':
+            result = HostMetric.objects.filter(**filter_kwargs)
+            list_of_queryset = self.host_metric_queryset(result, offset, limit)
+        elif type == 'host_metric_summary_monthly':
+            result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
+            list_of_queryset = self.host_metric_summary_monthly_queryset(result, offset, limit)
+
+        df = pd.DataFrame(list_of_queryset)
+
+        if options['anonymized'] and 'hostname' in df.columns:
+            key = get_encryption_key('hostname', options.get('anonymized'))
+            df['hostname'] = df.apply(lambda x: self.obfuscated_hostname(key, x['hostname']), axis=1)
+
+        return df
+
+    def obfuscated_hostname(self, secret_sauce, hostname):
+        return self.encrypt_name(secret_sauce, hostname)
+
+    def whole_page_count(self, row_count, rows_per_file):
+        whole_pages = int(row_count / rows_per_file)
+        partial_page = row_count % rows_per_file
+        if partial_page:
+            whole_pages += 1
+        return whole_pages
+
+    def csv_for_tar(self, options, temp_dir, type, filter_kwargs, index=1, offset=0, rows_per_file=PREFERRED_ROW_COUNT):
+        df = self.paginated_df(options, type, filter_kwargs, offset, rows_per_file)
+        csv_file = f'{temp_dir}/{type}{index}.csv'
+        arcname_file = f'{type}{index}.csv'
+        df.to_csv(csv_file, index=False)
+        return csv_file, arcname_file
+
+    def config_for_tar(self, options, temp_dir):
+        config_json = json.dumps(config(options.get('since')))
+        config_file = f'{temp_dir}/config.json'
+        arcname_file = 'config.json'
+        with open(config_file, 'w') as f:
+            f.write(config_json)
+        return config_file, arcname_file
+
+    def encrypt_name(self, key, value):
+        value = smart_str(value)
+        f = Fernet256(key)
+        encrypted = f.encrypt(smart_bytes(value))
+        b64data = smart_str(base64.b64encode(encrypted))
+        tokens = ['$encrypted', 'UTF8', 'AESCBC', b64data]
+        return '$'.join(tokens)
+
+    def decrypt_name(self, encryption_key, value):
+        raw_data = value[len('$encrypted$') :]
+        # If the encrypted string contains a UTF8 marker, discard it
+        utf8 = raw_data.startswith('UTF8$')
+        if utf8:
+            raw_data = raw_data[len('UTF8$') :]
+        algo, b64data = raw_data.split('$', 1)
+        if algo != 'AESCBC':
+            raise ValueError('unsupported algorithm: %s' % algo)
+        encrypted = base64.b64decode(b64data)
+        f = Fernet256(encryption_key)
+        value = f.decrypt(encrypted)
+        return smart_str(value)
+
+    def output_json(self, options, filter_kwargs):
+        if not options.get('json') or options.get('json') == 'host_metric':
+            result = HostMetric.objects.filter(**filter_kwargs)
+            list_of_queryset = self.host_metric_queryset(result)
+        elif options.get('json') == 'host_metric_summary_monthly':
+            result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
+            list_of_queryset = self.host_metric_summary_monthly_queryset(result)
+
+        json_result = json.dumps(list_of_queryset, cls=DjangoJSONEncoder)
+        print(json_result)
+
+    def output_csv(self, options, filter_kwargs):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            if not options.get('csv') or options.get('csv') == 'host_metric':
+                csv_file, _arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs)
+            elif options.get('csv') == 'host_metric_summary_monthly':
+                csv_file, _arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs)
+            with open(csv_file) as f:
+                sys.stdout.write(f.read())
+
+    def output_tarball(self, options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count):
+        tar = tarfile.open("./host_metrics.tar.gz", "w:gz")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            if host_metric_row_count:
+                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs)
+                tar.add(csv_file, arcname=arcname_file)
+
+            if host_metric_summary_monthly_row_count:
+                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs)
+                tar.add(csv_file, arcname=arcname_file)
+
+            config_file, arcname_file = self.config_for_tar(options, temp_dir)
+            tar.add(config_file, arcname=arcname_file)
+
+        tar.close()
+
+    def output_rows_per_file(self, options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count):
+        rows_per_file = options.get('rows_per_file', PREFERRED_ROW_COUNT)
+        tar = tarfile.open("./host_metrics.tar.gz", "w:gz")
+
+        host_metric_whole_pages = self.whole_page_count(host_metric_row_count, rows_per_file)
+        host_metric_summary_monthly_whole_pages = self.whole_page_count(host_metric_summary_monthly_row_count, rows_per_file)
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            for index in range(host_metric_whole_pages):
+                offset = index * rows_per_file
+
+                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs, index + 1, offset, rows_per_file)
+                tar.add(csv_file, arcname=arcname_file)
+
+            for index in range(host_metric_summary_monthly_whole_pages):
+                offset = index * rows_per_file
+
+                csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs, index + 1, offset, rows_per_file)
+                tar.add(csv_file, arcname=arcname_file)
+
+            config_file, arcname_file = self.config_for_tar(options, temp_dir)
+            tar.add(config_file, arcname=arcname_file)
+
+        tar.close()
+
    def add_arguments(self, parser):
        parser.add_argument('--since', type=datetime.datetime.fromisoformat, help='Start Date in ISO format YYYY-MM-DD')
        parser.add_argument('--until', type=datetime.datetime.fromisoformat, help='End Date in ISO format YYYY-MM-DD')
-        parser.add_argument('--json', action='store_true', help='Select output as JSON')
+        parser.add_argument('--json', type=str, const='host_metric', nargs='?', help='Select output as JSON for host_metric or host_metric_summary_monthly')
+        parser.add_argument('--csv', type=str, const='host_metric', nargs='?', help='Select output as CSV for host_metric or host_metric_summary_monthly')
+        parser.add_argument('--tarball', action='store_true', help=f'Package CSV files into a tar with upto {PREFERRED_ROW_COUNT} rows')
+        parser.add_argument('--anonymized', type=str, help='Anonymize hostnames with provided salt')
+        parser.add_argument('--rows_per_file', type=int, help=f'Split rows in chunks of {PREFERRED_ROW_COUNT}')

    def handle(self, *args, **options):
        since = options.get('since')
        until = options.get('until')

-        if since is None and until is None:
-            print("No Arguments received")
-            return None
-
        if since is not None and since.tzinfo is None:
            since = since.replace(tzinfo=datetime.timezone.utc)

@@ -33,17 +203,39 @@ class Command(BaseCommand):
        if until is not None:
            filter_kwargs['last_automation__lte'] = until

-        result = HostMetric.objects.filter(**filter_kwargs)
+        filter_kwargs_host_metrics_summary = {}
+        if since is not None:
+            filter_kwargs_host_metrics_summary['date__gte'] = since
+        if until is not None:
+            filter_kwargs_host_metrics_summary['date__lte'] = until
+
+        host_metric_row_count = HostMetric.objects.filter(**filter_kwargs).count()
+        host_metric_summary_monthly_row_count = HostMetricSummaryMonthly.objects.filter(**filter_kwargs_host_metrics_summary).count()
+
+        if (host_metric_row_count > PREFERRED_ROW_COUNT or host_metric_summary_monthly_row_count > PREFERRED_ROW_COUNT) and (
+            not options.get('rows_per_file') or options.get('rows_per_file') > PREFERRED_ROW_COUNT
+        ):
+            print(
+                f"HostMetric / HostMetricSummaryMonthly rows exceed the allowable limit of {PREFERRED_ROW_COUNT}. "
+                f"Set --rows_per_file {PREFERRED_ROW_COUNT} "
+                f"to split the rows in chunks of {PREFERRED_ROW_COUNT}"
+            )
+            return

        # if --json flag is set, output the result in json format
        if options['json']:
-            list_of_queryset = list(result.values('hostname', 'first_automation', 'last_automation'))
-            json_result = json.dumps(list_of_queryset, cls=DjangoJSONEncoder)
-            print(json_result)
+            self.output_json(options, filter_kwargs)
+        elif options['csv']:
+            self.output_csv(options, filter_kwargs)
+        elif options['tarball']:
+            self.output_tarball(options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count)
+        elif options['rows_per_file']:
+            self.output_rows_per_file(options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count)

        # --json flag is not set, output in plain text
        else:
-            print(f"Total Number of hosts automated: {len(result)}")
+            print(f"Total Number of hosts automated: {host_metric_row_count}")
+            result = HostMetric.objects.filter(**filter_kwargs)
            for item in result:
                print(
                    "Hostname : {hostname} | first_automation : {first_automation} | last_automation : {last_automation}".format(