awx/awx/main/management/commands/host_metric.py

from django.core.management.base import BaseCommand
import datetime
from django.core.serializers.json import DjangoJSONEncoder
from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
from awx.main.analytics.collectors import config
import json
import sys
import tempfile
import tarfile
import csv

CSV_PREFERRED_ROW_COUNT = 500000
BATCHED_FETCH_COUNT = 10000


class Command(BaseCommand):
    help = 'This is for offline licensing usage'

    def host_metric_queryset(self, result, offset=0, limit=BATCHED_FETCH_COUNT):
        list_of_queryset = list(
            result.values(
                'id',
                'hostname',
                'first_automation',
                'last_automation',
                'last_deleted',
                'automated_counter',
                'deleted_counter',
                'deleted',
                'used_in_inventories',
            ).order_by('first_automation')[offset : offset + limit]
        )

        return list_of_queryset

    def host_metric_summary_monthly_queryset(self, result, offset=0, limit=BATCHED_FETCH_COUNT):
        list_of_queryset = list(
            result.values(
                'id',
                'date',
                'license_consumed',
                'license_capacity',
                'hosts_added',
                'hosts_deleted',
                'indirectly_managed_hosts',
            ).order_by(
                'date'
            )[offset : offset + limit]
        )

        return list_of_queryset

    def paginated_db_retrieval(self, type, filter_kwargs, rows_per_file):
        offset = 0
        list_of_queryset = []
        while True:
            if type == 'host_metric':
                result = HostMetric.objects.filter(**filter_kwargs)
                list_of_queryset = self.host_metric_queryset(result, offset, rows_per_file)
            elif type == 'host_metric_summary_monthly':
                result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
                list_of_queryset = self.host_metric_summary_monthly_queryset(result, offset, rows_per_file)

            if not list_of_queryset:
                break
            else:
                yield list_of_queryset

            offset += len(list_of_queryset)

    def controlled_db_retrieval(self, type, filter_kwargs, offset=0, fetch_count=BATCHED_FETCH_COUNT):
        if type == 'host_metric':
            result = HostMetric.objects.filter(**filter_kwargs)
            return self.host_metric_queryset(result, offset, fetch_count)
        elif type == 'host_metric_summary_monthly':
            result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
            return self.host_metric_summary_monthly_queryset(result, offset, fetch_count)

    def write_to_csv(self, csv_file, list_of_queryset, always_header, first_write=False, mode='a'):
        with open(csv_file, mode, newline='') as output_file:
            try:
                keys = list_of_queryset[0].keys() if list_of_queryset else []
                dict_writer = csv.DictWriter(output_file, keys)
                if always_header or first_write:
                    dict_writer.writeheader()
                dict_writer.writerows(list_of_queryset)

            except Exception as e:
                print(e)

    def csv_for_tar(self, temp_dir, type, filter_kwargs, rows_per_file, always_header=True):
        for index, list_of_queryset in enumerate(self.paginated_db_retrieval(type, filter_kwargs, rows_per_file)):
            csv_file = f'{temp_dir}/{type}{index+1}.csv'
            arcname_file = f'{type}{index+1}.csv'

            first_write = True if index == 0 else False

            self.write_to_csv(csv_file, list_of_queryset, always_header, first_write, 'w')
            yield csv_file, arcname_file

    def csv_for_tar_batched_fetch(self, temp_dir, type, filter_kwargs, rows_per_file, always_header=True):
        csv_iteration = 1

        offset = 0
        rows_written_per_csv = 0
        to_fetch = BATCHED_FETCH_COUNT

        while True:
            list_of_queryset = self.controlled_db_retrieval(type, filter_kwargs, offset, to_fetch)

            if not list_of_queryset:
                break

            csv_file = f'{temp_dir}/{type}{csv_iteration}.csv'
            arcname_file = f'{type}{csv_iteration}.csv'
            self.write_to_csv(csv_file, list_of_queryset, always_header)

            offset += to_fetch
            rows_written_per_csv += to_fetch
            always_header = False

            remaining_rows_per_csv = rows_per_file - rows_written_per_csv

            if not remaining_rows_per_csv:
                yield csv_file, arcname_file

                rows_written_per_csv = 0
                always_header = True
                to_fetch = BATCHED_FETCH_COUNT
                csv_iteration += 1
            elif remaining_rows_per_csv < BATCHED_FETCH_COUNT:
                to_fetch = remaining_rows_per_csv

        if rows_written_per_csv:
            yield csv_file, arcname_file

    def config_for_tar(self, options, temp_dir):
        config_json = json.dumps(config(options.get('since')))
        config_file = f'{temp_dir}/config.json'
        arcname_file = 'config.json'
        with open(config_file, 'w') as f:
            f.write(config_json)
        return config_file, arcname_file

    def output_json(self, options, filter_kwargs):
        with tempfile.TemporaryDirectory() as temp_dir:
            for csv_detail in self.csv_for_tar(temp_dir, options.get('json', 'host_metric'), filter_kwargs, BATCHED_FETCH_COUNT, True):
                csv_file = csv_detail[0]

                with open(csv_file) as f:
                    reader = csv.DictReader(f)
                    rows = list(reader)
                    json_result = json.dumps(rows, cls=DjangoJSONEncoder)
                    print(json_result)

    def output_csv(self, options, filter_kwargs):
        with tempfile.TemporaryDirectory() as temp_dir:
            for csv_detail in self.csv_for_tar(temp_dir, options.get('csv', 'host_metric'), filter_kwargs, BATCHED_FETCH_COUNT, False):
                csv_file = csv_detail[0]
                with open(csv_file) as f:
                    sys.stdout.write(f.read())

    def output_tarball(self, options, filter_kwargs):
        always_header = True
        rows_per_file = options['rows_per_file'] or CSV_PREFERRED_ROW_COUNT

        tar = tarfile.open("./host_metrics.tar.gz", "w:gz")

        if rows_per_file <= BATCHED_FETCH_COUNT:
            csv_function = self.csv_for_tar
        else:
            csv_function = self.csv_for_tar_batched_fetch

        with tempfile.TemporaryDirectory() as temp_dir:
            for csv_detail in csv_function(temp_dir, 'host_metric', filter_kwargs, rows_per_file, always_header):
                tar.add(csv_detail[0], arcname=csv_detail[1])

            for csv_detail in csv_function(temp_dir, 'host_metric_summary_monthly', filter_kwargs, rows_per_file, always_header):
                tar.add(csv_detail[0], arcname=csv_detail[1])

            config_file, arcname_file = self.config_for_tar(options, temp_dir)
            tar.add(config_file, arcname=arcname_file)

        tar.close()

    def add_arguments(self, parser):
        parser.add_argument('--since', type=datetime.datetime.fromisoformat, help='Start Date in ISO format YYYY-MM-DD')
        parser.add_argument('--json', type=str, const='host_metric', nargs='?', help='Select output as JSON for host_metric or host_metric_summary_monthly')
        parser.add_argument('--csv', type=str, const='host_metric', nargs='?', help='Select output as CSV for host_metric or host_metric_summary_monthly')
        parser.add_argument('--tarball', action='store_true', help=f'Package CSV files into a tar with upto {CSV_PREFERRED_ROW_COUNT} rows')
        parser.add_argument('--rows_per_file', type=int, help=f'Split rows in chunks of {CSV_PREFERRED_ROW_COUNT}')

    def handle(self, *args, **options):
        since = options.get('since')

        if since is not None and since.tzinfo is None:
            since = since.replace(tzinfo=datetime.timezone.utc)

        filter_kwargs = {}
        if since is not None:
            filter_kwargs['last_automation__gte'] = since

        filter_kwargs_host_metrics_summary = {}
        if since is not None:
            filter_kwargs_host_metrics_summary['date__gte'] = since

        if options['rows_per_file'] and options.get('rows_per_file') > CSV_PREFERRED_ROW_COUNT:
            print(f"rows_per_file exceeds the allowable limit of {CSV_PREFERRED_ROW_COUNT}.")
            return

        # if --json flag is set, output the result in json format
        if options['json']:
            self.output_json(options, filter_kwargs)
        elif options['csv']:
            self.output_csv(options, filter_kwargs)
        elif options['tarball']:
            self.output_tarball(options, filter_kwargs)

        # --json flag is not set, output in plain text
        else:
            print(f"Printing up to {BATCHED_FETCH_COUNT} automated hosts:")
            result = HostMetric.objects.filter(**filter_kwargs)
            list_of_queryset = self.host_metric_queryset(result, 0, BATCHED_FETCH_COUNT)
            for item in list_of_queryset:
                print(
                    "Hostname : {hostname} | first_automation : {first_automation} | last_automation : {last_automation}".format(
                        hostname=item['hostname'], first_automation=item['first_automation'], last_automation=item['last_automation']
                    )
                )
        return