CLI for host usage collection

This commit is contained in:
Aparna Karve 2023-02-14 16:53:56 -08:00 committed by John Westcott IV
parent 88bb6e5a6a
commit 311cea5a4a

View File

@ -1,26 +1,196 @@
from django.core.management.base import BaseCommand
import datetime
from django.core.serializers.json import DjangoJSONEncoder
from awx.main.models.inventory import HostMetric
from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
from awx.main.analytics.collectors import config
from awx.main.utils.encryption import get_encryption_key, Fernet256
from django.utils.encoding import smart_str, smart_bytes
import base64
import json
import sys
import tempfile
import tarfile
import pandas as pd
PREFERRED_ROW_COUNT = 500000
class Command(BaseCommand):
help = 'This is for offline licensing usage'
def host_metric_queryset(self, result, offset=0, limit=PREFERRED_ROW_COUNT):
list_of_queryset = list(
result.values(
'id',
'hostname',
'first_automation',
'last_automation',
'last_deleted',
'automated_counter',
'deleted_counter',
'deleted',
'used_in_inventories',
).order_by('first_automation')[offset : offset + limit]
)
return list_of_queryset
def host_metric_summary_monthly_queryset(self, result, offset=0, limit=PREFERRED_ROW_COUNT):
list_of_queryset = list(
result.values(
'id',
'date',
'license_consumed',
'license_capacity',
'hosts_added',
'hosts_deleted',
'indirectly_managed_hosts',
).order_by(
'date'
)[offset : offset + limit]
)
return list_of_queryset
def paginated_df(self, options, type, filter_kwargs, offset=0, limit=PREFERRED_ROW_COUNT):
list_of_queryset = []
if type == 'host_metric':
result = HostMetric.objects.filter(**filter_kwargs)
list_of_queryset = self.host_metric_queryset(result, offset, limit)
elif type == 'host_metric_summary_monthly':
result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
list_of_queryset = self.host_metric_summary_monthly_queryset(result, offset, limit)
df = pd.DataFrame(list_of_queryset)
if options['anonymized'] and 'hostname' in df.columns:
key = get_encryption_key('hostname', options.get('anonymized'))
df['hostname'] = df.apply(lambda x: self.obfuscated_hostname(key, x['hostname']), axis=1)
return df
def obfuscated_hostname(self, secret_sauce, hostname):
return self.encrypt_name(secret_sauce, hostname)
def whole_page_count(self, row_count, rows_per_file):
whole_pages = int(row_count / rows_per_file)
partial_page = row_count % rows_per_file
if partial_page:
whole_pages += 1
return whole_pages
def csv_for_tar(self, options, temp_dir, type, filter_kwargs, index=1, offset=0, rows_per_file=PREFERRED_ROW_COUNT):
df = self.paginated_df(options, type, filter_kwargs, offset, rows_per_file)
csv_file = f'{temp_dir}/{type}{index}.csv'
arcname_file = f'{type}{index}.csv'
df.to_csv(csv_file, index=False)
return csv_file, arcname_file
def config_for_tar(self, options, temp_dir):
config_json = json.dumps(config(options.get('since')))
config_file = f'{temp_dir}/config.json'
arcname_file = 'config.json'
with open(config_file, 'w') as f:
f.write(config_json)
return config_file, arcname_file
def encrypt_name(self, key, value):
value = smart_str(value)
f = Fernet256(key)
encrypted = f.encrypt(smart_bytes(value))
b64data = smart_str(base64.b64encode(encrypted))
tokens = ['$encrypted', 'UTF8', 'AESCBC', b64data]
return '$'.join(tokens)
def decrypt_name(self, encryption_key, value):
raw_data = value[len('$encrypted$') :]
# If the encrypted string contains a UTF8 marker, discard it
utf8 = raw_data.startswith('UTF8$')
if utf8:
raw_data = raw_data[len('UTF8$') :]
algo, b64data = raw_data.split('$', 1)
if algo != 'AESCBC':
raise ValueError('unsupported algorithm: %s' % algo)
encrypted = base64.b64decode(b64data)
f = Fernet256(encryption_key)
value = f.decrypt(encrypted)
return smart_str(value)
def output_json(self, options, filter_kwargs):
if not options.get('json') or options.get('json') == 'host_metric':
result = HostMetric.objects.filter(**filter_kwargs)
list_of_queryset = self.host_metric_queryset(result)
elif options.get('json') == 'host_metric_summary_monthly':
result = HostMetricSummaryMonthly.objects.filter(**filter_kwargs)
list_of_queryset = self.host_metric_summary_monthly_queryset(result)
json_result = json.dumps(list_of_queryset, cls=DjangoJSONEncoder)
print(json_result)
def output_csv(self, options, filter_kwargs):
with tempfile.TemporaryDirectory() as temp_dir:
if not options.get('csv') or options.get('csv') == 'host_metric':
csv_file, _arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs)
elif options.get('csv') == 'host_metric_summary_monthly':
csv_file, _arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs)
with open(csv_file) as f:
sys.stdout.write(f.read())
def output_tarball(self, options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count):
tar = tarfile.open("./host_metrics.tar.gz", "w:gz")
with tempfile.TemporaryDirectory() as temp_dir:
if host_metric_row_count:
csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs)
tar.add(csv_file, arcname=arcname_file)
if host_metric_summary_monthly_row_count:
csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs)
tar.add(csv_file, arcname=arcname_file)
config_file, arcname_file = self.config_for_tar(options, temp_dir)
tar.add(config_file, arcname=arcname_file)
tar.close()
def output_rows_per_file(self, options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count):
rows_per_file = options.get('rows_per_file', PREFERRED_ROW_COUNT)
tar = tarfile.open("./host_metrics.tar.gz", "w:gz")
host_metric_whole_pages = self.whole_page_count(host_metric_row_count, rows_per_file)
host_metric_summary_monthly_whole_pages = self.whole_page_count(host_metric_summary_monthly_row_count, rows_per_file)
with tempfile.TemporaryDirectory() as temp_dir:
for index in range(host_metric_whole_pages):
offset = index * rows_per_file
csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric', filter_kwargs, index + 1, offset, rows_per_file)
tar.add(csv_file, arcname=arcname_file)
for index in range(host_metric_summary_monthly_whole_pages):
offset = index * rows_per_file
csv_file, arcname_file = self.csv_for_tar(options, temp_dir, 'host_metric_summary_monthly', filter_kwargs, index + 1, offset, rows_per_file)
tar.add(csv_file, arcname=arcname_file)
config_file, arcname_file = self.config_for_tar(options, temp_dir)
tar.add(config_file, arcname=arcname_file)
tar.close()
def add_arguments(self, parser):
parser.add_argument('--since', type=datetime.datetime.fromisoformat, help='Start Date in ISO format YYYY-MM-DD')
parser.add_argument('--until', type=datetime.datetime.fromisoformat, help='End Date in ISO format YYYY-MM-DD')
parser.add_argument('--json', action='store_true', help='Select output as JSON')
parser.add_argument('--json', type=str, const='host_metric', nargs='?', help='Select output as JSON for host_metric or host_metric_summary_monthly')
parser.add_argument('--csv', type=str, const='host_metric', nargs='?', help='Select output as CSV for host_metric or host_metric_summary_monthly')
parser.add_argument('--tarball', action='store_true', help=f'Package CSV files into a tar with upto {PREFERRED_ROW_COUNT} rows')
parser.add_argument('--anonymized', type=str, help='Anonymize hostnames with provided salt')
parser.add_argument('--rows_per_file', type=int, help=f'Split rows in chunks of {PREFERRED_ROW_COUNT}')
def handle(self, *args, **options):
since = options.get('since')
until = options.get('until')
if since is None and until is None:
print("No Arguments received")
return None
if since is not None and since.tzinfo is None:
since = since.replace(tzinfo=datetime.timezone.utc)
@ -33,17 +203,39 @@ class Command(BaseCommand):
if until is not None:
filter_kwargs['last_automation__lte'] = until
result = HostMetric.objects.filter(**filter_kwargs)
filter_kwargs_host_metrics_summary = {}
if since is not None:
filter_kwargs_host_metrics_summary['date__gte'] = since
if until is not None:
filter_kwargs_host_metrics_summary['date__lte'] = until
host_metric_row_count = HostMetric.objects.filter(**filter_kwargs).count()
host_metric_summary_monthly_row_count = HostMetricSummaryMonthly.objects.filter(**filter_kwargs_host_metrics_summary).count()
if (host_metric_row_count > PREFERRED_ROW_COUNT or host_metric_summary_monthly_row_count > PREFERRED_ROW_COUNT) and (
not options.get('rows_per_file') or options.get('rows_per_file') > PREFERRED_ROW_COUNT
):
print(
f"HostMetric / HostMetricSummaryMonthly rows exceed the allowable limit of {PREFERRED_ROW_COUNT}. "
f"Set --rows_per_file {PREFERRED_ROW_COUNT} "
f"to split the rows in chunks of {PREFERRED_ROW_COUNT}"
)
return
# if --json flag is set, output the result in json format
if options['json']:
list_of_queryset = list(result.values('hostname', 'first_automation', 'last_automation'))
json_result = json.dumps(list_of_queryset, cls=DjangoJSONEncoder)
print(json_result)
self.output_json(options, filter_kwargs)
elif options['csv']:
self.output_csv(options, filter_kwargs)
elif options['tarball']:
self.output_tarball(options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count)
elif options['rows_per_file']:
self.output_rows_per_file(options, filter_kwargs, host_metric_row_count, host_metric_summary_monthly_row_count)
# --json flag is not set, output in plain text
else:
print(f"Total Number of hosts automated: {len(result)}")
print(f"Total Number of hosts automated: {host_metric_row_count}")
result = HostMetric.objects.filter(**filter_kwargs)
for item in result:
print(
"Hostname : {hostname} | first_automation : {first_automation} | last_automation : {last_automation}".format(