mirror of
https://github.com/ansible/awx.git
synced 2026-03-01 08:48:46 -03:30
Move back to less frequent collections, and split large event tables
This should ensure we stay under 100MB at all times.
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import io
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
import platform
|
import platform
|
||||||
@@ -227,21 +228,58 @@ def query_info(since, collection_type, **kwargs):
|
|||||||
return query_info
|
return query_info
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
The event table can be *very* large, and we have a 100MB upload limit.
|
||||||
|
|
||||||
|
Split large table dumps at dump time into a series of files.
|
||||||
|
'''
|
||||||
|
MAX_TABLE_SIZE = 200 * 1048576
|
||||||
|
|
||||||
|
|
||||||
|
class FileSplitter(io.StringIO):
|
||||||
|
def __init__(self, filespec=None, *args, **kwargs):
|
||||||
|
self.filespec = filespec
|
||||||
|
self.files = []
|
||||||
|
self.currentfile = None
|
||||||
|
self.header = None
|
||||||
|
self.counter = 0
|
||||||
|
self.cycle_file()
|
||||||
|
|
||||||
|
def cycle_file(self):
|
||||||
|
if self.currentfile:
|
||||||
|
self.currentfile.close()
|
||||||
|
self.counter = 0
|
||||||
|
fname = '{}_split{}'.format(self.filespec, len(self.files))
|
||||||
|
self.currentfile = open(fname, 'w', encoding='utf-8')
|
||||||
|
self.files.append(fname)
|
||||||
|
if self.header:
|
||||||
|
self.currentfile.write('{}\n'.format(self.header))
|
||||||
|
|
||||||
|
def file_list(self):
|
||||||
|
self.currentfile.close()
|
||||||
|
# Check for an empty dump
|
||||||
|
if len(self.header) + 1 == self.counter:
|
||||||
|
os.remove(self.files[-1])
|
||||||
|
self.files = self.files[:-1]
|
||||||
|
# If we only have one file, remove the suffix
|
||||||
|
if len(self.files) == 1:
|
||||||
|
os.rename(self.files[0],self.files[0].replace('_split0',''))
|
||||||
|
return self.files
|
||||||
|
|
||||||
|
def write(self, s):
|
||||||
|
if not self.header:
|
||||||
|
self.header = s[0:s.index('\n')]
|
||||||
|
self.counter += self.currentfile.write(s)
|
||||||
|
if self.counter >= MAX_TABLE_SIZE:
|
||||||
|
self.cycle_file()
|
||||||
|
|
||||||
|
|
||||||
def _copy_table(table, query, path):
|
def _copy_table(table, query, path):
|
||||||
file_path = os.path.join(path, table + '_table.csv')
|
file_path = os.path.join(path, table + '_table.csv')
|
||||||
file = open(file_path, 'w', encoding='utf-8')
|
file = FileSplitter(filespec=file_path)
|
||||||
with connection.cursor() as cursor:
|
with connection.cursor() as cursor:
|
||||||
cursor.copy_expert(query, file)
|
cursor.copy_expert(query, file)
|
||||||
file.close()
|
return file.file_list()
|
||||||
# Ensure we actually dumped data, and not just headers
|
|
||||||
file = open(file_path, 'r', encoding='utf-8')
|
|
||||||
file.readline()
|
|
||||||
data = file.readline()
|
|
||||||
file.close()
|
|
||||||
if not data:
|
|
||||||
os.remove(file_path)
|
|
||||||
return None
|
|
||||||
return file_path
|
|
||||||
|
|
||||||
|
|
||||||
@register('events_table', '1.1', format='csv', description=_('Automation task records'), expensive=True)
|
@register('events_table', '1.1', format='csv', description=_('Automation task records'), expensive=True)
|
||||||
|
|||||||
@@ -90,6 +90,15 @@ def gather(dest=None, module=None, subset = None, since = None, until = now(), c
|
|||||||
:param module: the module to search for registered analytic collector
|
:param module: the module to search for registered analytic collector
|
||||||
functions; defaults to awx.main.analytics.collectors
|
functions; defaults to awx.main.analytics.collectors
|
||||||
"""
|
"""
|
||||||
|
def _write_manifest(destdir, manifest):
|
||||||
|
path = os.path.join(destdir, 'manifest.json')
|
||||||
|
with open(path, 'w', encoding='utf-8') as f:
|
||||||
|
try:
|
||||||
|
json.dump(manifest, f)
|
||||||
|
except Exception:
|
||||||
|
f.close()
|
||||||
|
os.remove(f.name)
|
||||||
|
logger.exception("Could not generate manifest.json")
|
||||||
|
|
||||||
last_run = since or settings.AUTOMATION_ANALYTICS_LAST_GATHER or (now() - timedelta(weeks=4))
|
last_run = since or settings.AUTOMATION_ANALYTICS_LAST_GATHER or (now() - timedelta(weeks=4))
|
||||||
logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))
|
logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))
|
||||||
@@ -103,10 +112,12 @@ def gather(dest=None, module=None, subset = None, since = None, until = now(), c
|
|||||||
return
|
return
|
||||||
|
|
||||||
collector_list = []
|
collector_list = []
|
||||||
if module is None:
|
if module:
|
||||||
|
collector_module = module
|
||||||
|
else:
|
||||||
from awx.main.analytics import collectors
|
from awx.main.analytics import collectors
|
||||||
module = collectors
|
collector_module = collectors
|
||||||
for name, func in inspect.getmembers(module):
|
for name, func in inspect.getmembers(collector_module):
|
||||||
if (
|
if (
|
||||||
inspect.isfunction(func) and
|
inspect.isfunction(func) and
|
||||||
hasattr(func, '__awx_analytics_key__') and
|
hasattr(func, '__awx_analytics_key__') and
|
||||||
@@ -116,10 +127,13 @@ def gather(dest=None, module=None, subset = None, since = None, until = now(), c
|
|||||||
|
|
||||||
manifest = dict()
|
manifest = dict()
|
||||||
dest = dest or tempfile.mkdtemp(prefix='awx_analytics')
|
dest = dest or tempfile.mkdtemp(prefix='awx_analytics')
|
||||||
|
gather_dir = os.path.join(dest, 'stage')
|
||||||
|
os.mkdir(gather_dir, 0o700)
|
||||||
|
num_splits = 1
|
||||||
for name, func in collector_list:
|
for name, func in collector_list:
|
||||||
if func.__awx_analytics_type__ == 'json':
|
if func.__awx_analytics_type__ == 'json':
|
||||||
key = func.__awx_analytics_key__
|
key = func.__awx_analytics_key__
|
||||||
path = '{}.json'.format(os.path.join(dest, key))
|
path = '{}.json'.format(os.path.join(gather_dir, key))
|
||||||
with open(path, 'w', encoding='utf-8') as f:
|
with open(path, 'w', encoding='utf-8') as f:
|
||||||
try:
|
try:
|
||||||
json.dump(func(last_run, collection_type=collection_type, until=until), f)
|
json.dump(func(last_run, collection_type=collection_type, until=until), f)
|
||||||
@@ -131,8 +145,11 @@ def gather(dest=None, module=None, subset = None, since = None, until = now(), c
|
|||||||
elif func.__awx_analytics_type__ == 'csv':
|
elif func.__awx_analytics_type__ == 'csv':
|
||||||
key = func.__awx_analytics_key__
|
key = func.__awx_analytics_key__
|
||||||
try:
|
try:
|
||||||
if func(last_run, full_path=dest, until=until):
|
files = func(last_run, full_path=gather_dir, until=until)
|
||||||
|
if files:
|
||||||
manifest['{}.csv'.format(key)] = func.__awx_analytics_version__
|
manifest['{}.csv'.format(key)] = func.__awx_analytics_version__
|
||||||
|
if len(files) > num_splits:
|
||||||
|
num_splits = len(files)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Could not generate metric {}.csv".format(key))
|
logger.exception("Could not generate metric {}.csv".format(key))
|
||||||
|
|
||||||
@@ -145,11 +162,12 @@ def gather(dest=None, module=None, subset = None, since = None, until = now(), c
|
|||||||
# Always include config.json if we're using our collectors
|
# Always include config.json if we're using our collectors
|
||||||
if 'config.json' not in manifest.keys() and not module:
|
if 'config.json' not in manifest.keys() and not module:
|
||||||
from awx.main.analytics import collectors
|
from awx.main.analytics import collectors
|
||||||
path = '{}.json'.format(os.path.join(dest, key))
|
config = collectors.config
|
||||||
|
path = '{}.json'.format(os.path.join(gather_dir, config.__awx_analytics_key__))
|
||||||
with open(path, 'w', encoding='utf-8') as f:
|
with open(path, 'w', encoding='utf-8') as f:
|
||||||
try:
|
try:
|
||||||
json.dump(collectors.config(last_run), f)
|
json.dump(collectors.config(last_run), f)
|
||||||
manifest['config.json'] = collectors.config.__awx_analytics_version__
|
manifest['config.json'] = config.__awx_analytics_version__
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Could not generate metric {}.json".format(key))
|
logger.exception("Could not generate metric {}.json".format(key))
|
||||||
f.close()
|
f.close()
|
||||||
@@ -157,31 +175,52 @@ def gather(dest=None, module=None, subset = None, since = None, until = now(), c
|
|||||||
shutil.rmtree(dest)
|
shutil.rmtree(dest)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
path = os.path.join(dest, 'manifest.json')
|
stage_dirs = [gather_dir]
|
||||||
with open(path, 'w', encoding='utf-8') as f:
|
if num_splits > 1:
|
||||||
try:
|
for i in range(0, num_splits):
|
||||||
json.dump(manifest, f)
|
split_path = os.path.join(dest, 'split{}'.format(i))
|
||||||
except Exception:
|
os.mkdir(split_path, 0o700)
|
||||||
logger.exception("Could not generate manifest.json")
|
filtered_manifest = {}
|
||||||
f.close()
|
shutil.copy(os.path.join(gather_dir, 'config.json'), split_path)
|
||||||
os.remove(f.name)
|
filtered_manifest['config.json'] = manifest['config.json']
|
||||||
|
suffix = '_split{}'.format(i)
|
||||||
|
for file in os.listdir(gather_dir):
|
||||||
|
if file.endswith(suffix):
|
||||||
|
old_file = os.path.join(gather_dir, file)
|
||||||
|
new_filename = file.replace(suffix, '')
|
||||||
|
new_file = os.path.join(split_path, new_filename)
|
||||||
|
shutil.move(old_file, new_file)
|
||||||
|
filtered_manifest[new_filename] = manifest[new_filename]
|
||||||
|
_write_manifest(split_path, filtered_manifest)
|
||||||
|
stage_dirs.append(split_path)
|
||||||
|
|
||||||
# can't use isoformat() since it has colons, which GNU tar doesn't like
|
for item in list(manifest.keys()):
|
||||||
tarname = '_'.join([
|
if not os.path.exists(os.path.join(gather_dir, item)):
|
||||||
settings.SYSTEM_UUID,
|
manifest.pop(item)
|
||||||
until.strftime('%Y-%m-%d-%H%M%S%z')
|
_write_manifest(gather_dir, manifest)
|
||||||
])
|
|
||||||
|
tarfiles = []
|
||||||
try:
|
try:
|
||||||
tgz = shutil.make_archive(
|
for i in range(0, len(stage_dirs)):
|
||||||
os.path.join(os.path.dirname(dest), tarname),
|
stage_dir = stage_dirs[i]
|
||||||
'gztar',
|
# can't use isoformat() since it has colons, which GNU tar doesn't like
|
||||||
dest
|
tarname = '_'.join([
|
||||||
)
|
settings.SYSTEM_UUID,
|
||||||
return tgz
|
until.strftime('%Y-%m-%d-%H%M%S%z'),
|
||||||
|
str(i)
|
||||||
|
])
|
||||||
|
tgz = shutil.make_archive(
|
||||||
|
os.path.join(os.path.dirname(dest), tarname),
|
||||||
|
'gztar',
|
||||||
|
stage_dir
|
||||||
|
)
|
||||||
|
tarfiles.append(tgz)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
shutil.rmtree(stage_dir, ignore_errors = True)
|
||||||
logger.exception("Failed to write analytics archive file")
|
logger.exception("Failed to write analytics archive file")
|
||||||
finally:
|
finally:
|
||||||
shutil.rmtree(dest)
|
shutil.rmtree(dest, ignore_errors = True)
|
||||||
|
return tarfiles
|
||||||
|
|
||||||
|
|
||||||
def ship(path):
|
def ship(path):
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from awx.main.analytics import gather, ship
|
from awx.main.analytics import gather, ship
|
||||||
|
from dateutil import parser
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.utils.timezone import now
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
@@ -15,6 +18,10 @@ class Command(BaseCommand):
|
|||||||
help='Gather analytics without shipping. Works even if analytics are disabled in settings.')
|
help='Gather analytics without shipping. Works even if analytics are disabled in settings.')
|
||||||
parser.add_argument('--ship', dest='ship', action='store_true',
|
parser.add_argument('--ship', dest='ship', action='store_true',
|
||||||
help='Enable to ship metrics to the Red Hat Cloud')
|
help='Enable to ship metrics to the Red Hat Cloud')
|
||||||
|
parser.add_argument('--since', dest='since', action='store',
|
||||||
|
help='Start date for collection')
|
||||||
|
parser.add_argument('--until', dest='until', action='store',
|
||||||
|
help='End date for collection')
|
||||||
|
|
||||||
def init_logging(self):
|
def init_logging(self):
|
||||||
self.logger = logging.getLogger('awx.main.analytics')
|
self.logger = logging.getLogger('awx.main.analytics')
|
||||||
@@ -28,11 +35,27 @@ class Command(BaseCommand):
|
|||||||
self.init_logging()
|
self.init_logging()
|
||||||
opt_ship = options.get('ship')
|
opt_ship = options.get('ship')
|
||||||
opt_dry_run = options.get('dry-run')
|
opt_dry_run = options.get('dry-run')
|
||||||
|
opt_since = options.get('since') or None
|
||||||
|
opt_until = options.get('until') or None
|
||||||
|
|
||||||
|
if opt_since:
|
||||||
|
since = parser.parse(opt_since)
|
||||||
|
else:
|
||||||
|
since = None
|
||||||
|
if opt_until:
|
||||||
|
until = parser.parse(opt_until)
|
||||||
|
else:
|
||||||
|
until = now()
|
||||||
|
|
||||||
if opt_ship and opt_dry_run:
|
if opt_ship and opt_dry_run:
|
||||||
self.logger.error('Both --ship and --dry-run cannot be processed at the same time.')
|
self.logger.error('Both --ship and --dry-run cannot be processed at the same time.')
|
||||||
return
|
return
|
||||||
tgz = gather(collection_type='manual' if not opt_dry_run else 'dry-run')
|
tgzfiles = gather(collection_type='manual' if not opt_dry_run else 'dry-run', since = since, until = until)
|
||||||
if tgz:
|
if tgzfiles:
|
||||||
self.logger.debug(tgz)
|
self.logger.debug(tgzfiles)
|
||||||
|
else:
|
||||||
|
self.logger.error('No analytics collected')
|
||||||
if opt_ship:
|
if opt_ship:
|
||||||
ship(tgz)
|
if tgzfiles:
|
||||||
|
for tgz in tgzfiles:
|
||||||
|
ship(tgz)
|
||||||
|
|||||||
@@ -357,19 +357,23 @@ def send_notifications(notification_list, job_id=None):
|
|||||||
@task(queue=get_local_queuename)
|
@task(queue=get_local_queuename)
|
||||||
def gather_analytics():
|
def gather_analytics():
|
||||||
def _gather_and_ship(subset, since, until):
|
def _gather_and_ship(subset, since, until):
|
||||||
|
tgzfiles = []
|
||||||
try:
|
try:
|
||||||
tgz = analytics.gather(subset=subset, since=since, until=until)
|
tgzfiles = analytics.gather(subset=subset, since=since, until=until)
|
||||||
# empty analytics without raising an exception is not an error
|
# empty analytics without raising an exception is not an error
|
||||||
if not tgz:
|
if not tgzfiles:
|
||||||
return True
|
return True
|
||||||
logger.info('gathered analytics: {}'.format(tgz))
|
logger.info('Gathered analytics from {} to {}: {}'.format(since, until, tgzfiles))
|
||||||
analytics.ship(tgz)
|
for tgz in tgzfiles:
|
||||||
|
analytics.ship(tgz)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('Error gathering and sending analytics for {} to {}.'.format(since,until))
|
logger.exception('Error gathering and sending analytics for {} to {}.'.format(since,until))
|
||||||
return False
|
return False
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(tgz):
|
if tgzfiles:
|
||||||
os.remove(tgz)
|
for tgz in tgzfiles:
|
||||||
|
if os.path.exists(tgz):
|
||||||
|
os.remove(tgz)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
from awx.conf.models import Setting
|
from awx.conf.models import Setting
|
||||||
@@ -404,7 +408,7 @@ def gather_analytics():
|
|||||||
start = since
|
start = since
|
||||||
until = None
|
until = None
|
||||||
while start < gather_time:
|
while start < gather_time:
|
||||||
until = start + timedelta(minutes=20)
|
until = start + timedelta(hours = 4)
|
||||||
if (until > gather_time):
|
if (until > gather_time):
|
||||||
until = gather_time
|
until = gather_time
|
||||||
if not _gather_and_ship(incremental_collectors, since=start, until=until):
|
if not _gather_and_ship(incremental_collectors, since=start, until=until):
|
||||||
|
|||||||
@@ -39,9 +39,9 @@ def mock_valid_license():
|
|||||||
def test_gather(mock_valid_license):
|
def test_gather(mock_valid_license):
|
||||||
settings.INSIGHTS_TRACKING_STATE = True
|
settings.INSIGHTS_TRACKING_STATE = True
|
||||||
|
|
||||||
tgz = gather(module=importlib.import_module(__name__))
|
tgzfiles = gather(module=importlib.import_module(__name__))
|
||||||
files = {}
|
files = {}
|
||||||
with tarfile.open(tgz, "r:gz") as archive:
|
with tarfile.open(tgzfiles[0], "r:gz") as archive:
|
||||||
for member in archive.getmembers():
|
for member in archive.getmembers():
|
||||||
files[member.name] = archive.extractfile(member)
|
files[member.name] = archive.extractfile(member)
|
||||||
|
|
||||||
@@ -53,7 +53,8 @@ def test_gather(mock_valid_license):
|
|||||||
assert './bad_json.json' not in files.keys()
|
assert './bad_json.json' not in files.keys()
|
||||||
assert './throws_error.json' not in files.keys()
|
assert './throws_error.json' not in files.keys()
|
||||||
try:
|
try:
|
||||||
os.remove(tgz)
|
for tgz in tgzfiles:
|
||||||
|
os.remove(tgz)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user