From b1d75327e3b6bb146da8761f285f6b45608505ad Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Mon, 13 May 2019 15:35:53 -0400 Subject: [PATCH] add the ability to toggle DEBUG logging on dynamically --- awx/conf/settings.py | 74 +++++++++++------------ awx/main/constants.py | 11 ++++ awx/main/scheduler/task_manager.py | 24 ++++---- awx/main/tasks.py | 36 +++++------ awx/main/tests/unit/utils/test_filters.py | 14 ----- awx/main/utils/db.py | 19 ------ awx/main/utils/filters.py | 21 +------ awx/main/utils/handlers.py | 27 ++++++++- awx/settings/defaults.py | 16 ++--- 9 files changed, 112 insertions(+), 130 deletions(-) diff --git a/awx/conf/settings.py b/awx/conf/settings.py index b301681613..350a4df049 100644 --- a/awx/conf/settings.py +++ b/awx/conf/settings.py @@ -24,7 +24,6 @@ from rest_framework.fields import empty, SkipField # Tower from awx.main.utils import encrypt_field, decrypt_field -from awx.main.utils.db import get_tower_migration_version from awx.conf import settings_registry from awx.conf.models import Setting from awx.conf.migrations._reencrypt import decrypt_field as old_decrypt_field @@ -90,45 +89,42 @@ def _ctit_db_wrapper(trans_safe=False): transaction.set_rollback(False) yield except DBError: - if 'migrate' in sys.argv and get_tower_migration_version() < '310': - logger.info('Using default settings until version 3.1 migration.') - else: - # We want the _full_ traceback with the context - # First we get the current call stack, which constitutes the "top", - # it has the context up to the point where the context manager is used - top_stack = StringIO() - traceback.print_stack(file=top_stack) - top_lines = top_stack.getvalue().strip('\n').split('\n') - top_stack.close() - # Get "bottom" stack from the local error that happened - # inside of the "with" block this wraps - exc_type, exc_value, exc_traceback = sys.exc_info() - bottom_stack = StringIO() - traceback.print_tb(exc_traceback, file=bottom_stack) - bottom_lines = bottom_stack.getvalue().strip('\n').split('\n') - # Glue together top and bottom where overlap is found - bottom_cutoff = 0 - for i, line in enumerate(bottom_lines): - if line in top_lines: - # start of overlapping section, take overlap from bottom - top_lines = top_lines[:top_lines.index(line)] - bottom_cutoff = i - break - bottom_lines = bottom_lines[bottom_cutoff:] - tb_lines = top_lines + bottom_lines + # We want the _full_ traceback with the context + # First we get the current call stack, which constitutes the "top", + # it has the context up to the point where the context manager is used + top_stack = StringIO() + traceback.print_stack(file=top_stack) + top_lines = top_stack.getvalue().strip('\n').split('\n') + top_stack.close() + # Get "bottom" stack from the local error that happened + # inside of the "with" block this wraps + exc_type, exc_value, exc_traceback = sys.exc_info() + bottom_stack = StringIO() + traceback.print_tb(exc_traceback, file=bottom_stack) + bottom_lines = bottom_stack.getvalue().strip('\n').split('\n') + # Glue together top and bottom where overlap is found + bottom_cutoff = 0 + for i, line in enumerate(bottom_lines): + if line in top_lines: + # start of overlapping section, take overlap from bottom + top_lines = top_lines[:top_lines.index(line)] + bottom_cutoff = i + break + bottom_lines = bottom_lines[bottom_cutoff:] + tb_lines = top_lines + bottom_lines - tb_string = '\n'.join( - ['Traceback (most recent call last):'] + - tb_lines + - ['{}: {}'.format(exc_type.__name__, str(exc_value))] - ) - bottom_stack.close() - # Log the combined stack - if trans_safe: - if 'check_migrations' not in sys.argv: - logger.warning('Database settings are not available, using defaults, error:\n{}'.format(tb_string)) - else: - logger.error('Error modifying something related to database settings.\n{}'.format(tb_string)) + tb_string = '\n'.join( + ['Traceback (most recent call last):'] + + tb_lines + + ['{}: {}'.format(exc_type.__name__, str(exc_value))] + ) + bottom_stack.close() + # Log the combined stack + if trans_safe: + if 'check_migrations' not in sys.argv: + logger.debug('Database settings are not available, using defaults, error:\n{}'.format(tb_string)) + else: + logger.debug('Error modifying something related to database settings.\n{}'.format(tb_string)) finally: if trans_safe and is_atomic and rollback_set: transaction.set_rollback(rollback_set) diff --git a/awx/main/constants.py b/awx/main/constants.py index 58bda8710b..eea1a55820 100644 --- a/awx/main/constants.py +++ b/awx/main/constants.py @@ -40,3 +40,14 @@ ENV_BLACKLIST = frozenset(( 'JOB_CALLBACK_DEBUG', 'INVENTORY_HOSTVARS', 'AWX_HOST', 'PROJECT_REVISION' )) + +# loggers that may be called in process of emitting a log +LOGGER_BLACKLIST = ( + 'awx.main.utils.handlers', + 'awx.main.utils.formatters', + 'awx.main.utils.filters', + 'awx.main.utils.encryption', + 'awx.main.utils.log', + # loggers that may be called getting logging settings + 'awx.conf' +) diff --git a/awx/main/scheduler/task_manager.py b/awx/main/scheduler/task_manager.py index 6cab324b8e..e0d595559f 100644 --- a/awx/main/scheduler/task_manager.py +++ b/awx/main/scheduler/task_manager.py @@ -108,7 +108,7 @@ class TaskManager(): dag = WorkflowDAG(workflow_job) spawn_nodes = dag.bfs_nodes_to_run() if spawn_nodes: - logger.info('Spawning jobs for %s', workflow_job.log_format) + logger.debug('Spawning jobs for %s', workflow_job.log_format) else: logger.debug('No nodes to spawn for %s', workflow_job.log_format) for spawn_node in spawn_nodes: @@ -118,7 +118,7 @@ class TaskManager(): job = spawn_node.unified_job_template.create_unified_job(**kv) spawn_node.job = job spawn_node.save() - logger.info('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk) + logger.debug('Spawned %s in %s for node %s', job.log_format, workflow_job.log_format, spawn_node.pk) can_start = True if isinstance(spawn_node.unified_job_template, WorkflowJobTemplate): workflow_ancestors = job.get_ancestor_workflows() @@ -178,7 +178,7 @@ class TaskManager(): if not is_done: continue has_failed, reason = dag.has_workflow_failed() - logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful') + logger.debug('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful') result.append(workflow_job.id) new_status = 'failed' if has_failed else 'successful' logger.debug("Transitioning {} to {} status.".format(workflow_job.log_format, new_status)) @@ -233,26 +233,26 @@ class TaskManager(): else: if type(task) is WorkflowJob: task.status = 'running' - logger.info('Transitioning %s to running status.', task.log_format) + logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif not task.supports_isolation() and rampart_group.controller_id: # non-Ansible jobs on isolated instances run on controller task.instance_group = rampart_group.controller task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True))) - logger.info('Submitting isolated {} to queue {}.'.format( - task.log_format, task.instance_group.name, task.execution_node)) + logger.debug('Submitting isolated {} to queue {}.'.format( + task.log_format, task.instance_group.name, task.execution_node)) elif controller_node: task.instance_group = rampart_group task.execution_node = instance.hostname task.controller_node = controller_node - logger.info('Submitting isolated {} to queue {} controlled by {}.'.format( - task.log_format, task.execution_node, controller_node)) + logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format( + task.log_format, task.execution_node, controller_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname - logger.info('Submitting {} to <{},{}>.'.format( - task.log_format, task.instance_group_id, task.execution_node)) + logger.debug('Submitting {} to <{},{}>.'.format( + task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() @@ -295,7 +295,7 @@ class TaskManager(): project_task.created = task.created - timedelta(seconds=1) project_task.status = 'pending' project_task.save() - logger.info( + logger.debug( 'Spawned {} as dependency of {}'.format( project_task.log_format, task.log_format ) @@ -309,7 +309,7 @@ class TaskManager(): inventory_task.created = task.created - timedelta(seconds=2) inventory_task.status = 'pending' inventory_task.save() - logger.info( + logger.debug( 'Spawned {} as dependency of {}'.format( inventory_task.log_format, task.log_format ) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index d0caa8d527..e1f0f77964 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -103,7 +103,7 @@ class InvalidVirtualenvError(Exception): def dispatch_startup(): startup_logger = logging.getLogger('awx.main.tasks') - startup_logger.info("Syncing Schedules") + startup_logger.debug("Syncing Schedules") for sch in Schedule.objects.all(): try: sch.update_computed_fields() @@ -189,20 +189,20 @@ def apply_cluster_membership_policies(): # NOTE: arguable behavior: policy-list-group is not added to # instance's group count for consideration in minimum-policy rules if group_actual.instances: - logger.info("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name)) + logger.debug("Policy List, adding Instances {} to Group {}".format(group_actual.instances, ig.name)) if ig.controller_id is None: actual_groups.append(group_actual) else: # For isolated groups, _only_ apply the policy_instance_list # do not add to in-memory list, so minimum rules not applied - logger.info('Committing instances to isolated group {}'.format(ig.name)) + logger.debug('Committing instances to isolated group {}'.format(ig.name)) ig.instances.set(group_actual.instances) # Process Instance minimum policies next, since it represents a concrete lower bound to the # number of instances to make available to instance groups actual_instances = [Node(obj=i, groups=[]) for i in considered_instances if i.managed_by_policy] - logger.info("Total non-isolated instances:{} available for policy: {}".format( + logger.debug("Total non-isolated instances:{} available for policy: {}".format( total_instances, len(actual_instances))) for g in sorted(actual_groups, key=lambda x: len(x.instances)): policy_min_added = [] @@ -217,7 +217,7 @@ def apply_cluster_membership_policies(): i.groups.append(g.obj.id) policy_min_added.append(i.obj.id) if policy_min_added: - logger.info("Policy minimum, adding Instances {} to Group {}".format(policy_min_added, g.obj.name)) + logger.debug("Policy minimum, adding Instances {} to Group {}".format(policy_min_added, g.obj.name)) # Finally, process instance policy percentages for g in sorted(actual_groups, key=lambda x: len(x.instances)): @@ -233,7 +233,7 @@ def apply_cluster_membership_policies(): i.groups.append(g.obj.id) policy_per_added.append(i.obj.id) if policy_per_added: - logger.info("Policy percentage, adding Instances {} to Group {}".format(policy_per_added, g.obj.name)) + logger.debug("Policy percentage, adding Instances {} to Group {}".format(policy_per_added, g.obj.name)) # Determine if any changes need to be made needs_change = False @@ -242,7 +242,7 @@ def apply_cluster_membership_policies(): needs_change = True break if not needs_change: - logger.info('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute)) + logger.debug('Cluster policy no-op finished in {} seconds'.format(time.time() - started_compute)) return # On a differential basis, apply instances to non-isolated groups @@ -251,12 +251,12 @@ def apply_cluster_membership_policies(): instances_to_add = set(g.instances) - set(g.prior_instances) instances_to_remove = set(g.prior_instances) - set(g.instances) if instances_to_add: - logger.info('Adding instances {} to group {}'.format(list(instances_to_add), g.obj.name)) + logger.debug('Adding instances {} to group {}'.format(list(instances_to_add), g.obj.name)) g.obj.instances.add(*instances_to_add) if instances_to_remove: - logger.info('Removing instances {} from group {}'.format(list(instances_to_remove), g.obj.name)) + logger.debug('Removing instances {} from group {}'.format(list(instances_to_remove), g.obj.name)) g.obj.instances.remove(*instances_to_remove) - logger.info('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute)) + logger.debug('Cluster policy computation finished in {} seconds'.format(time.time() - started_compute)) @task(queue='tower_broadcast_all', exchange_type='fanout') @@ -277,7 +277,7 @@ def delete_project_files(project_path): if os.path.exists(project_path): try: shutil.rmtree(project_path) - logger.info('Success removing project files {}'.format(project_path)) + logger.debug('Success removing project files {}'.format(project_path)) except Exception: logger.exception('Could not remove project directory {}'.format(project_path)) if os.path.exists(lock_file): @@ -372,7 +372,7 @@ def purge_old_stdout_files(): for f in os.listdir(settings.JOBOUTPUT_ROOT): if os.path.getctime(os.path.join(settings.JOBOUTPUT_ROOT,f)) < nowtime - settings.LOCAL_STDOUT_EXPIRE_TIME: os.unlink(os.path.join(settings.JOBOUTPUT_ROOT,f)) - logger.info("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT,f))) + logger.debug("Removing {}".format(os.path.join(settings.JOBOUTPUT_ROOT,f))) @task(queue=get_local_queuename) @@ -514,7 +514,7 @@ def awx_periodic_scheduler(): try: job_kwargs = schedule.get_job_kwargs() new_unified_job = schedule.unified_job_template.create_unified_job(**job_kwargs) - logger.info('Spawned {} from schedule {}-{}.'.format( + logger.debug('Spawned {} from schedule {}-{}.'.format( new_unified_job.log_format, schedule.name, schedule.pk)) if invalid_license: @@ -1264,7 +1264,7 @@ class BaseTask(object): extra_update_fields['result_traceback'] = traceback.format_exc() logger.exception('%s Exception occurred while running task', self.instance.log_format) finally: - logger.info('%s finished running, producing %s events.', self.instance.log_format, self.event_ct) + logger.debug('%s finished running, producing %s events.', self.instance.log_format, self.event_ct) try: self.post_run_hook(self.instance, status) @@ -1795,8 +1795,8 @@ class RunProjectUpdate(BaseTask): with transaction.atomic(): if InventoryUpdate.objects.filter(inventory_source=inv_src, status__in=ACTIVE_STATES).exists(): - logger.info('Skipping SCM inventory update for `{}` because ' - 'another update is already active.'.format(inv_src.name)) + logger.debug('Skipping SCM inventory update for `{}` because ' + 'another update is already active.'.format(inv_src.name)) continue local_inv_update = inv_src.create_inventory_update( _eager_fields=dict( @@ -1862,7 +1862,7 @@ class RunProjectUpdate(BaseTask): try: instance.refresh_from_db(fields=['cancel_flag']) if instance.cancel_flag: - logger.info("ProjectUpdate({0}) was cancelled".format(instance.pk)) + logger.debug("ProjectUpdate({0}) was cancelled".format(instance.pk)) return fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) break @@ -2415,7 +2415,7 @@ def deep_copy_model_obj( model_module, model_name, obj_pk, new_obj_pk, user_pk, sub_obj_list, permission_check_func=None ): - logger.info('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk)) + logger.debug('Deep copy {} from {} to {}.'.format(model_name, obj_pk, new_obj_pk)) from awx.api.generics import CopyAPIView from awx.main.signals import disable_activity_stream model = getattr(importlib.import_module(model_module), model_name, None) diff --git a/awx/main/tests/unit/utils/test_filters.py b/awx/main/tests/unit/utils/test_filters.py index 0e267047cd..54ae9c9691 100644 --- a/awx/main/tests/unit/utils/test_filters.py +++ b/awx/main/tests/unit/utils/test_filters.py @@ -29,20 +29,6 @@ def test_base_logging_handler_skip_log(params, logger_name, expected, dummy_log_ assert filter.filter(dummy_log_record) is expected, (params, logger_name) -@pytest.mark.parametrize('level, expect', [ - (30, True), # warning - (20, False) # info -]) -def test_log_configurable_severity(level, expect, dummy_log_record): - dummy_log_record.levelno = level - filter = ExternalLoggerEnabled( - enabled_flag=True, - enabled_loggers=['awx', 'activity_stream', 'job_events', 'system_tracking'], - lvl='WARNING' - ) - assert filter.filter(dummy_log_record) is expect - - class Field(object): def __init__(self, name, related_model=None, __prevent_search__=None): diff --git a/awx/main/utils/db.py b/awx/main/utils/db.py index bd2f5db69b..f91f2d7b65 100644 --- a/awx/main/utils/db.py +++ b/awx/main/utils/db.py @@ -1,26 +1,7 @@ # Copyright (c) 2017 Ansible by Red Hat # All Rights Reserved. -# Django database -from django.db.migrations.loader import MigrationLoader -from django.db import connection - -# Python from itertools import chain -import re - - -def get_tower_migration_version(): - loader = MigrationLoader(connection, ignore_no_migrations=True) - v = '000' - for app_name, migration_name in loader.applied_migrations: - if app_name == 'main': - version_captures = re.findall('^[0-9]{4}_v([0-9]{3})_', migration_name) - if len(version_captures) == 1: - migration_version = version_captures[0] - if migration_version > v: - v = migration_version - return v def get_all_field_names(model): diff --git a/awx/main/utils/filters.py b/awx/main/utils/filters.py index 7cb3b58bb4..015bfd8901 100644 --- a/awx/main/utils/filters.py +++ b/awx/main/utils/filters.py @@ -9,12 +9,13 @@ from pyparsing import ( ParseException, ) import logging -from logging import Filter, _nameToLevel +from logging import Filter from django.apps import apps from django.db import models from django.conf import settings +from awx.main.constants import LOGGER_BLACKLIST from awx.main.utils.common import get_search_fields __all__ = ['SmartFilter', 'ExternalLoggerEnabled'] @@ -49,18 +50,6 @@ class FieldFromSettings(object): class ExternalLoggerEnabled(Filter): - # Prevents recursive logging loops from swamping the server - LOGGER_BLACKLIST = ( - # loggers that may be called in process of emitting a log - 'awx.main.utils.handlers', - 'awx.main.utils.formatters', - 'awx.main.utils.filters', - 'awx.main.utils.encryption', - 'awx.main.utils.log', - # loggers that may be called getting logging settings - 'awx.conf' - ) - lvl = FieldFromSettings('LOG_AGGREGATOR_LEVEL') enabled_loggers = FieldFromSettings('LOG_AGGREGATOR_LOGGERS') enabled_flag = FieldFromSettings('LOG_AGGREGATOR_ENABLED') @@ -84,17 +73,13 @@ class ExternalLoggerEnabled(Filter): True - should be logged """ # Logger exceptions - for logger_name in self.LOGGER_BLACKLIST: + for logger_name in LOGGER_BLACKLIST: if record.name.startswith(logger_name): return False # General enablement if not self.enabled_flag: return False - # Level enablement - if record.levelno < _nameToLevel[self.lvl]: - return False - # Logger type enablement loggers = self.enabled_loggers if not loggers: diff --git a/awx/main/utils/handlers.py b/awx/main/utils/handlers.py index aac46966b5..17f94f799d 100644 --- a/awx/main/utils/handlers.py +++ b/awx/main/utils/handlers.py @@ -19,11 +19,12 @@ from django.conf import settings from requests_futures.sessions import FuturesSession # AWX +from awx.main.constants import LOGGER_BLACKLIST from awx.main.utils.formatters import LogstashFormatter __all__ = ['BaseHTTPSHandler', 'TCPHandler', 'UDPHandler', - 'AWXProxyHandler'] + 'AWXProxyHandler', 'RotatingProductionLogHandler'] logger = logging.getLogger('awx.main.utils.handlers') @@ -91,6 +92,28 @@ class SocketResult: return self +class DynamicLevelMixin(object): + + @property + def level(self): + from django.conf import settings + for logger_name in LOGGER_BLACKLIST: + if self.name.startswith(logger_name): + return 30 # WARNING + try: + return logging._nameToLevel[settings.LOG_AGGREGATOR_LEVEL] + except Exception: + return 30 # WARNING + + @level.setter + def level(self, level): + pass # no-op, this value comes from the database + + +class RotatingProductionLogHandler(logging.handlers.RotatingFileHandler, DynamicLevelMixin): + pass + + class BaseHandler(logging.Handler): def __init__(self, host=None, port=None, indv_facts=None, **kwargs): super(BaseHandler, self).__init__() @@ -272,7 +295,7 @@ HANDLER_MAPPING = { } -class AWXProxyHandler(logging.Handler): +class AWXProxyHandler(logging.Handler, DynamicLevelMixin): ''' Handler specific to the AWX external logging feature diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 011b6d086d..5b520d2973 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -1013,8 +1013,8 @@ LOGGING = { 'class': 'django.utils.log.AdminEmailHandler', }, 'tower_warnings': { - 'level': 'WARNING', - 'class':'logging.handlers.RotatingFileHandler', + # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL + 'class':'awx.main.utils.handlers.RotatingProductionLogHandler', 'filters': ['require_debug_false'], 'filename': os.path.join(LOG_ROOT, 'tower.log'), 'maxBytes': 1024 * 1024 * 5, # 5 MB @@ -1022,8 +1022,8 @@ LOGGING = { 'formatter':'simple', }, 'callback_receiver': { - 'level': 'WARNING', - 'class':'logging.handlers.RotatingFileHandler', + # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL + 'class':'awx.main.utils.handlers.RotatingProductionLogHandler', 'filters': ['require_debug_false'], 'filename': os.path.join(LOG_ROOT, 'callback_receiver.log'), 'maxBytes': 1024 * 1024 * 5, # 5 MB @@ -1031,8 +1031,8 @@ LOGGING = { 'formatter':'simple', }, 'dispatcher': { - 'level': 'WARNING', - 'class':'logging.handlers.RotatingFileHandler', + # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL + 'class':'awx.main.utils.handlers.RotatingProductionLogHandler', 'filters': ['require_debug_false'], 'filename': os.path.join(LOG_ROOT, 'dispatcher.log'), 'maxBytes': 1024 * 1024 * 5, # 5 MB @@ -1049,8 +1049,8 @@ LOGGING = { 'formatter': 'timed_import', }, 'task_system': { - 'level': 'INFO', - 'class':'logging.handlers.RotatingFileHandler', + # don't define a level here, it's set by settings.LOG_AGGREGATOR_LEVEL + 'class':'awx.main.utils.handlers.RotatingProductionLogHandler', 'filters': ['require_debug_false'], 'filename': os.path.join(LOG_ROOT, 'task_system.log'), 'maxBytes': 1024 * 1024 * 5, # 5 MB