Fix api/v2/metrics data displaying incorrect value

- Use a locally defined prometheus registry instead of global registry
This commit is contained in:
Seth Foster
2021-03-23 17:38:48 -04:00
parent f46c968742
commit 3f342feadd

View File

@@ -1,5 +1,5 @@
from django.conf import settings from django.conf import settings
from prometheus_client import REGISTRY, PROCESS_COLLECTOR, PLATFORM_COLLECTOR, GC_COLLECTOR, Gauge, Info, generate_latest from prometheus_client import PROCESS_COLLECTOR, PLATFORM_COLLECTOR, GC_COLLECTOR, CollectorRegistry, Gauge, Info, generate_latest
from awx.conf.license import get_license from awx.conf.license import get_license
from awx.main.utils import get_awx_version, get_ansible_version from awx.main.utils import get_awx_version, get_ansible_version
@@ -11,115 +11,123 @@ from awx.main.analytics.collectors import (
) )
REGISTRY.unregister(PROCESS_COLLECTOR) def metrics():
REGISTRY.unregister(PLATFORM_COLLECTOR) REGISTRY = CollectorRegistry()
REGISTRY.unregister(GC_COLLECTOR)
SYSTEM_INFO = Info('awx_system', 'AWX System Information') SYSTEM_INFO = Info('awx_system', 'AWX System Information', registry=REGISTRY)
ORG_COUNT = Gauge('awx_organizations_total', 'Number of organizations') ORG_COUNT = Gauge('awx_organizations_total', 'Number of organizations', registry=REGISTRY)
USER_COUNT = Gauge('awx_users_total', 'Number of users') USER_COUNT = Gauge('awx_users_total', 'Number of users', registry=REGISTRY)
TEAM_COUNT = Gauge('awx_teams_total', 'Number of teams') TEAM_COUNT = Gauge('awx_teams_total', 'Number of teams', registry=REGISTRY)
INV_COUNT = Gauge('awx_inventories_total', 'Number of inventories') INV_COUNT = Gauge('awx_inventories_total', 'Number of inventories', registry=REGISTRY)
PROJ_COUNT = Gauge('awx_projects_total', 'Number of projects') PROJ_COUNT = Gauge('awx_projects_total', 'Number of projects', registry=REGISTRY)
JT_COUNT = Gauge('awx_job_templates_total', 'Number of job templates') JT_COUNT = Gauge('awx_job_templates_total', 'Number of job templates', registry=REGISTRY)
WFJT_COUNT = Gauge('awx_workflow_job_templates_total', 'Number of workflow job templates') WFJT_COUNT = Gauge('awx_workflow_job_templates_total', 'Number of workflow job templates', registry=REGISTRY)
HOST_COUNT = Gauge( HOST_COUNT = Gauge(
'awx_hosts_total', 'awx_hosts_total',
'Number of hosts', 'Number of hosts',
[ [
'type', 'type',
], ],
) registry=REGISTRY,
SCHEDULE_COUNT = Gauge('awx_schedules_total', 'Number of schedules') )
INV_SCRIPT_COUNT = Gauge('awx_inventory_scripts_total', 'Number of invetory scripts') SCHEDULE_COUNT = Gauge('awx_schedules_total', 'Number of schedules', registry=REGISTRY)
USER_SESSIONS = Gauge( INV_SCRIPT_COUNT = Gauge('awx_inventory_scripts_total', 'Number of invetory scripts', registry=REGISTRY)
USER_SESSIONS = Gauge(
'awx_sessions_total', 'awx_sessions_total',
'Number of sessions', 'Number of sessions',
[ [
'type', 'type',
], ],
) registry=REGISTRY,
CUSTOM_VENVS = Gauge('awx_custom_virtualenvs_total', 'Number of virtualenvs') )
RUNNING_JOBS = Gauge('awx_running_jobs_total', 'Number of running jobs on the Tower system') CUSTOM_VENVS = Gauge('awx_custom_virtualenvs_total', 'Number of virtualenvs', registry=REGISTRY)
PENDING_JOBS = Gauge('awx_pending_jobs_total', 'Number of pending jobs on the Tower system') RUNNING_JOBS = Gauge('awx_running_jobs_total', 'Number of running jobs on the Tower system', registry=REGISTRY)
STATUS = Gauge( PENDING_JOBS = Gauge('awx_pending_jobs_total', 'Number of pending jobs on the Tower system', registry=REGISTRY)
STATUS = Gauge(
'awx_status_total', 'awx_status_total',
'Status of Job launched', 'Status of Job launched',
[ [
'status', 'status',
], ],
) registry=REGISTRY,
)
INSTANCE_CAPACITY = Gauge( INSTANCE_CAPACITY = Gauge(
'awx_instance_capacity', 'awx_instance_capacity',
'Capacity of each node in a Tower system', 'Capacity of each node in a Tower system',
[ [
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
) registry=REGISTRY,
INSTANCE_CPU = Gauge( )
INSTANCE_CPU = Gauge(
'awx_instance_cpu', 'awx_instance_cpu',
'CPU cores on each node in a Tower system', 'CPU cores on each node in a Tower system',
[ [
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
) registry=REGISTRY,
INSTANCE_MEMORY = Gauge( )
INSTANCE_MEMORY = Gauge(
'awx_instance_memory', 'awx_instance_memory',
'RAM (Kb) on each node in a Tower system', 'RAM (Kb) on each node in a Tower system',
[ [
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
) registry=REGISTRY,
INSTANCE_INFO = Info( )
INSTANCE_INFO = Info(
'awx_instance', 'awx_instance',
'Info about each node in a Tower system', 'Info about each node in a Tower system',
[ [
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
) registry=REGISTRY,
INSTANCE_LAUNCH_TYPE = Gauge( )
INSTANCE_LAUNCH_TYPE = Gauge(
'awx_instance_launch_type_total', 'awx_instance_launch_type_total',
'Type of Job launched', 'Type of Job launched',
[ [
'node', 'node',
'launch_type', 'launch_type',
], ],
) registry=REGISTRY,
INSTANCE_STATUS = Gauge( )
INSTANCE_STATUS = Gauge(
'awx_instance_status_total', 'awx_instance_status_total',
'Status of Job launched', 'Status of Job launched',
[ [
'node', 'node',
'status', 'status',
], ],
) registry=REGISTRY,
INSTANCE_CONSUMED_CAPACITY = Gauge( )
INSTANCE_CONSUMED_CAPACITY = Gauge(
'awx_instance_consumed_capacity', 'awx_instance_consumed_capacity',
'Consumed capacity of each node in a Tower system', 'Consumed capacity of each node in a Tower system',
[ [
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
) registry=REGISTRY,
INSTANCE_REMAINING_CAPACITY = Gauge( )
INSTANCE_REMAINING_CAPACITY = Gauge(
'awx_instance_remaining_capacity', 'awx_instance_remaining_capacity',
'Remaining capacity of each node in a Tower system', 'Remaining capacity of each node in a Tower system',
[ [
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
) registry=REGISTRY,
)
LICENSE_INSTANCE_TOTAL = Gauge('awx_license_instance_total', 'Total number of managed hosts provided by your license') LICENSE_INSTANCE_TOTAL = Gauge('awx_license_instance_total', 'Total number of managed hosts provided by your license', registry=REGISTRY)
LICENSE_INSTANCE_FREE = Gauge('awx_license_instance_free', 'Number of remaining managed hosts provided by your license') LICENSE_INSTANCE_FREE = Gauge('awx_license_instance_free', 'Number of remaining managed hosts provided by your license', registry=REGISTRY)
def metrics():
license_info = get_license() license_info = get_license()
SYSTEM_INFO.info( SYSTEM_INFO.info(
{ {
@@ -197,7 +205,7 @@ def metrics():
for status, value in statuses.items(): for status, value in statuses.items():
INSTANCE_STATUS.labels(node=node, status=status).set(value) INSTANCE_STATUS.labels(node=node, status=status).set(value)
return generate_latest() return generate_latest(registry=REGISTRY)
__all__ = ['metrics'] __all__ = ['metrics']