Fix api/v2/metrics data displaying incorrect value

- Use a locally defined prometheus registry instead of global registry
This commit is contained in:
Seth Foster
2021-03-23 17:38:48 -04:00
parent f46c968742
commit 3f342feadd

View File

@@ -1,5 +1,5 @@
from django.conf import settings from django.conf import settings
from prometheus_client import REGISTRY, PROCESS_COLLECTOR, PLATFORM_COLLECTOR, GC_COLLECTOR, Gauge, Info, generate_latest from prometheus_client import PROCESS_COLLECTOR, PLATFORM_COLLECTOR, GC_COLLECTOR, CollectorRegistry, Gauge, Info, generate_latest
from awx.conf.license import get_license from awx.conf.license import get_license
from awx.main.utils import get_awx_version, get_ansible_version from awx.main.utils import get_awx_version, get_ansible_version
@@ -11,43 +11,45 @@ from awx.main.analytics.collectors import (
) )
REGISTRY.unregister(PROCESS_COLLECTOR) def metrics():
REGISTRY.unregister(PLATFORM_COLLECTOR) REGISTRY = CollectorRegistry()
REGISTRY.unregister(GC_COLLECTOR)
SYSTEM_INFO = Info('awx_system', 'AWX System Information') SYSTEM_INFO = Info('awx_system', 'AWX System Information', registry=REGISTRY)
ORG_COUNT = Gauge('awx_organizations_total', 'Number of organizations') ORG_COUNT = Gauge('awx_organizations_total', 'Number of organizations', registry=REGISTRY)
USER_COUNT = Gauge('awx_users_total', 'Number of users') USER_COUNT = Gauge('awx_users_total', 'Number of users', registry=REGISTRY)
TEAM_COUNT = Gauge('awx_teams_total', 'Number of teams') TEAM_COUNT = Gauge('awx_teams_total', 'Number of teams', registry=REGISTRY)
INV_COUNT = Gauge('awx_inventories_total', 'Number of inventories') INV_COUNT = Gauge('awx_inventories_total', 'Number of inventories', registry=REGISTRY)
PROJ_COUNT = Gauge('awx_projects_total', 'Number of projects') PROJ_COUNT = Gauge('awx_projects_total', 'Number of projects', registry=REGISTRY)
JT_COUNT = Gauge('awx_job_templates_total', 'Number of job templates') JT_COUNT = Gauge('awx_job_templates_total', 'Number of job templates', registry=REGISTRY)
WFJT_COUNT = Gauge('awx_workflow_job_templates_total', 'Number of workflow job templates') WFJT_COUNT = Gauge('awx_workflow_job_templates_total', 'Number of workflow job templates', registry=REGISTRY)
HOST_COUNT = Gauge( HOST_COUNT = Gauge(
'awx_hosts_total', 'awx_hosts_total',
'Number of hosts', 'Number of hosts',
[ [
'type', 'type',
], ],
registry=REGISTRY,
) )
SCHEDULE_COUNT = Gauge('awx_schedules_total', 'Number of schedules') SCHEDULE_COUNT = Gauge('awx_schedules_total', 'Number of schedules', registry=REGISTRY)
INV_SCRIPT_COUNT = Gauge('awx_inventory_scripts_total', 'Number of invetory scripts') INV_SCRIPT_COUNT = Gauge('awx_inventory_scripts_total', 'Number of invetory scripts', registry=REGISTRY)
USER_SESSIONS = Gauge( USER_SESSIONS = Gauge(
'awx_sessions_total', 'awx_sessions_total',
'Number of sessions', 'Number of sessions',
[ [
'type', 'type',
], ],
registry=REGISTRY,
) )
CUSTOM_VENVS = Gauge('awx_custom_virtualenvs_total', 'Number of virtualenvs') CUSTOM_VENVS = Gauge('awx_custom_virtualenvs_total', 'Number of virtualenvs', registry=REGISTRY)
RUNNING_JOBS = Gauge('awx_running_jobs_total', 'Number of running jobs on the Tower system') RUNNING_JOBS = Gauge('awx_running_jobs_total', 'Number of running jobs on the Tower system', registry=REGISTRY)
PENDING_JOBS = Gauge('awx_pending_jobs_total', 'Number of pending jobs on the Tower system') PENDING_JOBS = Gauge('awx_pending_jobs_total', 'Number of pending jobs on the Tower system', registry=REGISTRY)
STATUS = Gauge( STATUS = Gauge(
'awx_status_total', 'awx_status_total',
'Status of Job launched', 'Status of Job launched',
[ [
'status', 'status',
], ],
registry=REGISTRY,
) )
INSTANCE_CAPACITY = Gauge( INSTANCE_CAPACITY = Gauge(
@@ -57,6 +59,7 @@ INSTANCE_CAPACITY = Gauge(
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
registry=REGISTRY,
) )
INSTANCE_CPU = Gauge( INSTANCE_CPU = Gauge(
'awx_instance_cpu', 'awx_instance_cpu',
@@ -65,6 +68,7 @@ INSTANCE_CPU = Gauge(
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
registry=REGISTRY,
) )
INSTANCE_MEMORY = Gauge( INSTANCE_MEMORY = Gauge(
'awx_instance_memory', 'awx_instance_memory',
@@ -73,6 +77,7 @@ INSTANCE_MEMORY = Gauge(
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
registry=REGISTRY,
) )
INSTANCE_INFO = Info( INSTANCE_INFO = Info(
'awx_instance', 'awx_instance',
@@ -81,6 +86,7 @@ INSTANCE_INFO = Info(
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
registry=REGISTRY,
) )
INSTANCE_LAUNCH_TYPE = Gauge( INSTANCE_LAUNCH_TYPE = Gauge(
'awx_instance_launch_type_total', 'awx_instance_launch_type_total',
@@ -89,6 +95,7 @@ INSTANCE_LAUNCH_TYPE = Gauge(
'node', 'node',
'launch_type', 'launch_type',
], ],
registry=REGISTRY,
) )
INSTANCE_STATUS = Gauge( INSTANCE_STATUS = Gauge(
'awx_instance_status_total', 'awx_instance_status_total',
@@ -97,6 +104,7 @@ INSTANCE_STATUS = Gauge(
'node', 'node',
'status', 'status',
], ],
registry=REGISTRY,
) )
INSTANCE_CONSUMED_CAPACITY = Gauge( INSTANCE_CONSUMED_CAPACITY = Gauge(
'awx_instance_consumed_capacity', 'awx_instance_consumed_capacity',
@@ -105,6 +113,7 @@ INSTANCE_CONSUMED_CAPACITY = Gauge(
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
registry=REGISTRY,
) )
INSTANCE_REMAINING_CAPACITY = Gauge( INSTANCE_REMAINING_CAPACITY = Gauge(
'awx_instance_remaining_capacity', 'awx_instance_remaining_capacity',
@@ -113,13 +122,12 @@ INSTANCE_REMAINING_CAPACITY = Gauge(
'hostname', 'hostname',
'instance_uuid', 'instance_uuid',
], ],
registry=REGISTRY,
) )
LICENSE_INSTANCE_TOTAL = Gauge('awx_license_instance_total', 'Total number of managed hosts provided by your license') LICENSE_INSTANCE_TOTAL = Gauge('awx_license_instance_total', 'Total number of managed hosts provided by your license', registry=REGISTRY)
LICENSE_INSTANCE_FREE = Gauge('awx_license_instance_free', 'Number of remaining managed hosts provided by your license') LICENSE_INSTANCE_FREE = Gauge('awx_license_instance_free', 'Number of remaining managed hosts provided by your license', registry=REGISTRY)
def metrics():
license_info = get_license() license_info = get_license()
SYSTEM_INFO.info( SYSTEM_INFO.info(
{ {
@@ -197,7 +205,7 @@ def metrics():
for status, value in statuses.items(): for status, value in statuses.items():
INSTANCE_STATUS.labels(node=node, status=status).set(value) INSTANCE_STATUS.labels(node=node, status=status).set(value)
return generate_latest() return generate_latest(registry=REGISTRY)
__all__ = ['metrics'] __all__ = ['metrics']