mirror of
https://github.com/ansible/awx.git
synced 2026-01-16 20:30:46 -03:30
initial prometheus commit
Co-authored-by: Wayne Witzel III <wayne@riotousliving.com> Co-authored-by: Christian Adams <rooftopcellist@gmail.com>
This commit is contained in:
parent
e9f2fddc7f
commit
c3812de3d6
15
awx/api/metrics.py
Normal file
15
awx/api/metrics.py
Normal file
@ -0,0 +1,15 @@
|
||||
# Copyright (c) 2017 Ansible, Inc.
|
||||
# All Rights Reserved.
|
||||
|
||||
from django.conf.urls import url
|
||||
|
||||
from awx.api.views import (
|
||||
MetricsView
|
||||
)
|
||||
|
||||
|
||||
urls = [
|
||||
url(r'^$', MetricsView.as_view(), name='metrics_view'),
|
||||
]
|
||||
|
||||
__all__ = ['urls']
|
||||
@ -34,6 +34,8 @@ from awx.api.views import (
|
||||
OAuth2ApplicationDetail,
|
||||
)
|
||||
|
||||
from awx.api.views.metrics import MetricsView
|
||||
|
||||
from .organization import urls as organization_urls
|
||||
from .user import urls as user_urls
|
||||
from .project import urls as project_urls
|
||||
@ -133,6 +135,7 @@ v2_urls = [
|
||||
url(r'^applications/(?P<pk>[0-9]+)/tokens/$', ApplicationOAuth2TokenList.as_view(), name='application_o_auth2_token_list'),
|
||||
url(r'^tokens/$', OAuth2TokenList.as_view(), name='o_auth2_token_list'),
|
||||
url(r'^', include(oauth2_urls)),
|
||||
url(r'^metrics/$', MetricsView.as_view(), name='metrics_view'),
|
||||
]
|
||||
|
||||
app_name = 'api'
|
||||
|
||||
46
awx/api/views/metrics.py
Normal file
46
awx/api/views/metrics.py
Normal file
@ -0,0 +1,46 @@
|
||||
# Copyright (c) 2018 Red Hat, Inc.
|
||||
# All Rights Reserved.
|
||||
|
||||
# Python
|
||||
import logging
|
||||
|
||||
# Django
|
||||
from django.conf import settings
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.timezone import now
|
||||
|
||||
# Django REST Framework
|
||||
from rest_framework.exceptions import PermissionDenied
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.renderers import JSONRenderer, StaticHTMLRenderer
|
||||
|
||||
# AWX
|
||||
# from awx.main.analytics import collectors
|
||||
from awx.main.analytics.metrics import metrics
|
||||
from awx.api import renderers
|
||||
|
||||
from awx.api.generics import (
|
||||
APIView,
|
||||
)
|
||||
|
||||
from awx.api.serializers import (
|
||||
InventorySerializer,
|
||||
ActivityStreamSerializer,
|
||||
)
|
||||
|
||||
logger = logging.getLogger('awx.main.analytics')
|
||||
|
||||
|
||||
|
||||
class MetricsView(APIView):
|
||||
|
||||
view_name = _('Metrics')
|
||||
swagger_topic = 'Metrics'
|
||||
|
||||
renderer_classes = [renderers.PlainTextRenderer,
|
||||
renderers.BrowsableAPIRenderer,
|
||||
JSONRenderer,]
|
||||
|
||||
def get(self, request, format='txt'):
|
||||
''' Show Metrics Details '''
|
||||
return Response(metrics().decode('UTF-8'))
|
||||
@ -104,6 +104,7 @@ class ApiVersionRootView(APIView):
|
||||
data['credential_input_sources'] = reverse('api:credential_input_source_list', request=request)
|
||||
data['applications'] = reverse('api:o_auth2_application_list', request=request)
|
||||
data['tokens'] = reverse('api:o_auth2_token_list', request=request)
|
||||
data['metrics'] = reverse('api:metrics_view', request=request)
|
||||
data['inventory'] = reverse('api:inventory_list', request=request)
|
||||
data['inventory_scripts'] = reverse('api:inventory_script_list', request=request)
|
||||
data['inventory_sources'] = reverse('api:inventory_source_list', request=request)
|
||||
@ -278,6 +279,3 @@ class ApiV1ConfigView(APIView):
|
||||
except Exception:
|
||||
# FIX: Log
|
||||
return Response({"error": _("Failed to remove license.")}, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
|
||||
|
||||
|
||||
@ -158,7 +158,7 @@ def instance_info(since):
|
||||
instances = models.Instance.objects.values_list('hostname').annotate().values(
|
||||
'uuid', 'version', 'capacity', 'cpu', 'memory', 'managed_by_policy', 'hostname', 'last_isolated_check', 'enabled')
|
||||
for instance in instances:
|
||||
info = {'uuid': instance['uuid'],
|
||||
instance_info = {'uuid': instance['uuid'],
|
||||
'version': instance['version'],
|
||||
'capacity': instance['capacity'],
|
||||
'cpu': instance['cpu'],
|
||||
@ -167,6 +167,7 @@ def instance_info(since):
|
||||
'last_isolated_check': instance['last_isolated_check'],
|
||||
'enabled': instance['enabled']
|
||||
}
|
||||
info[instance['uuid']] = instance_info
|
||||
return info
|
||||
|
||||
|
||||
@ -186,12 +187,12 @@ def job_instance_counts(since):
|
||||
job_types = models.UnifiedJob.objects.exclude(launch_type='sync').values_list(
|
||||
'execution_node', 'launch_type').annotate(job_launch_type=Count('launch_type'))
|
||||
for job in job_types:
|
||||
counts.setdefault(job[0], {}).setdefault('status', {})[job[1]] = job[2]
|
||||
counts.setdefault(job[0], {}).setdefault('launch_type', {})[job[1]] = job[2]
|
||||
|
||||
job_statuses = models.UnifiedJob.objects.exclude(launch_type='sync').values_list(
|
||||
'execution_node', 'status').annotate(job_status=Count('status'))
|
||||
for job in job_statuses:
|
||||
counts.setdefault(job[0], {}).setdefault('launch_type', {})[job[1]] = job[2]
|
||||
counts.setdefault(job[0], {}).setdefault('status', {})[job[1]] = job[2]
|
||||
return counts
|
||||
|
||||
|
||||
|
||||
127
awx/main/analytics/metrics.py
Normal file
127
awx/main/analytics/metrics.py
Normal file
@ -0,0 +1,127 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
from prometheus_client import (
|
||||
REGISTRY,
|
||||
PROCESS_COLLECTOR,
|
||||
PLATFORM_COLLECTOR,
|
||||
GC_COLLECTOR,
|
||||
Gauge,
|
||||
Info,
|
||||
generate_latest
|
||||
)
|
||||
|
||||
from django.contrib.sessions.models import Session
|
||||
|
||||
# Temporary Imports
|
||||
from django.db import connection
|
||||
from django.db.models import Count
|
||||
from django.conf import settings
|
||||
|
||||
from awx.conf.license import get_license
|
||||
from awx.main.utils import (get_awx_version, get_ansible_version,
|
||||
get_custom_venv_choices)
|
||||
from awx.main import models
|
||||
from awx.main.analytics.collectors import (
|
||||
counts,
|
||||
instance_info,
|
||||
job_instance_counts
|
||||
)
|
||||
from django.contrib.sessions.models import Session
|
||||
from awx.main.analytics import register
|
||||
|
||||
|
||||
REGISTRY.unregister(PROCESS_COLLECTOR)
|
||||
REGISTRY.unregister(PLATFORM_COLLECTOR)
|
||||
REGISTRY.unregister(GC_COLLECTOR)
|
||||
|
||||
|
||||
SYSTEM_INFO = Info('awx_system', 'AWX System Information')
|
||||
ORG_COUNT = Gauge('awx_organizations_total', 'Number of organizations')
|
||||
USER_COUNT = Gauge('awx_users_total', 'Number of users')
|
||||
TEAM_COUNT = Gauge('awx_teams_total', 'Number of teams')
|
||||
INV_COUNT = Gauge('awx_inventories_total', 'Number of inventories')
|
||||
PROJ_COUNT = Gauge('awx_projects_total', 'Number of projects')
|
||||
JT_COUNT = Gauge('awx_job_templates_total', 'Number of job templates')
|
||||
WFJT_COUNT = Gauge('awx_workflow_job_templates_total', 'Number of workflow job templates')
|
||||
HOST_COUNT = Gauge('awx_hosts_total', 'Number of hosts', ['type',])
|
||||
SCHEDULE_COUNT = Gauge('awx_schedules_total', 'Number of schedules')
|
||||
INV_SCRIPT_COUNT = Gauge('awx_inventory_scripts_total', 'Number of invetory scripts')
|
||||
USER_SESSIONS = Gauge('awx_sessions_total', 'Number of sessions', ['type',])
|
||||
CUSTOM_VENVS = Gauge('awx_custom_virtualenvs_total', 'Number of virtualenvs')
|
||||
RUNNING_JOBS = Gauge('awx_running_jobs_total', 'Number of running jobs on the Tower system')
|
||||
|
||||
INSTANCE_CAPACITY = Gauge('awx_instance_capacity', 'Capacity of each node in a Tower system', ['type',])
|
||||
INSTANCE_CPU = Gauge('awx_instance_cpu', 'CPU cores on each node in a Tower system', ['type',])
|
||||
INSTANCE_MEMORY = Gauge('awx_instance_memory', 'RAM (Kb) on each node in a Tower system', ['type',])
|
||||
INSTANCE_INFO = Info('awx_instance', 'Info about each node in a Tower system', ['type',])
|
||||
INSTANCE_LAUNCH_TYPE = Gauge('awx_instance_launch_type_total', 'Type of Job launched', ['node', 'launch_type',])
|
||||
INSTANCE_STATUS = Gauge('awx_instance_status_total', 'Status of Job launched', ['node', 'status',])
|
||||
|
||||
|
||||
def metrics():
|
||||
license_info = get_license(show_key=False)
|
||||
SYSTEM_INFO.info({'system_uuid': settings.SYSTEM_UUID,
|
||||
'tower_url_base': settings.TOWER_URL_BASE,
|
||||
'tower_version': get_awx_version(),
|
||||
'ansible_version': get_ansible_version(),
|
||||
'license_type': license_info.get('license_type', 'UNLICENSED'),
|
||||
'free_instances': str(license_info.get('free instances', 0)),
|
||||
'license_expiry': str(license_info.get('time_remaining', 0)),
|
||||
'pendo_tracking': settings.PENDO_TRACKING_STATE,
|
||||
'external_logger_enabled': str(settings.LOG_AGGREGATOR_ENABLED),
|
||||
'external_logger_type': getattr(settings, 'LOG_AGGREGATOR_TYPE', 'None')})
|
||||
|
||||
current_counts = counts(datetime.now())
|
||||
|
||||
ORG_COUNT.set(current_counts['organization'])
|
||||
USER_COUNT.set(current_counts['user'])
|
||||
TEAM_COUNT.set(current_counts['team'])
|
||||
INV_COUNT.set(current_counts['inventory'])
|
||||
PROJ_COUNT.set(current_counts['project'])
|
||||
JT_COUNT.set(current_counts['job_template'])
|
||||
WFJT_COUNT.set(current_counts['workflow_job_template'])
|
||||
|
||||
HOST_COUNT.labels(type='all').set(current_counts['host'])
|
||||
HOST_COUNT.labels(type='active').set(current_counts['active_host_count'])
|
||||
|
||||
SCHEDULE_COUNT.set(current_counts['schedule'])
|
||||
INV_SCRIPT_COUNT.set(current_counts['custom_inventory_script'])
|
||||
CUSTOM_VENVS.set(current_counts['custom_virtualenvs'])
|
||||
|
||||
USER_SESSIONS.labels(type='all').set(current_counts['active_sessions'])
|
||||
USER_SESSIONS.labels(type='user').set(current_counts['active_user_sessions'])
|
||||
USER_SESSIONS.labels(type='anonymous').set(current_counts['active_anonymous_sessions'])
|
||||
|
||||
RUNNING_JOBS.set(current_counts['running_jobs'])
|
||||
|
||||
|
||||
instance_data = instance_info(datetime.now())
|
||||
for uuid in instance_data:
|
||||
INSTANCE_CAPACITY.labels(type=uuid).set(instance_data[uuid]['capacity'])
|
||||
INSTANCE_CPU.labels(type=uuid).set(instance_data[uuid]['cpu'])
|
||||
INSTANCE_MEMORY.labels(type=uuid).set(instance_data[uuid]['memory'])
|
||||
INSTANCE_INFO.labels(type=uuid).info({'enabled': str(instance_data[uuid]['enabled']),
|
||||
'last_isolated_check': getattr(instance_data[uuid], 'last_isolated_check', 'None'),
|
||||
'managed_by_policy': str(instance_data[uuid]['managed_by_policy']),
|
||||
'version': instance_data[uuid]['version']
|
||||
})
|
||||
|
||||
instance_data = job_instance_counts(datetime.now())
|
||||
for node in instance_data:
|
||||
# skipping internal execution node (for system jobs)
|
||||
# TODO: determine if we should exclude execution_node from instance count
|
||||
if node == '':
|
||||
continue
|
||||
types = instance_data[node].get('launch_type', {})
|
||||
for launch_type, value in types.items():
|
||||
INSTANCE_LAUNCH_TYPE.labels(node=node, launch_type=launch_type).set(value)
|
||||
statuses = instance_data[node].get('status', {})
|
||||
for status, value in types.items():
|
||||
INSTANCE_STATUS.labels(node=node, status=status).set(value)
|
||||
|
||||
|
||||
return generate_latest()
|
||||
|
||||
|
||||
__all__ = ['metrics']
|
||||
49
docs/prometheus.md
Normal file
49
docs/prometheus.md
Normal file
@ -0,0 +1,49 @@
|
||||
# Prometheus Support
|
||||
|
||||
## Development
|
||||
|
||||
Starting a Prometheus container.
|
||||
|
||||
docker run --net=tools_default --link=tools_awx_1:awxweb --volume ./prometheus.yml:/prometheus.yml --name prometheus -d -p 127.0.0.1:9090:9090 prom/prometheus --web.enable-lifecycle --config.file=/prometheus.yml
|
||||
|
||||
Example Prometheus config.
|
||||
|
||||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
# - alertmanager:9093
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'prometheus'
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
- job_name: 'awx'
|
||||
tls_config:
|
||||
insecure_skip_verify: True
|
||||
metrics_path: /api/v2/metrics
|
||||
scrape_interval: 5s
|
||||
scheme: https
|
||||
params:
|
||||
format: ['txt']
|
||||
basic_auth:
|
||||
username: root
|
||||
password: reverse
|
||||
# bearer_token: <token_value>
|
||||
static_configs:
|
||||
- targets:
|
||||
- awxweb:8043
|
||||
@ -29,6 +29,7 @@ jsonschema==2.6.0
|
||||
Markdown==2.6.11 # used for formatting API help
|
||||
ordereddict==1.1
|
||||
pexpect==4.6.0
|
||||
prometheus_client==0.6.0
|
||||
psutil==5.4.3
|
||||
psycopg2==2.7.3.2 # problems with Segmentation faults / wheels on upgrade
|
||||
pygerduty==0.37.0
|
||||
|
||||
@ -74,6 +74,7 @@ oauthlib==2.0.6 # via django-oauth-toolkit, requests-oauthlib, social-
|
||||
ordereddict==1.1
|
||||
pexpect==4.6.0
|
||||
pkgconfig==1.4.0 # via xmlsec
|
||||
prometheus_client==0.6.0
|
||||
psutil==5.4.3
|
||||
psycopg2==2.7.3.2
|
||||
ptyprocess==0.6.0 # via pexpect
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user