Merge pull request #12908 from rebeccahhh/devel

new example grafana alert rule
This commit is contained in:
Rebeccah Hunter 2022-09-26 10:49:49 -04:00 committed by GitHub
commit a66b27edff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 226 additions and 9 deletions

View File

@ -483,5 +483,11 @@ $ PROMETHEUS=yes GRAFANA=yes make docker-compose
### Alerts in Grafana
We are configuring alerts in grafana using the provisioning files method. This feature is new in Grafana as of August, 2022. Documentation can be found: https://grafana.com/docs/grafana/latest/administration/provisioning/#alerting however it does not fully show all parameters to the config. One way to understand how to build rules is to build them in the UI and use chrometools to inspect the payload as you save the rules. It appears that the "data" portion of the payload for each rule is the same syntax as needed in the provisioning file config. To reload the alerts without restarting the container, from within the container you can send a POST with `curl -X POST http://admin:admin@localhost:3000/api/admin/provisioning/alerting/relo
ad`. Keep in mind the grafana container does not default contain `curl` and you can get it with `apk add curl`.
We are configuring alerts in grafana using the provisioning files method. This feature is new in Grafana as of August, 2022. Documentation can be found: https://grafana.com/docs/grafana/latest/administration/provisioning/#alerting however it does not fully show all parameters to the config.
One way to understand how to build rules is to build them in the UI and use chrometools to inspect the payload as you save the rules. It appears that the "data" portion of the payload for each rule is the same syntax as needed in the provisioning file config. To reload the alerts without restarting the container, from within the container you can send a POST with `curl -X POST http://admin:admin@localhost:3000/api/admin/provisioning/alerting/reload`. Keep in mind the grafana container does not contain `curl`. You can install it with the command `apk add curl`.
Another way to export rules is explore the api.
1. Get all the folders: `GET` to `/api/folders`
2. Get the rules `GET` to `/api/ruler/grafana/api/v1/rules/{{ Folder }}`

View File

@ -6,10 +6,167 @@ groups:
name: awx_rules
orgId: 1
rules:
- condition: A
- condition: if_failures_too_high
dashboardUid: awx
data:
- datasourceUid: PBFA97CFB590B2093
- refId: total_errors
queryType: ''
relativeTimeRange:
from: 600
to: 0
datasourceUid: awx_alert
model:
editorMode: code
expr: >-
max(delta(awx_instance_status_total{instance="awx1:8013",
status="failed|error"}[30m]))
hide: false
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: true
refId: total_errors
- refId: max_errors
queryType: ''
relativeTimeRange:
from: 0
to: 0
datasourceUid: '-100'
model:
conditions:
- evaluator:
params:
- 80
- 0
type: gt
operator:
type: and
query:
params:
- total_errors
reducer:
params: []
type: max
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: total_errors
hide: false
intervalMs: 1000
maxDataPoints: 43200
reducer: max
refId: max_errors
type: reduce
- refId: total_success
queryType: ''
relativeTimeRange:
from: 600
to: 0
datasourceUid: awx_alert
model:
datasource:
type: prometheus
uid: awx_alert
editorMode: code
expr: >-
max(delta(awx_instance_status_total{instance="awx1:8013",
status="successful"}[30m]))
hide: false
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: true
refId: total_success
- refId: max_success
queryType: ''
relativeTimeRange:
from: 0
to: 0
datasourceUid: '-100'
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params:
- total_success
reducer:
params: []
type: max
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: total_success
hide: false
intervalMs: 1000
maxDataPoints: 43200
reducer: max
refId: max_success
type: reduce
- refId: compare
queryType: ''
relativeTimeRange:
from: 0
to: 0
datasourceUid: '-100'
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params:
- max_success
reducer:
params: []
type: avg
type: query
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params:
- max_success
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: $max_errors / ($max_errors+$max_success) >= .2
hide: false
intervalMs: 1000
maxDataPoints: 43200
refId: compare
type: math
for: 30m
noDataState: OK
panelId: 2
title: failure_rate_exceeded_20_percent
uid: failure_rate_exceeded_20_percent
- condition: if_redis_queue_too_large
dashboardUid: awx
data:
- datasourceUid: awx_alert
model:
editorMode: code
expr: irate(callback_receiver_events_insert_db{node='awx_1'}[1m])
@ -55,11 +212,11 @@ groups:
relativeTimeRange:
from: 0
to: 0
- datasourceUid: PBFA97CFB590B2093
- datasourceUid: awx_alert
model:
datasource:
type: prometheus
uid: PBFA97CFB590B2093
uid: awx_alert
editorMode: code
expr: callback_receiver_events_queue_size_redis{node='awx_1'}
hide: false
@ -125,9 +282,7 @@ groups:
name: Expression
type: __expr__
uid: __expr__
expression: '(
${mean_redis_queue_size} >
($mean_event_insertion_rate\ * 120))'
expression: '($mean_redis_queue_size > ($mean_event_insertion_rate * 120))'
hide: false
intervalMs: 1000
maxDataPoints: 43200
@ -143,3 +298,58 @@ groups:
panelId: 1
title: redis_queue_too_large_to_clear_in_2_min
uid: redis_queue_too_large_to_clear_in_2_min
- condition: if_capacity_is_too_low
dashboardUid: awx
no_data_state: OK
exec_err_state: Error
data:
- refId: remaining_capacity
queryType: ''
relativeTimeRange:
from: 1800
to: 0
datasourceUid: awx_alert
model:
editorMode: builder
expr: awx_instance_remaining_capacity{instance="awx1:8013"}
hide: false
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: true
refId: remaining_capacity
- refId: if_capacity_is_too_low
queryType: ''
relativeTimeRange:
from: 0
to: 0
datasourceUid: "-100"
model:
conditions:
- evaluator:
params:
- 20
- 0
type: lt
operator:
type: when
query:
params:
- remaining_capacity
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: remaining_capacity
hide: false
intervalMs: 1000
maxDataPoints: 43200
refId: if_capacity_is_too_low
type: classic_conditions
for: 30m
title: if_capacity_is_too_low
uid: if_capacity_is_too_low

View File

@ -10,3 +10,4 @@ datasources:
editable: true
jsonData:
timeInterval: 5s
uid: awx_alert