--- apiVersion: 1 groups: - folder: awx interval: 60s name: awx_rules orgId: 1 rules: - condition: if_failures_too_high dashboardUid: awx data: - refId: total_errors queryType: '' relativeTimeRange: from: 600 to: 0 datasourceUid: awx_prometheus model: editorMode: code expr: >- max(delta(awx_instance_status_total{instance="awx1:8013", status="failed|error"}[30m])) hide: false intervalMs: 1000 legendFormat: __auto maxDataPoints: 43200 range: true refId: total_errors - refId: max_errors queryType: '' relativeTimeRange: from: 0 to: 0 datasourceUid: '-100' model: conditions: - evaluator: params: - 80 - 0 type: gt operator: type: and query: params: - total_errors reducer: params: [] type: max type: query datasource: name: Expression type: __expr__ uid: __expr__ expression: total_errors hide: false intervalMs: 1000 maxDataPoints: 43200 reducer: max refId: max_errors type: reduce - refId: total_success queryType: '' relativeTimeRange: from: 600 to: 0 datasourceUid: awx_prometheus model: datasource: type: prometheus uid: awx_prometheus editorMode: code expr: >- max(delta(awx_instance_status_total{instance="awx1:8013", status="successful"}[30m])) hide: false intervalMs: 1000 legendFormat: __auto maxDataPoints: 43200 range: true refId: total_success - refId: max_success queryType: '' relativeTimeRange: from: 0 to: 0 datasourceUid: '-100' model: conditions: - evaluator: params: - 0 - 0 type: gt operator: type: and query: params: - total_success reducer: params: [] type: max type: query datasource: name: Expression type: __expr__ uid: __expr__ expression: total_success hide: false intervalMs: 1000 maxDataPoints: 43200 reducer: max refId: max_success type: reduce - refId: compare queryType: '' relativeTimeRange: from: 0 to: 0 datasourceUid: '-100' model: conditions: - evaluator: params: - 0 - 0 type: gt operator: type: and query: params: - max_success reducer: params: [] type: avg type: query - evaluator: params: - 0 - 0 type: gt operator: type: and query: params: - max_success reducer: params: [] type: avg type: query datasource: name: Expression type: __expr__ uid: __expr__ expression: $max_errors / ($max_errors+$max_success) >= .2 hide: false intervalMs: 1000 maxDataPoints: 43200 refId: compare type: math for: 30m noDataState: OK panelId: 2 title: failure_rate_exceeded_20_percent uid: failure_rate_exceeded_20_percent - condition: if_redis_queue_too_large dashboardUid: awx data: - datasourceUid: awx_prometheus model: editorMode: code expr: irate(callback_receiver_events_insert_db{node='awx_1'}[1m]) hide: false intervalMs: 1000 legendFormat: __auto maxDataPoints: 43200 range: true refId: events_insertion_rate_per_second queryType: "" refId: events_insertion_rate_per_second relativeTimeRange: from: 300 to: 0 - datasourceUid: -100 model: conditions: - evaluator: params: - 3 type: gt operator: type: and query: params: - event_insertion_rate reducer: params: [] type: last type: query datasource: type: __expr__ uid: -100 expression: events_insertion_rate_per_second hide: false intervalMs: 1000 maxDataPoints: 43200 reducer: mean refId: mean_event_insertion_rate type: reduce queryType: "" refId: mean_event_insertion_rate relativeTimeRange: from: 0 to: 0 - datasourceUid: awx_prometheus model: datasource: type: prometheus uid: awx_prometheus editorMode: code expr: callback_receiver_events_queue_size_redis{node='awx_1'} hide: false intervalMs: 1000 legendFormat: __auto maxDataPoints: 43200 range: true refId: redis_queue_size queryType: "" refId: redis_queue_size relativeTimeRange: from: 300 to: 0 - datasourceUid: -100 model: conditions: - evaluator: params: - 3 type: gt operator: type: and query: params: - event_insertion_rate reducer: params: [] type: last type: query datasource: type: __expr__ uid: -100 expression: redis_queue_size hide: false intervalMs: 1000 maxDataPoints: 43200 reducer: last refId: last_redis_queue_size type: reduce queryType: "" refId: last_redis_queue_size relativeTimeRange: from: 0 to: 0 - datasourceUid: -100 model: conditions: - evaluator: params: - 0 - 0 type: gt operator: type: and query: params: - last_redis_queue_size reducer: params: [] type: avg type: query datasource: name: Expression type: __expr__ uid: __expr__ expression: '($last_redis_queue_size > ($mean_event_insertion_rate * 120))' hide: false intervalMs: 1000 maxDataPoints: 43200 refId: redis_queue_growing_faster_than_insertion_rate type: math queryType: "" refId: redis_queue_growing_faster_than_insertion_rate relativeTimeRange: from: 0 to: 0 for: 60s noDataState: OK panelId: 1 title: redis_queue_too_large_to_clear_in_2_min uid: redis_queue_too_large_to_clear_in_2_min - condition: if_capacity_is_too_low dashboardUid: awx no_data_state: OK exec_err_state: Error data: - refId: remaining_capacity queryType: '' relativeTimeRange: from: 1800 to: 0 datasourceUid: awx_prometheus model: editorMode: builder expr: awx_instance_remaining_capacity{instance="awx1:8013"} hide: false intervalMs: 1000 legendFormat: __auto maxDataPoints: 43200 range: true refId: remaining_capacity - refId: if_capacity_is_too_low queryType: '' relativeTimeRange: from: 0 to: 0 datasourceUid: "-100" model: conditions: - evaluator: params: - 20 - 0 type: lt operator: type: when query: params: - remaining_capacity reducer: params: [] type: avg type: query datasource: name: Expression type: __expr__ uid: __expr__ expression: remaining_capacity hide: false intervalMs: 1000 maxDataPoints: 43200 refId: if_capacity_is_too_low type: classic_conditions for: 30m title: if_capacity_is_too_low uid: if_capacity_is_too_low