mirror of
https://github.com/ansible/awx.git
synced 2026-02-01 09:38:10 -03:30
Centralized logging via otel
This commit is contained in:
committed by
Chris Meyers
parent
d0fe0ed796
commit
0eb465531c
@@ -613,3 +613,13 @@ docker exec -it -e VAULT_TOKEN=<token> tools_vault_1 vault kv get --address=http
|
||||
### Prometheus and Grafana integration
|
||||
|
||||
See docs at https://github.com/ansible/awx/blob/devel/tools/grafana/README.md
|
||||
|
||||
### OpenTelemetry Integration
|
||||
|
||||
```bash
|
||||
OTEL=true GRAFANA=true LOKI=true PROMETHEUS=true make docker-compose
|
||||
```
|
||||
|
||||
This will start the sidecar container `tools_otel_1` and configure AWX logging to send to it. The OpenTelemetry Collector is configured to export logs to Loki. Grafana is configured with Loki as a datasource. AWX logs can be viewed in Grafana.
|
||||
|
||||
`http://localhost:3001` grafana
|
||||
|
||||
@@ -269,6 +269,42 @@ services:
|
||||
# pg_notify will NOT work in transaction mode.
|
||||
PGBOUNCER_POOL_MODE: session
|
||||
{% endif %}
|
||||
{% if enable_otel|bool %}
|
||||
otel:
|
||||
image: otel/opentelemetry-collector-contrib:0.88.0
|
||||
container_name: tools_otel_1
|
||||
hostname: otel
|
||||
command: ["--config=/etc/otel-collector-config.yaml", ""]
|
||||
networks:
|
||||
- awx
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC receiver
|
||||
- "4318:4318" # OTLP http receiver
|
||||
- "55679:55679" # zpages http://localhost:55679/debug/servicez /tracez
|
||||
volumes:
|
||||
- "../../otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml"
|
||||
depends_on:
|
||||
- loki
|
||||
{% endif %}
|
||||
{% if enable_loki|bool %}
|
||||
loki:
|
||||
image: grafana/loki:2.9.5
|
||||
container_name: tools_loki_1
|
||||
hostname: loki
|
||||
ports:
|
||||
- "3100:3100"
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
networks:
|
||||
- awx
|
||||
volumes:
|
||||
- "loki_storage:/loki:rw"
|
||||
#- "../../docker-compose/loki/volumes/index:/loki/index"
|
||||
#- "../../docker-compose/loki/volumes/boltdb-cache:/loki/boltdb-cache"
|
||||
- "../../loki/local-config.yaml:/etc/loki/local-config.yaml"
|
||||
depends_on:
|
||||
- grafana
|
||||
{% endif %}
|
||||
|
||||
{% if execution_node_count|int > 0 %}
|
||||
receptor-hop:
|
||||
image: {{ receptor_image }}
|
||||
@@ -360,6 +396,10 @@ volumes:
|
||||
grafana_storage:
|
||||
name: tools_grafana_storage
|
||||
{% endif %}
|
||||
{% if enable_loki|bool %}
|
||||
loki_storage:
|
||||
name: tools_loki_storage
|
||||
{% endif %}
|
||||
|
||||
networks:
|
||||
awx:
|
||||
|
||||
@@ -46,6 +46,18 @@ OPTIONAL_API_URLPATTERN_PREFIX = '{{ api_urlpattern_prefix }}'
|
||||
# LOGGING['loggers']['django_auth_ldap']['handlers'] = ['console']
|
||||
# LOGGING['loggers']['django_auth_ldap']['level'] = 'DEBUG'
|
||||
|
||||
{% if enable_otel|bool %}
|
||||
LOGGING['handlers']['otel'] |= {
|
||||
'class': 'awx.main.utils.handlers.OTLPHandler',
|
||||
'endpoint': 'http://otel:4317',
|
||||
}
|
||||
# Add otel log handler to all log handlers
|
||||
for name in LOGGING['loggers'].keys():
|
||||
handler = LOGGING['loggers'][name].get('handlers', [])
|
||||
if 'otel' not in handler:
|
||||
LOGGING['loggers'][name].get('handlers', []).append('otel')
|
||||
{% endif %}
|
||||
|
||||
BROADCAST_WEBSOCKET_PORT = 8013
|
||||
BROADCAST_WEBSOCKET_VERIFY_CERT = False
|
||||
BROADCAST_WEBSOCKET_PROTOCOL = 'http'
|
||||
|
||||
11
tools/grafana/datasources/loki_source.yml
Normal file
11
tools/grafana/datasources/loki_source.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
---
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki:3100
|
||||
jsonData:
|
||||
timeout: 60
|
||||
maxLines: 100000
|
||||
96
tools/loki/local-config.yaml
Normal file
96
tools/loki/local-config.yaml
Normal file
@@ -0,0 +1,96 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_server_max_recv_msg_size: 524288000 # 500 MB
|
||||
grpc_server_max_send_msg_size: 524288000 # 500 MB, might be too much, be careful
|
||||
|
||||
frontend_worker:
|
||||
match_max_concurrent: true
|
||||
grpc_client_config:
|
||||
max_send_msg_size: 524288000 # 500 MB
|
||||
|
||||
|
||||
ingester:
|
||||
max_chunk_age: 8766h
|
||||
|
||||
common:
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
# compactor:
|
||||
# retention_enabled: true
|
||||
# # cmeyers: YOLO. 1s seems wrong but it works so right
|
||||
# compaction_interval: 1s # default 10m
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
storage_config:
|
||||
boltdb_shipper:
|
||||
active_index_directory: /loki/index
|
||||
cache_location: /loki/boltdb-cache
|
||||
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
|
||||
limits_config:
|
||||
retention_period: 3y
|
||||
# cmeyers: The default of 30m triggers a loop of queries that take a long time
|
||||
# to complete and the UI times out
|
||||
split_queries_by_interval: 1d
|
||||
# cmeyers: Default of 30d1h limits grafana time queries. Can't, for example,
|
||||
# query last 90 days
|
||||
max_query_length: 3y
|
||||
# cmeyers: Made the batch post request succeed.
|
||||
reject_old_samples: false
|
||||
reject_old_samples_max_age: 365d
|
||||
|
||||
ingestion_rate_mb: 32
|
||||
ingestion_burst_size_mb: 32
|
||||
per_stream_rate_limit: 32M
|
||||
per_stream_rate_limit_burst: 32M
|
||||
ingestion_rate_strategy: local # Default: global
|
||||
max_global_streams_per_user: 100000000
|
||||
max_entries_limit_per_query: 100000000
|
||||
max_query_series: 1000000
|
||||
max_query_parallelism: 32 # Old Default: 14
|
||||
max_streams_per_user: 100000000 # Old Default: 10000
|
||||
|
||||
# Taken from aap-log-visualizer
|
||||
frontend:
|
||||
max_outstanding_per_tenant: 2048
|
||||
|
||||
query_scheduler:
|
||||
max_outstanding_requests_per_tenant: 2048
|
||||
|
||||
query_range:
|
||||
parallelise_shardable_queries: false
|
||||
split_queries_by_interval: 0
|
||||
|
||||
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
|
||||
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
|
||||
#
|
||||
# Statistics help us better understand how Loki is used, and they show us performance
|
||||
# levels for most users. This helps us prioritize features and documentation.
|
||||
# For more information on what's sent, look at
|
||||
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
|
||||
# Refer to the buildReport method to see what goes into a report.
|
||||
#
|
||||
# If you would like to disable reporting, uncomment the following lines:
|
||||
#analytics:
|
||||
# reporting_enabled: false
|
||||
39
tools/otel/otel-collector-config.yaml
Normal file
39
tools/otel/otel-collector-config.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
|
||||
exporters:
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
loki:
|
||||
endpoint: http://loki:3100/loki/api/v1/push
|
||||
tls:
|
||||
insecure: true
|
||||
headers:
|
||||
"X-Scope-OrgID": "1"
|
||||
default_labels_enabled:
|
||||
exporter: true
|
||||
job: true
|
||||
instance: true
|
||||
level: true
|
||||
|
||||
processors:
|
||||
batch:
|
||||
|
||||
extensions:
|
||||
health_check:
|
||||
zpages:
|
||||
endpoint: ":55679"
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [loki]
|
||||
|
||||
extensions:
|
||||
- health_check
|
||||
- zpages
|
||||
Reference in New Issue
Block a user