mirror of
https://github.com/ansible/awx.git
synced 2026-01-09 23:12:08 -03:30
Centralized logging via otel
This commit is contained in:
parent
d0fe0ed796
commit
0eb465531c
@ -11,6 +11,8 @@ ignore: |
|
|||||||
# django template files
|
# django template files
|
||||||
awx/api/templates/instance_install_bundle/**
|
awx/api/templates/instance_install_bundle/**
|
||||||
.readthedocs.yaml
|
.readthedocs.yaml
|
||||||
|
tools/loki
|
||||||
|
tools/otel
|
||||||
|
|
||||||
extends: default
|
extends: default
|
||||||
|
|
||||||
|
|||||||
6
Makefile
6
Makefile
@ -47,6 +47,10 @@ VAULT ?= false
|
|||||||
VAULT_TLS ?= false
|
VAULT_TLS ?= false
|
||||||
# If set to true docker-compose will also start a tacacs+ instance
|
# If set to true docker-compose will also start a tacacs+ instance
|
||||||
TACACS ?= false
|
TACACS ?= false
|
||||||
|
# If set to true docker-compose will also start an OpenTelemetry Collector instance
|
||||||
|
OTEL ?= false
|
||||||
|
# If set to true docker-compose will also start a Loki instance
|
||||||
|
LOKI ?= false
|
||||||
# If set to true docker-compose will install editable dependencies
|
# If set to true docker-compose will install editable dependencies
|
||||||
EDITABLE_DEPENDENCIES ?= false
|
EDITABLE_DEPENDENCIES ?= false
|
||||||
|
|
||||||
@ -535,6 +539,8 @@ docker-compose-sources: .git/hooks/pre-commit
|
|||||||
-e enable_vault=$(VAULT) \
|
-e enable_vault=$(VAULT) \
|
||||||
-e vault_tls=$(VAULT_TLS) \
|
-e vault_tls=$(VAULT_TLS) \
|
||||||
-e enable_tacacs=$(TACACS) \
|
-e enable_tacacs=$(TACACS) \
|
||||||
|
-e enable_otel=$(OTEL) \
|
||||||
|
-e enable_loki=$(LOKI) \
|
||||||
-e install_editable_dependencies=$(EDITABLE_DEPENDENCIES) \
|
-e install_editable_dependencies=$(EDITABLE_DEPENDENCIES) \
|
||||||
$(EXTRA_SOURCES_ANSIBLE_OPTS)
|
$(EXTRA_SOURCES_ANSIBLE_OPTS)
|
||||||
|
|
||||||
|
|||||||
@ -2,9 +2,11 @@
|
|||||||
# All Rights Reserved.
|
# All Rights Reserved.
|
||||||
|
|
||||||
# Python
|
# Python
|
||||||
|
import base64
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# Django
|
# Django
|
||||||
@ -15,6 +17,15 @@ from django.utils.encoding import force_str
|
|||||||
# AWX
|
# AWX
|
||||||
from awx.main.exceptions import PostRunError
|
from awx.main.exceptions import PostRunError
|
||||||
|
|
||||||
|
# OTEL
|
||||||
|
from opentelemetry._logs import set_logger_provider
|
||||||
|
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter as OTLPGrpcLogExporter
|
||||||
|
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter as OTLPHttpLogExporter
|
||||||
|
|
||||||
|
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
||||||
|
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
||||||
|
from opentelemetry.sdk.resources import Resource
|
||||||
|
|
||||||
|
|
||||||
class RSysLogHandler(logging.handlers.SysLogHandler):
|
class RSysLogHandler(logging.handlers.SysLogHandler):
|
||||||
append_nul = False
|
append_nul = False
|
||||||
@ -133,3 +144,39 @@ if settings.COLOR_LOGS is True:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
ColorHandler = logging.StreamHandler
|
ColorHandler = logging.StreamHandler
|
||||||
|
|
||||||
|
|
||||||
|
class OTLPHandler(LoggingHandler):
|
||||||
|
def __init__(self, endpoint=None, protocol='grpc', service_name=None, instance_id=None, auth=None, username=None, password=None):
|
||||||
|
if not endpoint:
|
||||||
|
raise ValueError("endpoint required")
|
||||||
|
|
||||||
|
if auth == 'basic' and (username is None or password is None):
|
||||||
|
raise ValueError("auth type basic requires username and passsword parameters")
|
||||||
|
|
||||||
|
self.endpoint = endpoint
|
||||||
|
self.service_name = service_name or (sys.argv[1] if len(sys.argv) > 1 else (sys.argv[0] or 'unknown_service'))
|
||||||
|
self.instance_id = instance_id or os.uname().nodename
|
||||||
|
|
||||||
|
logger_provider = LoggerProvider(
|
||||||
|
resource=Resource.create(
|
||||||
|
{
|
||||||
|
"service.name": self.service_name,
|
||||||
|
"service.instance.id": self.instance_id,
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
set_logger_provider(logger_provider)
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
if auth == 'basic':
|
||||||
|
secret = f'{username}:{password}'
|
||||||
|
headers['Authorization'] = "Basic " + base64.b64encode(secret.encode()).decode()
|
||||||
|
|
||||||
|
if protocol == 'grpc':
|
||||||
|
otlp_exporter = OTLPGrpcLogExporter(endpoint=self.endpoint, insecure=True, headers=headers)
|
||||||
|
elif protocol == 'http':
|
||||||
|
otlp_exporter = OTLPHttpLogExporter(endpoint=self.endpoint, headers=headers)
|
||||||
|
logger_provider.add_log_record_processor(BatchLogRecordProcessor(otlp_exporter))
|
||||||
|
|
||||||
|
super().__init__(level=logging.NOTSET, logger_provider=logger_provider)
|
||||||
|
|||||||
@ -880,6 +880,7 @@ LOGGING = {
|
|||||||
'address': '/var/run/awx-rsyslog/rsyslog.sock',
|
'address': '/var/run/awx-rsyslog/rsyslog.sock',
|
||||||
'filters': ['external_log_enabled', 'dynamic_level_filter', 'guid'],
|
'filters': ['external_log_enabled', 'dynamic_level_filter', 'guid'],
|
||||||
},
|
},
|
||||||
|
'otel': {'class': 'logging.NullHandler'},
|
||||||
},
|
},
|
||||||
'loggers': {
|
'loggers': {
|
||||||
'django': {'handlers': ['console']},
|
'django': {'handlers': ['console']},
|
||||||
|
|||||||
@ -30,3 +30,9 @@ pip>=21.3 # PEP 660 – Editable installs for pyproject.toml based builds (wheel
|
|||||||
debugpy
|
debugpy
|
||||||
remote-pdb
|
remote-pdb
|
||||||
sdb
|
sdb
|
||||||
|
|
||||||
|
# OTEL
|
||||||
|
opentelemetry-api==1.24.0
|
||||||
|
opentelemetry-sdk==1.24.0
|
||||||
|
opentelemetry-instrumentation-logging
|
||||||
|
opentelemetry-exporter-otlp
|
||||||
|
|||||||
@ -613,3 +613,13 @@ docker exec -it -e VAULT_TOKEN=<token> tools_vault_1 vault kv get --address=http
|
|||||||
### Prometheus and Grafana integration
|
### Prometheus and Grafana integration
|
||||||
|
|
||||||
See docs at https://github.com/ansible/awx/blob/devel/tools/grafana/README.md
|
See docs at https://github.com/ansible/awx/blob/devel/tools/grafana/README.md
|
||||||
|
|
||||||
|
### OpenTelemetry Integration
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OTEL=true GRAFANA=true LOKI=true PROMETHEUS=true make docker-compose
|
||||||
|
```
|
||||||
|
|
||||||
|
This will start the sidecar container `tools_otel_1` and configure AWX logging to send to it. The OpenTelemetry Collector is configured to export logs to Loki. Grafana is configured with Loki as a datasource. AWX logs can be viewed in Grafana.
|
||||||
|
|
||||||
|
`http://localhost:3001` grafana
|
||||||
|
|||||||
@ -269,6 +269,42 @@ services:
|
|||||||
# pg_notify will NOT work in transaction mode.
|
# pg_notify will NOT work in transaction mode.
|
||||||
PGBOUNCER_POOL_MODE: session
|
PGBOUNCER_POOL_MODE: session
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if enable_otel|bool %}
|
||||||
|
otel:
|
||||||
|
image: otel/opentelemetry-collector-contrib:0.88.0
|
||||||
|
container_name: tools_otel_1
|
||||||
|
hostname: otel
|
||||||
|
command: ["--config=/etc/otel-collector-config.yaml", ""]
|
||||||
|
networks:
|
||||||
|
- awx
|
||||||
|
ports:
|
||||||
|
- "4317:4317" # OTLP gRPC receiver
|
||||||
|
- "4318:4318" # OTLP http receiver
|
||||||
|
- "55679:55679" # zpages http://localhost:55679/debug/servicez /tracez
|
||||||
|
volumes:
|
||||||
|
- "../../otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml"
|
||||||
|
depends_on:
|
||||||
|
- loki
|
||||||
|
{% endif %}
|
||||||
|
{% if enable_loki|bool %}
|
||||||
|
loki:
|
||||||
|
image: grafana/loki:2.9.5
|
||||||
|
container_name: tools_loki_1
|
||||||
|
hostname: loki
|
||||||
|
ports:
|
||||||
|
- "3100:3100"
|
||||||
|
command: -config.file=/etc/loki/local-config.yaml
|
||||||
|
networks:
|
||||||
|
- awx
|
||||||
|
volumes:
|
||||||
|
- "loki_storage:/loki:rw"
|
||||||
|
#- "../../docker-compose/loki/volumes/index:/loki/index"
|
||||||
|
#- "../../docker-compose/loki/volumes/boltdb-cache:/loki/boltdb-cache"
|
||||||
|
- "../../loki/local-config.yaml:/etc/loki/local-config.yaml"
|
||||||
|
depends_on:
|
||||||
|
- grafana
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if execution_node_count|int > 0 %}
|
{% if execution_node_count|int > 0 %}
|
||||||
receptor-hop:
|
receptor-hop:
|
||||||
image: {{ receptor_image }}
|
image: {{ receptor_image }}
|
||||||
@ -360,6 +396,10 @@ volumes:
|
|||||||
grafana_storage:
|
grafana_storage:
|
||||||
name: tools_grafana_storage
|
name: tools_grafana_storage
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if enable_loki|bool %}
|
||||||
|
loki_storage:
|
||||||
|
name: tools_loki_storage
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
awx:
|
awx:
|
||||||
|
|||||||
@ -46,6 +46,18 @@ OPTIONAL_API_URLPATTERN_PREFIX = '{{ api_urlpattern_prefix }}'
|
|||||||
# LOGGING['loggers']['django_auth_ldap']['handlers'] = ['console']
|
# LOGGING['loggers']['django_auth_ldap']['handlers'] = ['console']
|
||||||
# LOGGING['loggers']['django_auth_ldap']['level'] = 'DEBUG'
|
# LOGGING['loggers']['django_auth_ldap']['level'] = 'DEBUG'
|
||||||
|
|
||||||
|
{% if enable_otel|bool %}
|
||||||
|
LOGGING['handlers']['otel'] |= {
|
||||||
|
'class': 'awx.main.utils.handlers.OTLPHandler',
|
||||||
|
'endpoint': 'http://otel:4317',
|
||||||
|
}
|
||||||
|
# Add otel log handler to all log handlers
|
||||||
|
for name in LOGGING['loggers'].keys():
|
||||||
|
handler = LOGGING['loggers'][name].get('handlers', [])
|
||||||
|
if 'otel' not in handler:
|
||||||
|
LOGGING['loggers'][name].get('handlers', []).append('otel')
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
BROADCAST_WEBSOCKET_PORT = 8013
|
BROADCAST_WEBSOCKET_PORT = 8013
|
||||||
BROADCAST_WEBSOCKET_VERIFY_CERT = False
|
BROADCAST_WEBSOCKET_VERIFY_CERT = False
|
||||||
BROADCAST_WEBSOCKET_PROTOCOL = 'http'
|
BROADCAST_WEBSOCKET_PROTOCOL = 'http'
|
||||||
|
|||||||
11
tools/grafana/datasources/loki_source.yml
Normal file
11
tools/grafana/datasources/loki_source.yml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
---
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Loki
|
||||||
|
type: loki
|
||||||
|
access: proxy
|
||||||
|
url: http://loki:3100
|
||||||
|
jsonData:
|
||||||
|
timeout: 60
|
||||||
|
maxLines: 100000
|
||||||
96
tools/loki/local-config.yaml
Normal file
96
tools/loki/local-config.yaml
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_port: 3100
|
||||||
|
grpc_server_max_recv_msg_size: 524288000 # 500 MB
|
||||||
|
grpc_server_max_send_msg_size: 524288000 # 500 MB, might be too much, be careful
|
||||||
|
|
||||||
|
frontend_worker:
|
||||||
|
match_max_concurrent: true
|
||||||
|
grpc_client_config:
|
||||||
|
max_send_msg_size: 524288000 # 500 MB
|
||||||
|
|
||||||
|
|
||||||
|
ingester:
|
||||||
|
max_chunk_age: 8766h
|
||||||
|
|
||||||
|
common:
|
||||||
|
path_prefix: /loki
|
||||||
|
storage:
|
||||||
|
filesystem:
|
||||||
|
chunks_directory: /loki/chunks
|
||||||
|
rules_directory: /loki/rules
|
||||||
|
replication_factor: 1
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
|
||||||
|
# compactor:
|
||||||
|
# retention_enabled: true
|
||||||
|
# # cmeyers: YOLO. 1s seems wrong but it works so right
|
||||||
|
# compaction_interval: 1s # default 10m
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2020-10-24
|
||||||
|
store: boltdb-shipper
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v11
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
storage_config:
|
||||||
|
boltdb_shipper:
|
||||||
|
active_index_directory: /loki/index
|
||||||
|
cache_location: /loki/boltdb-cache
|
||||||
|
|
||||||
|
ruler:
|
||||||
|
alertmanager_url: http://localhost:9093
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
retention_period: 3y
|
||||||
|
# cmeyers: The default of 30m triggers a loop of queries that take a long time
|
||||||
|
# to complete and the UI times out
|
||||||
|
split_queries_by_interval: 1d
|
||||||
|
# cmeyers: Default of 30d1h limits grafana time queries. Can't, for example,
|
||||||
|
# query last 90 days
|
||||||
|
max_query_length: 3y
|
||||||
|
# cmeyers: Made the batch post request succeed.
|
||||||
|
reject_old_samples: false
|
||||||
|
reject_old_samples_max_age: 365d
|
||||||
|
|
||||||
|
ingestion_rate_mb: 32
|
||||||
|
ingestion_burst_size_mb: 32
|
||||||
|
per_stream_rate_limit: 32M
|
||||||
|
per_stream_rate_limit_burst: 32M
|
||||||
|
ingestion_rate_strategy: local # Default: global
|
||||||
|
max_global_streams_per_user: 100000000
|
||||||
|
max_entries_limit_per_query: 100000000
|
||||||
|
max_query_series: 1000000
|
||||||
|
max_query_parallelism: 32 # Old Default: 14
|
||||||
|
max_streams_per_user: 100000000 # Old Default: 10000
|
||||||
|
|
||||||
|
# Taken from aap-log-visualizer
|
||||||
|
frontend:
|
||||||
|
max_outstanding_per_tenant: 2048
|
||||||
|
|
||||||
|
query_scheduler:
|
||||||
|
max_outstanding_requests_per_tenant: 2048
|
||||||
|
|
||||||
|
query_range:
|
||||||
|
parallelise_shardable_queries: false
|
||||||
|
split_queries_by_interval: 0
|
||||||
|
|
||||||
|
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
|
||||||
|
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
|
||||||
|
#
|
||||||
|
# Statistics help us better understand how Loki is used, and they show us performance
|
||||||
|
# levels for most users. This helps us prioritize features and documentation.
|
||||||
|
# For more information on what's sent, look at
|
||||||
|
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
|
||||||
|
# Refer to the buildReport method to see what goes into a report.
|
||||||
|
#
|
||||||
|
# If you would like to disable reporting, uncomment the following lines:
|
||||||
|
#analytics:
|
||||||
|
# reporting_enabled: false
|
||||||
39
tools/otel/otel-collector-config.yaml
Normal file
39
tools/otel/otel-collector-config.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
|
||||||
|
exporters:
|
||||||
|
debug:
|
||||||
|
verbosity: detailed
|
||||||
|
|
||||||
|
loki:
|
||||||
|
endpoint: http://loki:3100/loki/api/v1/push
|
||||||
|
tls:
|
||||||
|
insecure: true
|
||||||
|
headers:
|
||||||
|
"X-Scope-OrgID": "1"
|
||||||
|
default_labels_enabled:
|
||||||
|
exporter: true
|
||||||
|
job: true
|
||||||
|
instance: true
|
||||||
|
level: true
|
||||||
|
|
||||||
|
processors:
|
||||||
|
batch:
|
||||||
|
|
||||||
|
extensions:
|
||||||
|
health_check:
|
||||||
|
zpages:
|
||||||
|
endpoint: ":55679"
|
||||||
|
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
logs:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [loki]
|
||||||
|
|
||||||
|
extensions:
|
||||||
|
- health_check
|
||||||
|
- zpages
|
||||||
Loading…
x
Reference in New Issue
Block a user