mirror of
https://github.com/ansible/awx.git
synced 2026-05-06 17:07:36 -02:30
remove order by from partitioned events query
* The order by results in an in-memory sort that COULD blow out the worker mem buffer and result in sorting having to take place on disk. * This WILL happen with a default postgres 4MB mem buffer. We saw as much as 20MB used. Note that AWX defaults postgres mem worker buffer to 3% of the DB memory on external installs and 1% on same-node installs. So for a 16GB remote DB this would not be a problem. * We are going to avoid this problem all together by NOT doing a sort when gathering. Instead, we will sort remotely, in analytics.
This commit is contained in:
@@ -356,37 +356,37 @@ def _copy_table(table, query, path):
|
|||||||
return file.file_list()
|
return file.file_list()
|
||||||
|
|
||||||
|
|
||||||
def _events_table(since, full_path, until, tbl, project_job_created=False, **kwargs):
|
def _events_table(since, full_path, until, tbl, project_job_created=False, order_by=True, **kwargs):
|
||||||
def query(event_data):
|
def query(event_data):
|
||||||
# TODO: conditional job_created based on if the column exists or not in the table
|
query = f'''COPY (SELECT {tbl}.id,
|
||||||
# {tbl}.job_created,
|
{tbl}.created,
|
||||||
|
{tbl}.modified,
|
||||||
return f'''COPY (SELECT {tbl}.id,
|
{tbl + '.job_created' if project_job_created else 'NULL'} as job_created,
|
||||||
{tbl}.created,
|
{tbl}.uuid,
|
||||||
{tbl}.modified,
|
{tbl}.parent_uuid,
|
||||||
{tbl + '.job_created' if project_job_created else 'NULL'} as job_created,
|
{tbl}.event,
|
||||||
{tbl}.uuid,
|
task_action,
|
||||||
{tbl}.parent_uuid,
|
(CASE WHEN event = 'playbook_on_stats' THEN event_data END) as playbook_on_stats,
|
||||||
{tbl}.event,
|
{tbl}.failed,
|
||||||
task_action,
|
{tbl}.changed,
|
||||||
(CASE WHEN event = 'playbook_on_stats' THEN event_data END) as playbook_on_stats,
|
{tbl}.playbook,
|
||||||
{tbl}.failed,
|
{tbl}.play,
|
||||||
{tbl}.changed,
|
{tbl}.task,
|
||||||
{tbl}.playbook,
|
{tbl}.role,
|
||||||
{tbl}.play,
|
{tbl}.job_id,
|
||||||
{tbl}.task,
|
{tbl}.host_id,
|
||||||
{tbl}.role,
|
{tbl}.host_name,
|
||||||
{tbl}.job_id,
|
CAST(x.start AS TIMESTAMP WITH TIME ZONE) AS start,
|
||||||
{tbl}.host_id,
|
CAST(x.end AS TIMESTAMP WITH TIME ZONE) AS end,
|
||||||
{tbl}.host_name,
|
x.duration AS duration,
|
||||||
CAST(x.start AS TIMESTAMP WITH TIME ZONE) AS start,
|
x.res->'warnings' AS warnings,
|
||||||
CAST(x.end AS TIMESTAMP WITH TIME ZONE) AS end,
|
x.res->'deprecations' AS deprecations
|
||||||
x.duration AS duration,
|
FROM {tbl}, json_to_record({event_data}) AS x("res" json, "duration" text, "task_action" text, "start" text, "end" text)
|
||||||
x.res->'warnings' AS warnings,
|
WHERE ({tbl}.id > {since} AND {tbl}.id <= {until})'''
|
||||||
x.res->'deprecations' AS deprecations
|
if order_by:
|
||||||
FROM {tbl}, json_to_record({event_data}) AS x("res" json, "duration" text, "task_action" text, "start" text, "end" text)
|
query += f' ORDER BY {tbl}.id ASC'
|
||||||
WHERE ({tbl}.id > {since} AND {tbl}.id <= {until})
|
query += ') TO STDOUT WITH CSV HEADER'
|
||||||
ORDER BY {tbl}.id ASC) TO STDOUT WITH CSV HEADER'''
|
return query
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return _copy_table(table='events', query=query(f"{tbl}.event_data::json"), path=full_path)
|
return _copy_table(table='events', query=query(f"{tbl}.event_data::json"), path=full_path)
|
||||||
@@ -401,7 +401,7 @@ def events_table_unpartitioned(since, full_path, until, **kwargs):
|
|||||||
|
|
||||||
@register('events_table', '1.3', format='csv', description=_('Automation task records'), expensive=events_slicing_partitioned_modified)
|
@register('events_table', '1.3', format='csv', description=_('Automation task records'), expensive=events_slicing_partitioned_modified)
|
||||||
def events_table_partitioned_modified(since, full_path, until, **kwargs):
|
def events_table_partitioned_modified(since, full_path, until, **kwargs):
|
||||||
return _events_table(since, full_path, until, 'main_jobevent', project_job_created=True, **kwargs)
|
return _events_table(since, full_path, until, 'main_jobevent', project_job_created=True, order_by=False, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@register('unified_jobs_table', '1.2', format='csv', description=_('Data on jobs run'), expensive=four_hour_slicing)
|
@register('unified_jobs_table', '1.2', format='csv', description=_('Data on jobs run'), expensive=four_hour_slicing)
|
||||||
|
|||||||
Reference in New Issue
Block a user