summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2022-04-08 11:27:21 +0900
committerMichael Paquier <michael@paquier.xyz>2022-04-08 11:27:21 +0900
commitefb0ef909f605817da6b77c1f3fef0a24457ec16 (patch)
treecc85a34ce3c1918a439f69e3e6f0bab1191cd486 /src
parentdafae9707ab7e7079ce1ba22cebda4557d0cbaf3 (diff)
Track I/O timing for temporary file blocks in EXPLAIN (BUFFERS)
Previously, the output of EXPLAIN (BUFFERS) option showed only the I/O timing spent reading and writing shared and local buffers. This commit adds on top of that the I/O timing for temporary buffers in the output of EXPLAIN (for spilled external sorts, hashes, materialization. etc). This can be helpful for users in cases where the I/O related to temporary buffers is the bottleneck. Like its cousin, this information is available only when track_io_timing is enabled. Playing the patch, this is showing an extra overhead of up to 1% even when using gettimeofday() as implementation for interval timings, which is slightly within the usual range noise still that's measurable. Author: Masahiko Sawada Reviewed-by: Georgios Kokolatos, Melanie Plageman, Julien Rouhaud, Ranier Vilela Discussion: https://postgr.es/m/CAD21AoAJgotTeP83p6HiAGDhs_9Fw9pZ2J=_tYTsiO5Ob-V5GQ@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/explain.c42
-rw-r--r--src/backend/executor/instrument.c6
-rw-r--r--src/backend/storage/file/buffile.c29
-rw-r--r--src/include/executor/instrument.h6
-rw-r--r--src/test/regress/expected/explain.out112
-rw-r--r--src/test/regress/sql/explain.sql7
6 files changed, 140 insertions, 62 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 512b41dc3a2..d2a24798220 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3521,8 +3521,11 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
usage->temp_blks_written > 0);
bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) ||
!INSTR_TIME_IS_ZERO(usage->blk_write_time));
+ bool has_temp_timing = (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time) ||
+ !INSTR_TIME_IS_ZERO(usage->temp_blk_write_time));
bool show_planning = (planning && (has_shared ||
- has_local || has_temp || has_timing));
+ has_local || has_temp || has_timing ||
+ has_temp_timing));
if (show_planning)
{
@@ -3587,16 +3590,33 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
}
/* As above, show only positive counter values. */
- if (has_timing)
+ if (has_timing || has_temp_timing)
{
ExplainIndentText(es);
appendStringInfoString(es->str, "I/O Timings:");
- if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
- appendStringInfo(es->str, " read=%0.3f",
- INSTR_TIME_GET_MILLISEC(usage->blk_read_time));
- if (!INSTR_TIME_IS_ZERO(usage->blk_write_time))
- appendStringInfo(es->str, " write=%0.3f",
- INSTR_TIME_GET_MILLISEC(usage->blk_write_time));
+
+ if (has_timing)
+ {
+ appendStringInfoString(es->str, " shared/local");
+ if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
+ appendStringInfo(es->str, " read=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->blk_read_time));
+ if (!INSTR_TIME_IS_ZERO(usage->blk_write_time))
+ appendStringInfo(es->str, " write=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->blk_write_time));
+ if (has_temp_timing)
+ appendStringInfoChar(es->str, ',');
+ }
+ if (has_temp_timing)
+ {
+ appendStringInfoString(es->str, " temp");
+ if (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time))
+ appendStringInfo(es->str, " read=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time));
+ if (!INSTR_TIME_IS_ZERO(usage->temp_blk_write_time))
+ appendStringInfo(es->str, " write=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time));
+ }
appendStringInfoChar(es->str, '\n');
}
@@ -3633,6 +3653,12 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
ExplainPropertyFloat("I/O Write Time", "ms",
INSTR_TIME_GET_MILLISEC(usage->blk_write_time),
3, es);
+ ExplainPropertyFloat("Temp I/O Read Time", "ms",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time),
+ 3, es);
+ ExplainPropertyFloat("Temp I/O Write Time", "ms",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time),
+ 3, es);
}
}
}
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index c5ff02a8424..ceff4727d4a 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -237,6 +237,8 @@ BufferUsageAdd(BufferUsage *dst, const BufferUsage *add)
dst->temp_blks_written += add->temp_blks_written;
INSTR_TIME_ADD(dst->blk_read_time, add->blk_read_time);
INSTR_TIME_ADD(dst->blk_write_time, add->blk_write_time);
+ INSTR_TIME_ADD(dst->temp_blk_read_time, add->temp_blk_read_time);
+ INSTR_TIME_ADD(dst->temp_blk_write_time, add->temp_blk_write_time);
}
/* dst += add - sub */
@@ -259,6 +261,10 @@ BufferUsageAccumDiff(BufferUsage *dst,
add->blk_read_time, sub->blk_read_time);
INSTR_TIME_ACCUM_DIFF(dst->blk_write_time,
add->blk_write_time, sub->blk_write_time);
+ INSTR_TIME_ACCUM_DIFF(dst->temp_blk_read_time,
+ add->temp_blk_read_time, sub->temp_blk_read_time);
+ INSTR_TIME_ACCUM_DIFF(dst->temp_blk_write_time,
+ add->temp_blk_write_time, sub->temp_blk_write_time);
}
/* helper functions for WAL usage accumulation */
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 3108e96dc92..56b88594cc8 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -429,6 +429,8 @@ static void
BufFileLoadBuffer(BufFile *file)
{
File thisfile;
+ instr_time io_start;
+ instr_time io_time;
/*
* Advance to next component file if necessary and possible.
@@ -440,10 +442,14 @@ BufFileLoadBuffer(BufFile *file)
file->curOffset = 0L;
}
+ thisfile = file->files[file->curFile];
+
+ if (track_io_timing)
+ INSTR_TIME_SET_CURRENT(io_start);
+
/*
* Read whatever we can get, up to a full bufferload.
*/
- thisfile = file->files[file->curFile];
file->nbytes = FileRead(thisfile,
file->buffer.data,
sizeof(file->buffer),
@@ -458,6 +464,13 @@ BufFileLoadBuffer(BufFile *file)
FilePathName(thisfile))));
}
+ if (track_io_timing)
+ {
+ INSTR_TIME_SET_CURRENT(io_time);
+ INSTR_TIME_SUBTRACT(io_time, io_start);
+ INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+ }
+
/* we choose not to advance curOffset here */
if (file->nbytes > 0)
@@ -485,6 +498,8 @@ BufFileDumpBuffer(BufFile *file)
while (wpos < file->nbytes)
{
off_t availbytes;
+ instr_time io_start;
+ instr_time io_time;
/*
* Advance to next component file if necessary and possible.
@@ -507,6 +522,10 @@ BufFileDumpBuffer(BufFile *file)
bytestowrite = (int) availbytes;
thisfile = file->files[file->curFile];
+
+ if (track_io_timing)
+ INSTR_TIME_SET_CURRENT(io_start);
+
bytestowrite = FileWrite(thisfile,
file->buffer.data + wpos,
bytestowrite,
@@ -517,6 +536,14 @@ BufFileDumpBuffer(BufFile *file)
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
FilePathName(thisfile))));
+
+ if (track_io_timing)
+ {
+ INSTR_TIME_SET_CURRENT(io_time);
+ INSTR_TIME_SUBTRACT(io_time, io_start);
+ INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+ }
+
file->curOffset += bytestowrite;
wpos += bytestowrite;
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 1b7157bdd15..2945cce3a97 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -33,8 +33,10 @@ typedef struct BufferUsage
int64 local_blks_written; /* # of local disk blocks written */
int64 temp_blks_read; /* # of temp blocks read */
int64 temp_blks_written; /* # of temp blocks written */
- instr_time blk_read_time; /* time spent reading */
- instr_time blk_write_time; /* time spent writing */
+ instr_time blk_read_time; /* time spent reading blocks */
+ instr_time blk_write_time; /* time spent writing blocks */
+ instr_time temp_blk_read_time; /* time spent reading temp blocks */
+ instr_time temp_blk_write_time; /* time spent writing temp blocks */
} BufferUsage;
/*
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index bc361759219..48620edbc2b 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -85,56 +85,6 @@ select explain_filter('explain (analyze, buffers, format text) select * from int
Execution Time: N.N ms
(3 rows)
-select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
- explain_filter
-------------------------------------
- [ +
- { +
- "Plan": { +
- "Node Type": "Seq Scan", +
- "Parallel Aware": false, +
- "Async Capable": false, +
- "Relation Name": "int8_tbl",+
- "Alias": "i8", +
- "Startup Cost": N.N, +
- "Total Cost": N.N, +
- "Plan Rows": N, +
- "Plan Width": N, +
- "Actual Startup Time": N.N, +
- "Actual Total Time": N.N, +
- "Actual Rows": N, +
- "Actual Loops": N, +
- "Shared Hit Blocks": N, +
- "Shared Read Blocks": N, +
- "Shared Dirtied Blocks": N, +
- "Shared Written Blocks": N, +
- "Local Hit Blocks": N, +
- "Local Read Blocks": N, +
- "Local Dirtied Blocks": N, +
- "Local Written Blocks": N, +
- "Temp Read Blocks": N, +
- "Temp Written Blocks": N +
- }, +
- "Planning": { +
- "Shared Hit Blocks": N, +
- "Shared Read Blocks": N, +
- "Shared Dirtied Blocks": N, +
- "Shared Written Blocks": N, +
- "Local Hit Blocks": N, +
- "Local Read Blocks": N, +
- "Local Dirtied Blocks": N, +
- "Local Written Blocks": N, +
- "Temp Read Blocks": N, +
- "Temp Written Blocks": N +
- }, +
- "Planning Time": N.N, +
- "Triggers": [ +
- ], +
- "Execution Time": N.N +
- } +
- ]
-(1 row)
-
select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
explain_filter
--------------------------------------------------------
@@ -276,6 +226,68 @@ select explain_filter('explain (buffers, format json) select * from int8_tbl i8'
]
(1 row)
+-- Check output including I/O timings. These fields are conditional
+-- but always set in JSON format, so check them only in this case.
+set track_io_timing = on;
+select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
+ explain_filter
+------------------------------------
+ [ +
+ { +
+ "Plan": { +
+ "Node Type": "Seq Scan", +
+ "Parallel Aware": false, +
+ "Async Capable": false, +
+ "Relation Name": "int8_tbl",+
+ "Alias": "i8", +
+ "Startup Cost": N.N, +
+ "Total Cost": N.N, +
+ "Plan Rows": N, +
+ "Plan Width": N, +
+ "Actual Startup Time": N.N, +
+ "Actual Total Time": N.N, +
+ "Actual Rows": N, +
+ "Actual Loops": N, +
+ "Shared Hit Blocks": N, +
+ "Shared Read Blocks": N, +
+ "Shared Dirtied Blocks": N, +
+ "Shared Written Blocks": N, +
+ "Local Hit Blocks": N, +
+ "Local Read Blocks": N, +
+ "Local Dirtied Blocks": N, +
+ "Local Written Blocks": N, +
+ "Temp Read Blocks": N, +
+ "Temp Written Blocks": N, +
+ "I/O Read Time": N.N, +
+ "I/O Write Time": N.N, +
+ "Temp I/O Read Time": N.N, +
+ "Temp I/O Write Time": N.N +
+ }, +
+ "Planning": { +
+ "Shared Hit Blocks": N, +
+ "Shared Read Blocks": N, +
+ "Shared Dirtied Blocks": N, +
+ "Shared Written Blocks": N, +
+ "Local Hit Blocks": N, +
+ "Local Read Blocks": N, +
+ "Local Dirtied Blocks": N, +
+ "Local Written Blocks": N, +
+ "Temp Read Blocks": N, +
+ "Temp Written Blocks": N, +
+ "I/O Read Time": N.N, +
+ "I/O Write Time": N.N, +
+ "Temp I/O Read Time": N.N, +
+ "Temp I/O Write Time": N.N +
+ }, +
+ "Planning Time": N.N, +
+ "Triggers": [ +
+ ], +
+ "Execution Time": N.N +
+ } +
+ ]
+(1 row)
+
+set track_io_timing = off;
-- SETTINGS option
-- We have to ignore other settings that might be imposed by the environment,
-- so printing the whole Settings field unfortunately won't do.
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index 5069fa87957..ae3f7a308d7 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -65,12 +65,17 @@ select explain_filter('explain select * from int8_tbl i8');
select explain_filter('explain (analyze) select * from int8_tbl i8');
select explain_filter('explain (analyze, verbose) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
-select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
select explain_filter('explain (analyze, buffers, format yaml) select * from int8_tbl i8');
select explain_filter('explain (buffers, format text) select * from int8_tbl i8');
select explain_filter('explain (buffers, format json) select * from int8_tbl i8');
+-- Check output including I/O timings. These fields are conditional
+-- but always set in JSON format, so check them only in this case.
+set track_io_timing = on;
+select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
+set track_io_timing = off;
+
-- SETTINGS option
-- We have to ignore other settings that might be imposed by the environment,
-- so printing the whole Settings field unfortunately won't do.