Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADBDEV-6156 Count startup memory of each process when using resource groups #1023

Open
wants to merge 18 commits into
base: adb-6.x-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/backend/utils/mmgr/vmem_tracker.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,12 @@ VmemTracker_ReleaseVmem(int64 toBeFreedRequested)
}
}

int32
VmemTracker_GetStartupChunks(void)
{
return startupChunks;
}

/*
* Register the startup memory to vmem tracker.
*
Expand Down Expand Up @@ -670,6 +676,8 @@ VmemTracker_RegisterStartupMemory(int64 bytes)
pg_atomic_add_fetch_u32((pg_atomic_uint32 *) segmentVmemChunks,
startupChunks);

ResGroupProcAddStartupChunks(startupChunks);

/*
* Step 2, check if an OOM error should be raised by allocating 0 chunk.
*/
Expand All @@ -692,6 +700,8 @@ VmemTracker_UnregisterStartupMemory(void)
pg_atomic_sub_fetch_u32((pg_atomic_uint32 *) &MySessionState->sessionVmem,
startupChunks);

ResGroupProcSubStartupChunks(startupChunks);

trackedBytes -= startupBytes;
trackedVmemChunks -= startupChunks;

Expand Down
24 changes: 24 additions & 0 deletions src/backend/utils/resgroup/resgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,30 @@ selfDetachResGroup(ResGroupData *group, ResGroupSlotData *slot)
selfUnsetGroup();
}

/*
* Add startup memory before a resgroup is assigned. This memory
* will later be added to resgroup via selfAttachResGroup
*/
void
ResGroupProcAddStartupChunks(int32 chunks)
{
if (IsResGroupEnabled())
self->memUsage += chunks;
}

/*
* Sub startup memory at cleanup. This memory should already been
* subtracted from a resource group via selfDetachResGroup.
* Actually, this is not needed because a running process will always have
* startup memory consumpion, but let it be just for symmetry.
*/
void
ResGroupProcSubStartupChunks(int32 chunks)
{
if (IsResGroupEnabled())
self->memUsage -= chunks;
}

/*
* Initialize the members of a slot
*/
Expand Down
3 changes: 3 additions & 0 deletions src/include/utils/resgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ extern Oid ResGroupGetGroupIdBySessionId(int sessionId);
extern char *getCpuSetByRole(const char *cpuset);
extern void checkCpuSetByRole(const char *cpuset);

extern void ResGroupProcAddStartupChunks(int32 chunks);
extern void ResGroupProcSubStartupChunks(int32 chunks);

#define LOG_RESGROUP_DEBUG(...) \
do {if (Debug_resource_group) elog(__VA_ARGS__); } while(false);

Expand Down
1 change: 1 addition & 0 deletions src/include/utils/vmem_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ extern void VmemTracker_UnregisterStartupMemory(void);
extern void VmemTracker_RequestWaiver(int64 waiver_bytes);
extern void VmemTracker_ResetWaiver(void);
extern int64 VmemTracker_Fault(int32 reason, int64 arg);
extern int32 VmemTracker_GetStartupChunks(void);

extern int32 RedZoneHandler_GetRedZoneLimitChunks(void);
extern int32 RedZoneHandler_GetRedZoneLimitMB(void);
Expand Down
8 changes: 6 additions & 2 deletions src/test/isolation2/input/resgroup/resgroup_move_query.source
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
--
-- end_matchsubs

-- start_ignore
! gpstop -rai;
-- end_ignore

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc'
LANGUAGE C READS SQL DATA;
Expand Down Expand Up @@ -135,7 +139,7 @@ SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_move_que
1&: SELECT pg_sleep(3);
2: SET ROLE role_move_query_mem_small;
2: BEGIN;
2: SELECT hold_memory_by_percent_on_qe(1,0.1);
2: SELECT hold_memory_by_percent_on_qe(1,0.2);
3&: SELECT gp_toolkit.pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND rsgname='rg_move_query_mem_small';
1<:
-- connection 1 finished, it will wake up connection 3
Expand All @@ -150,7 +154,7 @@ SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_move_que
1: ALTER RESOURCE GROUP rg_move_query SET memory_limit 0;
1: SET ROLE role_move_query_mem_small;
1: BEGIN;
1: SELECT hold_memory_by_percent_on_qe(1,0.1);
1: SELECT hold_memory_by_percent_on_qe(1,0.2);
2: SELECT gp_toolkit.pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND rsgname='rg_move_query_mem_small';
2: SELECT is_session_in_group(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction';
1: END;
Expand Down
30 changes: 30 additions & 0 deletions src/test/isolation2/input/resgroup/resgroup_startup_memory.source
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- start_ignore
! gpconfig -c runaway_detector_activation_percent -v 20;
! gpstop -rai;
-- end_ignore

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc'
LANGUAGE C READS SQL DATA;

CREATE OR REPLACE FUNCTION resGroupPallocIgnoreStartup(float) RETURNS int AS
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPallocIgnoreStartup'
LANGUAGE C READS SQL DATA;

ALTER RESOURCE GROUP default_group SET memory_limit 20;
ALTER RESOURCE GROUP admin_group SET memory_limit 10;
ALTER RESOURCE GROUP admin_group set concurrency 1;

5: select * from gp_toolkit.gp_resgroup_config;
5q:

-- This query will be killed by redzone because resGroupPallocIgnoreStartup just allocates
-- the memory size of all resgroup memory. Before startup chunks were considered, a group had no memory tracked
-- at the moment we called resGroupPalloc, so this case shows that now a group tracks startup memory.
5: SELECT resGroupPallocIgnoreStartup(2.39) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;
5q:

-- This won't fail because now resGroupPalloc subtracts startup chunks from the requested amount
-- of memory to keep all previews tests intact. Leaving resGroupPalloc without changes may break them.
5: SELECT resGroupPalloc(2.39) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;
5q:
2 changes: 2 additions & 0 deletions src/test/isolation2/isolation2_resgroup_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@ test: resgroup/resgroup_dumpinfo
# test larget group id
test: resgroup/resgroup_large_group_id

test: resgroup/resgroup_startup_memory
RekGRpth marked this conversation as resolved.
Show resolved Hide resolved

test: resgroup/disable_resgroup
22 changes: 11 additions & 11 deletions src/test/isolation2/output/resgroup/resgroup_bypass.source
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 12.0
rg_bypass_test | 1 | 24.0
bandetto marked this conversation as resolved.
Show resolved Hide resolved
(2 rows)
61: SELECT * FROM eat_memory_on_qd_large;
ERROR: Out of memory
Expand Down Expand Up @@ -202,8 +202,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 4.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 16.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_one_slice;
count
Expand All @@ -213,8 +213,8 @@ SELECT * FROM memory_result;
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 8.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 20.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_one_slice;
ERROR: Out of memory (seg0 slice1 127.0.0.1:25432 pid=336)
Expand All @@ -234,7 +234,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 1 | 12.0
(2 rows)
61q: ... <quitting>

Expand All @@ -256,8 +256,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 4.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 16.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_slices;
count
Expand All @@ -267,8 +267,8 @@ SELECT * FROM memory_result;
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 8.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 20.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_slices;
ERROR: Out of memory (seg0 slice2 127.0.0.1:25432 pid=354)
Expand All @@ -288,7 +288,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 1 | 12.0
(2 rows)
61q: ... <quitting>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
-----------------+----------+---------
rg1_memory_test | 0 | 0.0
rg1_memory_test | 1 | 20.0
rg1_memory_test | 1 | 30.0
rg2_memory_test | 0 | 0.0
rg2_memory_test | 1 | 0.0
(4 rows)
Expand Down Expand Up @@ -144,8 +144,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
-----------------+----------+---------
rg1_memory_test | 0 | 20.0
rg1_memory_test | 1 | 0.0
rg1_memory_test | 0 | 30.0
rg1_memory_test | 1 | 10.0
rg2_memory_test | 0 | 0.0
rg2_memory_test | 1 | 0.0
(4 rows)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
--
-- end_matchsubs

-- start_ignore
! gpstop -rai;
-- end_ignore

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA;
CREATE

Expand Down Expand Up @@ -185,7 +189,7 @@ SET
SET
2: BEGIN;
BEGIN
2: SELECT hold_memory_by_percent_on_qe(1,0.1);
2: SELECT hold_memory_by_percent_on_qe(1,0.2);
hold_memory_by_percent_on_qe
------------------------------
0
Expand Down Expand Up @@ -220,7 +224,7 @@ ALTER
SET
1: BEGIN;
BEGIN
1: SELECT hold_memory_by_percent_on_qe(1,0.1);
1: SELECT hold_memory_by_percent_on_qe(1,0.2);
hold_memory_by_percent_on_qe
------------------------------
0
Expand Down
41 changes: 41 additions & 0 deletions src/test/isolation2/output/resgroup/resgroup_startup_memory.source
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- start_ignore
! gpconfig -c runaway_detector_activation_percent -v 20;
! gpstop -rai;
-- end_ignore

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA;
CREATE

CREATE OR REPLACE FUNCTION resGroupPallocIgnoreStartup(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPallocIgnoreStartup' LANGUAGE C READS SQL DATA;
CREATE

ALTER RESOURCE GROUP default_group SET memory_limit 20;
ALTER
ALTER RESOURCE GROUP admin_group SET memory_limit 10;
ALTER
ALTER RESOURCE GROUP admin_group set concurrency 1;
ALTER

5: select * from gp_toolkit.gp_resgroup_config;
groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset
---------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+--------
6437 | default_group | 20 | 30 | 20 | 80 | 10 | vmtracker | -1
6438 | admin_group | 1 | 10 | 10 | 80 | 10 | vmtracker | -1
(2 rows)
5q: ... <quitting>

-- This query will be killed by redzone because resGroupPallocIgnoreStartup just allocates
-- the memory size of all resgroup memory. Before startup chunks were considered, a group had no memory tracked
-- at the moment we called resGroupPalloc, so this case shows that now a group tracks startup memory.
5: SELECT resGroupPallocIgnoreStartup(2.39) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;
ERROR: Canceling query because of high VMEM usage. current group id is 6438, group memory usage 175 MB, group shared memory quota is 55 MB, slot memory quota is 13 MB, global freechunks memory is 371 MB, global safe memory threshold is 382 MB (runaway_cleaner.c:197) (seg1 slice1 10.92.43.77:6003 pid=29522) (runaway_cleaner.c:197)
5q: ... <quitting>

-- This won't fail because now resGroupPalloc subtracts startup chunks from the requested amount
-- of memory to keep all previews tests intact. Leaving resGroupPalloc without changes may break them.
5: SELECT resGroupPalloc(2.39) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;
resgrouppalloc
----------------
0
(1 row)
5q: ... <quitting>
1 change: 1 addition & 0 deletions src/test/isolation2/sql/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ resgroup_bypass.sql
resgroup_cpuset.sql
gp_collation.sql
distributed_snapshot.sql
resgroup_startup_memory.sql
Loading
Loading