Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADBDEV-6156 Count startup memory of each process when using resource groups #1023

Open
wants to merge 18 commits into
base: adb-6.x-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/backend/utils/mmgr/vmem_tracker.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,12 @@ VmemTracker_ReleaseVmem(int64 toBeFreedRequested)
}
}

int32
VmemTracker_GetStartupChunks(void)
{
return startupChunks;
}

/*
* Register the startup memory to vmem tracker.
*
Expand Down Expand Up @@ -670,6 +676,8 @@ VmemTracker_RegisterStartupMemory(int64 bytes)
pg_atomic_add_fetch_u32((pg_atomic_uint32 *) segmentVmemChunks,
startupChunks);

ResGroupProcAddStartupChunks(startupChunks);

/*
* Step 2, check if an OOM error should be raised by allocating 0 chunk.
*/
Expand All @@ -692,6 +700,8 @@ VmemTracker_UnregisterStartupMemory(void)
pg_atomic_sub_fetch_u32((pg_atomic_uint32 *) &MySessionState->sessionVmem,
startupChunks);

ResGroupProcSubStartupChunks(startupChunks);

trackedBytes -= startupBytes;
trackedVmemChunks -= startupChunks;

Expand Down
24 changes: 24 additions & 0 deletions src/backend/utils/resgroup/resgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,30 @@ selfDetachResGroup(ResGroupData *group, ResGroupSlotData *slot)
selfUnsetGroup();
}

/*
* Add startup memory before a resgroup is assigned. This memory
* will later be added to resgroup via selfAttachResGroup
*/
void
ResGroupProcAddStartupChunks(int32 chunks)
{
if (IsResGroupEnabled())
self->memUsage += chunks;
}

/*
* Sub startup memory at cleanup. This memory should already been
* subtracted from a resource group via selfDetachResGroup.
* Actually, this is not needed because a running process will always have
* startup memory consumpion, but let it be just for symmetry.
*/
void
ResGroupProcSubStartupChunks(int32 chunks)
{
if (IsResGroupEnabled())
self->memUsage -= chunks;
}

/*
* Initialize the members of a slot
*/
Expand Down
3 changes: 3 additions & 0 deletions src/include/utils/resgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ extern Oid ResGroupGetGroupIdBySessionId(int sessionId);
extern char *getCpuSetByRole(const char *cpuset);
extern void checkCpuSetByRole(const char *cpuset);

extern void ResGroupProcAddStartupChunks(int32 chunks);
extern void ResGroupProcSubStartupChunks(int32 chunks);

#define LOG_RESGROUP_DEBUG(...) \
do {if (Debug_resource_group) elog(__VA_ARGS__); } while(false);

Expand Down
1 change: 1 addition & 0 deletions src/include/utils/vmem_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ extern void VmemTracker_UnregisterStartupMemory(void);
extern void VmemTracker_RequestWaiver(int64 waiver_bytes);
extern void VmemTracker_ResetWaiver(void);
extern int64 VmemTracker_Fault(int32 reason, int64 arg);
extern int32 VmemTracker_GetStartupChunks(void);

extern int32 RedZoneHandler_GetRedZoneLimitChunks(void);
extern int32 RedZoneHandler_GetRedZoneLimitMB(void);
Expand Down
8 changes: 6 additions & 2 deletions src/test/isolation2/input/resgroup/resgroup_move_query.source
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
--
-- end_matchsubs

-- start_ignore
! gpstop -rai;
-- end_ignore

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc'
LANGUAGE C READS SQL DATA;
Expand Down Expand Up @@ -135,7 +139,7 @@ SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_move_que
1&: SELECT pg_sleep(3);
2: SET ROLE role_move_query_mem_small;
2: BEGIN;
2: SELECT hold_memory_by_percent_on_qe(1,0.1);
2: SELECT hold_memory_by_percent_on_qe(1,0.2);
3&: SELECT gp_toolkit.pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND rsgname='rg_move_query_mem_small';
1<:
-- connection 1 finished, it will wake up connection 3
Expand All @@ -150,7 +154,7 @@ SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_move_que
1: ALTER RESOURCE GROUP rg_move_query SET memory_limit 0;
1: SET ROLE role_move_query_mem_small;
1: BEGIN;
1: SELECT hold_memory_by_percent_on_qe(1,0.1);
1: SELECT hold_memory_by_percent_on_qe(1,0.2);
2: SELECT gp_toolkit.pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND rsgname='rg_move_query_mem_small';
2: SELECT is_session_in_group(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction';
1: END;
Expand Down
75 changes: 75 additions & 0 deletions src/test/isolation2/input/resgroup/resgroup_startup_memory.source
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
-- start_ignore
! gpconfig -c runaway_detector_activation_percent -v 100;
! gpstop -rai;

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc'
LANGUAGE C READS SQL DATA;
-- end_ignore

ALTER RESOURCE GROUP default_group SET memory_limit 10;
ALTER RESOURCE GROUP admin_group SET memory_limit 30;

CREATE RESOURCE GROUP test_group with (concurrency=1, memory_limit=59, memory_spill_ratio=100, cpu_rate_limit=20);
CREATE ROLE test_role RESOURCE GROUP test_group;
CREATE ROLE default_role RESOURCE GROUP default_group;

-- 1. Occupy all memory on a segment and fail with vmem tracker error
ALTER RESOURCE GROUP admin_group SET concurrency 1;
ALTER RESOURCE GROUP default_group SET concurrency 1;

-- Start a session which will be detached from a group when the query is done
-- resource groups can't see startup chunks occupied by a detached session but
-- the vmem tracker can
0: SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid != (SELECT pg_backend_pid());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here and below

Suggested change
0: SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid != (SELECT pg_backend_pid());
0: SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid != pg_backend_pid();

Do we really want to kill ALL backends except the current one?!

Copy link
Collaborator Author

@dnskvlnk dnskvlnk Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the point of the test, I don't want other processes to interfere. I can change it if you explain what problems it can cause

0: CREATE TABLE test (a int);

-- Now we need to occupy all memory in all resgroups
1: SET ROLE test_role;
1: BEGIN;
1: SELECT resGroupPalloc(1.0) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;

2: SET ROLE default_role;
2: BEGIN;
2: SELECT resGroupPalloc(1.0) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;

-- The sum of all resource groups' memory is 99% and 1% is global shared memory aka
-- freeChunks. On this test cluster each segment has 682mb of memory, so 1% is ~7mb.
-- The minimum number of startup chunks a process can have is 8mb.
3: BEGIN;
3: SELECT resGroupPalloc(1.0) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;

1q:
2q:
3q:

-- start_ignore
! gpconfig -c runaway_detector_activation_percent -v 20;
! gpstop -rai;

5: CREATE OR REPLACE FUNCTION resGroupPallocIgnoreStartup(float) RETURNS int AS
'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPallocIgnoreStartup'
LANGUAGE C READS SQL DATA;
-- end_ignore

5: drop role test_role;
5: drop resource group test_group;
5: alter resource group default_group set memory_limit 20;
5: alter resource group admin_group set memory_limit 10;
5: alter resource group admin_group set concurrency 1;
5: select * from gp_toolkit.gp_resgroup_config;
5q:

-- This query will be killed by redzone because resGroupPallocIgnoreStartup just allocates
-- the memory size of all resgroup memory. Before the patch, a group had no memory tracked
bandetto marked this conversation as resolved.
Show resolved Hide resolved
-- at the moment we called resGroupPalloc, so this case shows that now a group tracks startup memory.
5: SELECT resGroupPallocIgnoreStartup(2.39) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;
5q:

-- This won't fail because now resGroupPalloc subtracts startup chunks from the requested amount
-- of memory to keep all previews tests intact. Leaving resGroupPalloc without changes may break them.
5: SELECT resGroupPalloc(2.39) FROM gp_dist_random('gp_id') WHERE gp_segment_id = 1;
5q:

5: DROP TABLE test;
5: DROP ROLE default_role;
2 changes: 2 additions & 0 deletions src/test/isolation2/isolation2_resgroup_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@ test: resgroup/resgroup_dumpinfo
# test larget group id
test: resgroup/resgroup_large_group_id

test: resgroup/resgroup_startup_memory
RekGRpth marked this conversation as resolved.
Show resolved Hide resolved

test: resgroup/disable_resgroup
22 changes: 11 additions & 11 deletions src/test/isolation2/output/resgroup/resgroup_bypass.source
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 12.0
rg_bypass_test | 1 | 24.0
bandetto marked this conversation as resolved.
Show resolved Hide resolved
(2 rows)
61: SELECT * FROM eat_memory_on_qd_large;
ERROR: Out of memory
Expand Down Expand Up @@ -202,8 +202,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 4.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 16.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_one_slice;
count
Expand All @@ -213,8 +213,8 @@ SELECT * FROM memory_result;
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 8.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 20.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_one_slice;
ERROR: Out of memory (seg0 slice1 127.0.0.1:25432 pid=336)
Expand All @@ -234,7 +234,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 1 | 12.0
(2 rows)
61q: ... <quitting>

Expand All @@ -256,8 +256,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 4.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 16.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_slices;
count
Expand All @@ -267,8 +267,8 @@ SELECT * FROM memory_result;
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 8.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 0 | 20.0
rg_bypass_test | 1 | 12.0
(2 rows)
61: SELECT * FROM eat_memory_on_slices;
ERROR: Out of memory (seg0 slice2 127.0.0.1:25432 pid=354)
Expand All @@ -288,7 +288,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
----------------+----------+---------
rg_bypass_test | 0 | 0.0
rg_bypass_test | 1 | 0.0
rg_bypass_test | 1 | 12.0
(2 rows)
61q: ... <quitting>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
-----------------+----------+---------
rg1_memory_test | 0 | 0.0
rg1_memory_test | 1 | 20.0
rg1_memory_test | 1 | 30.0
rg2_memory_test | 0 | 0.0
rg2_memory_test | 1 | 0.0
(4 rows)
Expand Down Expand Up @@ -144,8 +144,8 @@ BEGIN
SELECT * FROM memory_result;
rsgname | ismaster | avg_mem
-----------------+----------+---------
rg1_memory_test | 0 | 20.0
rg1_memory_test | 1 | 0.0
rg1_memory_test | 0 | 30.0
rg1_memory_test | 1 | 10.0
rg2_memory_test | 0 | 0.0
rg2_memory_test | 1 | 0.0
(4 rows)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
--
-- end_matchsubs

-- start_ignore
! gpstop -rai;
-- end_ignore

CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA;
CREATE

Expand Down Expand Up @@ -185,7 +189,7 @@ SET
SET
2: BEGIN;
BEGIN
2: SELECT hold_memory_by_percent_on_qe(1,0.1);
2: SELECT hold_memory_by_percent_on_qe(1,0.2);
hold_memory_by_percent_on_qe
------------------------------
0
Expand Down Expand Up @@ -220,7 +224,7 @@ ALTER
SET
1: BEGIN;
BEGIN
1: SELECT hold_memory_by_percent_on_qe(1,0.1);
1: SELECT hold_memory_by_percent_on_qe(1,0.2);
hold_memory_by_percent_on_qe
------------------------------
0
Expand Down
Loading
Loading