Skip to content

Commit 19aba57

Browse files
committed
Add a warning to promote --aggregated if files are too big
Users tend to run memray over long running programs but they don't tend to know about the --aggregated option, while most of the cases they don't need the extra features that runnin with the default mode has. Signed-off-by: Pablo Galindo <[email protected]>
1 parent 578e02d commit 19aba57

File tree

7 files changed

+46
-0
lines changed

7 files changed

+46
-0
lines changed

src/memray/_memray.pyx

+5
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,10 @@ cdef _create_metadata(header, peak_memory):
746746
PythonAllocatorType.PYTHON_ALLOCATOR_MALLOC: "malloc",
747747
PythonAllocatorType.PYTHON_ALLOCATOR_OTHER: "unknown",
748748
}
749+
file_format = (
750+
FileFormat.ALL_ALLOCATIONS if header["file_format"] == _FileFormat.ALL_ALLOCATIONS
751+
else FileFormat.AGGREGATED_ALLOCATIONS
752+
)
749753
return Metadata(
750754
start_time=millis_to_dt(stats["start_time"]),
751755
end_time=millis_to_dt(stats["end_time"]),
@@ -757,6 +761,7 @@ cdef _create_metadata(header, peak_memory):
757761
python_allocator=allocator_id_to_name[header["python_allocator"]],
758762
has_native_traces=header["native_traces"],
759763
trace_python_allocators=header["trace_python_allocators"],
764+
file_format=file_format,
760765
)
761766

762767

src/memray/_metadata.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ class Metadata:
1414
python_allocator: str
1515
has_native_traces: bool
1616
trace_python_allocators: bool
17+
file_format: int

src/memray/commands/common.py

+21
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from memray import FileReader
2121
from memray import MemorySnapshot
2222
from memray._errors import MemrayCommandError
23+
from memray._memray import FileFormat
2324
from memray._memray import SymbolicSupport
2425
from memray._memray import TemporalAllocationRecord
2526
from memray._memray import get_symbolic_support
@@ -77,6 +78,24 @@ def warn_if_not_enough_symbols() -> None:
7778
return
7879

7980

81+
def warn_if_file_is_not_aggregated_and_is_too_big(
82+
reader: FileReader, result_path: Path
83+
) -> None:
84+
FILE_SIZE_LIMIT = 1e7
85+
if (
86+
reader.metadata.file_format == FileFormat.ALL_ALLOCATIONS
87+
and result_path.stat().st_size > FILE_SIZE_LIMIT
88+
):
89+
pprint(
90+
":warning: [bold yellow] The file is large and may take a long time to "
91+
"process [/] :warning:\n\n"
92+
"Consider using the `--aggregated` option when running the memray command "
93+
"to reduce the size of the file.\n"
94+
"Check https://bloomberg.github.io/memray/run.html#aggregated-capture-files "
95+
"for more information.\n"
96+
)
97+
98+
8099
class HighWatermarkCommand:
81100
def __init__(
82101
self,
@@ -140,6 +159,8 @@ def write_report(
140159
if reader.metadata.has_native_traces:
141160
warn_if_not_enough_symbols()
142161

162+
warn_if_file_is_not_aggregated_and_is_too_big(reader, result_path)
163+
143164
if temporal:
144165
assert self.temporal_reporter_factory is not None
145166
if show_memory_leaks:

src/memray/commands/summary.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from memray import FileReader
77
from memray._errors import MemrayCommandError
8+
from memray.commands.common import warn_if_file_is_not_aggregated_and_is_too_big
89
from memray.commands.common import warn_if_not_enough_symbols
910
from memray.reporters.summary import SummaryReporter
1011

@@ -67,6 +68,7 @@ def run(self, args: argparse.Namespace, parser: argparse.ArgumentParser) -> None
6768
reader = FileReader(os.fspath(args.results), report_progress=True)
6869
if reader.metadata.has_native_traces:
6970
warn_if_not_enough_symbols()
71+
warn_if_file_is_not_aggregated_and_is_too_big(reader, result_path)
7072

7173
if args.temporary_allocation_threshold >= 0:
7274
snapshot = iter(

src/memray/commands/tree.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from memray import FileReader
77
from memray._errors import MemrayCommandError
8+
from memray.commands.common import warn_if_file_is_not_aggregated_and_is_too_big
89
from memray.commands.common import warn_if_not_enough_symbols
910
from memray.reporters.tree import TreeReporter
1011

@@ -56,6 +57,7 @@ def run(self, args: argparse.Namespace, parser: argparse.ArgumentParser) -> None
5657
reader = FileReader(os.fspath(args.results), report_progress=True)
5758
if reader.metadata.has_native_traces:
5859
warn_if_not_enough_symbols()
60+
warn_if_file_is_not_aggregated_and_is_too_big(reader, result_path)
5961
if args.temporary_allocation_threshold >= 0:
6062
snapshot = iter(
6163
reader.get_temporary_allocation_records(

tests/unit/test_highwatermark_command.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
2+
import sys
23
from pathlib import Path
4+
from unittest.mock import ANY
35
from unittest.mock import MagicMock
46
from unittest.mock import Mock
57
from unittest.mock import call
@@ -8,6 +10,7 @@
810
import pytest
911

1012
from memray._errors import MemrayCommandError
13+
from memray._memray import FileFormat
1114
from memray.commands.common import HighWatermarkCommand
1215

1316

@@ -165,6 +168,9 @@ def test_tracker_and_reporter_interactions_for_peak(self, tmp_path, merge_thread
165168
calls = [
166169
call(os.fspath(result_path), report_progress=True),
167170
call().metadata.has_native_traces.__bool__(),
171+
call().metadata.file_format.__eq__(FileFormat.ALL_ALLOCATIONS)
172+
if sys.version_info > (3, 7, 0)
173+
else ANY,
168174
call().get_high_watermark_allocation_records(merge_threads=merge_threads),
169175
call().get_memory_snapshots(),
170176
]
@@ -195,6 +201,9 @@ def test_tracker_and_reporter_interactions_for_leak(self, tmp_path, merge_thread
195201
calls = [
196202
call(os.fspath(result_path), report_progress=True),
197203
call().metadata.has_native_traces.__bool__(),
204+
call().metadata.file_format.__eq__(FileFormat.ALL_ALLOCATIONS)
205+
if sys.version_info > (3, 7, 0)
206+
else ANY,
198207
call().get_leaked_allocation_records(merge_threads=merge_threads),
199208
call().get_memory_snapshots(),
200209
]
@@ -227,6 +236,9 @@ def test_tracker_and_reporter_interactions_for_temporary_allocations(
227236
calls = [
228237
call(os.fspath(result_path), report_progress=True),
229238
call().metadata.has_native_traces.__bool__(),
239+
call().metadata.file_format.__eq__(FileFormat.ALL_ALLOCATIONS)
240+
if sys.version_info > (3, 7, 0)
241+
else ANY,
230242
call().get_temporary_allocation_records(
231243
threshold=3, merge_threads=merge_threads
232244
),

tests/unit/test_stats_reporter.py

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytest
1010

1111
from memray import AllocatorType as AT
12+
from memray._memray import FileFormat
1213
from memray._metadata import Metadata
1314
from memray._stats import Stats
1415
from memray.reporters.stats import StatsReporter
@@ -97,6 +98,7 @@ def fake_stats():
9798
python_allocator="pymalloc",
9899
has_native_traces=False,
99100
trace_python_allocators=True,
101+
file_format=FileFormat.ALL_ALLOCATIONS,
100102
),
101103
total_num_allocations=20,
102104
total_memory_allocated=sum(mem_allocation_list),
@@ -435,6 +437,7 @@ def test_stats_output_json(fake_stats, tmp_path):
435437
"python_allocator": "pymalloc",
436438
"has_native_traces": False,
437439
"trace_python_allocators": True,
440+
"file_format": 0,
438441
},
439442
}
440443
actual = json.loads(output_file.read_text())

0 commit comments

Comments
 (0)