Add cpu_memory.py and track cpu memory by default, too.

BlackHC · BlackHC · commit 0597dfb9ed92 · 2020-04-08T21:30:38.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
 /.idea/
+/build/
+/dist/
+.benchmarks/
+*.egg-info
diff --git a/README.md b/README.md
@@ -80,9 +80,17 @@ toma.explicit.batch(..., toma_cache_type=toma.GlobalBatchsizeCache)
 ### `StacktraceMemoryBatchsizeCache`: Stacktrace & Available Memory (*the default*)
 
 This memorizes the successful batchsizes for a given call trace and available memory at that point.
-For most machine learning code this is sufficient to know the right batchsize without having to look at the actual arguments and understanding more of the semantics.
+For most machine learning code, this is sufficient to remember the right batchsize without having to look at the actual arguments and understanding more of the semantics.
 
-The implicit assumption is that after a few iterations a stable state will be reached in regards to memory usage.
+The implicit assumption is that after a few iterations a stable state will be reached in regards to GPU and CPU memory usage.
+
+To limit the CPU memory of the process, toma provides:
+```python
+import toma.cpu_memory
+
+toma.cpu_memory.set_cpu_memory_limit(8)
+```
+This can also be useful to avoid accidental swap thrashing.
 
 ### `GlobalBatchsizeCache`: Global per Function
 
diff --git a/setup.py b/setup.py
@@ -53,7 +53,7 @@
     # your project is installed. For an analysis of "install_requires" vs pip's
     # requirements files see:
     # https://packaging.python.org/en/latest/requirements.html
-    install_requires=["torch"],
+    install_requires=["torch", "psutil"],
     # List additional groups of dependencies here (e.g. development
     # dependencies). You can install these using the following syntax,
     # for example:
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -1,6 +1,9 @@
 import torch
 import pytest_benchmark
 
+# Preload this import
+import resource
+
 from toma import simple, toma, explicit, NoBatchsizeCache
 
 
diff --git a/tests/test_cpu_mem_limit.py b/tests/test_cpu_mem_limit.py
@@ -0,0 +1,21 @@
+import torch
+
+from toma import toma
+from toma import cpu_memory
+
+
+def test_cpu_mem_limit():
+    cpu_memory.set_cpu_memory_limit(2)
+
+    batchsize = None
+
+    @toma.batch(initial_batchsize=2048)
+    def allocate_gigabytes(bs):
+        torch.empty((bs, 1024, 1024 // 4), dtype=torch.float32)
+
+        nonlocal batchsize
+        batchsize = bs
+
+    allocate_gigabytes()
+
+    assert batchsize <= 512
diff --git a/toma/__init__.py b/toma/__init__.py
@@ -8,9 +8,10 @@
 
 import torch
 
-import toma.torch_cuda_memory as tcm
 import toma.stacktrace as tst
 from toma.batchsize_cache import StacktraceMemoryBatchsizeCache, NoBatchsizeCache, GlobalBatchsizeCache
+from toma.cpu_memory import is_out_of_cpu_memory
+from toma.torch_cuda_memory import is_cuda_out_of_memory, is_cudnn_snafu, gc_cuda
 
 
 DEFAULT_CACHE_TYPE = StacktraceMemoryBatchsizeCache
@@ -23,22 +24,22 @@ class simple:
 
     @staticmethod
     def batch(func, initial_batchsize: int, *args, **kwargs):
-        tcm.gc_cuda()
+        gc_cuda()
 
         batchsize = initial_batchsize
         while True:
             try:
                 return func(batchsize, *args, **kwargs)
             except RuntimeError as exception:
-                if batchsize > 1 and tcm.should_reduce_batch_size(exception):
+                if batchsize > 1 and should_reduce_batch_size(exception):
                     batchsize //= 2
-                    tcm.gc_cuda()
+                    gc_cuda()
                 else:
                     raise
 
     @staticmethod
     def range(func, start: int, end: int, initial_step: int, *args, **kwargs):
-        tcm.gc_cuda()
+        gc_cuda()
 
         stepsize = initial_step
         current = start
@@ -47,9 +48,9 @@ def range(func, start: int, end: int, initial_step: int, *args, **kwargs):
                 func(current, min(current + stepsize, end), *args, **kwargs)
                 current += stepsize
             except RuntimeError as exception:
-                if stepsize > 1 and tcm.should_reduce_batch_size(exception):
+                if stepsize > 1 and should_reduce_batch_size(exception):
                     stepsize //= 2
-                    tcm.gc_cuda()
+                    gc_cuda()
                 else:
                     raise
 
@@ -170,7 +171,7 @@ class explicit:
     def batch(
         func, initial_batchsize: int, *args, toma_context=None, toma_cache_type: Type = DEFAULT_CACHE_TYPE, **kwargs
     ):
-        tcm.gc_cuda()
+        gc_cuda()
 
         cache = get_cache_for_context(toma_cache_type, toma_context or func)
 
@@ -181,9 +182,9 @@ def batch(
                 value = batchsize.get()
                 return func(value, *args, **kwargs)
             except RuntimeError as exception:
-                if value > 1 and tcm.should_reduce_batch_size(exception):
+                if value > 1 and should_reduce_batch_size(exception):
                     batchsize.decrease_batchsize()
-                    tcm.gc_cuda()
+                    gc_cuda()
                 else:
                     raise
 
@@ -198,22 +199,22 @@ def range(
         toma_cache_type: Type = DEFAULT_CACHE_TYPE,
         **kwargs,
     ):
-        tcm.gc_cuda()
+        gc_cuda()
 
         cache = get_cache_for_context(toma_cache_type, toma_context or func)
 
         batchsize = cache.get_batchsize(initial_step)
 
-        tcm.gc_cuda()
+        gc_cuda()
         current = start
         while current < end:
             try:
                 func(current, min(current + batchsize.get(), end), *args, **kwargs)
                 current += batchsize.get()
             except RuntimeError as exception:
-                if batchsize.get() > 1 and tcm.should_reduce_batch_size(exception):
+                if batchsize.get() > 1 and should_reduce_batch_size(exception):
                     batchsize.decrease_batchsize()
-                    tcm.gc_cuda()
+                    gc_cuda()
                 else:
                     raise
 
@@ -242,3 +243,7 @@ def body(start: int, end: int):
             toma_context=toma_context or func,
             toma_cache_type=toma_cache_type,
         )
+
+
+def should_reduce_batch_size(exception):
+    return is_cuda_out_of_memory(exception) or is_cudnn_snafu(exception) or is_out_of_cpu_memory(exception)
diff --git a/toma/batchsize_cache.py b/toma/batchsize_cache.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from typing import Optional
 
+import toma.cpu_memory
 from toma import stacktrace as tst, torch_cuda_memory as tcm
 import weakref
 
@@ -50,7 +51,9 @@ def get_batchsize(self, initial_batchsize: int) -> Batchsize:
 
 
 class StacktraceMemoryBatchsizeCache(BatchsizeCache):
-    LRU_CACHE_SIZE = 128
+    LRU_CACHE_SIZE: int = 128
+    TRACK_RAM: bool = True
+
     initial_batchsize: Optional[int]
 
     def __init__(self, lru_cache_size=None):
@@ -59,15 +62,20 @@ def __init__(self, lru_cache_size=None):
         self.initial_batchsize = None
 
         @functools.lru_cache(lru_cache_size or StacktraceMemoryBatchsizeCache.LRU_CACHE_SIZE)
-        def get_batchsize_from_cache(stacktrace, available_memory):
+        def get_batchsize_from_cache(stacktrace, cpu_available_memory, gpu_available_memory):
             return Batchsize(self.initial_batchsize)
 
         self.get_batchsize_from_cache = get_batchsize_from_cache
 
     def get_batchsize(self, initial_batchsize: int):
         stacktrace = tst.get_simple_traceback(2)
-        available_memory_256MB = int(tcm.get_cuda_assumed_available_memory() // 2 ** 28)
 
-        batchsize = self.get_batchsize_from_cache(stacktrace, available_memory_256MB)
+        cpu_available_memory_256MB = int(tcm.get_cuda_assumed_available_memory() // 2 ** 28)
+        if self.TRACK_RAM:
+            gpu_available_memory_256MB = int(toma.cpu_memory.get_available_cpu_memory() // 2 ** 28)
+        else:
+            gpu_available_memory_256MB = -1
+
+        batchsize = self.get_batchsize_from_cache(stacktrace, cpu_available_memory_256MB, gpu_available_memory_256MB)
         batchsize.set_initial_batchsize(initial_batchsize)
         return batchsize
diff --git a/toma/cpu_memory.py b/toma/cpu_memory.py
@@ -0,0 +1,36 @@
+import psutil
+
+
+def get_available_cpu_memory():
+    this_process = psutil.Process()
+    available_memory = psutil.virtual_memory().available
+
+    try:
+        import resource
+
+        soft_mem_limit, hard_mem_limit = resource.getrlimit(resource.RLIMIT_AS)
+        if hard_mem_limit != resource.RLIM_INFINITY:
+            used_memory = this_process.memory_info().vms
+            available_memory = min(hard_mem_limit - used_memory, available_memory)
+    except ImportError:
+        pass
+
+    return available_memory
+
+
+def set_cpu_memory_limit(num_gigabytes):
+    try:
+        import resource
+
+        num_bytes = int(num_gigabytes * 2 ** 30)
+        resource.setrlimit(resource.RLIMIT_AS, (num_bytes, num_bytes))
+    except ImportError:
+        pass
+
+
+def is_out_of_cpu_memory(exception):
+    return (
+        isinstance(exception, RuntimeError)
+        and len(exception.args) == 1
+        and "DefaultCPUAllocator: can't allocate memory" in exception.args[0]
+    )
diff --git a/toma/torch_cuda_memory.py b/toma/torch_cuda_memory.py
@@ -8,7 +8,7 @@
 
 
 def gc_cuda():
-    """Gargage collect Torch cuda memory."""
+    """Gargage collect Torch (CUDA) memory."""
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
@@ -32,6 +32,7 @@ def get_cuda_available_memory():
         return get_cuda_assumed_available_memory() - get_cuda_blocked_memory()
     return 0
 
+
 def get_cuda_blocked_memory():
     if not torch.cuda.is_available():
         return 0
@@ -70,10 +71,6 @@ def is_cudnn_snafu(exception):
     )
 
 
-def should_reduce_batch_size(exception):
-    return is_cuda_out_of_memory(exception) or is_cudnn_snafu(exception)
-
-
 def cuda_meminfo():
     if not torch.cuda.is_available():
         return

-Original file line number
+Diff line change
@@ @@ -1 +1,5 @@ @@
 /.idea/
 +/build/
 +/dist/
 +.benchmarks/
 +*.egg-info