Big memory leak problem #129

st0rmbtw · 2024-08-15T01:40:54Z

On macOS with the Metal renderer, memory leaks every frame.
In 3 minutes memory usage of a simple app reached 6 GB.

Minimal repro:

quad.metal:

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

struct VertexIn
{
    float2 position [[attribute(0)]];
    float2 uv       [[attribute(1)]];
};

struct VertexOut
{
    float4 position [[position]];
    float2 uv;
};

vertex VertexOut VS(VertexIn inp [[stage_in]])
{
    VertexOut outp;

    outp.position = float4(inp.position, 0.0, 1.0);
    outp.uv    = inp.uv;

    return outp;
}

fragment float4 PS(VertexOut inp [[stage_in]])
{
    return float4(inp.uv, 1.0, 1.0);
}

main.mm:

#include <stdio.h>
#include <LLGL/LLGL.h>
#include <LLGL/Platform/NativeHandle.h>
#include <LLGL/Utils/VertexFormat.h>
#include <LLGL/Utils/TypeNames.h>
#include <memory>

#define GLFW_EXPOSE_NATIVE_COCOA

#include <GLFW/glfw3.h>
#include <GLFW/glfw3native.h>
#include <LLGL/LLGL.h>

class CustomSurface : public LLGL::Surface {
public:
    CustomSurface(GLFWwindow *window, const LLGL::Extent2D& size) : m_size(size), m_wnd(window) {}
    ~CustomSurface();

    bool GetNativeHandle(void* nativeHandle, std::size_t nativeHandleSize) override;
    LLGL::Extent2D GetContentSize() const override { return m_size; };
    bool AdaptForVideoMode(LLGL::Extent2D* resolution, bool* fullscreen) override;
    void ResetPixelFormat() override {};
    LLGL::Display* FindResidentDisplay() const override { return LLGL::Display::GetPrimary(); };

    bool ProcessEvents();
private:
    LLGL::Extent2D m_size;
    GLFWwindow* m_wnd = nullptr;
};

CustomSurface::~CustomSurface() {
    glfwDestroyWindow(m_wnd);
}

bool CustomSurface::GetNativeHandle(void* nativeHandle, std::size_t nativeHandleSize) {
    auto handle = reinterpret_cast<LLGL::NativeHandle*>(nativeHandle);
    handle->responder = glfwGetCocoaWindow(m_wnd);
    return true;
}

bool CustomSurface::AdaptForVideoMode(LLGL::Extent2D *resolution, bool *fullscreen) {
    m_size = *resolution;
    glfwSetWindowSize(m_wnd, m_size.width, m_size.height);
    return true;
}

bool CustomSurface::ProcessEvents() {
    glfwPollEvents();
    return !glfwWindowShouldClose(m_wnd);
}

int main(void) {
    LLGL::Log::RegisterCallbackStd();

    if (!glfwInit()) return -1;

    GLFWwindow *window = glfwCreateWindow(1280, 720, "AAA", nullptr, nullptr);

    LLGL::Report report;
    auto context = LLGL::RenderSystem::Load("Metal", &report);

    const LLGL::Display* display = LLGL::Display::GetPrimary();
    const std::uint32_t resScale = (display != nullptr ? static_cast<std::uint32_t>(display->GetScale()) : 1u);

    const auto resolution = LLGL::Extent2D(1280 * resScale, 720 * resScale);

    LLGL::SwapChainDescriptor swapChainDesc;
    swapChainDesc.resolution = resolution;

    auto surface = std::make_shared<CustomSurface>(window, resolution);

    auto swapChain = context->CreateSwapChain(swapChainDesc, surface);
    swapChain->SetVsyncInterval(0);

    auto commands = context->CreateCommandBuffer(LLGL::CommandBufferFlags::ImmediateSubmit);

    const auto& info = context->GetRendererInfo();

    float vertices[] = {
        -0.5f, -0.5f, 0.0f, 0.0f,
        0.5f,  -0.5f, 1.0f, 0.0f,
        -0.5f, 0.5f,  0.0f, 1.0f,
        0.5f,  0.5f,  1.0f, 1.0f,
    };

    LLGL::VertexFormat vertexFormat;
    vertexFormat.AppendAttribute({"a_position", LLGL::Format::RG32Float});
    vertexFormat.AppendAttribute({"a_uv", LLGL::Format::RG32Float});
    vertexFormat.SetStride(sizeof(float) * 4);

    LLGL::BufferDescriptor vertexBufferDesc;
    vertexBufferDesc.size = sizeof(vertices);
    vertexBufferDesc.bindFlags = LLGL::BindFlags::VertexBuffer;
    vertexBufferDesc.vertexAttribs = vertexFormat.attributes;

    LLGL::Buffer* vertexBuffer = context->CreateBuffer(vertexBufferDesc, vertices);

    LLGL::ShaderDescriptor vertexShaderDesc, fragmentShaderDesc;
    vertexShaderDesc = { LLGL::ShaderType::Vertex,   "assets/shaders/quad.metal", "VS", "1.1" };
    fragmentShaderDesc = { LLGL::ShaderType::Fragment, "assets/shaders/quad.metal", "PS", "1.1" };

    vertexShaderDesc.vertex.inputAttribs = vertexFormat.attributes;

    LLGL::Shader* vertexShader = context->CreateShader(vertexShaderDesc);
    LLGL::Shader* fragmentShader = context->CreateShader(fragmentShaderDesc);

    LLGL::PipelineLayoutDescriptor pipelineLayoutDesc;

    LLGL::PipelineLayout* pipelineLayout = context->CreatePipelineLayout(pipelineLayoutDesc);

    LLGL::GraphicsPipelineDescriptor pipelineDesc;
    pipelineDesc.vertexShader = vertexShader;
    pipelineDesc.fragmentShader = fragmentShader;
    pipelineDesc.pipelineLayout = pipelineLayout;
    pipelineDesc.indexFormat = LLGL::Format::R32UInt;
    pipelineDesc.primitiveTopology = LLGL::PrimitiveTopology::TriangleStrip;
    pipelineDesc.renderPass = swapChain->GetRenderPass();

    LLGL::PipelineState* pipelineState = context->CreatePipelineState(pipelineDesc);
    if (const LLGL::Report* report = pipelineState->GetReport()) {
        if (report->HasErrors()) LLGL::Log::Errorf("%s", report->GetText());
        return -1;
    }

    double prevTick = glfwGetTime();
    
    while (surface->ProcessEvents()) {
        double currentTick = glfwGetTime();
        const double deltaTime = (currentTick - prevTick);
        prevTick = currentTick;

        commands->Begin();
        {
            commands->SetVertexBuffer(*vertexBuffer);

            commands->BeginRenderPass(*swapChain);
            {
                commands->Clear(LLGL::ClearFlags::Color, LLGL::ClearValue(0.0f, 0.0f, 0.0f, 1.0f));
                commands->SetViewport(swapChain->GetResolution());
                commands->SetPipelineState(*pipelineState);

                commands->Draw(4, 0);
            }
            commands->EndRenderPass();
        }
        commands->End();

        swapChain->Present();
    }

    return 0;
}

The text was updated successfully, but these errors were encountered:

LukasBanana · 2024-08-15T03:17:30Z

Thanks for the simple repro. I haven't done much memory analysis yet and my examples relied on the autorelease pool feature in macOS, which I think is fine to use, but it would be nicer not having to rely on it. Wrapping your main loop or at least all LLGL frame drawing into the following block fixes the problem:

while (ProcessEvents())
{
    @autoreleasepool
    {
        /* Frame rendering ... */
    }
}

ExampleBase does it this way, too, but I am concerned this will always fail on iOS as this platform doesn't support garbage collection iirc.

st0rmbtw · 2024-08-15T13:38:19Z

Well, with autoreleasepool memory leaks much slower, but it still leaks about 0.1 MB in 5–10 seconds.

When debugging with Xcode memory leaks faster: about 100 MB in 1 minute

LukasBanana · 2024-08-15T14:58:52Z

Did you notice any of such leaks in the examples as well? My activity monitor showed a steady memory usage for the examples. What happens if you wrap the entire main function into the autoreleasepool-block?

FWIW: There are only a handful of retain calls in the Metal backend like in GetNativeHandle() which is also documented (see CommandBuffer.h:1004). I therefore don't think LLGL is causing a retain cycle, which means it might be possible to track this down with Xcode's leak detection tools (I won't have time for this until next week, though).

st0rmbtw · 2024-08-15T15:54:29Z

What happens if you wrap the entire main function into the autoreleasepool-block?

If I wrap the entire main function nothing happens, autoreleasepool just doesn't work and memory leaks with the speed of light.

Did you notice any of such leaks in the examples as well?

In the PBR example memory leaks about 0.1 MB in 5–10 seconds.
In the Instancing example memory leaks very slow, I ran the example for 5 minutes and memory usage was increased by about 0.4 MB.

LukasBanana · 2024-08-15T16:36:32Z

I can't think of a tremendous amount of objects being allocated that LLGL wouldn't clean up. Can you test this by replacing the commandBuffer calls with commandBufferWithUnretainedReferences at these two places:

I think the Metal backend should already take care of maintaining the buffer live time during command encoding, so the default MTLCommandBuffer with strong references might not be necessary.

I can take a closer look tomorrow.

st0rmbtw · 2024-08-15T17:01:45Z

Can you test this by replacing the commandBuffer calls with commandBufferWithUnretainedReferences

Looks like nothing has changed.

LukasBanana · 2024-08-16T15:51:41Z

With the example you posted I am having a hard time reproducing a memory leak after adding the @autoreleasepool block around the commands->Begin() and swapChain->Present() calls. That app stays around 68.2 MB pretty consistently. When I resize the window, the memory goes down to 54.1 MB and stays there even after several minutes (I have an Intel Iris Pro Graphics with Metal 1.4, but the M1* GPUs use shared memory as well). I'll try a few things without the autorelease pool, but I think this is pretty common practice since this also works with automatic reference counting (ARC) as opposed to garbage collection.

st0rmbtw · 2024-08-16T16:05:21Z

Try to capture the GPU workload in Xcode, memory usage should be increasing pretty fast.

You can do that by pressing this button:

LukasBanana · 2024-08-16T16:41:38Z

Unfortunately, my Xcode version doesn't have that feature and my MacBook is too old to upgrade. So I'll have to stick with more oldschool debugging techniques.

As I'm testing a bit with explicitly releasing the MTLCommandBuffer objects, it looks like it's not feasible to ditch the autoreleasepool block without significant refactoring. I'll also be on vacation for a couple of days, but feel free to post any more details you can find and I'll continue debugging this issue when I'm back.

LukasBanana self-assigned this Aug 15, 2024

LukasBanana added bug macOS labels Aug 15, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Big memory leak problem #129

Big memory leak problem #129

st0rmbtw commented Aug 15, 2024 •

edited

Loading

LukasBanana commented Aug 15, 2024 •

edited

Loading

st0rmbtw commented Aug 15, 2024 •

edited

Loading

LukasBanana commented Aug 15, 2024

st0rmbtw commented Aug 15, 2024

LukasBanana commented Aug 15, 2024 •

edited

Loading

st0rmbtw commented Aug 15, 2024

LukasBanana commented Aug 16, 2024

st0rmbtw commented Aug 16, 2024

LukasBanana commented Aug 16, 2024

Big memory leak problem #129

Big memory leak problem #129

Comments

st0rmbtw commented Aug 15, 2024 • edited Loading

Minimal repro:

LukasBanana commented Aug 15, 2024 • edited Loading

st0rmbtw commented Aug 15, 2024 • edited Loading

LukasBanana commented Aug 15, 2024

st0rmbtw commented Aug 15, 2024

LukasBanana commented Aug 15, 2024 • edited Loading

st0rmbtw commented Aug 15, 2024

LukasBanana commented Aug 16, 2024

st0rmbtw commented Aug 16, 2024

LukasBanana commented Aug 16, 2024

st0rmbtw commented Aug 15, 2024 •

edited

Loading

LukasBanana commented Aug 15, 2024 •

edited

Loading

st0rmbtw commented Aug 15, 2024 •

edited

Loading

LukasBanana commented Aug 15, 2024 •

edited

Loading