Skip to content

[bug]: Runtime error when enable_partial_loading is enabled #8850

@Intrinsically-Sublime

Description

@Intrinsically-Sublime

Is there an existing issue for this problem?

  • I have searched the existing issues

Install method

Docker-Compose

Operating system

Linux

GPU vendor

Nvidia (CUDA)

GPU model

RTX 2060

GPU VRAM

6GB

Version number

6.11.0.post1

Browser

Vivaldi 7.8.3925.56

System Information

{
    "version": "6.11.0.post1",
    "dependencies": {
        "absl-py"                 : "2.3.1",
        "accelerate"              : "1.8.1",
        "annotated-types"         : "0.7.0",
        "anyio"                   : "4.9.0",
        "attrs"                   : "25.3.0",
        "bidict"                  : "0.23.1",
        "bitsandbytes"            : "0.46.1",
        "blake3"                  : "1.0.5",
        "certifi"                 : "2025.6.15",
        "cffi"                    : "1.17.1",
        "charset-normalizer"      : "3.4.2",
        "click"                   : "8.2.1",
        "coloredlogs"             : "15.0.1",
        "compel"                  : "2.1.1",
        "contourpy"               : "1.3.2",
        "CUDA"                    : "12.8",
        "cycler"                  : "0.12.1",
        "Deprecated"              : "1.2.18",
        "diffusers"               : "0.36.0",
        "dnspython"               : "2.7.0",
        "dynamicprompts"          : "0.31.0",
        "einops"                  : "0.8.1",
        "fastapi"                 : "0.118.3",
        "fastapi-events"          : "0.12.2",
        "filelock"                : "3.18.0",
        "flatbuffers"             : "25.2.10",
        "fonttools"               : "4.58.5",
        "fsspec"                  : "2025.5.1",
        "gguf"                    : "0.17.1",
        "h11"                     : "0.16.0",
        "hf-xet"                  : "1.1.5",
        "httpcore"                : "1.0.9",
        "httptools"               : "0.6.4",
        "httpx"                   : "0.28.1",
        "huggingface-hub"         : "0.34.4",
        "humanfriendly"           : "10.0",
        "idna"                    : "3.10",
        "importlib_metadata"      : "8.7.0",
        "InvokeAI"                : "6.11.0.post1",
        "jax"                     : "0.6.2",
        "jaxlib"                  : "0.6.2",
        "Jinja2"                  : "3.1.6",
        "kiwisolver"              : "1.4.8",
        "MarkupSafe"              : "3.0.1",
        "matplotlib"              : "3.10.3",
        "mediapipe"               : "0.10.14",
        "ml_dtypes"               : "0.5.1",
        "mpmath"                  : "1.3.0",
        "networkx"                : "3.5",
        "numpy"                   : "1.26.4",
        "nvidia-cublas-cu12"      : "12.8.3.14",
        "nvidia-cuda-cupti-cu12"  : "12.8.57",
        "nvidia-cuda-nvrtc-cu12"  : "12.8.61",
        "nvidia-cuda-runtime-cu12": "12.8.57",
        "nvidia-cudnn-cu12"       : "9.7.1.26",
        "nvidia-cufft-cu12"       : "11.3.3.41",
        "nvidia-cufile-cu12"      : "1.13.0.11",
        "nvidia-curand-cu12"      : "10.3.9.55",
        "nvidia-cusolver-cu12"    : "11.7.2.55",
        "nvidia-cusparse-cu12"    : "12.5.7.53",
        "nvidia-cusparselt-cu12"  : "0.6.3",
        "nvidia-nccl-cu12"        : "2.26.2",
        "nvidia-nvjitlink-cu12"   : "12.8.61",
        "nvidia-nvtx-cu12"        : "12.8.55",
        "onnx"                    : "1.16.1",
        "onnxruntime"             : "1.19.2",
        "opencv-contrib-python"   : "4.11.0.86",
        "opt_einsum"              : "3.4.0",
        "packaging"               : "25.0",
        "picklescan"              : "0.0.26",
        "pillow"                  : "11.3.0",
        "prompt_toolkit"          : "3.0.51",
        "protobuf"                : "4.25.8",
        "psutil"                  : "7.0.0",
        "pycparser"               : "2.22",
        "pydantic"                : "2.11.7",
        "pydantic-settings"       : "2.10.1",
        "pydantic_core"           : "2.33.2",
        "pyparsing"               : "3.2.3",
        "PyPatchMatch"            : "1.0.2",
        "python-dateutil"         : "2.9.0.post0",
        "python-dotenv"           : "1.1.1",
        "python-engineio"         : "4.12.2",
        "python-multipart"        : "0.0.20",
        "python-socketio"         : "5.13.0",
        "PyWavelets"              : "1.8.0",
        "PyYAML"                  : "6.0.2",
        "regex"                   : "2024.11.6",
        "requests"                : "2.32.4",
        "safetensors"             : "0.5.3",
        "scipy"                   : "1.16.0",
        "semver"                  : "3.0.4",
        "sentencepiece"           : "0.2.0",
        "setuptools"              : "80.9.0",
        "simple-websocket"        : "1.1.0",
        "six"                     : "1.17.0",
        "sniffio"                 : "1.3.1",
        "sounddevice"             : "0.5.2",
        "spandrel"                : "0.4.1",
        "starlette"               : "0.46.2",
        "sympy"                   : "1.14.0",
        "tokenizers"              : "0.22.0",
        "torch"                   : "2.7.1+cu128",
        "torchsde"                : "0.2.6",
        "torchvision"             : "0.22.1+cu128",
        "tqdm"                    : "4.67.1",
        "trampoline"              : "0.1.2",
        "transformers"            : "4.56.0",
        "triton"                  : "3.3.1",
        "typing-inspection"       : "0.4.1",
        "typing_extensions"       : "4.14.0",
        "urllib3"                 : "2.5.0",
        "uvicorn"                 : "0.35.0",
        "uvloop"                  : "0.21.0",
        "watchfiles"              : "1.1.0",
        "wcwidth"                 : "0.2.13",
        "websockets"              : "15.0.1",
        "wrapt"                   : "1.17.2",
        "wsproto"                 : "1.2.0",
        "zipp"                    : "3.23.0"
    },
    "config": {
        "schema_version": "4.0.2",
        "legacy_models_yaml_path": null,
        "host": "0.0.0.0",
        "port": 9090,
        "allow_origins": [],
        "allow_credentials": true,
        "allow_methods": ["*"],
        "allow_headers": ["*"],
        "ssl_certfile": null,
        "ssl_keyfile": null,
        "log_tokenization": false,
        "patchmatch": true,
        "models_dir": "models",
        "convert_cache_dir": "models/.convert_cache",
        "download_cache_dir": "models/.download_cache",
        "legacy_conf_dir": "configs",
        "db_dir": "databases",
        "outputs_dir": "outputs",
        "custom_nodes_dir": "nodes",
        "style_presets_dir": "style_presets",
        "workflow_thumbnails_dir": "workflow_thumbnails",
        "log_handlers": ["console"],
        "log_format": "color",
        "log_level": "info",
        "log_sql": false,
        "log_level_network": "warning",
        "use_memory_db": false,
        "dev_reload": false,
        "profile_graphs": false,
        "profile_prefix": null,
        "profiles_dir": "profiles",
        "max_cache_ram_gb": null,
        "max_cache_vram_gb": null,
        "log_memory_usage": false,
        "model_cache_keep_alive_min": 0,
        "device_working_mem_gb": 3,
        "enable_partial_loading": true,
        "keep_ram_copy_of_weights": true,
        "ram": null,
        "vram": null,
        "lazy_offload": true,
        "pytorch_cuda_alloc_conf": "backend:cudaMallocAsync",
        "device": "auto",
        "precision": "auto",
        "sequential_guidance": false,
        "attention_type": "auto",
        "attention_slice_size": "auto",
        "force_tiled_decode": false,
        "pil_compress_level": 1,
        "max_queue_size": 10000,
        "clear_queue_on_startup": false,
        "allow_nodes": null,
        "deny_nodes": null,
        "node_cache_size": 512,
        "hashing_algorithm": "blake3_single",
        "remote_api_tokens": null,
        "scan_models_on_startup": false,
        "unsafe_disable_picklescan": false,
        "allow_unknown_models": true
    },
    "set_config_fields": [
        "host",                    "legacy_models_yaml_path", "enable_partial_loading",  "pytorch_cuda_alloc_conf",
        "port"
    ]
}

What happened

After updating my docker container to the latest Invoke I started getting an error

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

If I set enable_partial_loading: false I no longer get the error but I do run out of memory

What you expected to happen

I expect invoke to run my model and produce an image.

How to reproduce the problem

It does not depend on any certain models usage. But it does require a prompt such as A vivid, surreal digital artwork depicting a fantastical forest . With NO prompt it seems to produce a picture, usually a person.

Additional context

I have been using Invoke for the past year without issue and it had been a few weeks since the last time I updated it.

Discord username

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions