[bug]: Runtime error when enable_partial_loading is enabled

### Is there an existing issue for this problem?

- [x] I have searched the existing issues

### Install method

Docker-Compose

### Operating system

Linux

### GPU vendor

Nvidia (CUDA)

### GPU model

RTX 2060

### GPU VRAM

6GB

### Version number

6.11.0.post1

### Browser

Vivaldi 7.8.3925.56

### System Information

```
{
    "version": "6.11.0.post1",
    "dependencies": {
        "absl-py"                 : "2.3.1",
        "accelerate"              : "1.8.1",
        "annotated-types"         : "0.7.0",
        "anyio"                   : "4.9.0",
        "attrs"                   : "25.3.0",
        "bidict"                  : "0.23.1",
        "bitsandbytes"            : "0.46.1",
        "blake3"                  : "1.0.5",
        "certifi"                 : "2025.6.15",
        "cffi"                    : "1.17.1",
        "charset-normalizer"      : "3.4.2",
        "click"                   : "8.2.1",
        "coloredlogs"             : "15.0.1",
        "compel"                  : "2.1.1",
        "contourpy"               : "1.3.2",
        "CUDA"                    : "12.8",
        "cycler"                  : "0.12.1",
        "Deprecated"              : "1.2.18",
        "diffusers"               : "0.36.0",
        "dnspython"               : "2.7.0",
        "dynamicprompts"          : "0.31.0",
        "einops"                  : "0.8.1",
        "fastapi"                 : "0.118.3",
        "fastapi-events"          : "0.12.2",
        "filelock"                : "3.18.0",
        "flatbuffers"             : "25.2.10",
        "fonttools"               : "4.58.5",
        "fsspec"                  : "2025.5.1",
        "gguf"                    : "0.17.1",
        "h11"                     : "0.16.0",
        "hf-xet"                  : "1.1.5",
        "httpcore"                : "1.0.9",
        "httptools"               : "0.6.4",
        "httpx"                   : "0.28.1",
        "huggingface-hub"         : "0.34.4",
        "humanfriendly"           : "10.0",
        "idna"                    : "3.10",
        "importlib_metadata"      : "8.7.0",
        "InvokeAI"                : "6.11.0.post1",
        "jax"                     : "0.6.2",
        "jaxlib"                  : "0.6.2",
        "Jinja2"                  : "3.1.6",
        "kiwisolver"              : "1.4.8",
        "MarkupSafe"              : "3.0.1",
        "matplotlib"              : "3.10.3",
        "mediapipe"               : "0.10.14",
        "ml_dtypes"               : "0.5.1",
        "mpmath"                  : "1.3.0",
        "networkx"                : "3.5",
        "numpy"                   : "1.26.4",
        "nvidia-cublas-cu12"      : "12.8.3.14",
        "nvidia-cuda-cupti-cu12"  : "12.8.57",
        "nvidia-cuda-nvrtc-cu12"  : "12.8.61",
        "nvidia-cuda-runtime-cu12": "12.8.57",
        "nvidia-cudnn-cu12"       : "9.7.1.26",
        "nvidia-cufft-cu12"       : "11.3.3.41",
        "nvidia-cufile-cu12"      : "1.13.0.11",
        "nvidia-curand-cu12"      : "10.3.9.55",
        "nvidia-cusolver-cu12"    : "11.7.2.55",
        "nvidia-cusparse-cu12"    : "12.5.7.53",
        "nvidia-cusparselt-cu12"  : "0.6.3",
        "nvidia-nccl-cu12"        : "2.26.2",
        "nvidia-nvjitlink-cu12"   : "12.8.61",
        "nvidia-nvtx-cu12"        : "12.8.55",
        "onnx"                    : "1.16.1",
        "onnxruntime"             : "1.19.2",
        "opencv-contrib-python"   : "4.11.0.86",
        "opt_einsum"              : "3.4.0",
        "packaging"               : "25.0",
        "picklescan"              : "0.0.26",
        "pillow"                  : "11.3.0",
        "prompt_toolkit"          : "3.0.51",
        "protobuf"                : "4.25.8",
        "psutil"                  : "7.0.0",
        "pycparser"               : "2.22",
        "pydantic"                : "2.11.7",
        "pydantic-settings"       : "2.10.1",
        "pydantic_core"           : "2.33.2",
        "pyparsing"               : "3.2.3",
        "PyPatchMatch"            : "1.0.2",
        "python-dateutil"         : "2.9.0.post0",
        "python-dotenv"           : "1.1.1",
        "python-engineio"         : "4.12.2",
        "python-multipart"        : "0.0.20",
        "python-socketio"         : "5.13.0",
        "PyWavelets"              : "1.8.0",
        "PyYAML"                  : "6.0.2",
        "regex"                   : "2024.11.6",
        "requests"                : "2.32.4",
        "safetensors"             : "0.5.3",
        "scipy"                   : "1.16.0",
        "semver"                  : "3.0.4",
        "sentencepiece"           : "0.2.0",
        "setuptools"              : "80.9.0",
        "simple-websocket"        : "1.1.0",
        "six"                     : "1.17.0",
        "sniffio"                 : "1.3.1",
        "sounddevice"             : "0.5.2",
        "spandrel"                : "0.4.1",
        "starlette"               : "0.46.2",
        "sympy"                   : "1.14.0",
        "tokenizers"              : "0.22.0",
        "torch"                   : "2.7.1+cu128",
        "torchsde"                : "0.2.6",
        "torchvision"             : "0.22.1+cu128",
        "tqdm"                    : "4.67.1",
        "trampoline"              : "0.1.2",
        "transformers"            : "4.56.0",
        "triton"                  : "3.3.1",
        "typing-inspection"       : "0.4.1",
        "typing_extensions"       : "4.14.0",
        "urllib3"                 : "2.5.0",
        "uvicorn"                 : "0.35.0",
        "uvloop"                  : "0.21.0",
        "watchfiles"              : "1.1.0",
        "wcwidth"                 : "0.2.13",
        "websockets"              : "15.0.1",
        "wrapt"                   : "1.17.2",
        "wsproto"                 : "1.2.0",
        "zipp"                    : "3.23.0"
    },
    "config": {
        "schema_version": "4.0.2",
        "legacy_models_yaml_path": null,
        "host": "0.0.0.0",
        "port": 9090,
        "allow_origins": [],
        "allow_credentials": true,
        "allow_methods": ["*"],
        "allow_headers": ["*"],
        "ssl_certfile": null,
        "ssl_keyfile": null,
        "log_tokenization": false,
        "patchmatch": true,
        "models_dir": "models",
        "convert_cache_dir": "models/.convert_cache",
        "download_cache_dir": "models/.download_cache",
        "legacy_conf_dir": "configs",
        "db_dir": "databases",
        "outputs_dir": "outputs",
        "custom_nodes_dir": "nodes",
        "style_presets_dir": "style_presets",
        "workflow_thumbnails_dir": "workflow_thumbnails",
        "log_handlers": ["console"],
        "log_format": "color",
        "log_level": "info",
        "log_sql": false,
        "log_level_network": "warning",
        "use_memory_db": false,
        "dev_reload": false,
        "profile_graphs": false,
        "profile_prefix": null,
        "profiles_dir": "profiles",
        "max_cache_ram_gb": null,
        "max_cache_vram_gb": null,
        "log_memory_usage": false,
        "model_cache_keep_alive_min": 0,
        "device_working_mem_gb": 3,
        "enable_partial_loading": true,
        "keep_ram_copy_of_weights": true,
        "ram": null,
        "vram": null,
        "lazy_offload": true,
        "pytorch_cuda_alloc_conf": "backend:cudaMallocAsync",
        "device": "auto",
        "precision": "auto",
        "sequential_guidance": false,
        "attention_type": "auto",
        "attention_slice_size": "auto",
        "force_tiled_decode": false,
        "pil_compress_level": 1,
        "max_queue_size": 10000,
        "clear_queue_on_startup": false,
        "allow_nodes": null,
        "deny_nodes": null,
        "node_cache_size": 512,
        "hashing_algorithm": "blake3_single",
        "remote_api_tokens": null,
        "scan_models_on_startup": false,
        "unsafe_disable_picklescan": false,
        "allow_unknown_models": true
    },
    "set_config_fields": [
        "host",                    "legacy_models_yaml_path", "enable_partial_loading",  "pytorch_cuda_alloc_conf",
        "port"
    ]
}
```

### What happened

After updating my docker container to the latest Invoke I started getting an error 
```Server Error
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
```

If I set ```enable_partial_loading: false``` I no longer get the error but I do run out of memory

### What you expected to happen

I expect invoke to run my model and produce an image.

### How to reproduce the problem

It does not depend on any certain models usage. But it does require a prompt such as ```A vivid, surreal digital artwork depicting a fantastical forest``` . With NO prompt it seems to produce a picture, usually a person.

### Additional context

I have been using Invoke for the past year without issue and it had been a few weeks since the last time I updated it.

### Discord username

_No response_

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[bug]: Runtime error when enable_partial_loading is enabled #8850

Is there an existing issue for this problem?

Install method

Operating system

GPU vendor

GPU model

GPU VRAM

Version number

Browser

System Information

What happened

What you expected to happen

How to reproduce the problem

Additional context

Discord username

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[bug]: Runtime error when enable_partial_loading is enabled #8850

Description

Is there an existing issue for this problem?

Install method

Operating system

GPU vendor

GPU model

GPU VRAM

Version number

Browser

System Information

What happened

What you expected to happen

How to reproduce the problem

Additional context

Discord username

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions