Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MemoryError when Increasing Number of Variables #189

Closed
LucasBoTang opened this issue Jul 23, 2024 · 4 comments
Closed

MemoryError when Increasing Number of Variables #189

LucasBoTang opened this issue Jul 23, 2024 · 4 comments

Comments

@LucasBoTang
Copy link
Contributor

Description:

I encountered a MemoryError while solving a problem that involves a large number of decision variables and parameters. The error appears when the problem size exceeds 16 decision variables and parameters, which should be a resonable size. However, the code functions correctly for smaller problem sizes, such as 15 or fewer variables and parameters.

Code:

It is a multiple knapsack problem with parametric objective coefficients. You can find code within the link or blow.

"""
Parametric Multi-Dimensional Knapsack
"""

import numpy as np
import neuromancer as nm

def knapsack(var_keys, param_keys, weights, caps, penalty_weight=100):
    # mutable parameters
    params = {}
    for p in param_keys:
        params[p] = nm.constraint.variable(p)
    # decision variables
    vars = {}
    for v in var_keys:
        vars[v] = nm.constraint.variable(v)
    obj = [get_obj(vars, params)]
    constrs = get_constrs(vars, weights, caps, penalty_weight)
    return obj, constrs

def get_obj(vars, params):
    """
    Get neuroMANCER objective component
    """
    # get decision variables
    x, = vars.values()
    # get mutable parameters
    c = params.values()
    # objective function c^T x
    f = sum(- ci * xi for ci, xi in zip(c, x))
    obj = f.minimize(weight=1.0, name="obj")
    return obj

def get_constrs(vars, weights, caps, penalty_weight):
    """
    Get neuroMANCER constraint component
    """
    # problem size
    dim, num_var = weights.shape
    # get decision variables
    x, = vars.values()
    # constraints
    constraints = []
    for i in range(dim):
        g = sum(weights[i, j] * x[:, j] for j in range(num_var))
        con = penalty_weight * (g <= caps[i])
        con.name = "cap_{}".format(i)
        constraints.append(con)
    return constraints

if __name__ == "__main__":

    import torch
    from torch import nn

    # random seed
    np.random.seed(42)
    torch.manual_seed(42)

    # init
    num_var = 32      # number of variables
    #num_var = 15      # number of variables
    dim = 2           # dimension of constraints
    caps = [20] * dim # capacity
    num_data = 5000   # number of data
    test_size = 1000  # number of test size
    val_size = 1000   # number of validation size

    # data sample from PyEPO
    import pyepo
    # generate data
    weights, x, c = pyepo.data.knapsack.genData(num_data=num_data, num_features=5,
                                                num_items=num_var, dim=dim,
                                                deg=4, noise_width=0.5)
    c_samples = torch.FloatTensor(c)
    data = {"c":c_samples}
    # data split
    from src.utlis import data_split
    data_train, data_test, data_dev = data_split(data, test_size=test_size, val_size=val_size)
    # torch dataloaders
    from torch.utils.data import DataLoader
    loader_train = DataLoader(data_train, batch_size=32, num_workers=0,
                              collate_fn=data_train.collate_fn, shuffle=True)
    loader_test  = DataLoader(data_test, batch_size=32, num_workers=0,
                              collate_fn=data_test.collate_fn, shuffle=True)
    loader_dev   = DataLoader(data_dev, batch_size=32, num_workers=0,
                              collate_fn=data_dev.collate_fn, shuffle=True)

    # get objective function & constraints
    obj, constrs = knapsack(["x"], ["c"], weights=weights, caps=caps, penalty_weight=100)

    # define neural architecture for the solution map smap(c) -> x
    import neuromancer as nm
    func = nm.modules.blocks.MLP(insize=num_var, outsize=num_var, bias=True,
                                 linear_map=nm.slim.maps["linear"],
                                 nonlin=nn.ReLU, hsizes=[64]*2)
    components = [nm.system.Node(func, ["c"], ["x"], name="smap")]

    # build neuromancer problems
    loss = nm.loss.PenaltyLoss(obj, constrs)
    problem = nm.problem.Problem(components, loss)

    # training
    lr = 0.001    # step size for gradient descent
    epochs = 400  # number of training epochs
    warmup = 40   # number of epochs to wait before enacting early stopping policy
    patience = 40 # number of epochs with no improvement in eval metric to allow before early stopping
    # set adamW as optimizer
    optimizer = torch.optim.AdamW(problem.parameters(), lr=lr)
    # define trainer
    trainer = nm.trainer.Trainer(
        problem,
        loader_train,
        loader_dev,
        loader_test,
        optimizer,
        epochs=epochs,
        patience=patience,
        warmup=warmup)
    # train solution map
    best_model = trainer.train()
    # load best model dict
    problem.load_state_dict(best_model)

Error Message:

Traceback (most recent call last):
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\OneDrive\Study\UofT\Research\DMIPL\src\problem\neuromancer\knapsack.py", line 116, in <module>
    trainer = nm.trainer.Trainer(
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\site-packages\neuromancer\trainer.py", line 227, in __init__
    self.best_model = deepcopy(self.model.state_dict())
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\site-packages\torch\nn\modules\module.py", line 1916, in state_dict
    module.state_dict(destination=destination, prefix=prefix + name + '.', keep_vars=keep_vars)
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\site-packages\torch\nn\modules\module.py", line 1916, in state_dict
    module.state_dict(destination=destination, prefix=prefix + name + '.', keep_vars=keep_vars)
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\site-packages\torch\nn\modules\module.py", line 1916, in state_dict
    module.state_dict(destination=destination, prefix=prefix + name + '.', keep_vars=keep_vars)
  [Previous line repeated 30 more times]
  File "C:\Users\lucas\anaconda3\envs\neuromancer\lib\site-packages\torch\nn\modules\module.py", line 1909, in state_dict
    destination._metadata[prefix[:-1]] = local_metadata
MemoryError

Environment:

  • OS: Windows 11
  • Python version: 3.10.13
  • PyTorch version: 2.3.0
  • NeuroMANCER version: 1.5.1
@drgona
Copy link
Contributor

drgona commented Aug 14, 2024

@LucasBoTang I can confirm that this code crashed when running in Colab.
session crashed after using all available RAM.

@LucasBoTang
Copy link
Contributor Author

When increasing the dimensions in a parametric Rosenbrock problem (with 32 decision variables and 17 parameters), the code runs for a very long time without producing any output, suggesting similar issues in handling higher dimensions.

Code for Rosenbrock Problem:

import numpy as np
import neuromancer as nm

def rosenbrock(var_keys, param_keys, steepness, num_blocks, penalty_weight=50):
    # mutable parameters
    params = {}
    for p in param_keys:
        params[p] = nm.constraint.variable(p)
    # decision variables
    vars = {}
    for v in var_keys:
        vars[v] = nm.constraint.variable(v)
    obj = [get_obj(vars, params, steepness, num_blocks)]
    constrs = get_constrs(vars, params, num_blocks, penalty_weight)
    return obj, constrs


def get_obj(vars, params, steepness, num_blocks):
    """
    Get neuroMANCER objective component
    """
    # get decision variables
    x, = vars.values()
    # get mutable parameters
    p, a = params.values()
    # objective function sum (a_i - x_2i)^2 + b * (x_2i+1 - x_2i^2)^2
    f = sum((a[:, i] - x[:, 2*i]) ** 2 + steepness * (x[:, 2*i+1] - x[:, 2*i] ** 2) ** 2
             for i in range(num_blocks))
    obj = f.minimize(weight=1.0, name="obj")
    return obj


def get_constrs(vars, params, num_blocks, penalty_weight):
    """
    Get neuroMANCER constraint component
    """
    # get decision variables
    x, = vars.values()
    # get mutable parameters
    p, a = params.values()
    # constraints
    constraints = []
    # inner
    g = sum(x[:, 2*i+1] for i in range(num_blocks))
    con = penalty_weight * (g >= num_blocks * p[:, 0] / 2)
    con.name = "c_inner"
    constraints.append(con)
    # outer
    g = sum(x[:, 2*i] ** 2 for i in range(num_blocks))
    con = penalty_weight * (g <= num_blocks * p[:, 0])
    con.name = "c_outer"
    constraints.append(con)
    return constraints


if __name__ == "__main__":

    import torch
    from torch import nn

    # random seed
    np.random.seed(42)
    torch.manual_seed(42)

    # init
    steepness = 30    # steepness factor
    num_blocks = 16   # number of expression blocks
    num_data = 5000   # number of data
    test_size = 1000  # number of test size
    val_size = 1000   # number of validation size

    # data sample from uniform distribution
    p_low, p_high = 1.0, 8.0
    a_low, a_high = 0.5, 4.5
    p_samples = torch.FloatTensor(num_data, 1).uniform_(p_low, p_high)
    a_samples = torch.FloatTensor(num_data, num_blocks).uniform_(a_low, a_high)
    data = {"p":p_samples, "a":a_samples}
    # data split
    from src.utlis import data_split
    data_train, data_test, data_dev = data_split(data, test_size=test_size, val_size=val_size)
    # torch dataloaders
    from torch.utils.data import DataLoader
    loader_train = DataLoader(data_train, batch_size=32, num_workers=0,
                              collate_fn=data_train.collate_fn, shuffle=True)
    loader_test  = DataLoader(data_test, batch_size=32, num_workers=0,
                              collate_fn=data_test.collate_fn, shuffle=True)
    loader_dev   = DataLoader(data_dev, batch_size=32, num_workers=0,
                              collate_fn=data_dev.collate_fn, shuffle=True)

    # get objective function & constraints
    obj, constrs = rosenbrock(["x"], ["p", "a"], steepness=steepness,
                              num_blocks=num_blocks, penalty_weight=100)

    # define neural architecture for the solution map smap(p, a) -> x
    import neuromancer as nm
    func = nm.modules.blocks.MLP(insize=num_blocks+1, outsize=2*num_blocks, bias=True,
                                 linear_map=nm.slim.maps["linear"],
                                 nonlin=nn.ReLU, hsizes=[32]*4)
    components = [nm.system.Node(func, ["p", "a"], ["x"], name="smap")]

    # build neuromancer problems
    loss = nm.loss.PenaltyLoss(obj, constrs)
    problem = nm.problem.Problem(components, loss)

    # training
    lr = 0.001    # step size for gradient descent
    epochs = 400  # number of training epochs
    warmup = 40   # number of epochs to wait before enacting early stopping policy
    patience = 40 # number of epochs with no improvement in eval metric to allow before early stopping
    # set adamW as optimizer
    optimizer = torch.optim.AdamW(problem.parameters(), lr=lr)
    # define trainer
    trainer = nm.trainer.Trainer(
        problem,
        loader_train,
        loader_dev,
        loader_test,
        optimizer,
        epochs=epochs,
        patience=patience,
        warmup=warmup)
    # train solution map
    best_model = trainer.train()
    # load best model dict
    problem.load_state_dict(best_model)
    print()

    # init mathmatic model
    from src.problem.math_solver.rosenbrock import rosenbrock
    model = rosenbrock(steepness=steepness, num_blocks=num_blocks)

    # test neuroMANCER
    from src.utlis import nm_test_solve
    p, a = data_train[0]["p"].tolist(), data_train[0]["a"].tolist()
    datapoint = {"p": torch.tensor([p], dtype=torch.float32),
                 "a": torch.tensor([a], dtype=torch.float32),
                 "name":"test"}
    model.set_param_val({"p":p, "a":a})
    print("neuroMANCER:")
    nm_test_solve(["x"], problem, datapoint, model)

@RBirmiwal
Copy link
Collaborator

RBirmiwal commented Oct 9, 2024

@LucasBoTang. For the Rosenbrock problem how long did it take to finish one epoch? I was able to train the model on CPU using our Lightning framework + Lightning trainer (https://github.com/pnnl/neuromancer/tree/master/examples/lightning_integration_examples). NeuroMANCER v1.5.1

Core part of code shown below

import torch
from torch import nn
import neuromancer as nm
# random seed
np.random.seed(42)
torch.manual_seed(42)

# init
steepness = 30    # steepness factor
num_blocks = 16   # number of expression blocks
num_data = 5000   # number of data
test_size = 1000  # number of test size
val_size = 1000   # number of validation size

# data sample from uniform distribution
p_low, p_high = 1.0, 8.0
a_low, a_high = 0.5, 4.5
p_samples = torch.FloatTensor(num_data, 1).float().uniform_(p_low, p_high)
a_samples = torch.FloatTensor(num_data, num_blocks).float().uniform_(a_low, a_high)
data = {"p":p_samples, "a":a_samples}
# data split

data_train, data_test, data_dev = data_split(data, test_size=test_size, val_size=val_size)

def data_setup_function(bs): 
    data_train, data_test, data_dev = data_split(data, test_size=test_size, val_size=val_size)
    data_train.name='train'
    data_test.name='test'
    data_dev.name='dev'
    return data_train, data_dev, data_test, bs

# torch dataloaders
from torch.utils.data import DataLoader
loader_train = DataLoader(data_train, batch_size=32, num_workers=0,
                            collate_fn=data_train.collate_fn, shuffle=True)
loader_test  = DataLoader(data_test, batch_size=32, num_workers=0,
                            collate_fn=data_test.collate_fn, shuffle=True)
loader_dev   = DataLoader(data_dev, batch_size=32, num_workers=0,
                            collate_fn=data_dev.collate_fn, shuffle=True)

# get objective function & constraints
obj, constrs = rosenbrock(["x"], ["p", "a"], steepness=steepness,
                            num_blocks=num_blocks, penalty_weight=100)









# define neural architecture for the solution map smap(p, a) -> x
import neuromancer as nm
func = nm.modules.blocks.MLP(insize=num_blocks+1, outsize=2*num_blocks, bias=True,
                                linear_map=nm.slim.maps["linear"],
                                nonlin=nn.ReLU, hsizes=[32]*4)
components = [nm.system.Node(func, ["p", "a"], ["x"], name="smap")]

# build neuromancer problems
loss = nm.loss.PenaltyLoss(obj, constrs)
problem = nm.problem.Problem(components, loss)

# training
lr = 0.001    # step size for gradient descent
epochs = 5  # number of training epochs
warmup = 40   # number of epochs to wait before enacting early stopping policy
patience = 40 # number of epochs with no improvement in eval metric to allow before early stopping

lit_trainer = nm.trainer.LitTrainer(epochs=epochs, warmup=warmup, patience=patience)
lit_trainer.fit(problem,data_setup_function, bs=32)

The original Neuromancer trainer was not designed to be efficient, wheras Lightning trainer likely is. I would encourage all future testing to use Lightning trainer

@drgona

@RBirmiwal
Copy link
Collaborator

I am going to close this for now. Thanks for continuing testing the extremes of the library, very important to us.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants