Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cuda detection and fix problem due to PyTorch Bug #43227 #19

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
{
"name" : "Urban Testing",
"csvPath" : "metdata/UrbanSound8K.csv",
"data" : {
"type" : "CSVDataManager",
"path" : "/home/kiran/Documents/DATA/UrbanSound8K",
"path" : "/data/UrbanSound8K",
"format" : "audio",
"loader" : {
"shuffle" : true,
Expand Down
5 changes: 4 additions & 1 deletion data/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ def __init__(self, config):

self.load_func = load_formats[config['format']]

mfile = os.path.join(self.dir_path, 'metadata/UrbanSound8K.csv')
csvPath = 'metadata/UrbanSound8K.csv' if not config['csvPath'] else config['csvPath']
print(f'Loading csv configuration from {csvPath}..')

mfile = os.path.join(self.dir_path, csvPath)
metadata_df = pd.read_csv(mfile).sample(frac=1)
self.metadata_df = self._remove_too_small(metadata_df, 1)

Expand Down
4 changes: 2 additions & 2 deletions net/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def forward(self, batch):
# xt -> (batch, time, channel*freq)
batch, time = x.size()[:2]
x = x.reshape(batch, time, -1)
x_pack = torch.nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True)
x_pack = torch.nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True)

# x -> (batch, time, lstm_out)
x_pack, hidden = self.net['recur'](x_pack)
Expand Down Expand Up @@ -123,7 +123,7 @@ def forward(self, batch):
# x -> (batch, time, channel*freq)
batch, time = x.size()[:2]
x = x.reshape(batch, time, -1)
x_pack = torch.nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True)
x_pack = torch.nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True)

# x -> (batch, time, lstm_out)
x_pack, hidden = self.net['recur'](x_pack)
Expand Down
14 changes: 13 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@

from eval import ClassificationEvaluator, AudioInference

def p(msg): print(msg)

def isCuda():
import torch
device = torch.device("cuda")
print(f'typeof(device)={type(device)} device={device}')
return str(device)=='cuda'

print(f"IsCuda? {isCuda()}")

def _get_transform(config, name):
tsf_name = config['transforms']['type']
Expand Down Expand Up @@ -52,6 +61,7 @@ def eval_main(checkpoint):

def infer_main(file_path, config, checkpoint):
# Fix bugs
p('infer_main')
if checkpoint is None:
model = getattr(net_module, config['model']['type'])()
else:
Expand Down Expand Up @@ -84,6 +94,7 @@ def train_main(config, resume):
m_name = config['model']['type']
model = getattr(net_module, m_name)(classes, config=config)
num_classes = len(classes)
print(f'num_classes = {num_classes}')


loss = getattr(net_module, config['train']['loss'])
Expand Down Expand Up @@ -112,10 +123,11 @@ def train_main(config, resume):
lr_scheduler=lr_scheduler,
train_logger=train_logger)

p('calling train()')
trainer.train()
return trainer
#duration = 1; freq = 440
#os.system('play --no-show-progress --null --channels 1 synth %s sine %f'%(duration, freq))
#os.system('play --no-show-progress --null --channels 1 synth %s sine %f'%(duration, freq))

def _test_loader(config):

Expand Down
1 change: 1 addition & 0 deletions train/base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self, model, loss, metrics, optimizer, resume, config, train_logger
self.logger = logging.getLogger(self.__class__.__name__)

self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'BaseTrainer.device={self.device}')

self.model = model.to(self.device)

Expand Down
2 changes: 2 additions & 0 deletions train/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@ def _train_epoch(self, epoch):
self.writer.set_step(epoch)

_trange = tqdm(self.data_loader, leave=True, desc='')
print(f'tqdm _trange={_trange}')

for batch_idx, batch in enumerate(_trange):
batch = [b.to(self.device) for b in batch]
data, target = batch[:-1], batch[-1]
print(f'len(data) = {len(data)}')
data = data if len(data) > 1 else data[0]
#data, target = data.to(self.device), target.to(self.device)

Expand Down