Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaced 'Sync Graph' with 'New Model' setting on UI. #831

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions assets/Applio_NoUI.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@
"pitch_guidance = True # @param{type:\"boolean\"}\n",
"auto_backups = True # @param{type:\"boolean\"}\n",
"pretrained = True # @param{type:\"boolean\"}\n",
"sync_graph = False # @param{type:\"boolean\"}\n",
"cleanup = False # @param{type:\"boolean\"}\n",
"cache_data_in_gpu = False # @param{type:\"boolean\"}\n",
"tensorboard = True # @param{type:\"boolean\"}\n",
"# @markdown ### ➡️ Choose how many epochs your model will be stored\n",
Expand Down Expand Up @@ -638,7 +638,7 @@
" if tensorboard == True:\n",
" %load_ext tensorboard\n",
" %tensorboard --logdir /content/Applio/logs/\n",
" !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pitch_guidance \"{pitch_guidance}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --sync_graph \"{sync_graph}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n",
" !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pitch_guidance \"{pitch_guidance}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --cleanup \"{cleanup}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n",
"\n",
"\n",
"server_thread = threading.Thread(target=start_train)\n",
Expand Down
4 changes: 2 additions & 2 deletions assets/i18n/languages/en_US.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@
"Overtraining Detector Settings": "Overtraining Detector Settings",
"Overtraining Threshold": "Overtraining Threshold",
"Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Set the maximum number of epochs you want your model to stop training if no improvement is detected.",
"Sync Graph": "Sync Graph",
"Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.",
"New Model": "New Model",
"Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs.": "Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs.",
"Start Training": "Start Training",
"Stop Training": "Stop Training",
"Generate Index": "Generate Index",
Expand Down
10 changes: 5 additions & 5 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def run_train_script(
overtraining_detector: bool,
overtraining_threshold: int,
pretrained: bool,
sync_graph: bool,
cleanup: bool,
index_algorithm: str = "Auto",
cache_data_in_gpu: bool = False,
custom_pretrained: bool = False,
Expand Down Expand Up @@ -575,7 +575,7 @@ def run_train_script(
cache_data_in_gpu,
overtraining_detector,
overtraining_threshold,
sync_graph,
cleanup,
],
),
]
Expand Down Expand Up @@ -2129,10 +2129,10 @@ def parse_arguments():
default=50,
)
train_parser.add_argument(
"--sync_graph",
"--cleanup",
type=lambda x: bool(strtobool(x)),
choices=[True, False],
help="Enable graph synchronization for distributed training.",
help="Cleanup previous training attempt.",
default=False,
)
train_parser.add_argument(
Expand Down Expand Up @@ -2529,7 +2529,7 @@ def main():
overtraining_threshold=args.overtraining_threshold,
pretrained=args.pretrained,
custom_pretrained=args.custom_pretrained,
sync_graph=args.sync_graph,
cleanup=args.cleanup,
index_algorithm=args.index_algorithm,
cache_data_in_gpu=args.cache_data_in_gpu,
g_pretrained_path=args.g_pretrained_path,
Expand Down
176 changes: 52 additions & 124 deletions rvc/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
cache_data_in_gpu = strtobool(sys.argv[13])
overtraining_detector = strtobool(sys.argv[14])
overtraining_threshold = int(sys.argv[15])
sync_graph = strtobool(sys.argv[16])
cleanup = strtobool(sys.argv[16])

current_dir = os.getcwd()
experiment_dir = os.path.join(current_dir, "logs", model_name)
Expand Down Expand Up @@ -198,8 +198,8 @@ def start():
pretrainG,
pretrainD,
pitch_guidance,
custom_total_epoch,
custom_save_every_weights,
total_epoch,
save_every_weights,
config,
device,
),
Expand Down Expand Up @@ -246,56 +246,9 @@ def continue_overtrain_detector(training_file_path):
smoothed_loss_gen_history,
) = load_from_json(training_file_path)

if sync_graph:
print(
"Sync graph is now activated! With sync graph enabled, the model undergoes a single epoch of training. Once the graphs are synchronized, training proceeds for the previously specified number of epochs."
)
custom_total_epoch = 1
custom_save_every_weights = True

start()

# Synchronize graphs by modifying config files
model_config_file = os.path.join(experiment_dir, "config.json")
rvc_config_file = os.path.join(
now_dir, "rvc", "configs", version, str(sample_rate) + ".json"
)
if not os.path.exists(rvc_config_file):
rvc_config_file = os.path.join(
now_dir, "rvc", "configs", "v1", str(sample_rate) + ".json"
)

pattern = rf"{os.path.basename(model_name)}_(\d+)e_(\d+)s\.pth"

for filename in os.listdir(experiment_dir):
match = re.match(pattern, filename)
if match:
steps = int(match.group(2))

def edit_config(config_file):
"""
Edits the config file to synchronize graphs.

Args:
config_file (str): Path to the config file.
"""
with open(config_file, "r", encoding="utf8") as json_file:
config_data = json.load(json_file)

config_data["train"]["log_interval"] = steps

with open(config_file, "w", encoding="utf8") as json_file:
json.dump(
config_data,
json_file,
indent=2,
separators=(",", ": "),
ensure_ascii=False,
)

edit_config(model_config_file)
edit_config(rvc_config_file)

if cleanup:
print("Removing files from the prior training attempt...")

# Clean up unnecessary files
for root, dirs, files in os.walk(
os.path.join(now_dir, "logs", model_name), topdown=False
Expand All @@ -319,17 +272,10 @@ def edit_config(config_file):
os.remove(item_path)
os.rmdir(folder_path)

print("Successfully synchronized graphs!")
custom_total_epoch = total_epoch
custom_save_every_weights = save_every_weights
continue_overtrain_detector(training_file_path)
start()
else:
custom_total_epoch = total_epoch
custom_save_every_weights = save_every_weights
continue_overtrain_detector(training_file_path)
start()

print("Cleanup done!")

continue_overtrain_detector(training_file_path)
start()

def run(
rank,
Expand Down Expand Up @@ -729,9 +675,7 @@ def train_and_evaluate(
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
with autocast(enabled=False):
loss_mel = F.l1_loss(y_mel, y_hat_mel) * config.train.c_mel
loss_kl = (
kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * config.train.c_kl
)
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * config.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g)
loss_gen, losses_gen = generator_loss(y_d_hat_g)
loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl
Expand All @@ -753,66 +697,50 @@ def train_and_evaluate(
scaler.step(optim_g)
scaler.update()

# Logging and checkpointing
if rank == 0:
if global_step % config.train.log_interval == 0:
lr = optim_g.param_groups[0]["lr"]
if loss_mel > 75:
loss_mel = 75
if loss_kl > 9:
loss_kl = 9
scalar_dict = {
"loss/g/total": loss_gen_all,
"loss/d/total": loss_disc,
"learning_rate": lr,
"grad_norm_d": grad_norm_d,
"grad_norm_g": grad_norm_g,
}
scalar_dict.update(
{
"loss/g/fm": loss_fm,
"loss/g/mel": loss_mel,
"loss/g/kl": loss_kl,
}
)
scalar_dict.update(
{f"loss/g/{i}": v for i, v in enumerate(losses_gen)}
)
scalar_dict.update(
{f"loss/d_r/{i}": v for i, v in enumerate(losses_disc_r)}
)
scalar_dict.update(
{f"loss/d_g/{i}": v for i, v in enumerate(losses_disc_g)}
)
image_dict = {
"slice/mel_org": plot_spectrogram_to_numpy(
y_mel[0].data.cpu().numpy()
),
"slice/mel_gen": plot_spectrogram_to_numpy(
y_hat_mel[0].data.cpu().numpy()
),
"all/mel": plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
}

with torch.no_grad():
if hasattr(net_g, "module"):
o, *_ = net_g.module.infer(*reference)
else:
o, *_ = net_g.infer(*reference)
audio_dict = {f"gen/audio_{global_step:07d}": o[0, :, :]}

summarize(
writer=writer,
global_step=global_step,
images=image_dict,
scalars=scalar_dict,
audios=audio_dict,
audio_sample_rate=config.data.sample_rate,
)

global_step += 1
pbar.update(1)

# Logging and checkpointing
if rank == 0:
lr = optim_g.param_groups[0]["lr"]
if loss_mel > 75:
loss_mel = 75
if loss_kl > 9:
loss_kl = 9
scalar_dict = {
"loss/g/total": loss_gen_all,
"loss/d/total": loss_disc,
"learning_rate": lr,
"grad_norm_d": grad_norm_d,
"grad_norm_g": grad_norm_g,
"loss/g/fm": loss_fm,
"loss/g/mel": loss_mel,
"loss/g/kl": loss_kl,
}
# commented out
#scalar_dict.update({f"loss/g/{i}": v for i, v in enumerate(losses_gen)})
#scalar_dict.update({f"loss/d_r/{i}": v for i, v in enumerate(losses_disc_r)})
#scalar_dict.update({f"loss/d_g/{i}": v for i, v in enumerate(losses_disc_g)})

image_dict = {
"slice/mel_org": plot_spectrogram_to_numpy(y_mel[0].data.cpu().numpy()),
"slice/mel_gen": plot_spectrogram_to_numpy(y_hat_mel[0].data.cpu().numpy()),
"all/mel": plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
}

with torch.no_grad():
o, *_ = net_g.infer(*reference)
audio_dict = {f"gen/audio_{global_step:07d}": o[0, :, :]}

summarize(
writer=writer,
global_step=global_step,
images=image_dict,
scalars=scalar_dict,
audios=audio_dict,
audio_sample_rate=config.data.sample_rate,
)

# Save checkpoint
model_add = []
model_del = []
Expand Down
8 changes: 4 additions & 4 deletions tabs/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,10 +622,10 @@ def train_tab():
interactive=True,
)
with gr.Column():
sync_graph = gr.Checkbox(
label=i18n("Sync Graph"),
cleanup = gr.Checkbox(
label=i18n("New Model"),
info=i18n(
"Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model."
"Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs."
),
value=False,
interactive=True,
Expand Down Expand Up @@ -768,7 +768,7 @@ def train_tab():
overtraining_detector,
overtraining_threshold,
pretrained,
sync_graph,
cleanup,
index_algorithm,
cache_dataset_in_gpu,
custom_pretrained,
Expand Down