sync with applio

blaisewf · Aug 26, 2024 · 40f27e1 · 40f27e1
1 parent e5b8d9a
commit 40f27e1
Show file tree

Hide file tree

Showing 14 changed files with 200 additions and 108 deletions.
diff --git a/install.bat b/install.bat
@@ -1,73 +1,75 @@
 @echo off
-setlocal
-title Installer
+setlocal enabledelayedexpansion
+title Applio Installer
+
+echo Welcome to the RVC CLI Installer!
+echo.
 
 set "principal=%cd%"
-set "URL_EXTRA=https://huggingface.co/IAHispano/applio/resolve/main"
 set "CONDA_ROOT_PREFIX=%UserProfile%\Miniconda3"
 set "INSTALL_ENV_DIR=%principal%\env"
 set "MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py39_23.9.0-0-Windows-x86_64.exe"
 set "CONDA_EXECUTABLE=%CONDA_ROOT_PREFIX%\Scripts\conda.exe"
 
-if not exist "%cd%\env.zip" (
- echo Downloading the fairseq build...
- curl -s -LJO %URL_EXTRA%/env.zip -o env.zip
-)
-
-if not exist "%cd%\env.zip" (
- echo Download failed, trying with the powershell method
- powershell -Command "& {Invoke-WebRequest -Uri '%URL_EXTRA%/env.zip' -OutFile 'env.zip'}"
-)
-
-if not exist "%cd%\env" (
- echo Extracting the file...
- powershell -command "& { Add-Type -AssemblyName System.IO.Compression.FileSystem ; [System.IO.Compression.ZipFile]::ExtractToDirectory('%cd%\env.zip', '%cd%') }"
-)
-
-if not exist "%cd%\env" (
- echo Extracting failed trying with the tar method...
- tar -xf %cd%\env.zip
-)
-
-if exist "%cd%\env" (
- del env.zip
-) else (
- echo Theres a problem extracting the file please download the file and extract it manually.
- echo https://huggingface.co/IAHispano/applio/resolve/main/env.zip
- pause
- exit
+echo Cleaning up unnecessary files...
+for %%F in (Makefile Dockerfile docker-compose.yaml *.sh) do (
+ if exist "%%F" del "%%F"
 )
+echo Cleanup complete.
+echo.
 
 if not exist "%CONDA_EXECUTABLE%" (
- echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL%...
- curl %MINICONDA_DOWNLOAD_URL% -o miniconda.exe
-
- if not exist "%principal%\miniconda.exe" (
- echo Download failed trying with the powershell method.
- powershell -Command "& {Invoke-WebRequest -Uri '%MINICONDA_DOWNLOAD_URL%' -OutFile 'miniconda.exe'}"
+ echo Miniconda not found. Starting download and installation...
+ echo Downloading Miniconda...
+ powershell -Command "& {Invoke-WebRequest -Uri '%MINICONDA_DOWNLOAD_URL%' -OutFile 'miniconda.exe'}"
+ if not exist "miniconda.exe" (
+ echo Download failed. Please check your internet connection and try again.
+ goto :error
  )
 
- echo Installing Miniconda to %CONDA_ROOT_PREFIX%...
+ echo Installing Miniconda...
  start /wait "" miniconda.exe /InstallationType=JustMe /RegisterPython=0 /S /D=%CONDA_ROOT_PREFIX%
+ if errorlevel 1 (
+ echo Miniconda installation failed.
+ goto :error
+ )
  del miniconda.exe
+ echo Miniconda installation complete.
+) else (
+ echo Miniconda already installed. Skipping installation.
 )
+echo.
 
+echo Creating Conda environment...
 call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.9
+if errorlevel 1 goto :error
+echo Conda environment created successfully.
+echo.
 
-if exist "%cd%\env\python.exe" (
- echo Installing pip version less than 24.1...
- "%cd%\env\python.exe" -m pip install "pip<24.1"
+if exist "%INSTALL_ENV_DIR%\python.exe" (
+ echo Installing specific pip version...
+ "%INSTALL_ENV_DIR%\python.exe" -m pip install "pip<24.1"
+ if errorlevel 1 goto :error
+ echo Pip installation complete.
+ echo.
 )
 
-echo Installing the dependencies...
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%"
-pip install --upgrade setuptools
-pip install -r "%principal%\requirements.txt"
+echo Installing dependencies...
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || goto :error
+pip install --upgrade setuptools || goto :error
+pip install -r "%principal%\requirements.txt" || goto :error
 pip uninstall torch torchvision torchaudio -y
-pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121
+pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121 || goto :error
 call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" deactivate
-echo.
+echo Dependencies installation complete.
+echo
 
 echo RVC CLI has been installed successfully!
+echo.
+pause
+exit /b 0
+
+:error
+echo An error occurred during installation. Please check the output above for details.
 pause
-cls
+exit /b 1
diff --git a/requirements.txt b/requirements.txt
@@ -21,8 +21,6 @@ pedalboard
 
 # Machine learning
 omegaconf==2.0.5; sys_platform == 'darwin'
-git+https://github.com/IAHispano/fairseq; sys_platform == 'linux'
-fairseq==0.12.2; sys_platform == 'darwin' or sys_platform == 'win32'
 numba; sys_platform == 'linux'
 numba==0.57.0; sys_platform == 'darwin' or sys_platform == 'win32'
 torchaudio==2.1.1
@@ -32,6 +30,7 @@ torchvision==0.16.1
 einops
 libf0
 torchfcpe
+transformers==4.44.2
 
 # Visualization
 matplotlib==3.7.2

diff --git a/rvc/configs/config.py b/rvc/configs/config.py
@@ -131,6 +131,13 @@ def device_config(self) -> tuple:
  def set_cuda_config(self):
  i_device = int(self.device.split(":")[-1])
  self.gpu_name = torch.cuda.get_device_name(i_device)
+ # Zluda
+ if self.gpu_name.endswith("[ZLUDA]"):
+ print("Zluda compatibility enabled, experimental feature.")
+ torch.backends.cudnn.enabled = False
+ torch.backends.cuda.enable_flash_sdp(False)
+ torch.backends.cuda.enable_math_sdp(True)
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
  low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"]
  if (
  any(gpu in self.gpu_name for gpu in low_end_gpus)

diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py
@@ -53,6 +53,7 @@ def __init__(self):
  self.hubert_model = (
  None # Initialize the Hubert model (for embedding extraction)
  )
+ self.last_embedder_model = None # Last used embedder model
  self.tgt_sr = None # Target sampling rate for the output audio
  self.net_g = None # Generator network for voice conversion
  self.vc = None # Voice conversion pipeline instance
@@ -69,8 +70,8 @@ def load_hubert(self, embedder_model: str, embedder_model_custom: str = None):
  embedder_model (str): Path to the pre-trained HuBERT model.
  embedder_model_custom (str): Path to the custom HuBERT model.
  """
- models, _, _ = load_embedding(embedder_model, embedder_model_custom)
- self.hubert_model = models[0].to(self.config.device)
+ self.hubert_model = load_embedding(embedder_model, embedder_model_custom)
+ self.hubert_model.to(self.config.device)
  self.hubert_model = (
  self.hubert_model.half()
  if self.config.is_half
@@ -333,8 +334,9 @@ def convert_audio(
  if audio_max > 1:
  audio /= audio_max
 
- if not self.hubert_model:
+ if not self.hubert_model or embedder_model != self.last_embedder_model:
  self.load_hubert(embedder_model, embedder_model_custom)
+ self.last_embedder_model = embedder_model
 
  file_index = (
  index_path.strip()
@@ -637,8 +639,9 @@ def convert_audio_batch(
  with open(pid_file_path, "w") as pid_file:
  pid_file.write(str(pid))
  try:
- if not self.hubert_model:
+ if not self.hubert_model or embedder_model != self.last_embedder_model:
  self.load_hubert(embedder_model, embedder_model_custom)
+ self.last_embedder_model = embedder_model
  self.get_vc(model_path, sid)
  file_index = (
  index_path.strip()

diff --git a/rvc/infer/pipeline.py b/rvc/infer/pipeline.py
@@ -425,14 +425,11 @@ def voice_conversion(
  feats = feats.view(1, -1)
  padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
 
- inputs = {
- "source": feats.to(self.device),
- "padding_mask": padding_mask,
- "output_layer": 9 if version == "v1" else 12,
- }
  with torch.no_grad():
- logits = model.extract_features(**inputs)
- feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
+ feats = model(feats.to(self.device))["last_hidden_state"]
+ feats = (
+ model.final_proj(feats[0]).unsqueeze(0) if version == "v1" else feats
+ )
  if protect < 0.5 and pitch != None and pitchf != None:
  feats0 = feats.clone()
  if (

diff --git a/rvc/lib/algorithm/commons.py b/rvc/lib/algorithm/commons.py
@@ -157,6 +157,24 @@ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
  return acts
 
 
+# Zluda, same as previous, but without jit.script
+def fused_add_tanh_sigmoid_multiply_no_jit(input_a, input_b, n_channels):
+ """
+ Fused add tanh sigmoid multiply operation.
+
+ Args:
+ input_a: The first input tensor.
+ input_b: The second input tensor.
+ n_channels: The number of channels.
+ """
+ n_channels_int = n_channels[0]
+ in_act = input_a + input_b
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+ acts = t_act * s_act
+ return acts
+
+
 def convert_pad_shape(pad_shape: List[List[int]]) -> List[int]:
  """
  Convert the pad shape to a list of integers.

diff --git a/rvc/lib/algorithm/modules.py b/rvc/lib/algorithm/modules.py
@@ -1,5 +1,8 @@
 import torch
-from rvc.lib.algorithm.commons import fused_add_tanh_sigmoid_multiply
+from rvc.lib.algorithm.commons import (
+ fused_add_tanh_sigmoid_multiply_no_jit,
+ fused_add_tanh_sigmoid_multiply,
+)
 
 
 class WaveNet(torch.nn.Module):
@@ -85,6 +88,11 @@ def forward(self, x, x_mask, g=None, **kwargs):
  if g is not None:
  g = self.cond_layer(g)
 
+ # Zluda
+ is_zluda = x.device.type == "cuda" and torch.cuda.get_device_name().endswith(
+ "[ZLUDA]"
+ )
+
  for i in range(self.n_layers):
  x_in = self.in_layers[i](x)
  if g is not None:
@@ -93,7 +101,14 @@ def forward(self, x, x_mask, g=None, **kwargs):
  else:
  g_l = torch.zeros_like(x_in)
 
- acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+ # Preventing HIP crash by not using jit-decorated function
+ if is_zluda:
+ acts = fused_add_tanh_sigmoid_multiply_no_jit(
+ x_in, g_l, n_channels_tensor
+ )
+ else:
+ acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+
  acts = self.drop(acts)
 
  res_skip_acts = self.res_skip_layers[i](acts)

diff --git a/rvc/lib/predictors/RMVPE.py b/rvc/lib/predictors/RMVPE.py
@@ -408,6 +408,15 @@ def forward(self, audio, keyshift=0, speed=1, center=True):
  self.hann_window[keyshift_key] = torch.hann_window(win_length_new).to(
  audio.device
  )
+
+ # Zluda, fall-back to CPU for FFTs since HIP SDK has no cuFFT alternative
+ source_device = audio.device
+ if audio.device.type == "cuda" and torch.cuda.get_device_name().endswith(
+ "[ZLUDA]"
+ ):
+ audio = audio.to("cpu")
+ self.hann_window[keyshift_key] = self.hann_window[keyshift_key].to("cpu")
+
  fft = torch.stft(
  audio,
  n_fft=n_fft_new,
@@ -416,7 +425,8 @@ def forward(self, audio, keyshift=0, speed=1, center=True):
  window=self.hann_window[keyshift_key],
  center=center,
  return_complex=True,
- )
+ ).to(source_device)
+
  magnitude = torch.sqrt(fft.real.pow(2) + fft.imag.pow(2))
  if keyshift != 0:
  size = self.n_fft // 2 + 1

diff --git a/rvc/lib/tools/prerequisites_download.py b/rvc/lib/tools/prerequisites_download.py
@@ -44,7 +44,7 @@
  )
 ]
 models_list = [("predictors/", ["rmvpe.pt", "fcpe.pt"])]
-embedders_list = [("embedders/", ["contentvec_base.pt"])]
+embedders_list = [("embedders/contentvec/", ["pytorch_model.bin", "config.json"])]
 linux_executables_list = [("formant/", ["stftpitchshift"])]
 executables_list = [
  ("", ["ffmpeg.exe", "ffprobe.exe"]),
@@ -54,7 +54,7 @@
 folder_mapping_list = {
  "pretrained_v1/": "rvc/models/pretraineds/pretrained_v1/",
  "pretrained_v2/": "rvc/models/pretraineds/pretrained_v2/",
- "embedders/": "rvc/models/embedders/",
+ "embedders/contentvec/": "rvc/models/embedders/contentvec/",
  "predictors/": "rvc/models/predictors/",
  "formant/": "rvc/models/formant/",
 }