Skip to content

Commit

Permalink
removed fp16 / mixed precision, minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
AznamirWoW committed Jan 7, 2025
1 parent a32d84e commit 837b945
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 135 deletions.
2 changes: 1 addition & 1 deletion core.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ def run_train_script(

if custom_pretrained == False:
pg, pd = pretrained_selector(
str(vocoder), True, int(sample_rate)
str(vocoder), int(sample_rate)
)
else:
if g_pretrained_path is None or d_pretrained_path is None:
Expand Down
17 changes: 1 addition & 16 deletions rvc/configs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def get_instance(*args, **kwargs):
class Config:
def __init__(self):
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.is_half = self.device != "cpu"
self.gpu_name = (
torch.cuda.get_device_name(int(self.device.split(":")[-1]))
if self.device.startswith("cuda")
Expand Down Expand Up @@ -82,13 +81,9 @@ def device_config(self):
self.set_cuda_config()
else:
self.device = "cpu"
self.is_half = False
self.set_precision("fp32")

# Configuration for 6GB GPU memory
x_pad, x_query, x_center, x_max = (
(3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
)
x_pad, x_query, x_center, x_max = (1, 6, 38, 41)
if self.gpu_mem is not None and self.gpu_mem <= 4:
# Configuration for 5GB GPU memory
x_pad, x_query, x_center, x_max = (1, 5, 30, 32)
Expand All @@ -98,19 +93,10 @@ def device_config(self):
def set_cuda_config(self):
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"]
if (
any(gpu in self.gpu_name for gpu in low_end_gpus)
and "V100" not in self.gpu_name.upper()
):
self.is_half = False
self.set_precision("fp32")

self.gpu_mem = torch.cuda.get_device_properties(i_device).total_memory // (
1024**3
)


def max_vram_gpu(gpu):
if torch.cuda.is_available():
gpu_properties = torch.cuda.get_device_properties(gpu)
Expand All @@ -119,7 +105,6 @@ def max_vram_gpu(gpu):
else:
return "8"


def get_gpu_info():
ngpu = torch.cuda.device_count()
gpu_infos = []
Expand Down
12 changes: 3 additions & 9 deletions rvc/infer/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,7 @@ def load_hubert(self, embedder_model: str, embedder_model_custom: str = None):
embedder_model_custom (str): Path to the custom HuBERT model.
"""
self.hubert_model = load_embedding(embedder_model, embedder_model_custom)
self.hubert_model.to(self.config.device)
self.hubert_model = (
self.hubert_model.half()
if self.config.is_half
else self.hubert_model.float()
)
self.hubert_model = self.hubert_model.to(self.config.device).float()
self.hubert_model.eval()

@staticmethod
Expand Down Expand Up @@ -482,13 +477,12 @@ def setup_network(self):
*self.cpt["config"],
use_f0=self.use_f0,
text_enc_hidden_dim=self.text_enc_hidden_dim,
is_half=False,
vocoder=self.vocoder,
)
del self.net_g.enc_q
self.net_g.load_state_dict(self.cpt["weight"], strict=False)
self.net_g.eval().to(self.config.device)
self.net_g = self.net_g.float()
self.net_g = self.net_g.to(self.config.device).float()
self.net_g.eval()

def setup_vc_instance(self):
"""
Expand Down
10 changes: 1 addition & 9 deletions rvc/infer/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ def __init__(self, tgt_sr, config):
self.x_query = config.x_query
self.x_center = config.x_center
self.x_max = config.x_max
self.is_half = config.is_half
self.sample_rate = 16000
self.window = 160
self.t_pad = self.sample_rate * self.x_pad
Expand Down Expand Up @@ -208,7 +207,6 @@ def __init__(self, tgt_sr, config):
self.note_dict = self.autotune.note_dict
self.model_rmvpe = RMVPE0Predictor(
os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
is_half=self.is_half,
device=self.device,
)

Expand Down Expand Up @@ -440,11 +438,7 @@ def voice_conversion(
with torch.no_grad():
pitch_guidance = pitch != None and pitchf != None
# prepare source audio
feats = (
torch.from_numpy(audio0).half()
if self.is_half
else torch.from_numpy(audio0).float()
)
feats = torch.from_numpy(audio0).float()
feats = feats.mean(-1) if feats.dim() == 2 else feats
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1).to(self.device)
Expand Down Expand Up @@ -498,12 +492,10 @@ def voice_conversion(

def _retrieve_speaker_embeddings(self, feats, index, big_npy, index_rate):
npy = feats[0].cpu().numpy()
npy = npy.astype("float32") if self.is_half else npy
score, ix = index.search(npy, k=8)
weight = np.square(1 / score)
weight /= weight.sum(axis=1, keepdims=True)
npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
npy = npy.astype("float16") if self.is_half else npy
feats = (
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
+ (1 - index_rate) * feats
Expand Down
1 change: 0 additions & 1 deletion rvc/lib/predictors/F0Extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def extract_f0(self):
elif method == "rmvpe":
model_rmvpe = RMVPE0Predictor(
os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
is_half=config.is_half,
device=config.device,
# hop_length=80
)
Expand Down
16 changes: 2 additions & 14 deletions rvc/lib/predictors/RMVPE.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,6 @@ class MelSpectrogram(torch.nn.Module):
Extracts Mel-spectrogram features from audio.
Args:
is_half (bool): Whether to use half-precision floating-point numbers.
n_mel_channels (int): Number of Mel-frequency bands.
sample_rate (int): Sampling rate of the audio.
win_length (int): Length of the window function in samples.
Expand All @@ -357,7 +356,6 @@ class MelSpectrogram(torch.nn.Module):

def __init__(
self,
is_half,
n_mel_channels,
sample_rate,
win_length,
Expand Down Expand Up @@ -386,7 +384,6 @@ def __init__(
self.sample_rate = sample_rate
self.n_mel_channels = n_mel_channels
self.clamp = clamp
self.is_half = is_half

def forward(self, audio, keyshift=0, speed=1, center=True):
factor = 2 ** (keyshift / 12)
Expand Down Expand Up @@ -416,8 +413,6 @@ def forward(self, audio, keyshift=0, speed=1, center=True):
magnitude = F.pad(magnitude, (0, 0, 0, size - resize))
magnitude = magnitude[:, :size, :] * self.win_length / win_length_new
mel_output = torch.matmul(self.mel_basis, magnitude)
if self.is_half:
mel_output = mel_output.half()
log_mel_spec = torch.log(torch.clamp(mel_output, min=self.clamp))
return log_mel_spec

Expand All @@ -428,24 +423,19 @@ class RMVPE0Predictor:
Args:
model_path (str): Path to the RMVPE0 model file.
is_half (bool): Whether to use half-precision floating-point numbers.
device (str, optional): Device to use for computation. Defaults to None, which uses CUDA if available.
"""

def __init__(self, model_path, is_half, device=None):
def __init__(self, model_path, device=None):
self.resample_kernel = {}
model = E2E(4, 1, (2, 2))
ckpt = torch.load(model_path, map_location="cpu")
model.load_state_dict(ckpt)
model.eval()
if is_half:
model = model.half()
self.model = model
self.resample_kernel = {}
self.is_half = is_half
self.device = device
self.mel_extractor = MelSpectrogram(
is_half, N_MELS, 16000, 1024, 160, None, 30, 8000
self.mel_extractor = MelSpectrogram(N_MELS, 16000, 1024, 160, None, 30, 8000
).to(device)
self.model = self.model.to(device)
cents_mapping = 20 * np.arange(N_CLASS) + 1997.3794084376191
Expand Down Expand Up @@ -491,8 +481,6 @@ def infer_from_audio(self, audio, thred=0.03):
mel = self.mel_extractor(audio, center=True)
hidden = self.mel2hidden(mel)
hidden = hidden.squeeze(0).cpu().numpy()
if self.is_half == True:
hidden = hidden.astype("float32")
f0 = self.decode(hidden, thred=thred)
return f0

Expand Down
7 changes: 3 additions & 4 deletions rvc/train/extract/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def process_files(self, files, f0_method, hop_length, device, threads):
if f0_method == "rmvpe":
self.model_rmvpe = RMVPE0Predictor(
os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
is_half=False,
device=device,
)

Expand Down Expand Up @@ -146,15 +145,15 @@ def run_pitch_extraction(files, devices, f0_method, hop_length, threads):
def process_file_embedding(
files, embedder_model, embedder_model_custom, device_num, device, n_threads
):
dtype = torch.float16 if (config.is_half and "cuda" in device) else torch.float32
model = load_embedding(embedder_model, embedder_model_custom).to(dtype).to(device)
model = load_embedding(embedder_model, embedder_model_custom).to(device).float()
model.eval()
n_threads = max(1, n_threads)

def worker(file_info):
wav_file_path, _, _, out_file_path = file_info
if os.path.exists(out_file_path):
return
feats = torch.from_numpy(load_audio(wav_file_path, 16000)).to(dtype).to(device)
feats = torch.from_numpy(load_audio(wav_file_path, 16000)).to(device).float()
feats = feats.view(1, -1)
with torch.no_grad():
result = model(feats)["last_hidden_state"]
Expand Down
Loading

0 comments on commit 837b945

Please sign in to comment.