Skip to content

fix Chinese and English mixing #237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class Settings(BaseSettings):
allow_local_voice_saving: bool = (
False # Whether to allow saving combined voices locally
)
repo_id: str = "hexgrad/Kokoro-82M"
kokoro_v1_file: str = "v1_0/kokoro-v1_0.pth"

# Container absolute paths
model_dir: str = "/app/api/src/models" # Absolute path in container
Expand Down
3 changes: 2 additions & 1 deletion api/src/core/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

from pydantic import BaseModel, Field
from core.config import settings


class KokoroV1Config(BaseModel):
Expand Down Expand Up @@ -36,7 +37,7 @@ class ModelConfig(BaseModel):

# Model filename
pytorch_kokoro_v1_file: str = Field(
"v1_0/kokoro-v1_0.pth", description="PyTorch Kokoro V1 model filename"
settings.kokoro_v1_file, description="PyTorch Kokoro V1 model filename"
)

# Backend config
Expand Down
16 changes: 14 additions & 2 deletions api/src/inference/kokoro_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ async def load_model(self, path: str) -> None:
logger.info(f"Model path: {model_path}")

# Load model and let KModel handle device mapping
self._model = KModel(config=config_path, model=model_path).eval()
self._model = KModel(config=config_path, model=model_path, repo_id=settings.repo_id).eval()
# Move to CUDA if needed
if self._device == "cuda":
self._model = self._model.cuda()
Expand All @@ -57,6 +57,9 @@ async def load_model(self, path: str) -> None:
except Exception as e:
raise RuntimeError(f"Failed to load Kokoro model: {e}")

def en_callable(self, text):
return next(self._pipelines['a'](text)).phonemes

def _get_pipeline(self, lang_code: str) -> KPipeline:
"""Get or create pipeline for language code.

Expand All @@ -69,10 +72,19 @@ def _get_pipeline(self, lang_code: str) -> KPipeline:
if not self._model:
raise RuntimeError("Model not loaded")

# When Chinese is mixed with English, it should be done like this.
if 'a' not in self._pipelines and lang_code == 'z':
lang_en = 'a'
logger.info(f"Creating new pipeline for language code: {lang_en}")
self._pipelines[lang_en] = KPipeline(
lang_code=lang_en, model=False, repo_id=settings.repo_id
)

if lang_code not in self._pipelines:
logger.info(f"Creating new pipeline for language code: {lang_code}")
self._pipelines[lang_code] = KPipeline(
lang_code=lang_code, model=self._model, device=self._device
lang_code=lang_code, model=self._model, device=self._device, repo_id=settings.repo_id,
en_callable=self.en_callable
)
return self._pipelines[lang_code]

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ dependencies = [
"matplotlib>=3.10.0",
"mutagen>=1.47.0",
"psutil>=6.1.1",
"kokoro @ git+https://github.com/hexgrad/kokoro.git@31a2b6337b8c1b1418ef68c48142328f640da938",
'misaki[en,ja,ko,zh] @ git+https://github.com/hexgrad/misaki.git@ebc76c21b66c5fc4866ed0ec234047177b396170',
"kokoro>=0.8.2",
'misaki[en,ja,ko,zh]>=0.8.2',
"spacy==3.7.2",
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl",
"inflect>=7.5.0",
Expand Down
5 changes: 5 additions & 0 deletions start-gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ export MODEL_DIR=src/models
export VOICES_DIR=src/voices/v1_0
export WEB_PLAYER_PATH=$PROJECT_ROOT/web

# Set about the Chinese environment variable
# export DEFAULT_VOICE=zf_xiaobei
# export REPO_ID=hexgrad/Kokoro-82M-v1.1-zh
# export KOKORO_V1_FILE=v1_1-zh/kokoro-v1_1-zh.pth

# Run FastAPI with GPU extras using uv run
uv pip install -e ".[gpu]"
uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880