remsky · chai51 · Mar 13, 2025
diff --git a/api/src/core/config.py b/api/src/core/config.py
@@ -18,6 +18,8 @@ class Settings(BaseSettings):
     allow_local_voice_saving: bool = (
         False  # Whether to allow saving combined voices locally
     )
+    repo_id: str = "hexgrad/Kokoro-82M"
+    kokoro_v1_file: str = "v1_0/kokoro-v1_0.pth"
 
     # Container absolute paths
     model_dir: str = "/app/api/src/models"  # Absolute path in container

diff --git a/api/src/core/model_config.py b/api/src/core/model_config.py
@@ -6,6 +6,7 @@
 """
 
 from pydantic import BaseModel, Field
+from core.config import settings
 
 
 class KokoroV1Config(BaseModel):
@@ -36,7 +37,7 @@ class ModelConfig(BaseModel):
 
     # Model filename
     pytorch_kokoro_v1_file: str = Field(
-        "v1_0/kokoro-v1_0.pth", description="PyTorch Kokoro V1 model filename"
+        settings.kokoro_v1_file, description="PyTorch Kokoro V1 model filename"
     )
 
     # Backend config

diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py
@@ -47,7 +47,7 @@ async def load_model(self, path: str) -> None:
             logger.info(f"Model path: {model_path}")
 
             # Load model and let KModel handle device mapping
-            self._model = KModel(config=config_path, model=model_path).eval()
+            self._model = KModel(config=config_path, model=model_path, repo_id=settings.repo_id).eval()
             # Move to CUDA if needed
             if self._device == "cuda":
                 self._model = self._model.cuda()
@@ -57,6 +57,9 @@ async def load_model(self, path: str) -> None:
         except Exception as e:
             raise RuntimeError(f"Failed to load Kokoro model: {e}")
 
+    def en_callable(self, text):
+        return next(self._pipelines['a'](text)).phonemes
+
     def _get_pipeline(self, lang_code: str) -> KPipeline:
         """Get or create pipeline for language code.
 
@@ -69,10 +72,19 @@ def _get_pipeline(self, lang_code: str) -> KPipeline:
         if not self._model:
             raise RuntimeError("Model not loaded")
 
+        # When Chinese is mixed with English, it should be done like this.
+        if 'a' not in self._pipelines and lang_code == 'z':
+            lang_en = 'a'
+            logger.info(f"Creating new pipeline for language code: {lang_en}")
+            self._pipelines[lang_en] = KPipeline(
+                lang_code=lang_en, model=False, repo_id=settings.repo_id
+            )
+
         if lang_code not in self._pipelines:
             logger.info(f"Creating new pipeline for language code: {lang_code}")
             self._pipelines[lang_code] = KPipeline(
-                lang_code=lang_code, model=self._model, device=self._device
+                lang_code=lang_code, model=self._model, device=self._device, repo_id=settings.repo_id,
+                en_callable=self.en_callable
             )
         return self._pipelines[lang_code]
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,8 +31,8 @@ dependencies = [
     "matplotlib>=3.10.0",
     "mutagen>=1.47.0",
     "psutil>=6.1.1",
-    "kokoro @ git+https://github.com/hexgrad/kokoro.git@31a2b6337b8c1b1418ef68c48142328f640da938",
-    'misaki[en,ja,ko,zh] @ git+https://github.com/hexgrad/misaki.git@ebc76c21b66c5fc4866ed0ec234047177b396170',
+    "kokoro>=0.8.2",
+    'misaki[en,ja,ko,zh]>=0.8.2',
     "spacy==3.7.2",
     "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl",
     "inflect>=7.5.0",

diff --git a/start-gpu.sh b/start-gpu.sh
@@ -11,6 +11,11 @@ export MODEL_DIR=src/models
 export VOICES_DIR=src/voices/v1_0
 export WEB_PLAYER_PATH=$PROJECT_ROOT/web
 
+# Set about the Chinese environment variable
+# export DEFAULT_VOICE=zf_xiaobei
+# export REPO_ID=hexgrad/Kokoro-82M-v1.1-zh
+# export KOKORO_V1_FILE=v1_1-zh/kokoro-v1_1-zh.pth
+
 # Run FastAPI with GPU extras using uv run
 uv pip install -e ".[gpu]"
 uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880