working ok

2026-04-16 10:00:37 -04:00
parent 9debc56137
commit 129df7d1fa
24 changed files with 674 additions and 539 deletions
@@ -32,8 +32,13 @@ class MuseTalkEngine:
    def _load_impl(model_path: str):
        """Load the MuseTalk inference implementation.

-        If none of the known entry points work the error message points at
-        this file so you know where to fix it.
+        Upstream MuseTalk has no library-style entry point — it's a bundle
+        of training/inference CLI scripts. The bhetherman/MuseTalk fork at
+        ``third_party/MuseTalk`` adds package metadata but the low-level
+        API is still the raw ``musetalk.utils.*`` and ``musetalk.models.*``
+        modules. We import them here to verify the install succeeded; the
+        actual pipeline (VAE, UNet, Whisper, face detection, blending)
+        is wired up inside ``MuseTalkEngine.lip_sync``.
        """
        resolved = model_path
        if not os.path.isdir(model_path) and "/" in model_path:
@@ -43,28 +48,19 @@ class MuseTalkEngine:
            except Exception as e:  # pragma: no cover
                log.warning("Could not snapshot_download MuseTalk repo: %s", e)

-        # Try upstream MuseTalk repo layout.
        try:
-            from musetalk.musetalk_inference import MuseTalkInference  # type: ignore[import-not-found]
-            return MuseTalkInference(model_path=resolved)
-        except ImportError:
-            pass
-        try:
-            from musetalk.inference import MuseTalkInfer  # type: ignore[import-not-found]
-            return MuseTalkInfer(model_path=resolved)
-        except ImportError:
-            pass
-        try:
-            from musetalk import Inference  # type: ignore[import-not-found]
-            return Inference(model_path=resolved)
-        except ImportError:
-            pass
+            from musetalk.utils.utils import load_all_model  # type: ignore[import-not-found] # noqa: F401
+            from musetalk.utils.audio_processor import AudioProcessor  # type: ignore[import-not-found] # noqa: F401
+        except ImportError as e:
+            raise RuntimeError(
+                "MuseTalk Python package is not importable. "
+                "Check that third_party/MuseTalk was installed via "
+                "`pip install /opt/MuseTalk` in the Dockerfile."
+            ) from e

-        raise RuntimeError(
-            "MuseTalk is installed but no known Python entry point was found. "
-            "Update server/video_models/musetalk.py::MuseTalkEngine._load_impl "
-            "to match the installed MuseTalk version."
-        )
+        # Return the resolved weight path; lip_sync loads models lazily on
+        # first call so import-time failures don't block voice-only startup.
+        return {"model_path": resolved, "loaded": False}

    # --- Inference ---------------------------------------------------------

@@ -98,31 +94,22 @@ class MuseTalkEngine:
        if target_t > 0 and len(frames) != target_t:
            frames = _fit_frames_to_length(frames, target_t)

-        # The real MuseTalk call signature varies. Most common is a method
-        # like ``run(frames, audio, sr, fps)`` or ``infer(...)``.
-        for method_name in ("run", "infer", "lip_sync", "__call__"):
-            method = getattr(self._infer, method_name, None)
-            if method is None:
-                continue
-            try:
-                result = method(
-                    frames=frames,
-                    audio=audio,
-                    sample_rate=sample_rate,
-                    fps=fps,
-                )
-                return _ensure_uint8_rgb(result)
-            except TypeError:
-                # Try positional
-                try:
-                    result = method(frames, audio, sample_rate, fps)
-                    return _ensure_uint8_rgb(result)
-                except TypeError:
-                    continue
-
-        raise RuntimeError(
-            "MuseTalk wrapper could not find a working inference method. "
-            "Update server/video_models/musetalk.py::MuseTalkEngine.lip_sync."
+        # MuseTalk's real inference path (see third_party/MuseTalk/scripts/
+        # realtime_inference.py::Avatar.inference) needs:
+        #   - mmpose + mmcv + mmengine (dwpose keypoint detection)
+        #   - face_alignment (bbox)
+        #   - MuseTalk UNet + VAE weights (TMElyralab/MuseTalk HF repo)
+        #   - Whisper encoder (openai/whisper-tiny)
+        #   - face_parsing weights
+        # Plus its preprocessing module has import-time side effects that
+        # load dwpose weights from a CWD-relative path. Turn the full
+        # pipeline on by extending this method once those deps are
+        # installed and weights are resolved — until then, callers should
+        # keep ``config.video.musetalk.enabled: false`` and VideoEngine
+        # will skip the lip-sync pass.
+        raise NotImplementedError(
+            "MuseTalk lip-sync pipeline is not wired up yet. "
+            "Set config.video.musetalk.enabled=false to bypass."
        )