working ok

This commit is contained in:
2026-04-16 10:00:37 -04:00
parent 9debc56137
commit 129df7d1fa
24 changed files with 674 additions and 539 deletions
+34 -47
View File
@@ -32,8 +32,13 @@ class MuseTalkEngine:
def _load_impl(model_path: str):
"""Load the MuseTalk inference implementation.
If none of the known entry points work the error message points at
this file so you know where to fix it.
Upstream MuseTalk has no library-style entry point — it's a bundle
of training/inference CLI scripts. The bhetherman/MuseTalk fork at
``third_party/MuseTalk`` adds package metadata but the low-level
API is still the raw ``musetalk.utils.*`` and ``musetalk.models.*``
modules. We import them here to verify the install succeeded; the
actual pipeline (VAE, UNet, Whisper, face detection, blending)
is wired up inside ``MuseTalkEngine.lip_sync``.
"""
resolved = model_path
if not os.path.isdir(model_path) and "/" in model_path:
@@ -43,28 +48,19 @@ class MuseTalkEngine:
except Exception as e: # pragma: no cover
log.warning("Could not snapshot_download MuseTalk repo: %s", e)
# Try upstream MuseTalk repo layout.
try:
from musetalk.musetalk_inference import MuseTalkInference # type: ignore[import-not-found]
return MuseTalkInference(model_path=resolved)
except ImportError:
pass
try:
from musetalk.inference import MuseTalkInfer # type: ignore[import-not-found]
return MuseTalkInfer(model_path=resolved)
except ImportError:
pass
try:
from musetalk import Inference # type: ignore[import-not-found]
return Inference(model_path=resolved)
except ImportError:
pass
from musetalk.utils.utils import load_all_model # type: ignore[import-not-found] # noqa: F401
from musetalk.utils.audio_processor import AudioProcessor # type: ignore[import-not-found] # noqa: F401
except ImportError as e:
raise RuntimeError(
"MuseTalk Python package is not importable. "
"Check that third_party/MuseTalk was installed via "
"`pip install /opt/MuseTalk` in the Dockerfile."
) from e
raise RuntimeError(
"MuseTalk is installed but no known Python entry point was found. "
"Update server/video_models/musetalk.py::MuseTalkEngine._load_impl "
"to match the installed MuseTalk version."
)
# Return the resolved weight path; lip_sync loads models lazily on
# first call so import-time failures don't block voice-only startup.
return {"model_path": resolved, "loaded": False}
# --- Inference ---------------------------------------------------------
@@ -98,31 +94,22 @@ class MuseTalkEngine:
if target_t > 0 and len(frames) != target_t:
frames = _fit_frames_to_length(frames, target_t)
# The real MuseTalk call signature varies. Most common is a method
# like ``run(frames, audio, sr, fps)`` or ``infer(...)``.
for method_name in ("run", "infer", "lip_sync", "__call__"):
method = getattr(self._infer, method_name, None)
if method is None:
continue
try:
result = method(
frames=frames,
audio=audio,
sample_rate=sample_rate,
fps=fps,
)
return _ensure_uint8_rgb(result)
except TypeError:
# Try positional
try:
result = method(frames, audio, sample_rate, fps)
return _ensure_uint8_rgb(result)
except TypeError:
continue
raise RuntimeError(
"MuseTalk wrapper could not find a working inference method. "
"Update server/video_models/musetalk.py::MuseTalkEngine.lip_sync."
# MuseTalk's real inference path (see third_party/MuseTalk/scripts/
# realtime_inference.py::Avatar.inference) needs:
# - mmpose + mmcv + mmengine (dwpose keypoint detection)
# - face_alignment (bbox)
# - MuseTalk UNet + VAE weights (TMElyralab/MuseTalk HF repo)
# - Whisper encoder (openai/whisper-tiny)
# - face_parsing weights
# Plus its preprocessing module has import-time side effects that
# load dwpose weights from a CWD-relative path. Turn the full
# pipeline on by extending this method once those deps are
# installed and weights are resolved — until then, callers should
# keep ``config.video.musetalk.enabled: false`` and VideoEngine
# will skip the lip-sync pass.
raise NotImplementedError(
"MuseTalk lip-sync pipeline is not wired up yet. "
"Set config.video.musetalk.enabled=false to bypass."
)