working ok
This commit is contained in:
@@ -32,8 +32,13 @@ class MuseTalkEngine:
|
||||
def _load_impl(model_path: str):
|
||||
"""Load the MuseTalk inference implementation.
|
||||
|
||||
If none of the known entry points work the error message points at
|
||||
this file so you know where to fix it.
|
||||
Upstream MuseTalk has no library-style entry point — it's a bundle
|
||||
of training/inference CLI scripts. The bhetherman/MuseTalk fork at
|
||||
``third_party/MuseTalk`` adds package metadata but the low-level
|
||||
API is still the raw ``musetalk.utils.*`` and ``musetalk.models.*``
|
||||
modules. We import them here to verify the install succeeded; the
|
||||
actual pipeline (VAE, UNet, Whisper, face detection, blending)
|
||||
is wired up inside ``MuseTalkEngine.lip_sync``.
|
||||
"""
|
||||
resolved = model_path
|
||||
if not os.path.isdir(model_path) and "/" in model_path:
|
||||
@@ -43,28 +48,19 @@ class MuseTalkEngine:
|
||||
except Exception as e: # pragma: no cover
|
||||
log.warning("Could not snapshot_download MuseTalk repo: %s", e)
|
||||
|
||||
# Try upstream MuseTalk repo layout.
|
||||
try:
|
||||
from musetalk.musetalk_inference import MuseTalkInference # type: ignore[import-not-found]
|
||||
return MuseTalkInference(model_path=resolved)
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from musetalk.inference import MuseTalkInfer # type: ignore[import-not-found]
|
||||
return MuseTalkInfer(model_path=resolved)
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from musetalk import Inference # type: ignore[import-not-found]
|
||||
return Inference(model_path=resolved)
|
||||
except ImportError:
|
||||
pass
|
||||
from musetalk.utils.utils import load_all_model # type: ignore[import-not-found] # noqa: F401
|
||||
from musetalk.utils.audio_processor import AudioProcessor # type: ignore[import-not-found] # noqa: F401
|
||||
except ImportError as e:
|
||||
raise RuntimeError(
|
||||
"MuseTalk Python package is not importable. "
|
||||
"Check that third_party/MuseTalk was installed via "
|
||||
"`pip install /opt/MuseTalk` in the Dockerfile."
|
||||
) from e
|
||||
|
||||
raise RuntimeError(
|
||||
"MuseTalk is installed but no known Python entry point was found. "
|
||||
"Update server/video_models/musetalk.py::MuseTalkEngine._load_impl "
|
||||
"to match the installed MuseTalk version."
|
||||
)
|
||||
# Return the resolved weight path; lip_sync loads models lazily on
|
||||
# first call so import-time failures don't block voice-only startup.
|
||||
return {"model_path": resolved, "loaded": False}
|
||||
|
||||
# --- Inference ---------------------------------------------------------
|
||||
|
||||
@@ -98,31 +94,22 @@ class MuseTalkEngine:
|
||||
if target_t > 0 and len(frames) != target_t:
|
||||
frames = _fit_frames_to_length(frames, target_t)
|
||||
|
||||
# The real MuseTalk call signature varies. Most common is a method
|
||||
# like ``run(frames, audio, sr, fps)`` or ``infer(...)``.
|
||||
for method_name in ("run", "infer", "lip_sync", "__call__"):
|
||||
method = getattr(self._infer, method_name, None)
|
||||
if method is None:
|
||||
continue
|
||||
try:
|
||||
result = method(
|
||||
frames=frames,
|
||||
audio=audio,
|
||||
sample_rate=sample_rate,
|
||||
fps=fps,
|
||||
)
|
||||
return _ensure_uint8_rgb(result)
|
||||
except TypeError:
|
||||
# Try positional
|
||||
try:
|
||||
result = method(frames, audio, sample_rate, fps)
|
||||
return _ensure_uint8_rgb(result)
|
||||
except TypeError:
|
||||
continue
|
||||
|
||||
raise RuntimeError(
|
||||
"MuseTalk wrapper could not find a working inference method. "
|
||||
"Update server/video_models/musetalk.py::MuseTalkEngine.lip_sync."
|
||||
# MuseTalk's real inference path (see third_party/MuseTalk/scripts/
|
||||
# realtime_inference.py::Avatar.inference) needs:
|
||||
# - mmpose + mmcv + mmengine (dwpose keypoint detection)
|
||||
# - face_alignment (bbox)
|
||||
# - MuseTalk UNet + VAE weights (TMElyralab/MuseTalk HF repo)
|
||||
# - Whisper encoder (openai/whisper-tiny)
|
||||
# - face_parsing weights
|
||||
# Plus its preprocessing module has import-time side effects that
|
||||
# load dwpose weights from a CWD-relative path. Turn the full
|
||||
# pipeline on by extending this method once those deps are
|
||||
# installed and weights are resolved — until then, callers should
|
||||
# keep ``config.video.musetalk.enabled: false`` and VideoEngine
|
||||
# will skip the lip-sync pass.
|
||||
raise NotImplementedError(
|
||||
"MuseTalk lip-sync pipeline is not wired up yet. "
|
||||
"Set config.video.musetalk.enabled=false to bypass."
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user