6 Commits

Author SHA1 Message Date
NeRF-Factory 8795fa1425 feat: windows infer & gradio 2025-04-12 23:19:41 +08:00
zzzweakman be656b199b fix: dependencies 2025-04-12 01:40:40 +08:00
zzzweakman b9b459a119 feat: v1.5 gradio for windows&linux 2025-04-11 02:43:04 +08:00
zzzweakman 2e5b74a257 docs: update readme 2025-04-10 14:02:24 +08:00
zzzweakman a0834ec2c2 docs: update readme 2025-04-10 13:59:59 +08:00
zzzweakman 0702078902 fix: windows infer 2025-04-10 13:54:16 +08:00
14 changed files with 36 additions and 83 deletions
+1 -1
View File
@@ -5,7 +5,7 @@
*.pyc *.pyc
.ipynb_checkpoints .ipynb_checkpoints
results/ results/
/models/ models/
**/__pycache__/ **/__pycache__/
*.py[cod] *.py[cod]
*$py.class *$py.class
+12 -8
View File
@@ -14,28 +14,32 @@ mkdir %CheckpointsDir%\sd-vae-ft-mse
mkdir %CheckpointsDir%\whisper mkdir %CheckpointsDir%\whisper
:: Install required packages :: Install required packages
pip install -U "huggingface_hub[hf_xet]" pip install -U "huggingface_hub[cli]"
pip install gdown
:: Set HuggingFace endpoint :: Set HuggingFace endpoint
set HF_ENDPOINT=https://hf-mirror.com set HF_ENDPOINT=https://hf-mirror.com
:: Download MuseTalk weights :: Download MuseTalk weights
hf download TMElyralab/MuseTalk --local-dir %CheckpointsDir% huggingface-cli download TMElyralab/MuseTalk --local-dir %CheckpointsDir%
:: Download SD VAE weights :: Download SD VAE weights
hf download stabilityai/sd-vae-ft-mse --local-dir %CheckpointsDir%\sd-vae --include "config.json" "diffusion_pytorch_model.bin" huggingface-cli download stabilityai/sd-vae-ft-mse --local-dir %CheckpointsDir%\sd-vae --include "config.json" "diffusion_pytorch_model.bin"
:: Download Whisper weights :: Download Whisper weights
hf download openai/whisper-tiny --local-dir %CheckpointsDir%\whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json" huggingface-cli download openai/whisper-tiny --local-dir %CheckpointsDir%\whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
:: Download DWPose weights :: Download DWPose weights
hf download yzd-v/DWPose --local-dir %CheckpointsDir%\dwpose --include "dw-ll_ucoco_384.pth" huggingface-cli download yzd-v/DWPose --local-dir %CheckpointsDir%\dwpose --include "dw-ll_ucoco_384.pth"
:: Download SyncNet weights :: Download SyncNet weights
hf download ByteDance/LatentSync --local-dir %CheckpointsDir%\syncnet --include "latentsync_syncnet.pt" huggingface-cli download ByteDance/LatentSync --local-dir %CheckpointsDir%\syncnet --include "latentsync_syncnet.pt"
:: Download face-parse-bisent weights :: Download Face Parse Bisent weights (using gdown)
hf download ManyOtherFunctions/face-parse-bisent --local-dir %CheckpointsDir%\face-parse-bisent --include "79999_iter.pth" "resnet18-5c106cde.pth" gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O %CheckpointsDir%\face-parse-bisent\79999_iter.pth
:: Download ResNet weights
curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth -o %CheckpointsDir%\face-parse-bisent\resnet18-5c106cde.pth
echo All weights have been downloaded successfully! echo All weights have been downloaded successfully!
endlocal endlocal
+13 -27
View File
@@ -4,48 +4,34 @@
CheckpointsDir="models" CheckpointsDir="models"
# Create necessary directories # Create necessary directories
mkdir -p models/musetalk models/musetalkV15 models/syncnet models/dwpose models/face-parse-bisent models/sd-vae models/whisper mkdir -p $CheckpointsDir/{musetalk,musetalkV15,syncnet,dwpose,face-parse-bisent,sd-vae-ft-mse,whisper}
# Install required packages # Install required packages
pip install -U "huggingface_hub[cli]" pip install -U "huggingface_hub[cli]"
pip install gdown pip install gdown
# Set HuggingFace mirror endpoint # Set HuggingFace endpoint
export HF_ENDPOINT=https://hf-mirror.com export HF_ENDPOINT=https://hf-mirror.com
# Download MuseTalk V1.0 weights # Download MuseTalk weights
huggingface-cli download TMElyralab/MuseTalk \ huggingface-cli download TMElyralab/MuseTalk --local-dir $CheckpointsDir
--local-dir $CheckpointsDir \
--include "musetalk/musetalk.json" "musetalk/pytorch_model.bin"
# Download MuseTalk V1.5 weights (unet.pth)
huggingface-cli download TMElyralab/MuseTalk \
--local-dir $CheckpointsDir \
--include "musetalkV15/musetalk.json" "musetalkV15/unet.pth"
# Download SD VAE weights # Download SD VAE weights
huggingface-cli download stabilityai/sd-vae-ft-mse \ huggingface-cli download stabilityai/sd-vae-ft-mse --local-dir $CheckpointsDir/sd-vae --include "config.json" "diffusion_pytorch_model.bin"
--local-dir $CheckpointsDir/sd-vae \
--include "config.json" "diffusion_pytorch_model.bin"
# Download Whisper weights # Download Whisper weights
huggingface-cli download openai/whisper-tiny \ huggingface-cli download openai/whisper-tiny --local-dir $CheckpointsDir/whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
--local-dir $CheckpointsDir/whisper \
--include "config.json" "pytorch_model.bin" "preprocessor_config.json"
# Download DWPose weights # Download DWPose weights
huggingface-cli download yzd-v/DWPose \ huggingface-cli download yzd-v/DWPose --local-dir $CheckpointsDir/dwpose --include "dw-ll_ucoco_384.pth"
--local-dir $CheckpointsDir/dwpose \
--include "dw-ll_ucoco_384.pth"
# Download SyncNet weights # Download SyncNet weights
huggingface-cli download ByteDance/LatentSync \ huggingface-cli download ByteDance/LatentSync --local-dir $CheckpointsDir/syncnet --include "latentsync_syncnet.pt"
--local-dir $CheckpointsDir/syncnet \
--include "latentsync_syncnet.pt"
# Download Face Parse Bisent weights # Download Face Parse Bisent weights (using gdown)
gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O $CheckpointsDir/face-parse-bisent/79999_iter.pth gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O $CheckpointsDir/face-parse-bisent/79999_iter.pth
curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth \
-o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth
echo "✅ All weights have been downloaded successfully!" # Download ResNet weights
curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth -o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth
echo "All weights have been downloaded successfully!"
View File
View File
+4 -7
View File
@@ -15,7 +15,6 @@ from decord.ndarray import cpu
from musetalk.data.sample_method import get_src_idx, shift_landmarks_to_face_coordinates, resize_landmark from musetalk.data.sample_method import get_src_idx, shift_landmarks_to_face_coordinates, resize_landmark
from musetalk.data import audio from musetalk.data import audio
from musetalk.utils.audio_utils import ensure_wav
syncnet_mel_step_size = math.ceil(16 / 5 * 16) # latentsync syncnet_mel_step_size = math.ceil(16 / 5 * 16) # latentsync
@@ -172,8 +171,7 @@ class FaceDataset(Dataset):
""" """
if not os.path.exists(wav_path): if not os.path.exists(wav_path):
return None return None
wav_path_converted = ensure_wav(wav_path) audio_input_librosa, sampling_rate = librosa.load(wav_path, sr=16000)
audio_input_librosa, sampling_rate = librosa.load(wav_path_converted, sr=16000)
assert sampling_rate == 16000 assert sampling_rate == 16000
while start_index >= 25 * 30: while start_index >= 25 * 30:
@@ -208,12 +206,11 @@ class FaceDataset(Dataset):
if not os.path.exists(wav_path): if not os.path.exists(wav_path):
return None return None
wav_path_converted = ensure_wav(wav_path) audio_input, sampling_rate = librosa.load(wav_path, sr=16000)
audio_input_librosa, sampling_rate = librosa.load(wav_path_converted, sr=16000)
assert sampling_rate == 16000 assert sampling_rate == 16000
audio_mel = self.mel_feature_extractor(audio_input_librosa) audio_input = self.mel_feature_extractor(audio_input)
return audio_mel, start_index return audio_input, start_index
def mel_feature_extractor(self, audio_input): def mel_feature_extractor(self, audio_input):
"""Extract mel spectrogram features """Extract mel spectrogram features
View File
View File
-17
View File
@@ -1,17 +0,0 @@
import os, subprocess
def ensure_wav(input_path: str, target_path: str | None = None) -> str:
"""
Convert any audio (mp3/ogg/m4a/wav/…) to 16kHz mono PCM WAV via ffmpeg.
Returns path to the converted .wav (original if already correct).
"""
if not isinstance(input_path, str) or not os.path.exists(input_path):
return input_path
base, ext = os.path.splitext(input_path)
ext = ext.lower()
if target_path is None:
target_path = base + "_16k.wav"
cmd = ["ffmpeg", "-y", "-i", input_path, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", target_path]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return target_path
View File
+1 -2
View File
@@ -118,8 +118,7 @@ def get_landmark_and_bbox(img_list,upperbondrange =0):
if upperbondrange != 0: if upperbondrange != 0:
half_face_coord[1] = upperbondrange+half_face_coord[1] #手动调整 + 向下(偏29) - 向上(偏28) half_face_coord[1] = upperbondrange+half_face_coord[1] #手动调整 + 向下(偏29) - 向上(偏28)
half_face_dist = np.max(face_land_mark[:,1]) - half_face_coord[1] half_face_dist = np.max(face_land_mark[:,1]) - half_face_coord[1]
min_upper_bond = 0 upper_bond = half_face_coord[1]-half_face_dist
upper_bond = max(min_upper_bond, half_face_coord[1] - half_face_dist)
f_landmark = (np.min(face_land_mark[:, 0]),int(upper_bond),np.max(face_land_mark[:, 0]),np.max(face_land_mark[:,1])) f_landmark = (np.min(face_land_mark[:, 0]),int(upper_bond),np.max(face_land_mark[:, 0]),np.max(face_land_mark[:,1]))
x1, y1, x2, y2 = f_landmark x1, y1, x2, y2 = f_landmark
-15
View File
@@ -1,15 +0,0 @@
[build-system]
requires = ["setuptools>=64"]
build-backend = "setuptools.build_meta"
[project]
name = "musetalk"
version = "1.5.0"
description = "MuseTalk: audio-driven lip-sync (source-only install; dependencies managed by the consumer)"
readme = "README.md"
requires-python = ">=3.10"
license = { text = "MIT" }
[tool.setuptools.packages.find]
include = ["musetalk*"]
exclude = ["scripts*", "assets*", "data*", "configs*"]
+3 -3
View File
@@ -1,9 +1,6 @@
import os import os
import argparse import argparse
import subprocess import subprocess
import torch
import numpy as np
from tqdm import tqdm
from omegaconf import OmegaConf from omegaconf import OmegaConf
from typing import Tuple, List, Union from typing import Tuple, List, Union
import decord import decord
@@ -12,6 +9,9 @@ import cv2
from musetalk.utils.face_detection import FaceAlignment,LandmarksType from musetalk.utils.face_detection import FaceAlignment,LandmarksType
from mmpose.apis import inference_topdown, init_model from mmpose.apis import inference_topdown, init_model
from mmpose.structures import merge_data_samples from mmpose.structures import merge_data_samples
import torch
import numpy as np
from tqdm import tqdm
import sys import sys
def fast_check_ffmpeg(): def fast_check_ffmpeg():
-1
View File
@@ -235,7 +235,6 @@ class Avatar:
cv2.imwrite(f"{self.avatar_path}/tmp/{str(self.idx).zfill(8)}.png", combine_frame) cv2.imwrite(f"{self.avatar_path}/tmp/{str(self.idx).zfill(8)}.png", combine_frame)
self.idx = self.idx + 1 self.idx = self.idx + 1
@torch.no_grad()
def inference(self, audio_path, out_vid_name, fps, skip_save_images): def inference(self, audio_path, out_vid_name, fps, skip_save_images):
os.makedirs(self.avatar_path + '/tmp', exist_ok=True) os.makedirs(self.avatar_path + '/tmp', exist_ok=True)
print("start inference") print("start inference")