Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8795fa1425 | |||
| be656b199b | |||
| b9b459a119 | |||
| 2e5b74a257 | |||
| a0834ec2c2 | |||
| 0702078902 |
+1
-1
@@ -5,7 +5,7 @@
|
||||
*.pyc
|
||||
.ipynb_checkpoints
|
||||
results/
|
||||
/models/
|
||||
models/
|
||||
**/__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
+13
-9
@@ -14,28 +14,32 @@ mkdir %CheckpointsDir%\sd-vae-ft-mse
|
||||
mkdir %CheckpointsDir%\whisper
|
||||
|
||||
:: Install required packages
|
||||
pip install -U "huggingface_hub[hf_xet]"
|
||||
pip install -U "huggingface_hub[cli]"
|
||||
pip install gdown
|
||||
|
||||
:: Set HuggingFace endpoint
|
||||
set HF_ENDPOINT=https://hf-mirror.com
|
||||
|
||||
:: Download MuseTalk weights
|
||||
hf download TMElyralab/MuseTalk --local-dir %CheckpointsDir%
|
||||
huggingface-cli download TMElyralab/MuseTalk --local-dir %CheckpointsDir%
|
||||
|
||||
:: Download SD VAE weights
|
||||
hf download stabilityai/sd-vae-ft-mse --local-dir %CheckpointsDir%\sd-vae --include "config.json" "diffusion_pytorch_model.bin"
|
||||
huggingface-cli download stabilityai/sd-vae-ft-mse --local-dir %CheckpointsDir%\sd-vae --include "config.json" "diffusion_pytorch_model.bin"
|
||||
|
||||
:: Download Whisper weights
|
||||
hf download openai/whisper-tiny --local-dir %CheckpointsDir%\whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
|
||||
huggingface-cli download openai/whisper-tiny --local-dir %CheckpointsDir%\whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
|
||||
|
||||
:: Download DWPose weights
|
||||
hf download yzd-v/DWPose --local-dir %CheckpointsDir%\dwpose --include "dw-ll_ucoco_384.pth"
|
||||
huggingface-cli download yzd-v/DWPose --local-dir %CheckpointsDir%\dwpose --include "dw-ll_ucoco_384.pth"
|
||||
|
||||
:: Download SyncNet weights
|
||||
hf download ByteDance/LatentSync --local-dir %CheckpointsDir%\syncnet --include "latentsync_syncnet.pt"
|
||||
huggingface-cli download ByteDance/LatentSync --local-dir %CheckpointsDir%\syncnet --include "latentsync_syncnet.pt"
|
||||
|
||||
:: Download face-parse-bisent weights
|
||||
hf download ManyOtherFunctions/face-parse-bisent --local-dir %CheckpointsDir%\face-parse-bisent --include "79999_iter.pth" "resnet18-5c106cde.pth"
|
||||
:: Download Face Parse Bisent weights (using gdown)
|
||||
gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O %CheckpointsDir%\face-parse-bisent\79999_iter.pth
|
||||
|
||||
:: Download ResNet weights
|
||||
curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth -o %CheckpointsDir%\face-parse-bisent\resnet18-5c106cde.pth
|
||||
|
||||
echo All weights have been downloaded successfully!
|
||||
endlocal
|
||||
endlocal
|
||||
+13
-27
@@ -4,48 +4,34 @@
|
||||
CheckpointsDir="models"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p models/musetalk models/musetalkV15 models/syncnet models/dwpose models/face-parse-bisent models/sd-vae models/whisper
|
||||
mkdir -p $CheckpointsDir/{musetalk,musetalkV15,syncnet,dwpose,face-parse-bisent,sd-vae-ft-mse,whisper}
|
||||
|
||||
# Install required packages
|
||||
pip install -U "huggingface_hub[cli]"
|
||||
pip install gdown
|
||||
|
||||
# Set HuggingFace mirror endpoint
|
||||
# Set HuggingFace endpoint
|
||||
export HF_ENDPOINT=https://hf-mirror.com
|
||||
|
||||
# Download MuseTalk V1.0 weights
|
||||
huggingface-cli download TMElyralab/MuseTalk \
|
||||
--local-dir $CheckpointsDir \
|
||||
--include "musetalk/musetalk.json" "musetalk/pytorch_model.bin"
|
||||
|
||||
# Download MuseTalk V1.5 weights (unet.pth)
|
||||
huggingface-cli download TMElyralab/MuseTalk \
|
||||
--local-dir $CheckpointsDir \
|
||||
--include "musetalkV15/musetalk.json" "musetalkV15/unet.pth"
|
||||
# Download MuseTalk weights
|
||||
huggingface-cli download TMElyralab/MuseTalk --local-dir $CheckpointsDir
|
||||
|
||||
# Download SD VAE weights
|
||||
huggingface-cli download stabilityai/sd-vae-ft-mse \
|
||||
--local-dir $CheckpointsDir/sd-vae \
|
||||
--include "config.json" "diffusion_pytorch_model.bin"
|
||||
huggingface-cli download stabilityai/sd-vae-ft-mse --local-dir $CheckpointsDir/sd-vae --include "config.json" "diffusion_pytorch_model.bin"
|
||||
|
||||
# Download Whisper weights
|
||||
huggingface-cli download openai/whisper-tiny \
|
||||
--local-dir $CheckpointsDir/whisper \
|
||||
--include "config.json" "pytorch_model.bin" "preprocessor_config.json"
|
||||
huggingface-cli download openai/whisper-tiny --local-dir $CheckpointsDir/whisper --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
|
||||
|
||||
# Download DWPose weights
|
||||
huggingface-cli download yzd-v/DWPose \
|
||||
--local-dir $CheckpointsDir/dwpose \
|
||||
--include "dw-ll_ucoco_384.pth"
|
||||
huggingface-cli download yzd-v/DWPose --local-dir $CheckpointsDir/dwpose --include "dw-ll_ucoco_384.pth"
|
||||
|
||||
# Download SyncNet weights
|
||||
huggingface-cli download ByteDance/LatentSync \
|
||||
--local-dir $CheckpointsDir/syncnet \
|
||||
--include "latentsync_syncnet.pt"
|
||||
huggingface-cli download ByteDance/LatentSync --local-dir $CheckpointsDir/syncnet --include "latentsync_syncnet.pt"
|
||||
|
||||
# Download Face Parse Bisent weights
|
||||
# Download Face Parse Bisent weights (using gdown)
|
||||
gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O $CheckpointsDir/face-parse-bisent/79999_iter.pth
|
||||
curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth \
|
||||
-o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth
|
||||
|
||||
echo "✅ All weights have been downloaded successfully!"
|
||||
# Download ResNet weights
|
||||
curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth -o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth
|
||||
|
||||
echo "All weights have been downloaded successfully!"
|
||||
@@ -15,7 +15,6 @@ from decord.ndarray import cpu
|
||||
|
||||
from musetalk.data.sample_method import get_src_idx, shift_landmarks_to_face_coordinates, resize_landmark
|
||||
from musetalk.data import audio
|
||||
from musetalk.utils.audio_utils import ensure_wav
|
||||
|
||||
syncnet_mel_step_size = math.ceil(16 / 5 * 16) # latentsync
|
||||
|
||||
@@ -172,8 +171,7 @@ class FaceDataset(Dataset):
|
||||
"""
|
||||
if not os.path.exists(wav_path):
|
||||
return None
|
||||
wav_path_converted = ensure_wav(wav_path)
|
||||
audio_input_librosa, sampling_rate = librosa.load(wav_path_converted, sr=16000)
|
||||
audio_input_librosa, sampling_rate = librosa.load(wav_path, sr=16000)
|
||||
assert sampling_rate == 16000
|
||||
|
||||
while start_index >= 25 * 30:
|
||||
@@ -208,12 +206,11 @@ class FaceDataset(Dataset):
|
||||
if not os.path.exists(wav_path):
|
||||
return None
|
||||
|
||||
wav_path_converted = ensure_wav(wav_path)
|
||||
audio_input_librosa, sampling_rate = librosa.load(wav_path_converted, sr=16000)
|
||||
audio_input, sampling_rate = librosa.load(wav_path, sr=16000)
|
||||
assert sampling_rate == 16000
|
||||
|
||||
audio_mel = self.mel_feature_extractor(audio_input_librosa)
|
||||
return audio_mel, start_index
|
||||
audio_input = self.mel_feature_extractor(audio_input)
|
||||
return audio_input, start_index
|
||||
|
||||
def mel_feature_extractor(self, audio_input):
|
||||
"""Extract mel spectrogram features
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
import os, subprocess
|
||||
|
||||
def ensure_wav(input_path: str, target_path: str | None = None) -> str:
|
||||
"""
|
||||
Convert any audio (mp3/ogg/m4a/wav/…) to 16kHz mono PCM WAV via ffmpeg.
|
||||
Returns path to the converted .wav (original if already correct).
|
||||
"""
|
||||
if not isinstance(input_path, str) or not os.path.exists(input_path):
|
||||
return input_path
|
||||
base, ext = os.path.splitext(input_path)
|
||||
ext = ext.lower()
|
||||
|
||||
if target_path is None:
|
||||
target_path = base + "_16k.wav"
|
||||
cmd = ["ffmpeg", "-y", "-i", input_path, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", target_path]
|
||||
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
return target_path
|
||||
@@ -118,8 +118,7 @@ def get_landmark_and_bbox(img_list,upperbondrange =0):
|
||||
if upperbondrange != 0:
|
||||
half_face_coord[1] = upperbondrange+half_face_coord[1] #手动调整 + 向下(偏29) - 向上(偏28)
|
||||
half_face_dist = np.max(face_land_mark[:,1]) - half_face_coord[1]
|
||||
min_upper_bond = 0
|
||||
upper_bond = max(min_upper_bond, half_face_coord[1] - half_face_dist)
|
||||
upper_bond = half_face_coord[1]-half_face_dist
|
||||
|
||||
f_landmark = (np.min(face_land_mark[:, 0]),int(upper_bond),np.max(face_land_mark[:, 0]),np.max(face_land_mark[:,1]))
|
||||
x1, y1, x2, y2 = f_landmark
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=64"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "musetalk"
|
||||
version = "1.5.0"
|
||||
description = "MuseTalk: audio-driven lip-sync (source-only install; dependencies managed by the consumer)"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = { text = "MIT" }
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["musetalk*"]
|
||||
exclude = ["scripts*", "assets*", "data*", "configs*"]
|
||||
@@ -1,9 +1,6 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import torch
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from omegaconf import OmegaConf
|
||||
from typing import Tuple, List, Union
|
||||
import decord
|
||||
@@ -12,6 +9,9 @@ import cv2
|
||||
from musetalk.utils.face_detection import FaceAlignment,LandmarksType
|
||||
from mmpose.apis import inference_topdown, init_model
|
||||
from mmpose.structures import merge_data_samples
|
||||
import torch
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import sys
|
||||
|
||||
def fast_check_ffmpeg():
|
||||
@@ -331,4 +331,4 @@ if __name__ == "__main__":
|
||||
config = OmegaConf.load(args.config)
|
||||
|
||||
main(config)
|
||||
|
||||
|
||||
@@ -235,7 +235,6 @@ class Avatar:
|
||||
cv2.imwrite(f"{self.avatar_path}/tmp/{str(self.idx).zfill(8)}.png", combine_frame)
|
||||
self.idx = self.idx + 1
|
||||
|
||||
@torch.no_grad()
|
||||
def inference(self, audio_path, out_vid_name, fps, skip_save_images):
|
||||
os.makedirs(self.avatar_path + '/tmp', exist_ok=True)
|
||||
print("start inference")
|
||||
|
||||
Reference in New Issue
Block a user