diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c4d088 --- /dev/null +++ b/.gitignore @@ -0,0 +1,180 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# PyPI configuration file +.pypirc + +*-bk +*_bk +*-bk* +*_bk* +*copy* + +# MacOS +.DS_Store diff --git a/README.md b/README.md index 70d753f..3859dae 100644 --- a/README.md +++ b/README.md @@ -94,16 +94,31 @@ conda env create -f environment.yml source activate deep3d_pytorch ``` -2. Install Nvdiffrast library: -``` -git clone -b 0.3.0 https://github.com/NVlabs/nvdiffrast -cd nvdiffrast # ./Deep3DFaceRecon_pytorch/nvdiffrast -pip install . -``` +2. Install mesh renderer: + 1. Nvdiffrast library (necessary for training, optional for testing): + ``` + git clone -b 0.3.0 https://github.com/NVlabs/nvdiffrast + cd nvdiffrast # ./Deep3DFaceRecon_pytorch/nvdiffrast + pip install . + cd .. # ./Deep3DFaceRecon_pytorch + ``` + 2. Use a cpu renderer from 3DDFA-V3 instead for testing (which can work on MacOS): + ``` + git clone --depth=1 https://github.com/wang-zidu/3DDFA-V3 + cp 3DDFA-V3/utils/cpu_renderer.py ./utils/ + cp -r 3DDFA-V3/utils/cython_renderer ./utils/ + + pip install Cython + + cd util/cython_renderer/ + python setup.py build_ext -i + cd ../.. # ./Deep3DFaceRecon_pytorch + ``` + 3. Skip this step for inference/test, but you need run test.py with "--renderer_type none --no_viz" options + 3. Install Arcface Pytorch: ``` -cd .. # ./Deep3DFaceRecon_pytorch git clone https://github.com/deepinsight/insightface.git cp -r ./insightface/recognition/arcface_torch ./models/ ``` @@ -137,6 +152,24 @@ Deep3DFaceRecon_pytorch ``` ### Test with custom images + +#### Face detection +To detect 5 facial landmarks from test images, first we need install [InsightFace](https://github.com/deepinsight/insightface) library: +``` +pip install insightface onnxruntime +``` +or +``` +pip install insightface onnxruntime-gpu +``` + +and then, run the following command: +``` +python detect_faces_by_insightface.py +``` + +#### Face reconstruction + To reconstruct 3d faces from test images, organize the test image folder as follows: ``` Deep3DFaceRecon_pytorch @@ -159,6 +192,37 @@ python test.py --name= --epoch=20 --img_folder= --epoch=20 --img_folder=./datasets/examples ``` + +On **MacOS**, you can run the test script with CPU or Apple Silicon (M1, M2, M3 chips) by adding "--device cpu" or "--device mps" to the command. For example, + +run with MPS: +``` +# get reconstruction results of your custom images +python test.py --name= --epoch=20 --img_folder= --device mps --renderer_type face3d + +# no visualization +python test.py --name= --epoch=20 --img_folder= --device mps --renderer_type none--no_viz + +# get reconstruction results of example images +python test.py --name= --epoch=20 --img_folder=./datasets/examples --device mps --renderer_type face3d + +# no visualization +python test.py --name= --epoch=20 --img_folder=./datasets/examples --device mps --renderer_type none --no_viz +``` + +or run with CPU: +``` +# get reconstruction results of your custom images +python test.py --name= --epoch=20 --img_folder= --device cpu --renderer_type face3d + +python test.py --name= --epoch=20 --img_folder= --device cpu --renderer_type none --no_viz + +# get reconstruction results of example images +python test.py --name= --epoch=20 --img_folder=./datasets/examples --device cpu --renderer_type face3d + +python test.py --name= --epoch=20 --img_folder=./datasets/examples --device cpu --renderer_type none --no_viz +``` + **_Following [#108](https://github.com/sicxu/Deep3DFaceRecon_pytorch/issues/108), if you don't have OpenGL environment, you can simply add "--use_opengl False" to use CUDA context. Make sure you have updated the nvdiffrast to the latest version._** Results will be saved into ./checkpoints//results/, which contain the following files: diff --git a/detect_faces_by_insightface.py b/detect_faces_by_insightface.py new file mode 100644 index 0000000..5d0e838 --- /dev/null +++ b/detect_faces_by_insightface.py @@ -0,0 +1,75 @@ +# coding=utf-8 +""" +Detect faces and landmarks in images using the InsightFace library. + +Authors: + zhaoyafei (zhaoyafei0210@gmail.com, https://github.com/walkoncross) +""" + +import os +import os.path as osp + +import cv2 +from insightface.app import FaceAnalysis + + +def detect_faces(input_dir): + # Initialize the face analysis app + app = FaceAnalysis( + allowed_modules=["detection", "alignment"], + providers=[ + "CUDAExecutionProvider", + "CoreMLExecutionProvider", + "CPUExecutionProvider", + ], + ) + app.prepare(ctx_id=0, det_size=(640, 640)) + + # Create the output directory if it doesn't exist + output_dir = os.path.join(input_dir, "detections") + os.makedirs(output_dir, exist_ok=True) + + # Iterate over all images in the input directory + file_list = [ + ff + for ff in os.listdir(input_dir) + if osp.splitext(ff)[-1].lower() in [".png", ".jpg", ".jpeg", ".bmp"] + ] + + print(f"--> image files: \n{file_list}") + + for ii, filename in enumerate(file_list): + print(f"--> {ii}: {filename}") + image_path = os.path.join(input_dir, filename) + img = cv2.imread(image_path) + + # Detect faces in the image + faces = app.get(img) + + if faces: + print(f"Detected {len(faces)} faces") + # Get the face with the highest score + best_face = max(faces, key=lambda face: face["det_score"]) + + # Get the 5 key points of the best face + keypoints = best_face["kps"] + + # Write the key points to a txt file + output_path = os.path.join( + output_dir, f"{os.path.splitext(filename)[0]}.txt" + ) + with open(output_path, "w") as f: + for point in keypoints: + f.write(f"{point[0]} {point[1]}\n") + else: + print(f"No faces detected") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("input_dir", help="Directory containing images") + args = parser.parse_args() + + detect_faces(args.input_dir) diff --git a/models/.gitignore b/models/.gitignore new file mode 100644 index 0000000..b8c04b1 --- /dev/null +++ b/models/.gitignore @@ -0,0 +1 @@ +arcface_torch/ \ No newline at end of file diff --git a/models/base_model.py b/models/base_model.py index 2a05d3a..10e9816 100644 --- a/models/base_model.py +++ b/models/base_model.py @@ -152,15 +152,16 @@ def eval(self): net = getattr(self, name) net.eval() - def test(self): + def test(self, do_render=True): """Forward function used in test time. This function wraps function in no_grad() so we don't save intermediate steps for backprop It also calls to produce additional visualization results """ with torch.no_grad(): - self.forward() - self.compute_visuals() + self.forward(do_render=do_render) + if do_render: + self.compute_visuals() def compute_visuals(self): """Calculate additional output images for visdom and HTML visualization""" diff --git a/models/facerecon_model.py b/models/facerecon_model.py index dfaaea9..28f08d0 100644 --- a/models/facerecon_model.py +++ b/models/facerecon_model.py @@ -8,7 +8,6 @@ from .bfm import ParametricFaceModel from .losses import perceptual_loss, photo_loss, reg_loss, reflectance_loss, landmark_loss from util import util -from util.nvdiffrast import MeshRenderer from util.preprocess import estimate_norm_torch import trimesh @@ -85,7 +84,12 @@ def __init__(self, opt): self.visual_names = ['output_vis'] self.model_names = ['net_recon'] - self.parallel_names = self.model_names + ['renderer'] + self.renderer_type = opt.renderer_type + + self.parallel_names = self.model_names + + if opt.renderer_type == "nvdiffrast": + self.parallel_names.append('renderer') self.net_recon = networks.define_net_recon( net_recon=opt.net_recon, use_last_fc=opt.use_last_fc, init_path=opt.init_path @@ -96,12 +100,28 @@ def __init__(self, opt): is_train=self.isTrain, default_name=opt.bfm_model ) - fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi - self.renderer = MeshRenderer( - rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center), use_opengl=opt.use_opengl - ) + if opt.renderer_type == "nvdiffrast": + from util.nvdiffrast import MeshRenderer + + fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi + self.renderer = MeshRenderer( + rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center), use_opengl=opt.use_opengl + ) + elif opt.renderer_type == "face3d" or opt.renderer_type == "cpu": + from util.cpu_renderer import MeshRenderer_cpu as MeshRenderer + + fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi + self.renderer = MeshRenderer( + rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center) + ) + else: + self.renderer = None + + self.output_vis = None if self.isTrain: + assert self.renderer_type == "nvdiffrast", f"{self.cls.__name__} should be initialized with opt.renderer_type='nvdiffrast'" + self.loss_names = ['all', 'feat', 'color', 'lm', 'reg', 'gamma', 'reflc'] self.net_recog = networks.define_net_recog( @@ -125,19 +145,30 @@ def set_input(self, input): Parameters: input: a dictionary that contains the data itself and its metadata information. """ - self.input_img = input['imgs'].to(self.device) - self.atten_mask = input['msks'].to(self.device) if 'msks' in input else None - self.gt_lm = input['lms'].to(self.device) if 'lms' in input else None - self.trans_m = input['M'].to(self.device) if 'M' in input else None - self.image_paths = input['im_paths'] if 'im_paths' in input else None - - def forward(self): + if self.device.type == 'mps': # torch.mps is not supported for torch.float64 + self.input_img = input['imgs'].to(dtype=torch.float32, device=self.device) + self.atten_mask = input['msks'].to(dtype=torch.float32, device=self.device) if 'msks' in input else None + self.gt_lm = input['lms'].to(dtype=torch.float32, device=self.device) if 'lms' in input else None + self.trans_m = input['M'].to(dtype=torch.float32, device=self.device) if 'M' in input else None + self.image_paths = input['im_paths'] if 'im_paths' in input else None + else: + self.input_img = input['imgs'].to(self.device) + self.atten_mask = input['msks'].to(self.device) if 'msks' in input else None + self.gt_lm = input['lms'].to(self.device) if 'lms' in input else None + self.trans_m = input['M'].to(self.device) if 'M' in input else None + self.image_paths = input['im_paths'] if 'im_paths' in input else None + + def forward(self, do_render=True): + if do_render: + assert self.renderer is not None, f"{self.cls.__name__} should be initialized with opt.renderer_type='nvdiffrast'" output_coeff = self.net_recon(self.input_img) self.facemodel.to(self.device) self.pred_vertex, self.pred_tex, self.pred_color, self.pred_lm = \ self.facemodel.compute_for_render(output_coeff) - self.pred_mask, _, self.pred_face = self.renderer( - self.pred_vertex, self.facemodel.face_buf, feat=self.pred_color) + + if do_render: + self.pred_mask, _, self.pred_face = self.renderer( + self.pred_vertex, self.facemodel.face_buf, feat=self.pred_color)[:3] self.pred_coeffs_dict = self.facemodel.split_coeff(output_coeff) diff --git a/options/base_options.py b/options/base_options.py index 67375d0..6be9d88 100644 --- a/options/base_options.py +++ b/options/base_options.py @@ -28,6 +28,7 @@ def initialize(self, parser): """Define the common options that are used in both training and test.""" # basic parameters parser.add_argument('--name', type=str, default='face_recon', help='name of the experiment. It decides where to store samples and models') + parser.add_argument('--device', type=str, default='cuda', help='device to run the model, [cuda | cpu | mps]') parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') parser.add_argument('--vis_batch_nums', type=float, default=1, help='batch nums of images for visulization') @@ -41,6 +42,9 @@ def initialize(self, parser): # model parameters parser.add_argument('--model', type=str, default='facerecon', help='chooses which model to use.') + # renderer parameters + parser.add_argument('--renderer_type', type=str, default='nvdiffrast', help='chooses which renderer to use. [nvdiffrast | face3d | cpu | none], cpu=face3d') + # additional parameters parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information') diff --git a/options/test_options.py b/options/test_options.py index 4ff3ad1..b14dbd1 100644 --- a/options/test_options.py +++ b/options/test_options.py @@ -1,5 +1,4 @@ -"""This script contains the test options for Deep3DFaceRecon_pytorch -""" +"""This script contains the test options for Deep3DFaceRecon_pytorch""" from .base_options import BaseOptions @@ -12,9 +11,40 @@ class TestOptions(BaseOptions): def initialize(self, parser): parser = BaseOptions.initialize(self, parser) # define shared options - parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') - parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') - parser.add_argument('--img_folder', type=str, default='examples', help='folder for test images.') + parser.add_argument( + "--phase", + type=str, + default="test", + help="train, val, test, etc", + ) + parser.add_argument( + "--dataset_mode", + type=str, + default=None, + help="chooses how datasets are loaded. [None | flist]", + ) + parser.add_argument( + "--img_folder", type=str, default="examples", help="folder for test images." + ) + + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--visualize", + "--vis", + "--viz", + dest="do_visualize", + action="store_true", + default=True, + help="render/visualize the results.", + ) + group.add_argument( + "--no_visualize", + "--no_vis", + "--no_viz", + dest="do_visualize", + action="store_false", + help="do not render/visualize the results.", + ) # Dropout and Batchnorm has different behavior during training and test. self.isTrain = False diff --git a/test.py b/test.py index 13e1a7d..474dddc 100644 --- a/test.py +++ b/test.py @@ -1,74 +1,134 @@ -"""This script is the test script for Deep3DFaceRecon_pytorch -""" +"""This script is the test script for Deep3DFaceRecon_pytorch""" import os -from options.test_options import TestOptions -from data import create_dataset -from models import create_model -from util.visualizer import MyVisualizer -from util.preprocess import align_img -from PIL import Image +import os.path as osp + import numpy as np +import torch +from PIL import Image + +from models import create_model +from options.test_options import TestOptions from util.load_mats import load_lm3d -import torch -from data.flist_dataset import default_flist_reader -from scipy.io import loadmat, savemat +from util.preprocess import align_img +from util.visualizer import MyVisualizer + -def get_data_path(root='examples'): - - im_path = [os.path.join(root, i) for i in sorted(os.listdir(root)) if i.endswith('png') or i.endswith('jpg')] - lm_path = [i.replace('png', 'txt').replace('jpg', 'txt') for i in im_path] - lm_path = [os.path.join(i.replace(i.split(os.path.sep)[-1],''),'detections',i.split(os.path.sep)[-1]) for i in lm_path] +def get_data_path(root="examples"): + supported_extensions = list(Image.registered_extensions().keys()) + # print(f"--> supported_extensions: {supported_extensions}") + + file_list = [ + ff + for ff in sorted(os.listdir(root)) + if osp.splitext(ff)[-1].lower() in supported_extensions + ] + im_path = [osp.join(root, ff) for ff in file_list] + lm_path = [ + osp.join(root, "detections", osp.splitext(ff)[0] + ".txt") for ff in file_list + ] return im_path, lm_path + def read_data(im_path, lm_path, lm3d_std, to_tensor=True): - # to RGB - im = Image.open(im_path).convert('RGB') - W,H = im.size + # to RGB + im = Image.open(im_path).convert("RGB") + W, H = im.size lm = np.loadtxt(lm_path).astype(np.float32) lm = lm.reshape([-1, 2]) lm[:, -1] = H - 1 - lm[:, -1] _, im, lm, _ = align_img(im, lm, lm3d_std) if to_tensor: - im = torch.tensor(np.array(im)/255., dtype=torch.float32).permute(2, 0, 1).unsqueeze(0) + im = ( + torch.tensor(np.array(im) / 255.0, dtype=torch.float32) + .permute(2, 0, 1) + .unsqueeze(0) + ) lm = torch.tensor(lm).unsqueeze(0) return im, lm -def main(rank, opt, name='examples'): - device = torch.device(rank) - torch.cuda.set_device(device) + +def main(opt): + if opt.device == "mps": + assert torch.mps.is_available(), "MPS is not available" + device = torch.device("mps") + print("--> Running on mps") + elif opt.device == "cuda": + assert torch.cuda.is_available(), "CUDA is not available" + device = torch.device(opt.gpu_ids[0]) + torch.cuda.set_device(device) + print(f"--> Running on cuda:{opt.gpu_ids[0]}") + else: + device = torch.device("cpu") + print("--> Running on cpu") + model = create_model(opt) model.setup(opt) model.device = device model.parallelize() model.eval() - visualizer = MyVisualizer(opt) - im_path, lm_path = get_data_path(name) - lm3d_std = load_lm3d(opt.bfm_folder) + if opt.do_visualize: + assert ( + model.renderer is not None + ), "Visualization is only supported for models with a renderer" + + visualizer = MyVisualizer(opt) + save_dir = osp.join( + visualizer.img_dir, + opt.img_folder.split(osp.sep)[-1], + "epoch_%s_%06d" % (opt.epoch, 0), + ) + else: + save_dir = opt.img_folder + "-results" + print(f"--> Save dir: {save_dir}") + + if not osp.exists(save_dir): + os.makedirs(save_dir) + + im_path, lm_path = get_data_path(opt.img_folder) + lm3d_std = load_lm3d(opt.bfm_folder) for i in range(len(im_path)): print(i, im_path[i]) - img_name = im_path[i].split(os.path.sep)[-1].replace('.png','').replace('.jpg','') - if not os.path.isfile(lm_path[i]): - print("%s is not found !!!"%lm_path[i]) + img_name = osp.splitext(osp.basename(im_path[i]))[0] + + if not osp.isfile(lm_path[i]): + print("%s is not found !!!" % lm_path[i]) continue im_tensor, lm_tensor = read_data(im_path[i], lm_path[i], lm3d_std) data = { - 'imgs': im_tensor, - 'lms': lm_tensor + "imgs": im_tensor, + "lms": lm_tensor, } model.set_input(data) # unpack data from data loader - model.test() # run inference - visuals = model.get_current_visuals() # get image results - visualizer.display_current_results(visuals, 0, opt.epoch, dataset=name.split(os.path.sep)[-1], - save_results=True, count=i, name=img_name, add_image=False) + model.test(do_render=opt.do_visualize) # run inference + + if opt.do_visualize: + visuals = model.get_current_visuals() # get image results + visualizer.display_current_results( + visuals, + 0, + opt.epoch, + dataset=osp.basename(opt.img_folder), + save_results=True, + count=i, + name=img_name, + add_image=False, + ) + + model.save_mesh( + osp.join(save_dir, img_name + ".obj") + ) # save reconstruction meshes + + model.save_coeff( + osp.join(save_dir, img_name + ".mat") + ) # save predicted coefficients + + print(f"--> Results saved under dir: {save_dir}") - model.save_mesh(os.path.join(visualizer.img_dir, name.split(os.path.sep)[-1], 'epoch_%s_%06d'%(opt.epoch, 0),img_name+'.obj')) # save reconstruction meshes - model.save_coeff(os.path.join(visualizer.img_dir, name.split(os.path.sep)[-1], 'epoch_%s_%06d'%(opt.epoch, 0),img_name+'.mat')) # save predicted coefficients -if __name__ == '__main__': +if __name__ == "__main__": opt = TestOptions().parse() # get test options - main(0, opt,opt.img_folder) - + main(opt) diff --git a/util/preprocess.py b/util/preprocess.py index c516f45..bdc619d 100644 --- a/util/preprocess.py +++ b/util/preprocess.py @@ -196,6 +196,7 @@ def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.): # calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face t, s = POS(lm5p.transpose(), lm3D.transpose()) s = rescale_factor/s + t = t.squeeze() # (2,1) -> (2,) # processing the image img_new, lm_new, mask_new = resize_n_crop_img(img, lm, t, s, target_size=target_size, mask=mask)