diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5c4d088
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,180 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# PyPI configuration file
+.pypirc
+
+*-bk
+*_bk
+*-bk*
+*_bk*
+*copy*
+
+# MacOS
+.DS_Store
diff --git a/README.md b/README.md
index 70d753f..3859dae 100644
--- a/README.md
+++ b/README.md
@@ -94,16 +94,31 @@ conda env create -f environment.yml
 source activate deep3d_pytorch
 ```
 
-2. Install Nvdiffrast library:
-```
-git clone -b 0.3.0 https://github.com/NVlabs/nvdiffrast
-cd nvdiffrast    # ./Deep3DFaceRecon_pytorch/nvdiffrast
-pip install .
-```
+2. Install mesh renderer:
+   1.  Nvdiffrast library (necessary for training, optional for testing):
+    ```
+    git clone -b 0.3.0 https://github.com/NVlabs/nvdiffrast
+    cd nvdiffrast    # ./Deep3DFaceRecon_pytorch/nvdiffrast
+    pip install .
+    cd ..    # ./Deep3DFaceRecon_pytorch
+    ```
+   2.  Use a cpu renderer from 3DDFA-V3 instead for testing (which can work on MacOS):
+    ```
+    git clone --depth=1 https://github.com/wang-zidu/3DDFA-V3
+    cp 3DDFA-V3/utils/cpu_renderer.py ./utils/
+    cp -r 3DDFA-V3/utils/cython_renderer ./utils/
+
+    pip install Cython
+
+    cd util/cython_renderer/
+    python setup.py build_ext -i
+    cd ../..     # ./Deep3DFaceRecon_pytorch
+    ```
+    3. Skip this step for inference/test, but you need run test.py with "--renderer_type none --no_viz" options
+
 
 3. Install Arcface Pytorch:
 ```
-cd ..    # ./Deep3DFaceRecon_pytorch
 git clone https://github.com/deepinsight/insightface.git
 cp -r ./insightface/recognition/arcface_torch ./models/
 ```
@@ -137,6 +152,24 @@ Deep3DFaceRecon_pytorch
 ```
 
 ### Test with custom images
+
+#### Face detection
+To detect 5 facial landmarks from test images, first we need install [InsightFace](https://github.com/deepinsight/insightface) library:
+```
+pip install insightface onnxruntime
+```
+or 
+```
+pip install insightface onnxruntime-gpu
+```
+
+and then, run the following command:
+```
+python detect_faces_by_insightface.py <folder_to_test_images>
+```
+
+#### Face reconstruction
+
 To reconstruct 3d faces from test images, organize the test image folder as follows:
 ```
 Deep3DFaceRecon_pytorch
@@ -159,6 +192,37 @@ python test.py --name=<model_name> --epoch=20 --img_folder=<folder_to_test_image
 # get reconstruction results of example images
 python test.py --name=<model_name> --epoch=20 --img_folder=./datasets/examples
 ```
+
+On **MacOS**, you can run the test script with CPU or Apple Silicon (M1, M2, M3 chips) by adding "--device cpu" or "--device mps" to the command. For example,
+
+run with MPS:
+```
+# get reconstruction results of your custom images
+python test.py --name=<model_name> --epoch=20 --img_folder=<folder_to_test_images> --device mps --renderer_type face3d
+
+# no visualization
+python test.py --name=<model_name> --epoch=20 --img_folder=<folder_to_test_images> --device mps --renderer_type none--no_viz
+
+# get reconstruction results of example images
+python test.py --name=<model_name> --epoch=20 --img_folder=./datasets/examples --device mps --renderer_type face3d
+
+# no visualization
+python test.py --name=<model_name> --epoch=20 --img_folder=./datasets/examples --device mps --renderer_type none --no_viz
+```
+
+or run with CPU:
+```
+# get reconstruction results of your custom images
+python test.py --name=<model_name> --epoch=20 --img_folder=<folder_to_test_images> --device cpu --renderer_type face3d
+
+python test.py --name=<model_name> --epoch=20 --img_folder=<folder_to_test_images> --device cpu --renderer_type none --no_viz
+
+# get reconstruction results of example images
+python test.py --name=<model_name> --epoch=20 --img_folder=./datasets/examples --device cpu --renderer_type face3d
+
+python test.py --name=<model_name> --epoch=20 --img_folder=./datasets/examples --device cpu --renderer_type none --no_viz
+```
+
 **_Following [#108](https://github.com/sicxu/Deep3DFaceRecon_pytorch/issues/108), if you don't have OpenGL environment, you can simply add "--use_opengl False" to use CUDA context. Make sure you have updated the nvdiffrast to the latest version._**
 
 Results will be saved into ./checkpoints/<model_name>/results/<folder_to_test_images>, which contain the following files:
diff --git a/detect_faces_by_insightface.py b/detect_faces_by_insightface.py
new file mode 100644
index 0000000..5d0e838
--- /dev/null
+++ b/detect_faces_by_insightface.py
@@ -0,0 +1,75 @@
+# coding=utf-8
+"""
+Detect faces and landmarks in images using the InsightFace library.
+
+Authors:
+    zhaoyafei (zhaoyafei0210@gmail.com, https://github.com/walkoncross)
+"""
+
+import os
+import os.path as osp
+
+import cv2
+from insightface.app import FaceAnalysis
+
+
+def detect_faces(input_dir):
+    # Initialize the face analysis app
+    app = FaceAnalysis(
+        allowed_modules=["detection", "alignment"],
+        providers=[
+            "CUDAExecutionProvider",
+            "CoreMLExecutionProvider",
+            "CPUExecutionProvider",
+        ],
+    )
+    app.prepare(ctx_id=0, det_size=(640, 640))
+
+    # Create the output directory if it doesn't exist
+    output_dir = os.path.join(input_dir, "detections")
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Iterate over all images in the input directory
+    file_list = [
+        ff
+        for ff in os.listdir(input_dir)
+        if osp.splitext(ff)[-1].lower() in [".png", ".jpg", ".jpeg", ".bmp"]
+    ]
+
+    print(f"--> image files: \n{file_list}")
+
+    for ii, filename in enumerate(file_list):
+        print(f"--> {ii}: {filename}")
+        image_path = os.path.join(input_dir, filename)
+        img = cv2.imread(image_path)
+
+        # Detect faces in the image
+        faces = app.get(img)
+
+        if faces:
+            print(f"Detected {len(faces)} faces")
+            # Get the face with the highest score
+            best_face = max(faces, key=lambda face: face["det_score"])
+
+            # Get the 5 key points of the best face
+            keypoints = best_face["kps"]
+
+            # Write the key points to a txt file
+            output_path = os.path.join(
+                output_dir, f"{os.path.splitext(filename)[0]}.txt"
+            )
+            with open(output_path, "w") as f:
+                for point in keypoints:
+                    f.write(f"{point[0]} {point[1]}\n")
+        else:
+            print(f"No faces detected")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input_dir", help="Directory containing images")
+    args = parser.parse_args()
+
+    detect_faces(args.input_dir)
diff --git a/models/.gitignore b/models/.gitignore
new file mode 100644
index 0000000..b8c04b1
--- /dev/null
+++ b/models/.gitignore
@@ -0,0 +1 @@
+arcface_torch/
\ No newline at end of file
diff --git a/models/base_model.py b/models/base_model.py
index 2a05d3a..10e9816 100644
--- a/models/base_model.py
+++ b/models/base_model.py
@@ -152,15 +152,16 @@ def eval(self):
                 net = getattr(self, name)
                 net.eval()
 
-    def test(self):
+    def test(self, do_render=True):
         """Forward function used in test time.
 
         This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
         It also calls <compute_visuals> to produce additional visualization results
         """
         with torch.no_grad():
-            self.forward()
-            self.compute_visuals()
+            self.forward(do_render=do_render)
+            if do_render:
+                self.compute_visuals()
 
     def compute_visuals(self):
         """Calculate additional output images for visdom and HTML visualization"""
diff --git a/models/facerecon_model.py b/models/facerecon_model.py
index dfaaea9..28f08d0 100644
--- a/models/facerecon_model.py
+++ b/models/facerecon_model.py
@@ -8,7 +8,6 @@
 from .bfm import ParametricFaceModel
 from .losses import perceptual_loss, photo_loss, reg_loss, reflectance_loss, landmark_loss
 from util import util 
-from util.nvdiffrast import MeshRenderer
 from util.preprocess import estimate_norm_torch
 
 import trimesh
@@ -85,7 +84,12 @@ def __init__(self, opt):
         
         self.visual_names = ['output_vis']
         self.model_names = ['net_recon']
-        self.parallel_names = self.model_names + ['renderer']
+        self.renderer_type = opt.renderer_type
+
+        self.parallel_names = self.model_names
+
+        if opt.renderer_type == "nvdiffrast":
+            self.parallel_names.append('renderer')
 
         self.net_recon = networks.define_net_recon(
             net_recon=opt.net_recon, use_last_fc=opt.use_last_fc, init_path=opt.init_path
@@ -96,12 +100,28 @@ def __init__(self, opt):
             is_train=self.isTrain, default_name=opt.bfm_model
         )
         
-        fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi
-        self.renderer = MeshRenderer(
-            rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center), use_opengl=opt.use_opengl
-        )
+        if opt.renderer_type == "nvdiffrast":
+            from util.nvdiffrast import MeshRenderer
+
+            fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi
+            self.renderer = MeshRenderer(
+                rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center), use_opengl=opt.use_opengl
+            )
+        elif opt.renderer_type == "face3d" or opt.renderer_type == "cpu":
+            from util.cpu_renderer import MeshRenderer_cpu as MeshRenderer
+
+            fov = 2 * np.arctan(opt.center / opt.focal) * 180 / np.pi
+            self.renderer = MeshRenderer(
+                rasterize_fov=fov, znear=opt.z_near, zfar=opt.z_far, rasterize_size=int(2 * opt.center)
+            )
+        else:
+            self.renderer = None
+
+        self.output_vis = None
 
         if self.isTrain:
+            assert self.renderer_type == "nvdiffrast", f"{self.cls.__name__} should be initialized with opt.renderer_type='nvdiffrast'"
+
             self.loss_names = ['all', 'feat', 'color', 'lm', 'reg', 'gamma', 'reflc']
 
             self.net_recog = networks.define_net_recog(
@@ -125,19 +145,30 @@ def set_input(self, input):
         Parameters:
             input: a dictionary that contains the data itself and its metadata information.
         """
-        self.input_img = input['imgs'].to(self.device) 
-        self.atten_mask = input['msks'].to(self.device) if 'msks' in input else None
-        self.gt_lm = input['lms'].to(self.device)  if 'lms' in input else None
-        self.trans_m = input['M'].to(self.device) if 'M' in input else None
-        self.image_paths = input['im_paths'] if 'im_paths' in input else None
-
-    def forward(self):
+        if self.device.type == 'mps': # torch.mps is not supported for torch.float64
+            self.input_img = input['imgs'].to(dtype=torch.float32, device=self.device) 
+            self.atten_mask = input['msks'].to(dtype=torch.float32, device=self.device) if 'msks' in input else None
+            self.gt_lm = input['lms'].to(dtype=torch.float32, device=self.device)  if 'lms' in input else None
+            self.trans_m = input['M'].to(dtype=torch.float32, device=self.device) if 'M' in input else None
+            self.image_paths = input['im_paths'] if 'im_paths' in input else None
+        else:
+            self.input_img = input['imgs'].to(self.device) 
+            self.atten_mask = input['msks'].to(self.device) if 'msks' in input else None
+            self.gt_lm = input['lms'].to(self.device)  if 'lms' in input else None
+            self.trans_m = input['M'].to(self.device) if 'M' in input else None
+            self.image_paths = input['im_paths'] if 'im_paths' in input else None
+
+    def forward(self, do_render=True):
+        if do_render:
+            assert self.renderer is not None, f"{self.cls.__name__} should be initialized with opt.renderer_type='nvdiffrast'"
         output_coeff = self.net_recon(self.input_img)
         self.facemodel.to(self.device)
         self.pred_vertex, self.pred_tex, self.pred_color, self.pred_lm = \
             self.facemodel.compute_for_render(output_coeff)
-        self.pred_mask, _, self.pred_face = self.renderer(
-            self.pred_vertex, self.facemodel.face_buf, feat=self.pred_color)
+
+        if do_render:
+            self.pred_mask, _, self.pred_face = self.renderer(
+                self.pred_vertex, self.facemodel.face_buf, feat=self.pred_color)[:3]
         
         self.pred_coeffs_dict = self.facemodel.split_coeff(output_coeff)
 
diff --git a/options/base_options.py b/options/base_options.py
index 67375d0..6be9d88 100644
--- a/options/base_options.py
+++ b/options/base_options.py
@@ -28,6 +28,7 @@ def initialize(self, parser):
         """Define the common options that are used in both training and test."""
         # basic parameters
         parser.add_argument('--name', type=str, default='face_recon', help='name of the experiment. It decides where to store samples and models')
+        parser.add_argument('--device', type=str, default='cuda', help='device to run the model, [cuda | cpu | mps]')
         parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
         parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
         parser.add_argument('--vis_batch_nums', type=float, default=1, help='batch nums of images for visulization')
@@ -41,6 +42,9 @@ def initialize(self, parser):
         # model parameters
         parser.add_argument('--model', type=str, default='facerecon', help='chooses which model to use.')
 
+        # renderer parameters
+        parser.add_argument('--renderer_type', type=str, default='nvdiffrast', help='chooses which renderer to use. [nvdiffrast | face3d | cpu | none], cpu=face3d')
+
         # additional parameters
         parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
         parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information')
diff --git a/options/test_options.py b/options/test_options.py
index 4ff3ad1..b14dbd1 100644
--- a/options/test_options.py
+++ b/options/test_options.py
@@ -1,5 +1,4 @@
-"""This script contains the test options for Deep3DFaceRecon_pytorch
-"""
+"""This script contains the test options for Deep3DFaceRecon_pytorch"""
 
 from .base_options import BaseOptions
 
@@ -12,9 +11,40 @@ class TestOptions(BaseOptions):
 
     def initialize(self, parser):
         parser = BaseOptions.initialize(self, parser)  # define shared options
-        parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
-        parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]')
-        parser.add_argument('--img_folder', type=str, default='examples', help='folder for test images.')
+        parser.add_argument(
+            "--phase",
+            type=str,
+            default="test",
+            help="train, val, test, etc",
+        )
+        parser.add_argument(
+            "--dataset_mode",
+            type=str,
+            default=None,
+            help="chooses how datasets are loaded. [None | flist]",
+        )
+        parser.add_argument(
+            "--img_folder", type=str, default="examples", help="folder for test images."
+        )
+        
+        group = parser.add_mutually_exclusive_group(required=False)
+        group.add_argument(
+            "--visualize",
+            "--vis",
+            "--viz",
+            dest="do_visualize",
+            action="store_true",
+            default=True,
+            help="render/visualize the results.",
+        )
+        group.add_argument(
+            "--no_visualize",
+            "--no_vis",
+            "--no_viz",
+            dest="do_visualize",
+            action="store_false",
+            help="do not render/visualize the results.",
+        )
 
         # Dropout and Batchnorm has different behavior during training and test.
         self.isTrain = False
diff --git a/test.py b/test.py
index 13e1a7d..474dddc 100644
--- a/test.py
+++ b/test.py
@@ -1,74 +1,134 @@
-"""This script is the test script for Deep3DFaceRecon_pytorch
-"""
+"""This script is the test script for Deep3DFaceRecon_pytorch"""
 
 import os
-from options.test_options import TestOptions
-from data import create_dataset
-from models import create_model
-from util.visualizer import MyVisualizer
-from util.preprocess import align_img
-from PIL import Image
+import os.path as osp
+
 import numpy as np
+import torch
+from PIL import Image
+
+from models import create_model
+from options.test_options import TestOptions
 from util.load_mats import load_lm3d
-import torch 
-from data.flist_dataset import default_flist_reader
-from scipy.io import loadmat, savemat
+from util.preprocess import align_img
+from util.visualizer import MyVisualizer
+
 
-def get_data_path(root='examples'):
-    
-    im_path = [os.path.join(root, i) for i in sorted(os.listdir(root)) if i.endswith('png') or i.endswith('jpg')]
-    lm_path = [i.replace('png', 'txt').replace('jpg', 'txt') for i in im_path]
-    lm_path = [os.path.join(i.replace(i.split(os.path.sep)[-1],''),'detections',i.split(os.path.sep)[-1]) for i in lm_path]
+def get_data_path(root="examples"):
+    supported_extensions = list(Image.registered_extensions().keys())
+    # print(f"--> supported_extensions: {supported_extensions}")
+
+    file_list = [
+        ff
+        for ff in sorted(os.listdir(root))
+        if osp.splitext(ff)[-1].lower() in supported_extensions
+    ]
+    im_path = [osp.join(root, ff) for ff in file_list]
+    lm_path = [
+        osp.join(root, "detections", osp.splitext(ff)[0] + ".txt") for ff in file_list
+    ]
 
     return im_path, lm_path
 
+
 def read_data(im_path, lm_path, lm3d_std, to_tensor=True):
-    # to RGB 
-    im = Image.open(im_path).convert('RGB')
-    W,H = im.size
+    # to RGB
+    im = Image.open(im_path).convert("RGB")
+    W, H = im.size
     lm = np.loadtxt(lm_path).astype(np.float32)
     lm = lm.reshape([-1, 2])
     lm[:, -1] = H - 1 - lm[:, -1]
     _, im, lm, _ = align_img(im, lm, lm3d_std)
     if to_tensor:
-        im = torch.tensor(np.array(im)/255., dtype=torch.float32).permute(2, 0, 1).unsqueeze(0)
+        im = (
+            torch.tensor(np.array(im) / 255.0, dtype=torch.float32)
+            .permute(2, 0, 1)
+            .unsqueeze(0)
+        )
         lm = torch.tensor(lm).unsqueeze(0)
     return im, lm
 
-def main(rank, opt, name='examples'):
-    device = torch.device(rank)
-    torch.cuda.set_device(device)
+
+def main(opt):
+    if opt.device == "mps":
+        assert torch.mps.is_available(), "MPS is not available"
+        device = torch.device("mps")
+        print("--> Running on mps")
+    elif opt.device == "cuda":
+        assert torch.cuda.is_available(), "CUDA is not available"
+        device = torch.device(opt.gpu_ids[0])
+        torch.cuda.set_device(device)
+        print(f"--> Running on cuda:{opt.gpu_ids[0]}")
+    else:
+        device = torch.device("cpu")
+        print("--> Running on cpu")
+
     model = create_model(opt)
     model.setup(opt)
     model.device = device
     model.parallelize()
     model.eval()
-    visualizer = MyVisualizer(opt)
 
-    im_path, lm_path = get_data_path(name)
-    lm3d_std = load_lm3d(opt.bfm_folder) 
+    if opt.do_visualize:
+        assert (
+            model.renderer is not None
+        ), "Visualization is only supported for models with a renderer"
+
+        visualizer = MyVisualizer(opt)
+        save_dir = osp.join(
+            visualizer.img_dir,
+            opt.img_folder.split(osp.sep)[-1],
+            "epoch_%s_%06d" % (opt.epoch, 0),
+        )
+    else:
+        save_dir = opt.img_folder + "-results"
+    print(f"--> Save dir: {save_dir}")
+
+    if not osp.exists(save_dir):
+        os.makedirs(save_dir)
+
+    im_path, lm_path = get_data_path(opt.img_folder)
+    lm3d_std = load_lm3d(opt.bfm_folder)
 
     for i in range(len(im_path)):
         print(i, im_path[i])
-        img_name = im_path[i].split(os.path.sep)[-1].replace('.png','').replace('.jpg','')
-        if not os.path.isfile(lm_path[i]):
-            print("%s is not found !!!"%lm_path[i])
+        img_name = osp.splitext(osp.basename(im_path[i]))[0]
+
+        if not osp.isfile(lm_path[i]):
+            print("%s is not found !!!" % lm_path[i])
             continue
         im_tensor, lm_tensor = read_data(im_path[i], lm_path[i], lm3d_std)
         data = {
-            'imgs': im_tensor,
-            'lms': lm_tensor
+            "imgs": im_tensor,
+            "lms": lm_tensor,
         }
         model.set_input(data)  # unpack data from data loader
-        model.test()           # run inference
-        visuals = model.get_current_visuals()  # get image results
-        visualizer.display_current_results(visuals, 0, opt.epoch, dataset=name.split(os.path.sep)[-1], 
-            save_results=True, count=i, name=img_name, add_image=False)
+        model.test(do_render=opt.do_visualize)  # run inference
+
+        if opt.do_visualize:
+            visuals = model.get_current_visuals()  # get image results
+            visualizer.display_current_results(
+                visuals,
+                0,
+                opt.epoch,
+                dataset=osp.basename(opt.img_folder),
+                save_results=True,
+                count=i,
+                name=img_name,
+                add_image=False,
+            )
+
+        model.save_mesh(
+            osp.join(save_dir, img_name + ".obj")
+        )  # save reconstruction meshes
+
+        model.save_coeff(
+            osp.join(save_dir, img_name + ".mat")
+        )  # save predicted coefficients
+
+    print(f"--> Results saved under dir: {save_dir}")
 
-        model.save_mesh(os.path.join(visualizer.img_dir, name.split(os.path.sep)[-1], 'epoch_%s_%06d'%(opt.epoch, 0),img_name+'.obj')) # save reconstruction meshes
-        model.save_coeff(os.path.join(visualizer.img_dir, name.split(os.path.sep)[-1], 'epoch_%s_%06d'%(opt.epoch, 0),img_name+'.mat')) # save predicted coefficients
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     opt = TestOptions().parse()  # get test options
-    main(0, opt,opt.img_folder)
-    
+    main(opt)
diff --git a/util/preprocess.py b/util/preprocess.py
index c516f45..bdc619d 100644
--- a/util/preprocess.py
+++ b/util/preprocess.py
@@ -196,6 +196,7 @@ def align_img(img, lm, lm3D, mask=None, target_size=224., rescale_factor=102.):
     # calculate translation and scale factors using 5 facial landmarks and standard landmarks of a 3D face
     t, s = POS(lm5p.transpose(), lm3D.transpose())
     s = rescale_factor/s
+    t = t.squeeze() # (2,1) -> (2,)
 
     # processing the image
     img_new, lm_new, mask_new = resize_n_crop_img(img, lm, t, s, target_size=target_size, mask=mask)