Add gradio and fixes

openvinotoolkit · Sep 26, 2024 · e53ff6e · e53ff6e
1 parent f196227
commit e53ff6e
Show file tree

Hide file tree

Showing 3 changed files with 95 additions and 21 deletions.
diff --git a/notebooks/wav2lip/gradio_helper.py b/notebooks/wav2lip/gradio_helper.py
@@ -0,0 +1,25 @@
+from typing import Callable
+import gradio as gr
+import numpy as np
+
+
+examples = [
+    [
+        "data_video_sun_5s.mp4",
+        "data_audio_sun_5s.wav",
+    ],
+]
+
+
+def make_demo(fn: Callable):
+    demo = gr.Interface(
+        fn=fn,
+        inputs=[
+            gr.Video(label="Face video"),
+            gr.Audio(label="Audio", type="filepath"),
+        ],
+        outputs="video",
+        examples=examples,
+        allow_flagging="never",
+    )
+    return demo
diff --git a/notebooks/wav2lip/ov_inference.py b/notebooks/wav2lip/ov_inference.py
@@ -87,7 +87,6 @@ def batch_detect(net, imgs, device):
     imgs = torch.from_numpy(imgs).float().to(device)
     BB, CC, HH, WW = imgs.size()
 
-    print("imgs.shape: ", imgs.shape)
     results = net({"x": imgs.numpy()})
     olist = [torch.Tensor(results[i]) for i in range(12)]
 
@@ -334,8 +333,6 @@ def __init__(
 
     def get_detections_for_batch(self, images):
         images = images[..., ::-1]
-        print("OVFaceAlignment get_detections_for_batch called!")
-        print("images.shape: ", images.shape)
         detected_faces = self.face_detector.detect_from_batch(images.copy())
         results = []
 
@@ -553,3 +550,5 @@ def ov_inference(
 
     command = "ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}".format(audio_path, "Wav2Lip/temp/result.avi", outfile)
     subprocess.call(command, shell=platform.system() != "Windows")
+
+    return outfile
diff --git a/notebooks/wav2lip/wav2lip.ipynb b/notebooks/wav2lip/wav2lip.ipynb
@@ -44,18 +44,6 @@
     "[back to top ⬆️](#Table-of-contents:)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "57aaf7ac-b7b1-4d69-a27a-8fa1757cf330",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install  -q \"openvino>=2024.3.0\"\n",
-    "%pip install -q huggingface_hub \"torch>=2.1\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
-    "%pip install -q \"librosa==0.9.2\" opencv-contrib-python opencv-python tqdm numba"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -68,8 +56,30 @@
     "r = requests.get(\n",
     "    url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py\",\n",
     ")\n",
+    "open(\"notebook_utils.py\", \"w\").write(r.text)\n",
+    "\n",
+    "from notebook_utils import pip_install\n",
+    "\n",
+    "pip_install(\"-q\", \"openvino>=2024.3.0\")\n",
+    "pip_install(\n",
+    "    \"-q\",\n",
+    "    \"huggingface_hub\",\n",
+    "    \"torch>=2.1\",\n",
+    "    \"gradio>=4.19\",\n",
+    "    \"librosa==0.9.2\",\n",
+    "    \"opencv-contrib-python\",\n",
+    "    \"opencv-python\",\n",
+    "    \"tqdm\",\n",
+    "    \"numba\",\n",
+    "    \"--extra-index-url\",\n",
+    "    \"https://download.pytorch.org/whl/cpu\",\n",
+    ")\n",
     "\n",
-    "open(\"notebook_utils.py\", \"w\").write(r.text)"
+    "helpers = [\"gradio_helper.py\", \"ov_inference.py\", \"bbox.py\", \"ov_wav2lip_helper.py\"]\n",
+    "for helper_file in helpers:\n",
+    "    if not Path(helper_file).exists():\n",
+    "        r = requests.get(url=f\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/{helper_file}\")\n",
+    "        open(helper_file, \"w\").write(r.text)"
    ]
   },
   {
@@ -79,15 +89,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import os\n",
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
     "\n",
     "wav2lip_path = Path(\"Wav2Lip\")\n",
     "\n",
     "if not wav2lip_path.exists():\n",
-    "    wav2lip_path.mkdir(parents=True, exist_ok=True)\n",
-    "    !git clone https://github.com/Rudrabha/Wav2Lip\n",
+    "    exit_code = os.system(\"git clone https://github.com/Rudrabha/Wav2Lip\")\n",
+    "    if exit_code != 0:\n",
+    "        raise Exception(\"Failed to clone the repository!\")\n",
     "\n",
     "sys.path.append(str(wav2lip_path))"
    ]
@@ -205,7 +217,7 @@
    "id": "0933d0af-4934-4348-8f33-a989e7f2ae74",
    "metadata": {},
    "source": [
-    "Here is an example to compare original video and generated video after the Wav2Lip pipeline:"
+    "Here is an example to compare the original video and the generated video after the Wav2Lip pipeline:"
    ]
   },
   {
@@ -217,7 +229,15 @@
    "source": [
     "from IPython.display import Video\n",
     "\n",
-    "Video(\"data_video_sun_5s.mp4\")"
+    "Video(\"data_video_sun_5s.mp4\", embed=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8364cd95",
+   "metadata": {},
+   "source": [
+    "The generated video:"
    ]
   },
   {
@@ -227,7 +247,37 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "Video(\"results/result_voice.mp4\")"
+    "Video(\"results/result_voice.mp4\", embed=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41a4fefe",
+   "metadata": {},
+   "source": [
+    "## Interactive inference\n",
+    "[back to top ⬆️](#Table-of-contents:)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "063d1573",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gradio_helper import make_demo\n",
+    "\n",
+    "\n",
+    "demo = make_demo(fn=ov_inference)\n",
+    "\n",
+    "try:\n",
+    "    demo.queue().launch(debug=True)\n",
+    "except Exception:\n",
+    "    demo.queue().launch(debug=True, share=True)\n",
+    "# if you are launching remotely, specify server_name and server_port\n",
+    "# demo.launch(server_name='your server name', server_port='server port in int')\n",
+    "# Read more in the docs: https://gradio.app/docs/\""
    ]
   }
  ],