Draw bounding box for detected face

rohhn · Feb 26, 2024 · ded86b4 · ded86b4
1 parent 3e0ed5a
commit ded86b4
Show file tree

Hide file tree

Showing 5 changed files with 132 additions and 39 deletions.
diff --git a/.github/workflows/github_deploy.yml b/.github/workflows/github_deploy.yml
@@ -39,11 +39,13 @@ jobs:
         PORT: ${{ secrets.PORT }}
         key: ${{ secrets.SSHKEY }}
         script: |
+          
+          # Update the repository with latest code
           cd Facial-Emotion-Recognition
           git pull origin main
           
           # Install Python dependencies
-          /home/deploy/miniconda3/envs/fer_env/bin/python -m pip install -r requirements.txt
+          $HOME/miniconda3/envs/fer_env/bin/python -m pip install -r requirements.txt
           
           # Build front-end
           export NVM_DIR=~/.nvm

diff --git a/data.py b/data.py
@@ -105,6 +105,14 @@ def __getitem__(self, idx) -> tuple[dict, torch.Tensor]:
 
 
 def cv2_face_segmentation(img, padding=25, convert_grayscale=False):
+    """
+    Detect and crop image to only include face.
+
+    :param img:
+    :param padding:
+    :param convert_grayscale:
+    :return: Returns face segmented image along with bounding if found.
+    """
 
     face_clf = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 
@@ -116,9 +124,13 @@ def cv2_face_segmentation(img, padding=25, convert_grayscale=False):
     if len(face_op) > 0:  # face detected
         x, y, w, h = face_op[0]
 
-        face = img[y - padding: y + h + padding, x - padding: x + w + padding]
+        # check max(0, y-padding) to avoid negative indexes
+        face = img[max(0, y - padding): y + h + padding, max(0, x - padding): x + w + padding]
         # face = cv2.resize(face, (48, 48))
+
+        face_bounding = (x, y, w, h)
     else:
         face = img
+        face_bounding = None
 
-    return face
+    return face, face_bounding
diff --git a/predict.py b/predict.py
@@ -29,14 +29,15 @@ def make_prediction(model_name, img):
 
     img_tensor = Image.open(img).convert('L')  # read image
 
-    img_tensor = cv2_face_segmentation(np.array(img_tensor, dtype='uint8'))  # segment face if found
+    # segment face if found
+    segmented_face, segmented_bounds = cv2_face_segmentation(np.array(img_tensor, dtype='uint8'))
 
-    img_tensor = IMAGE_TRANSFORMER_TORCH(img_tensor)
+    img_tensor = IMAGE_TRANSFORMER_TORCH(segmented_face)
 
     y_pred = model(**{"x": img_tensor.unsqueeze(0)})
     y_pred = target_encoder.inverse_transform(F.softmax(y_pred, dim=1).argmax(-1).cpu().numpy())[0]
 
-    return y_pred
+    return y_pred, segmented_bounds
 
 
 if __name__ == "__main__":

diff --git a/server.py b/server.py
@@ -31,14 +31,29 @@ def home(path):
 
 @app.route("/api/predict", methods=['POST'])
 def get_emotion_prediction():
+
+    response = {}
+
     data = request.data.decode('utf-8')
     data = json.loads(data)['data']
     img = re.sub('^data:image/.+;base64,', '', data)
     img = BytesIO(b64decode(img))
-    emotion = make_prediction(MODEL_NAME, img)
+    emotion, segmentation_bounds = make_prediction(MODEL_NAME, img)
+
+    response['emotion'] = emotion
+
+    if segmentation_bounds is not None:
+        response['segmentation_bounds'] = {
+            'x': int(segmentation_bounds[0]),
+            'y': int(segmentation_bounds[1]),
+            'w': int(segmentation_bounds[2]),
+            'h': int(segmentation_bounds[3])
+        }
+    else:
+        response['segmentation_bounds'] = None
 
-    response = make_response(emotion, 200)
-    response.mimetype = "text/plain"
+    response = make_response(response, 200)
+    response.mimetype = "application/json"
     return response
 
 

diff --git a/svelte_client/src/App.svelte b/svelte_client/src/App.svelte
@@ -14,25 +14,48 @@
 </style>
 
 <script>
+
   let videoSource = null;
   let any_btn_clicked = false;
   let video = null;
-  let canvas = null;
+  // let img_inp_canvas = document.getElementById("img_inp_canvas");
   let emotion = "";
 
-  let height = 0;
-  let width = 0;
+  // let height = 0;
+  // let width = 0;
 
-  async function getEmotion(data) {
+  async function getEmotion(data, mirrored=false) {
 
     fetch('./api/predict', {
         method: 'POST',
         headers: {
           'Content-Type': 'application/json'
         },
         body: JSON.stringify({data})
-      }).then(response => response.text()).then(data => {
-        emotion = data;
+      }).then(response => response.json()).then(data => {
+
+        console.log(data);
+
+        if (data.segmentation_bounds) {
+
+            let bounds = data.segmentation_bounds;
+
+            let img_inp_canvas = document.getElementById("img_inp_canvas");
+            const context = img_inp_canvas.getContext("2d");
+
+            if (mirrored) {
+                context.translate(img_inp_canvas.width, 0);
+                context.scale(-1,1);
+            }
+
+            context.beginPath();
+            context.rect(bounds.x, bounds.y, bounds.w, bounds.h);
+            context.lineWidth = 4;
+            context.strokeStyle = 'red';
+            context.stroke();
+        }
+
+        emotion = data.emotion;
 
       }).catch((error) => {
         console.error('Error:', error);
@@ -57,9 +80,10 @@
     let cam_d = document.getElementById("camera_d");
     cam_d.style.display = "block";
 
-    if (canvas) {
-      canvas.setAttribute('height', 0);
-      canvas.setAttribute('width', 0);
+    let img_inp_canvas = document.getElementById("img_inp_canvas");
+    if (img_inp_canvas) {
+      img_inp_canvas.setAttribute('height', 0);
+      img_inp_canvas.setAttribute('width', 0);
     }
 
     // File Upload reset
@@ -97,24 +121,27 @@
   function takeSnapshot() {
 
     video = document.getElementById("video");
-    height = video.videoHeight;
-    width = video.videoWidth;
+    // height = video.videoHeight;
+    // width = video.videoWidth;
 
     let cam_d = document.getElementById("camera_d");
     cam_d.style.display = "none";
 
-    canvas = document.getElementById("img_canvas");
-    canvas.setAttribute('height', height);
-    canvas.setAttribute('width', width);
-
-    const context = canvas.getContext("2d");
-
-    context.translate(canvas.width, 0);
-    context.scale(-1,1);
+    // img_inp_canvas = document.getElementById("img_inp_canvas");
+    // img_inp_canvas.setAttribute('height', height);
+    // img_inp_canvas.setAttribute('width', width);
+    //
+    // const context = img_inp_canvas.getContext("2d");
+    //
+    // context.translate(img_inp_canvas.width, 0);
+    // context.scale(-1,1);
+    //
+    // context.drawImage(video, 0, 0, width, height);
 
-    context.drawImage(video, 0, 0, width, height);
+    const data = displayImage(video.videoHeight, video.videoWidth, video, true);
 
-    const data = canvas.toDataURL("image/png");
+    // let img_inp_canvas = document.getElementById("img_inp_canvas");
+    // const data = img_inp_canvas.toDataURL("image/png");
 
     // Stop video after image capture
     if (video) {
@@ -123,7 +150,7 @@
       video.pause();
     }
 
-    getEmotion(data);
+    getEmotion(data, true);
 
   }
 
@@ -141,16 +168,48 @@
 
   const uploadFile =(e) => {
 
-    let img = e.target.files[0];
+    let img_file = e.target.files[0];
 
     let reader = new FileReader();
-    reader.readAsDataURL(img);
+    reader.readAsDataURL(img_file);
+
+    reader.onloadend = e => {
+        let image = new Image();
+        image.src = e.target.result;
+
+        console.log("Image height: " + image.height);
+        console.log("Image width: " + image.width);
+
+        image.onload = ev => {
+            const data = displayImage(image.height, image.width, image);
+            getEmotion(data);
+        }
 
-    reader.onload = e => {
-         getEmotion(e.target.result);
     };
   }
 
+  function displayImage(height, width, data, mirror=false) {
+
+      let img_inp_canvas = document.getElementById("img_inp_canvas");
+
+      console.log("displayImage height: " + height);
+      console.log("displayImage width: " + width);
+
+      img_inp_canvas.setAttribute('height', height);
+      img_inp_canvas.setAttribute('width', width);
+
+      const context = img_inp_canvas.getContext("2d");
+
+      if (mirror) {
+          context.translate(img_inp_canvas.width, 0);
+          context.scale(-1,1);
+      }
+
+      context.drawImage(data, 0, 0, width, height);
+
+      return img_inp_canvas.toDataURL("image/png");
+  }
+
 </script>
 
     <div class="row align-items-start">
@@ -183,16 +242,20 @@
                       <button id="img_snap_b" on:click={takeSnapshot} class="btn btn-sm btn-secondary">Take Photo</button>
                     </div>
 
-                    <div id="photo_d" class="m-5 align-items-center text-center">
-                        <!--  This is where the image will be displayed after capture-->
-                        <canvas id="img_canvas"></canvas>
-                    </div>
+<!--                    <div id="photo_d" class="m-5 align-items-center text-center">-->
+<!--                        &lt;!&ndash;  This is where the image will be displayed after capture&ndash;&gt;-->
+<!--                        <canvas id="img_inp_canvas"></canvas>-->
+<!--                    </div>-->
                 </div>
 
                 <div id="img_upl_d" class="container-fluid align-items-center text-center">
                     <input id="upl_img" type="file" accept="image/*" on:change={(e)=>uploadFile(e)} class="form-control my-3 bg-dark text-light"/>
                 </div>
+
+                <canvas id="img_inp_canvas"></canvas>
+
             </div>
+
         </div>
 
         <div class="col-md-12 col-lg-4 p-2">