Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated code #4

Merged
merged 25 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions recognizeapp/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from multiprocessing import Pool, cpu_count
from pathlib import Path
from threading import Lock

import os
import cv2
import face_recognition
import numpy as np
Expand All @@ -16,7 +16,7 @@
from face_recognition import compare_faces, face_encodings

from .models import Individual
from .utils import encode_faces, find_duplicates, get_face_detections
from .utils import encode_faces, find_duplicates, get_face_detections, generate_html_report

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -65,25 +65,21 @@ def generate_face_encoding(individual_id, tolerance=config.TOLERANCE):
start_time = time.time()
process = psutil.Process()
ram_before = process.memory_info().rss / (1024**2)
logger.info("Starting face recognition for individual")

try:
individual = Individual.objects.get(id=individual_id)
if not individual.photo or not default_storage.exists(individual.photo.path):
logger.error(f"Photo for individual ID {individual_id} is missing or invalid.")
return
image_path = individual.photo.path
image_path, regions = get_face_detections(image_path)

if not regions:
logger.error(f"No face detected in the image for individual ID {individual_id}.")
return
image = cv2.imread(image_path)
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
encodings = face_recognition.face_encodings(rgb_image, known_face_locations=regions)

if not encodings:
logger.error(f"No encodings generated for the image of individual ID {individual_id}.")
return

current_encoding = encodings[0]
Expand Down Expand Up @@ -131,14 +127,26 @@ def nightly_face_encoding_task(folder_path, threshold=config.TOLERANCE):
face_data[image_path] = encodings
else:
images_without_faces_count += 1
duplicates = find_duplicates(face_data, threshold)
model_choice = config.FACE_MODEL.lower()
metric = "euclidean" if model_choice == "dnn" else "cosine"
duplicates = find_duplicates(face_data, threshold, metric=metric)
save_encodings(face_data)

end_time = time.time()
ram_after = process.memory_info().rss / (1024**2)
elapsed_time = end_time - start_time
ram_used = ram_after - ram_before
output_file = os.path.join(folder_path, model_choice + "_duplicates_report.html")
generate_html_report(
duplicates,
output_file,
elapsed_time=elapsed_time,
ram_used=ram_used,
images_without_faces_count=images_without_faces_count
)

logger.info(
f"Nightly face encoding task completed in {elapsed_time:.2f} seconds, using approximately {ram_used} MB of RAM "
f"found {len(duplicates)} duplicates, {images_without_faces_count} images without faces"
f"Nightly face encoding task completed in {elapsed_time:.2f} seconds, using approximately {ram_used} MB of RAM. "
f"Found {len(duplicates)} duplicates and {images_without_faces_count} images without faces. "
f"Report generated: {output_file}"
)
118 changes: 93 additions & 25 deletions recognizeapp/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
import time
from multiprocessing import Pool, cpu_count
from pathlib import Path
Expand All @@ -11,9 +12,92 @@

logger = logging.getLogger(__name__)

from pathlib import Path

from constance import config
from insightface.app import FaceAnalysis
from pathlib import Path


def generate_html_report(duplicates, output_file, elapsed_time, ram_used, images_without_faces_count):
"""
Generate an HTML report of duplicate image comparisons.

:param duplicates: List of tuples [(path1, path2, distance)].
:param output_file: Path to the HTML file to save.
"""
html_template = """
<!DOCTYPE html>
<html>
<head>
<title>Duplicate Images Report</title>
<style>
table {{
width: 100%;
border-collapse: collapse;
}}
th, td {{
border: 1px solid #ddd;
padding: 8px;
text-align: center;
}}
th {{
background-color: #f4f4f4;
}}
img {{
max-width: 100px;
max-height: 100px;
}}
</style>
</head>
<body>
<h1>Duplicate Images Report</h1>
<table>
<thead>
<tr>
<th>First Image Name</th>
<th>First Image</th>
<th>Second Image</th>
<th>Second Image Name</th>
<th>Distance</th>
</tr>
</thead>
<tbody>
{rows}
</tbody>
</table>
<h3>Task Metrics</h3>
<ul>
<li>Total time taken: {elapsed_time:.2f} seconds</li>
<li>Total RAM used: {ram_used:.2f} MB</li>
<li>Images without faces detected: {images_without_faces_count}</li>
</ul>
</body>
</html>
"""

rows = ""

for path1, path2, distance in duplicates:
image1 = f'<img src="{path1}" alt="Image 1">' if os.path.exists(path1) else "Image unavailable"
name1 = Path(path1).name if os.path.exists(path1) else "N/A"
image2 = f'<img src="{path2}" alt="Image 2">' if os.path.exists(path2) else "Image unavailable"
name2 = Path(path2).name if os.path.exists(path2) else "N/A"
rows += f"""
<tr>
<td>{image1}</td>
<td>{name1}</td>
<td>{image2}</td>
<td>{name2}</td>
<td>{distance:.8f}</td>
</tr>
"""
html_content = html_template.format(rows=rows,
elapsed_time=elapsed_time,
ram_used=ram_used,
images_without_faces_count=images_without_faces_count)
with open(output_file, "w") as file:
srugano marked this conversation as resolved.
Show resolved Hide resolved
file.write(html_content)
print(f"Report generated: {output_file}")


def cosine_similarity(embedding1, embedding2):
Expand Down Expand Up @@ -45,16 +129,6 @@ def get_face_detections(image_path):
raise ValueError(f"Unsupported face model: {model_choice}")


def preprocess_image(image_path):
"""Load and convert image to RGB if necessary."""
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Unable to load image at path: {image_path}")
if len(image.shape) == 2:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
return image


def get_face_detections_dnn(image_path, prototxt=settings.PROTOTXT, caffemodel=settings.CAFFEMODEL):
try:
net = cv2.dnn.readNetFromCaffe(prototxt, caffemodel)
Expand Down Expand Up @@ -140,7 +214,7 @@ def find_duplicates(face_encodings, threshold=0.2, metric="cosine"):
:param face_encodings: Dictionary of {path: [embeddings]}.
:param threshold: Threshold for duplicate detection.
:param metric: Similarity metric ("cosine" or "euclidean").
:return: List of duplicate pairs.
:return: List of duplicate pairs with distances [(path1, path2, distance)].
"""
duplicates = []
encoding_list = list(face_encodings.items())
Expand All @@ -153,26 +227,20 @@ def find_duplicates(face_encodings, threshold=0.2, metric="cosine"):
for encoding2 in encodings2:
if metric == "cosine":
similarity = cosine_similarity(encoding1, encoding2)
if similarity >= 1 - threshold: # Adjust threshold for cosine
logger.info(
f"Duplicate found between {Path(path1).name} and {Path(path2).name} with similarity: {similarity}"
)
duplicates.append((path1, path2))
if similarity >= 1 - threshold:
duplicates.append((path1, path2, similarity))
break
elif metric == "euclidean":
distance = euclidean_distance(encoding1, encoding2)
print(f"Cosine distance: {distance}")
if distance <= threshold:
logger.info(
f"Duplicate found between {Path(path1).name} and {Path(path2).name} with distance: {distance}"
)
duplicates.append((path1, path2))
break
distances = face_recognition.face_distance(encodings2, encoding1)
valid_indices = np.where(distances <= threshold)[0]
for idx in valid_indices:
duplicates.append((path1, path2, distances[idx]))
else:
raise ValueError(f"Unsupported metric: {metric}")
return duplicates



def process_folder_parallel(folder_path, prototxt, caffemodel):
start_time = time.time()
image_paths = list(Path(folder_path).glob("*.jpg")) + list(Path(folder_path).glob("*.png"))
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ psutil
tzdata

# Development (optional, move to requirements-dev.txt if preferred)
ipdb
ipdb