diff --git a/.gitignore b/.gitignore
index ae1f867..329e41d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,3 +76,4 @@ dataset
*.pyc
flask_monitoringdashboard.db
uploads
+conf
diff --git a/api/Api.py b/api/Api.py
index ad782af..0336a02 100644
--- a/api/Api.py
+++ b/api/Api.py
@@ -2,15 +2,13 @@
"""
Custom function that will be wrapped for be HTTP compliant
"""
-import os
-import pickle
+
import time
-import zipfile
+from datetime import datetime
from logging import getLogger
-from os.path import join as path_join
from datastructure.Response import Response
-from utils.util import print_prediction_on_image, random_string, remove_dir, unzip_data
+from utils.util import print_prediction_on_image, random_string, retrieve_dataset
log = getLogger()
@@ -24,21 +22,22 @@ def predict_image(img_path, clf, PREDICTION_PATH):
:return: Response dictionary jsonizable
"""
response = Response()
- log.debug("predict_image | Predicting {}".format(img_path))
if clf is None:
+ log.error("predict_image | FATAL | Classifier is None!")
prediction = None
else:
+ log.debug("predict_image | Predicting {}".format(img_path))
prediction = clf.predict(img_path)
- log.debug("predict_image | Image analyzed!")
+ log.debug("predict_image | Result: {}".format(prediction))
# Manage success
- if prediction is not None and isinstance(prediction, list) and len(prediction) == 1:
+ if prediction and isinstance(prediction["predictions"], list):
img_name = random_string() + ".png"
- log.debug("predict_image | Generated a random name: {}".format(img_path))
+ log.debug("predict_image | Generated a random name: {}".format(img_name))
log.debug("predict_image | Visualizing face recognition ...")
- print_prediction_on_image(img_path, prediction, PREDICTION_PATH, img_name)
- response.status = "OK"
- response.description = img_name
- response.data = prediction[0][0]
+ print_prediction_on_image(img_path, prediction["predictions"], PREDICTION_PATH, img_name)
+ return Response(status="OK", description=img_name, data={"name": prediction["predictions"][0][0],
+ "distance": prediction[
+ "score"]}).__dict__
# Manage error
elif prediction is None:
@@ -61,7 +60,7 @@ def predict_image(img_path, clf, PREDICTION_PATH):
# TODO: Add custom algorithm that "try to understand" who has never been recognized
response.error = "FACE_NOT_RECOGNIZED"
response.description = "Seems that this face is related to nobody that i've seen before ..."
- log.error("predict_image | Seems that this face is lated to nobody that i've seen before ...")
+ log.error("predict_image | Seems that this face is related to nobody that i've seen before ...")
elif prediction == -2:
response.error = "FILE_NOT_VALID"
@@ -79,23 +78,23 @@ def train_network(folder_uncompress, zip_file, clf):
:param clf:
:return:
"""
- log.debug("train_network | uncompressing zip file ...")
- folder_name = path_join(folder_uncompress, random_string())
- zip_ref = zipfile.ZipFile(zip_file)
- zip_ref.extractall(folder_name)
- zip_ref.close()
- log.debug("train_network | zip file uncompressed!")
- clf.init_peoples_list(peoples_path=folder_name)
- dataset = clf.init_dataset()
- neural_model_file = clf.train(dataset["X"], dataset["Y"])
- log.debug("train_network | Removing unzipped files")
- remove_dir(folder_name)
- response = Response()
- response.status = "OK"
- response.data = neural_model_file
- response.description = "Model succesfully trained!"
- return response.__dict__
+ log.debug("train_network | Starting training phase ...")
+ dataset = retrieve_dataset(folder_uncompress, zip_file, clf)
+
+ if dataset is None:
+ return Response(error="ERROR DURING LOADING DAT", description="Seems that the dataset is not valid").__dict__
+
+ else:
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+ neural_model_file, elapsed_time = clf.train(dataset["X"], dataset["Y"], timestamp)
+
+ response = Response(status="OK", data=neural_model_file)
+ response.description = "Model succesfully trained! | {}".format(
+ time.strftime("%H:%M:%S.%f", time.gmtime(elapsed_time)))
+ log.debug("train_network | Tuning phase finihsed! | {}".format(response.description))
+
+ return response.__dict__
def tune_network(folder_uncompress, zip_file, clf):
@@ -106,50 +105,19 @@ def tune_network(folder_uncompress, zip_file, clf):
:param clf:
:return:
"""
- log.debug("tune_network | uncompressing zip file ...")
- check = verify_extension(zip_file.filename)
- if check == "zip": # Image provided
- folder_name = unzip_data(folder_uncompress, zip_file)
- log.debug("tune_network | zip file uncompressed!")
- clf.init_peoples_list(peoples_path=folder_name)
- dataset = clf.init_dataset()
- elif check == "dat":
- dataset = pickle.load(zip_file)
+ log.debug("tune_network | Starting tuning phase ...")
+ dataset = retrieve_dataset(folder_uncompress, zip_file, clf)
+
+ if dataset is None:
+ return Response(error="ERROR DURING LOADING DAT", description="Seems that the dataset is not valid").__dict__
+
else:
- dataset = None
-
- if dataset is not None:
- start_time = time.time()
- neural_model_file = clf.tuning(dataset["X"], dataset["Y"])
- elapsed_time = time.time() - start_time
-
- log.debug("tune_network | Removing unzipped files")
- if check == "zip":
- # TODO: Refactor this method :/
- remove_dir(folder_name)
- response = Response()
- response.status = "OK"
- response.data = neural_model_file
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+ neural_model_file, elapsed_time = clf.tuning(dataset["X"], dataset["Y"], timestamp)
+
+ response = Response(status="OK", data=neural_model_file)
response.description = "Model succesfully trained! | {}".format(
time.strftime("%H:%M:%S.%f", time.gmtime(elapsed_time)))
- else:
- response = Response()
- response.error = "ERROR DURING LOADING DAT"
- return response.__dict__
+ log.debug("train_network | Tuning phase finihsed! | {}".format(response.description))
-
-def verify_extension(file):
- """
- Wrapper for validate file
- :param file:
- :return:
- """
- extension = os.path.splitext(file)[1]
- log.debug("verify_extension | File: {} | Ext: {}".format(file, extension))
- if extension == ".zip":
- # In this case we have to analyze the photos
- return "zip"
- elif extension == ".dat":
- # Photos have been alredy analyzed, dataset is ready!
- return "dat"
- return None
+ return response.__dict__
diff --git a/api/templates/train.html b/api/templates/train.html
index 619e7be..ab9a3e7 100644
--- a/api/templates/train.html
+++ b/api/templates/train.html
@@ -10,6 +10,7 @@
Upload a zip file with all person that you want to save!
{% with messages = get_flashed_messages() %}
{% if messages %}
diff --git a/api/templates/upload.html b/api/templates/upload.html
index a38c97d..0fc7ce8 100644
--- a/api/templates/upload.html
+++ b/api/templates/upload.html
@@ -10,6 +10,7 @@ Upload new File
{% with messages = get_flashed_messages() %}
{% if messages %}
diff --git a/conf/dashboard.ini b/conf/dashboard.ini
index 33dd9e4..00b35e2 100644
--- a/conf/dashboard.ini
+++ b/conf/dashboard.ini
@@ -15,4 +15,4 @@ GUEST_PASSWORD = ['guest', 'password']
[database]
TABLE_PREFIX = fmd
-DATABASE = sqlite:///log/flask_monitoringdashboard.db
\ No newline at end of file
+DATABASE = sqlite:///conf/flask_monitoringdashboard.db
\ No newline at end of file
diff --git a/conf/ssl/localhost.crt b/conf/ssl/localhost.crt
new file mode 100644
index 0000000..41bbfc1
--- /dev/null
+++ b/conf/ssl/localhost.crt
@@ -0,0 +1,18 @@
+-----BEGIN CERTIFICATE-----
+MIIC5TCCAc2gAwIBAgIJAJPNi4jjHSy3MA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV
+BAMMCWxvY2FsaG9zdDAeFw0xOTA1MjIxNjMxMDJaFw0xOTA2MjExNjMxMDJaMBQx
+EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBANskAjz6LENzhnpGkyJHztmIf3Pno8h/k70fjEI13osonv7W5alA3vgQ9az3
+ivD7cp6YPXkv5lK+mTx6dKccrdAPQLWQDZBqaotasTX1hBxaqILqNvh25QY5gjbz
+jdfK27E+82QDZUzdYsFDyZQ4ORQ8qVUz0k42ulS4WMpluBEaLk8rHkDIyZSM4psv
+EK+IcI7mN8z1YI8mS3jOW2ouQQVwRb60ZOe4b9wcFPYR7+NdNQM7rCR9UQU9ymjC
+U4VmTUrIonmXML1gRPHs0Z694AsQe+Mr5O3OxeYhbsFb7d1Ry4WcZiPM+ugJJiNS
+Fkpf4SDT7nHAcHbqFzibpSJPP7cCAwEAAaM6MDgwFAYDVR0RBA0wC4IJbG9jYWxo
+b3N0MAsGA1UdDwQEAwIHgDATBgNVHSUEDDAKBggrBgEFBQcDATANBgkqhkiG9w0B
+AQsFAAOCAQEADz/YL1DOV8n/15/ApaWCQhzcFGOPSv1DcnI6sY46I4zRKyG9yuHE
+N11XqkCmQuKF9UnowhFFMLIfxzlqkUTWjKtaWKasnOdAd/LOqO9Eh4cnsyC4yEBB
+aMO00YdUAdFb0eV3bR/UY3srji6LjRy9215Ad3eXYxjdTTB/btIsN75XTTsZLnbR
+F0V3TRkZlxCQXcYh/lpfPHG9xWLxPZ8g8e+hrwJhsmW3a0BMzYNF8nJdzhZi7Dls
+ldR2V8IqVP/Ip6dpsygn/CzbDlZVcZVV4jqhec8bbijsXdSizwm8bfc57TssRA1C
+HlvLlwAsoiDj6PZ4PwRCvc5k6ydDbXNftw==
+-----END CERTIFICATE-----
diff --git a/conf/ssl/localhost.key b/conf/ssl/localhost.key
new file mode 100644
index 0000000..8753f14
--- /dev/null
+++ b/conf/ssl/localhost.key
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDbJAI8+ixDc4Z6
+RpMiR87ZiH9z56PIf5O9H4xCNd6LKJ7+1uWpQN74EPWs94rw+3KemD15L+ZSvpk8
+enSnHK3QD0C1kA2QamqLWrE19YQcWqiC6jb4duUGOYI2843XytuxPvNkA2VM3WLB
+Q8mUODkUPKlVM9JONrpUuFjKZbgRGi5PKx5AyMmUjOKbLxCviHCO5jfM9WCPJkt4
+zltqLkEFcEW+tGTnuG/cHBT2Ee/jXTUDO6wkfVEFPcpowlOFZk1KyKJ5lzC9YETx
+7NGeveALEHvjK+TtzsXmIW7BW+3dUcuFnGYjzProCSYjUhZKX+Eg0+5xwHB26hc4
+m6UiTz+3AgMBAAECggEBAIMpqFVK/9dXfDQPrd0k0cAOHQsIqFVHVuwpx8+RYqQ0
+KgYqJcgKVepwbDuc5oKaXd5jDNhOPTNldV5nhQ7I8ZfIqViC4juAFklWfR7o1qwJ
+7zZ8bW6F60qwfSna2RlCCACsxw0joyxAje1TX4HhrPhZ3phqrgO2agxvUmXCQEur
+HmZXEXP2grR0XdWiXazWI5jlG0MsX6J+qsMHFCApGR/9KcsB8Lwe8RAiszc1SPPp
+TNGZopojkH1GK8DAXMFvODmTdwlStpDh1g711cX5KoINKlX5ppJjsoqcGOLhbEee
+uCsfckXGrHJm51GbJePPZ16x7Op/BUdyKjYvSL31fuECgYEA7mumpBMDq4NQ1gju
+n7kmU75k2ddrXSycvFJ5yxKCCec+hdJBtKm6WrGGD+uchjxFhZP37JRTimV/F5RL
+Ps6xVwgwX3DtSLpwyOelLR8Zo2wT1cDFKp6EfD4ltDVbTsOW2X8yyKeJHac23/wT
+HIRyv+8DUUo0GU4JMl4VAW9PwWkCgYEA60xv/8c0AfjOZIGlxdk2RCKWnZas6Rdk
+STChPXoIOj5T75B7OfxJukY4R8d7jzXOwX5WX3wS/rtEuom5tFW5+fLl16HWUyz5
+pXa7/QW5dQa7GLB3K6HBKhfTm7/fDkaFKDu/c+sF46RWoP7vxqct1ir0L0Z1BFnk
+/qSpSbhBtB8CgYA1/ajR9QBawbT3kzQ+dVYplq8N6cuFYQnpV5//DaTnCzfMZC2+
+9MSfrx3V0xwyBcoUksqNB5XXfF6If2t+wJ3GQLN7mX4Sfy31QQfVrPpIWLwxJqM/
+oIAOBqDRK1gPARnTDQv6Bn51eZ1ioZnOVmwJ7N1KdkxQAqzwe/+zwHpGKQKBgQCH
+e/Pha2pe2Ey/QoeZbID6qo/fHatia72rBv1Q0Lt8Dfd2sdLCiKpLP7OYYRycUXdD
+ouNJB8BIPLxOTI9JbzMu4NXHW8B1FCiLRdrozisDX2TLypBT50e6XQ3TWJ+vMJvr
+lruem21ArpfTC/g0gn66GvGPZxpp7vkURuvTLu1mMQKBgQDI0yvH+FqxiXmnZjY6
+4rqoq7shenmrHxbywHOCJbXMVlFMhFovZUCKZtJ0G14e3yGystA3wkNj8CJtBYj4
+/R1ucQIXBeiGJHKY9lVuRuJI258jUrIQ8z6hNv8zXVW/2oM0R58dJXL2UJVFHDpU
+ETwkYWrY5QeX4J4mxX2AfsrZ8Q==
+-----END PRIVATE KEY-----
diff --git a/conf/test.json b/conf/test.json
index 5fcf24f..179c272 100644
--- a/conf/test.json
+++ b/conf/test.json
@@ -1,6 +1,6 @@
{
"PyRecognizer": {
- "Version": "0.0.1",
+ "Version": "0.1.2",
"temp_upload_training": "uploads/training/",
"temp_upload_predict": "uploads/predict/",
"temp_upload": "uploads/upload"
@@ -11,21 +11,26 @@
"level": "debug"
},
"network": {
- "host": "locahost",
+ "host": "0.0.0.0",
"port": 11001,
"templates": "api/templates/",
"SSL": {
- "enabled": false,
- "cert.pub": "/dev/null",
- "cert.priv": "/dev/null"
+ "enabled": true,
+ "cert.pub": "conf/ssl/localhost.crt",
+ "cert.priv": "conf/ssl/localhost.key"
}
},
"classifier": {
"trainin_dir": "dataset/images/",
"model_path": "dataset/model/",
- "model": "model-20190518_191827.clf",
- "n_neighbors": "",
- "knn_algo": ""
+ "timestamp": "20190522_170246",
+ "params": {
+ "algorithm": "ball_tree",
+ "metric": "minkowski",
+ "n_neighbors": 80,
+ "p": 2,
+ "weights": "distance"
+ }
},
"data": {
"test_data": "/tmp/test_data/"
diff --git a/dataset/model/model-20190519_210950.json b/dataset/model/20190522_170246/model.json
similarity index 56%
rename from dataset/model/model-20190519_210950.json
rename to dataset/model/20190522_170246/model.json
index 2cee954..b313bc7 100644
--- a/dataset/model/model-20190519_210950.json
+++ b/dataset/model/20190522_170246/model.json
@@ -1,9 +1,9 @@
{
- "classifier_file": "dataset/model/model-20190519_210950",
+ "classifier_file": "20190522_170246/model.clf",
"params": {
"algorithm": "ball_tree",
"metric": "minkowski",
- "n_neighbors": 78,
+ "n_neighbors": 80,
"p": 2,
"weights": "distance"
}
diff --git a/dataset/model/model-20190519_210950.clf b/dataset/model/model-20190519_210950.clf
deleted file mode 100644
index d0198cd..0000000
Binary files a/dataset/model/model-20190519_210950.clf and /dev/null differ
diff --git a/datastructure/Classifier.py b/datastructure/Classifier.py
index 5841615..1e01c89 100644
--- a/datastructure/Classifier.py
+++ b/datastructure/Classifier.py
@@ -6,9 +6,8 @@
import logging
import os
import pickle
-from datetime import datetime
+import time
from math import sqrt
-from multiprocessing.pool import ThreadPool
from pprint import pformat
import face_recognition
@@ -16,6 +15,7 @@
precision_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
+from tqdm import tqdm
from datastructure.Person import Person
from utils.util import dump_dataset
@@ -32,19 +32,22 @@ def __init__(self):
self.training_dir = None
self.model_path = None
self.n_neighbors = None
- self.knn_algo = None
+ self.algorithm = None
+ self.metric = None
+ self.p = None
+ self.weights = None
self.peoples_list = []
self.classifier = None
- def init_knn_algo(self, knn_algo):
+ def init_algorithm(self, algorithm):
"""
- Initialize the knn_algorithm for the neural network. If not provided the 'ball_tree' will
+ Initialize the algorithmrithm for the neural network. If not provided the 'ball_tree' will
be used as default
- :param knn_algo: 'ball_tree' as default
+ :param algorithm: 'ball_tree' as default
"""
- log.debug("init_knn_algo | Initializing knn algorithm ...")
- if self.knn_algo is None:
- self.knn_algo = knn_algo
+ log.debug("init_algorithm | Initializing knn algorithm ...")
+ if self.algorithm is None:
+ self.algorithm = algorithm
def init_n_neighbors(self, X_len=10):
"""
@@ -63,45 +66,51 @@ def init_classifier(self):
"""
if self.classifier is None:
log.debug("init_classifier | START!")
- if self.knn_algo is not None and self.n_neighbors is not None:
+ if self.algorithm is not None and self.n_neighbors is not None:
log.debug("init_classifier | Initializing a new classifier ... | {0}".format(pformat(self.__dict__)))
self.classifier = KNeighborsClassifier(
- n_neighbors=self.n_neighbors, algorithm=self.knn_algo, weights='distance')
+ n_neighbors=self.n_neighbors, algorithm=self.algorithm, weights='distance')
else:
- log.error("init_classifier | Mandatory parameter not provided :/")
- self.classifier = None
+ log.error("init_classifier | Mandatory parameter not provided | Init a new KNN Classifier")
+ self.classifier = KNeighborsClassifier()
- def init_specs(self, X_len, knn_algo='ball_tree'):
+ def load_classifier_from_file(self, timestamp):
"""
- Initalize the classifier
- :param knn_algo:
- :param X_len:
- """
- log.debug("init_specs | Init knn algorithm ...")
- self.init_knn_algo(knn_algo)
- self.init_n_neighbors(X_len)
- self.init_classifier()
-
- def load_classifier_from_file(self, classifier_file):
- """
- Initalize the classifier from file
- :param classifier_file:
+ Initalize the classifier from file.
+ The classifier file rappresent the name of the directory related to the classifier that we want to load.
+
+ The tree structure of the the model folder will be something like this
+
+ Structure:
+ model/
+ ├── <20190520_095119>/ --> Timestamp in which the model was created
+ │ ├── model.dat --> Dataset generated by encoding the faces and pickelizing them
+ │ ├── model.clf --> Classifier delegated to recognize a given face
+ │ ├── model.json --> Hyperparameters related to the current classifier
+ ├── <20190519_210950>/
+ │ ├── model.dat
+ │ ├── model.clf
+ │ ├── model.json
+ └── ...
+
+ :param timestamp:
:return:
"""
- log.debug("load_classifier_from_file | Loading classifier from file ... | File: {}".format(classifier_file))
+ log.debug("load_classifier_from_file | Loading classifier from file ... | File: {}".format(timestamp))
# Load a trained KNN model (if one was passed in)
err = None
if self.classifier is None:
if self.model_path is None or not os.path.isdir(self.model_path):
raise Exception("Model folder not provided!")
- log.debug("load_classifier_from_file | Loading classifier from file ...")
- log.debug("load_classifier_from_file | Path {} exist ...".format(self.model_path))
- filename = os.path.join(self.model_path, classifier_file)
+ # Adding the conventional name used for the classifier -> 'model.clf'
+ filename = os.path.join(self.model_path, timestamp, "model.clf")
+ log.debug("load_classifier_from_file | Loading classifier from file: {}".format(filename))
if os.path.isfile(filename):
- log.debug("load_classifier_from_file | File {} exist ...".format(filename))
+ log.debug("load_classifier_from_file | File {} exist!".format(filename))
with open(filename, 'rb') as f:
self.classifier = pickle.load(f)
+ log.debug("load_classifier_from_file | Classifier loaded!")
else:
err = "load_classifier_from_file | FATAL | File {} DOES NOT EXIST ...".format(filename)
else:
@@ -113,32 +122,44 @@ def load_classifier_from_file(self, classifier_file):
self.classifier = None
return
- def train(self, X, Y):
+ def train(self, X, Y, timestamp):
"""
Train a new model by the given data [X] related to the given target [Y]
:param X:
:param Y:
+ :param timestamp:
"""
log.debug("train | START")
- if self.classifier is not None:
- log.debug("train | Training ...")
- X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25)
- self.classifier.fit(X_train, Y_train)
- log.debug("train | Model Trained!")
- log.debug("train | Checking performance ...")
- y_pred = self.classifier.predict(x_test)
- # Static method
- self.verify_performance(y_test, y_pred)
- return self.dump_model(self.model_path, "model")
-
- def tuning(self, X, Y):
+ if self.classifier is None:
+ self.init_classifier()
+
+ dump_dataset(X, Y, os.path.join(self.model_path, timestamp))
+
+ start_time = time.time()
+
+ X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25)
+ log.debug("train | Training ...")
+ self.classifier.fit(X_train, Y_train)
+ log.debug("train | Model Trained!")
+ log.debug("train | Checking performance ...")
+ y_pred = self.classifier.predict(x_test)
+ # Static method
+ self.verify_performance(y_test, y_pred)
+
+ return self.dump_model(timestamp=timestamp, classifier=self.classifier), time.time() - start_time
+
+ def tuning(self, X, Y, timestamp):
"""
Tune the hyperparameter of a new model by the given data [X] related to the given target [Y]
:param X:
:param Y:
+ :param timestamp:
:return:
"""
+ start_time = time.time()
+ dump_dataset(X, Y, os.path.join(self.model_path, timestamp))
+
X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25)
self.classifier = KNeighborsClassifier()
# Hyperparameter of the neural network (KKN)
@@ -159,7 +180,7 @@ def tuning(self, X, Y):
'p': power_range,
}
log.debug("tuning | Parameter -> {}".format(pformat(parameter_space)))
- grid = GridSearchCV(self.classifier, parameter_space, cv=3, scoring='accuracy', verbose=10, n_jobs=3)
+ grid = GridSearchCV(self.classifier, parameter_space, cv=3, scoring='accuracy', verbose=20, n_jobs=2)
grid.fit(X_train, Y_train)
log.info("TUNING COMPLETE | DUMPING DATA!")
# log.info("tuning | Grid Scores: {}".format(pformat(grid.grid_scores_)))
@@ -171,7 +192,8 @@ def tuning(self, X, Y):
self.verify_performance(y_test, y_pred)
- return self.dump_model(params=grid.best_params_)
+ return self.dump_model(timestamp=timestamp, params=grid.best_params_,
+ classifier=grid.best_estimator_), time.time() - start_time
@staticmethod
def verify_performance(y_test, y_pred):
@@ -183,42 +205,47 @@ def verify_performance(y_test, y_pred):
"""
log.debug("verify_performance | Analyzing performance ...")
- # log.info("Computing classifier score --> {}".format(pformat(clf.score(y_test,y_pred))))
log.info("Classification Report: {}".format(pformat(classification_report(y_test, y_pred))))
log.info("balanced_accuracy_score: {}".format(pformat(balanced_accuracy_score(y_test, y_pred))))
log.info("accuracy_score: {}".format(pformat(accuracy_score(y_test, y_pred))))
log.info("precision_score: {}".format(pformat(precision_score(y_test, y_pred, average='weighted'))))
- def dump_model(self, params, path=None, file=None):
+ def dump_model(self, timestamp, classifier, params=None, path=None):
"""
Dump the model to the given path, file
:param params:
+ :param timestamp:
+ :param classifier:
:param path:
- :param file:
+
"""
+ log.debug("dump_model | Dumping model ...")
if path is None:
if self.model_path is not None:
if os.path.exists(self.model_path) and os.path.isdir(self.model_path):
path = self.model_path
- if file is None:
- file = "model"
-
- if os.path.isdir(path):
- time_parsed = datetime.now().strftime('%Y%m%d_%H%M%S')
- classifier_file = os.path.join(path, "{}-{}".format(file, time_parsed))
- config = {'classifier_file': classifier_file,
- 'params': params
- }
-
- log.debug("dump_model | Dumping model ... | Path: {} | File: {}".format(path, classifier_file))
- # TODO: Save every model in a different folder
- with open(classifier_file + ".clf", 'wb') as f:
- pickle.dump(self.classifier, f)
- with open(classifier_file + ".json", 'w') as f:
- json.dump(config, f)
- log.info('dump_model | Configuration saved to {0}'.format(classifier_file))
-
- return config
+ config = {'classifier_file': os.path.join(timestamp, "model.clf"),
+ 'params': params
+ }
+ if not os.path.isdir(path):
+ os.makedirs(timestamp)
+ classifier_folder = os.path.join(path, timestamp)
+ classifier_file = os.path.join(classifier_folder, "model")
+
+ log.debug("dump_model | Dumping model ... | Path: {} | Model folder: {}".format(path, timestamp))
+ # TODO: Save every model in a different folder
+ if not os.path.exists(classifier_folder):
+ os.makedirs(classifier_folder)
+
+ with open(classifier_file + ".clf", 'wb') as f:
+ pickle.dump(classifier, f)
+ log.info('dump_model | Model saved to {0}.clf'.format(classifier_file))
+
+ with open(classifier_file + ".json", 'w') as f:
+ json.dump(config, f)
+ log.info('dump_model | Configuration saved to {0}.json'.format(classifier_file))
+
+ return config
def init_peoples_list(self, peoples_path=None):
"""
@@ -230,9 +257,13 @@ def init_peoples_list(self, peoples_path=None):
log.debug("init_peoples_list | Initalizing people ...")
if peoples_path is not None and os.path.isdir(peoples_path):
self.training_dir = peoples_path
- # freq_list = pool.map(partial(get_frequency, nlp=nlp_en, client=mongo_client), fileList)
- pool = ThreadPool(3)
- self.peoples_list = pool.map(self.init_peoples_list_core, os.listdir(self.training_dir))
+ # pool = ThreadPool(3)
+ # self.peoples_list = pool.map(self.init_peoples_list_core, os.listdir(self.training_dir))
+
+ for people_name in tqdm(os.listdir(self.training_dir),
+ total=len(os.listdir(self.training_dir)), desc="Init people list ..."):
+ self.peoples_list.append(self.init_peoples_list_core(people_name))
+
self.peoples_list = list(filter(None.__ne__, self.peoples_list)) # Remove None
# TODO: Add method for dump datastructure in order to don't wait to load same data for test
@@ -272,11 +303,10 @@ def init_dataset(self):
DATASET["X"].append(item)
for item in people.dataset["Y"]:
DATASET["Y"].append(item)
- dump_dataset(DATASET, self.model_path)
return DATASET
# TODO: Add configuration parameter for choose the distance_threshold
- def predict(self, X_img_path, distance_threshold=0.45):
+ def predict(self, X_img_path, distance_threshold=0.54):
"""
Recognizes faces in given image using a trained KNN classifier
@@ -298,29 +328,33 @@ def predict(self, X_img_path, distance_threshold=0.45):
except OSError:
log.error("predict | What have you uploaded ???")
return -2
+ # TODO: Manage multiple faces
+ log.debug("predict | Extracting faces locations ...")
X_face_locations = face_recognition.face_locations(X_img)
+ log.debug("predict | Found {} face(s) for the given image".format(len(X_face_locations)))
# If no faces are found in the image, or more than one face are found, return an empty result.
- if len(X_face_locations) != 1:
+ if len(X_face_locations) == 0:
+ log.warning("predict | Seems that no faces was found :( ")
return []
+ log.debug("predict | Found more than one face, encoding the faces ...")
# Find encodings for faces in the test iamge
faces_encodings = face_recognition.face_encodings(X_img, known_face_locations=X_face_locations)
-
+ log.debug("predict | Face encoded! Let's ask to the neural network ...")
# Use the KNN model to find the best matches for the test face
- closest_distances = self.classifier.kneighbors(faces_encodings, n_neighbors=1)
+ closest_distances = self.classifier.kneighbors(faces_encodings)
log.debug("predict | Closest distances: {}".format(closest_distances))
- are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))]
- log.debug("predict | are_matches: {}".format(are_matches))
-
- prediction = []
- for pred, loc, rec in zip(self.classifier.predict(faces_encodings), X_face_locations, are_matches):
- log.debug("predict_folder | Pred: {} | Loc: {} | Rec: {}".format(pred, loc, rec))
- if rec: # Face recognized !
- prediction.append((pred, loc))
- else:
- log.debug("predict | Face {} not recognized :/".format(pred))
- prediction = -1
- log.debug("predict_folder | Prediction: {}".format(prediction))
+ min_distance = min(closest_distances[0][0])
+ log.debug("predict | Min: {}".format(min_distance))
+ predictions = []
+ if min_distance < distance_threshold:
+ for pred, loc in zip(self.classifier.predict(faces_encodings), X_face_locations):
+ log.debug("predict_folder | Pred: {} | Loc: {}".format(pred, loc))
+ predictions.append((pred, loc))
+ log.debug("predict_folder | Prediction: {}".format(predictions))
+ else:
+ log.debug("predict | Face not recognized :/")
+ predictions = -1
- return prediction
+ return {"predictions": predictions, "score": min_distance}
diff --git a/datastructure/Person.py b/datastructure/Person.py
index 0df3ae1..6b7239e 100644
--- a/datastructure/Person.py
+++ b/datastructure/Person.py
@@ -3,7 +3,6 @@
Common structure for define how to manage a person
"""
from logging import getLogger
-from multiprocessing.pool import ThreadPool
from os.path import isdir
from face_recognition import face_encodings, face_locations, load_image_file
@@ -42,8 +41,10 @@ def init_dataset(self):
if self.path != "" and isdir(self.path):
log.debug("initDataset | Paramater provided, iterating images ..")
# Iterating the images in parallel
- pool = ThreadPool(1)
- self.dataset["X"] = pool.map(self.init_dataset_core, image_files_in_folder(self.path))
+ # pool = ThreadPool(2)
+ # self.dataset["X"] = pool.map(self.init_dataset_core, image_files_in_folder(self.path))
+ for image_path in image_files_in_folder(self.path):
+ self.dataset["X"].append(self.init_dataset_core(image_path))
self.dataset["X"] = list(filter(None.__ne__, self.dataset["X"])) # Remove None
# Loading the Y [target]
for i in range(len(self.dataset["X"])):
@@ -65,12 +66,12 @@ def init_dataset_core(img_path=None):
return None
# log.debug("initDataset | Image loaded! | Searching for face ...")
# Array of w,x,y,z coordinates
- face_bounding_boxes = face_locations(image)
+ face_bounding_boxes = face_locations(image, model="hog")
face_data = None
if len(face_bounding_boxes) == 1:
- log.info("initDataset | Seems that {0} is valid, loading for future training ...".format(img_path))
+ log.info("initDataset | Image {0} have only 1 face, loading for future training ...".format(img_path))
# Loading the X [data]
- face_data = face_encodings(image, known_face_locations=face_bounding_boxes)[0]
+ face_data = face_encodings(image, known_face_locations=face_bounding_boxes, num_jitters=1)[0]
else:
log.error("initDataset | Image {0} not suitable for training!".format(img_path))
if len(face_bounding_boxes) == 0:
diff --git a/datastructure/Response.py b/datastructure/Response.py
index 82cbaa8..c028edf 100644
--- a/datastructure/Response.py
+++ b/datastructure/Response.py
@@ -12,9 +12,9 @@ class Response(object):
external tools
"""
- def __init__(self):
- self.status = "KO"
- self.description = None
- self.error = None
- self.data = None
+ def __init__(self, status="KO", description=None, error=None, data=None):
+ self.status = status
+ self.description = description
+ self.error = error
+ self.data = data
self.date = str(datetime.now())
diff --git a/main.py b/main.py
index 79617a2..e36b518 100644
--- a/main.py
+++ b/main.py
@@ -6,12 +6,13 @@
import os
import flask_monitoringdashboard as dashboard
-from flask import Flask, flash, jsonify, render_template, request, send_from_directory
+from flask import Flask, flash, jsonify, render_template, request, send_from_directory, session
+from werkzeug.exceptions import abort
from werkzeug.utils import redirect, secure_filename
from api.Api import predict_image, train_network, tune_network
from datastructure.Classifier import Classifier
-from utils.util import init_main_data
+from utils.util import init_main_data, random_string, secure_request
# ===== LOAD CONFIGURATION FILE =====
# TODO: Add argument parser for manage configuration file
@@ -37,9 +38,13 @@
# ===== CLASSIFIER CONFIGURATION =====
log.debug("Init classifier ...")
+
+PUB_KEY = CFG["network"]["SSL"]["cert.pub"]
+PRIV_KEY = CFG["network"]["SSL"]["cert.priv"]
+
clf = Classifier()
clf.model_path = CFG["classifier"]["model_path"]
-clf.load_classifier_from_file(CFG["classifier"]["model"])
+clf.load_classifier_from_file(CFG["classifier"]["timestamp"])
# TODO Add check on extension
allowed_ext = ["jpg", "jpeg", "png"]
@@ -126,5 +131,41 @@ def uploaded_file(filename):
return send_from_directory(TMP_UPLOAD_PREDICTION, filename)
+@app.before_request
+def csrf_protect():
+ """
+
+ :return:
+ """
+ if "dashboard" not in str(request.url_rule):
+ if request.method == "POST":
+ token = session.pop('_csrf_token', None)
+ if not token or token != request.form.get('_csrf_token'):
+ abort(403)
+
+
+# secure_request(request)
+
+
+def generate_csrf_token():
+ """
+
+ :return:
+ """
+ if '_csrf_token' not in session:
+ session['_csrf_token'] = random_string()
+ return session['_csrf_token']
+
+
+
+app.jinja_env.globals['csrf_token'] = generate_csrf_token
+
if __name__ == '__main__':
- app.run(host=CFG["network"]["host"], port=CFG["network"]["port"], threaded=True, debug=True)
+ app.jinja_env.autoescape = True
+ if CFG["network"]["SSL"]["enabled"] is True:
+ log.debug("main | RUNNING OVER SSL")
+ app.run(host=CFG["network"]["host"], port=CFG["network"]["port"], threaded=False, debug=True, ssl_context=(
+ PUB_KEY, PRIV_KEY))
+ else:
+ log.debug("main | HTTPS DISABLED | RUNNING OVER HTTP")
+ app.run(host=CFG["network"]["host"], port=CFG["network"]["port"], threaded=False, debug=True)
diff --git a/utils/util.py b/utils/util.py
index 1db6639..f67448e 100644
--- a/utils/util.py
+++ b/utils/util.py
@@ -1,6 +1,15 @@
# -*- coding: utf-8 -*-
"""
Common method for reuse code
+
+Generate certificate
+
+openssl req -x509 -out localhost.crt -keyout localhost.key \
+ -newkey rsa:2048 -nodes -sha256 \
+ -subj '/CN=localhost' -extensions EXT -config <( \
+ printf "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\nsubjectAltName=DNS:localhost\nkeyUsage=digitalSignature\nextendedKeyUsage=serverAuth")
+
+
"""
import json
import logging
@@ -10,7 +19,6 @@
import shutil
import string
import zipfile
-from datetime import datetime
from logging.handlers import TimedRotatingFileHandler
from PIL import Image, ImageDraw
@@ -132,7 +140,7 @@ def unzip_data(unzipped_folder, zip_file):
Unzip the zip file in input in the given 'unzipped_folder'
:param unzipped_folder:
:param zip_file:
- :return:
+ :return: The name of the folder in which find the unzipped data
"""
log = logging.getLogger()
folder_name = os.path.join(unzipped_folder, random_string())
@@ -144,23 +152,28 @@ def unzip_data(unzipped_folder, zip_file):
return folder_name
-def dump_dataset(dataset, path, dataset_name=None):
+def dump_dataset(X, Y, path):
"""
- :param dataset:
+ :param X:
+ :param Y:
:param path:
- :param dataset_name:
:return:
"""
log = logging.getLogger()
- log.debug("dump_dataset | Dumping {} {}".format(path, dataset_name))
- if os.path.exists(path) and os.path.isdir(path):
- if dataset_name is None:
- dataset_name = "image_dataset"
- time_parsed = datetime.now().strftime('%Y%m%d_%H%M%S')
- dataset_name = os.path.join(path, "{}-{}".format(dataset_name, time_parsed))
- with open(dataset_name + ".dat", 'wb') as f:
+ dataset = {
+ 'X': X,
+ 'Y': Y
+ }
+ log.debug("dump_dataset | Dumping dataset int {}".format(path))
+ if not os.path.exists(path):
+ os.makedirs(path)
+ log.debug("dump_dataset | Path {} exist".format(path))
+ dataset_name = os.path.join(path, "model.dat")
+ with open(dataset_name, 'wb') as f:
pickle.dump(dataset, f)
+ else:
+ log.error("dump_dataset | Path {} ALREDY EXIST exist".format(path))
def remove_dir(directory):
@@ -173,3 +186,62 @@ def remove_dir(directory):
log.debug("remove_dir | Removing directory {}".format(directory))
if os.path.isdir(directory):
shutil.rmtree(directory)
+
+
+def verify_extension(file):
+ """
+ Wrapper for validate file
+ :param file:
+ :return:
+ """
+ log = logging.getLogger()
+ extension = os.path.splitext(file)[1]
+ log.debug("verify_extension | File: {} | Ext: {}".format(file, extension))
+ if extension == ".zip":
+ # In this case we have to analyze the photos
+ return "zip"
+ elif extension == ".dat":
+ # Photos have been alredy analyzed, dataset is ready!
+ return "dat"
+ return None
+
+
+def retrieve_dataset(folder_uncompress, zip_file, clf):
+ """
+
+ :param folder_uncompress:
+ :param zip_file:
+ :param clf:
+ :return:
+ """
+ log = logging.getLogger()
+ log.debug("retrieve_dataset | Parsing dataset ...")
+ check = verify_extension(zip_file.filename)
+ if check == "zip": # Image provided
+ log.debug("retrieve_dataset | Zip file uploaded")
+ folder_name = unzip_data(folder_uncompress, zip_file)
+ log.debug("retrieve_dataset | zip file uncompressed!")
+ clf.init_peoples_list(peoples_path=folder_name)
+ dataset = clf.init_dataset()
+ log.debug("retrieve_dataset | Removing [{}]".format(folder_name))
+ remove_dir(folder_name)
+ elif check == "dat":
+ log.debug("retrieve_dataset | Pickle data uploaded")
+ dataset = pickle.load(zip_file)
+ else:
+ dataset = None
+ log.debug("tune_network | Dataset parsed!")
+ return dataset
+
+
+def secure_request(request):
+ """
+
+ :param request:
+ :return:
+ """
+ request.headers['Content-Security-Policy'] = "default-src 'self'"
+ request.headers['X-Content-Type-Options'] = 'nosniff'
+ request.headers['X-Content-Type-Options'] = 'nosniff'
+ request.headers['X-XSS-Protection'] = '1; mode=block'
+ return request