diff --git a/.gitignore b/.gitignore index ae1f867..329e41d 100644 --- a/.gitignore +++ b/.gitignore @@ -76,3 +76,4 @@ dataset *.pyc flask_monitoringdashboard.db uploads +conf diff --git a/api/Api.py b/api/Api.py index ad782af..0336a02 100644 --- a/api/Api.py +++ b/api/Api.py @@ -2,15 +2,13 @@ """ Custom function that will be wrapped for be HTTP compliant """ -import os -import pickle + import time -import zipfile +from datetime import datetime from logging import getLogger -from os.path import join as path_join from datastructure.Response import Response -from utils.util import print_prediction_on_image, random_string, remove_dir, unzip_data +from utils.util import print_prediction_on_image, random_string, retrieve_dataset log = getLogger() @@ -24,21 +22,22 @@ def predict_image(img_path, clf, PREDICTION_PATH): :return: Response dictionary jsonizable """ response = Response() - log.debug("predict_image | Predicting {}".format(img_path)) if clf is None: + log.error("predict_image | FATAL | Classifier is None!") prediction = None else: + log.debug("predict_image | Predicting {}".format(img_path)) prediction = clf.predict(img_path) - log.debug("predict_image | Image analyzed!") + log.debug("predict_image | Result: {}".format(prediction)) # Manage success - if prediction is not None and isinstance(prediction, list) and len(prediction) == 1: + if prediction and isinstance(prediction["predictions"], list): img_name = random_string() + ".png" - log.debug("predict_image | Generated a random name: {}".format(img_path)) + log.debug("predict_image | Generated a random name: {}".format(img_name)) log.debug("predict_image | Visualizing face recognition ...") - print_prediction_on_image(img_path, prediction, PREDICTION_PATH, img_name) - response.status = "OK" - response.description = img_name - response.data = prediction[0][0] + print_prediction_on_image(img_path, prediction["predictions"], PREDICTION_PATH, img_name) + return Response(status="OK", description=img_name, data={"name": prediction["predictions"][0][0], + "distance": prediction[ + "score"]}).__dict__ # Manage error elif prediction is None: @@ -61,7 +60,7 @@ def predict_image(img_path, clf, PREDICTION_PATH): # TODO: Add custom algorithm that "try to understand" who has never been recognized response.error = "FACE_NOT_RECOGNIZED" response.description = "Seems that this face is related to nobody that i've seen before ..." - log.error("predict_image | Seems that this face is lated to nobody that i've seen before ...") + log.error("predict_image | Seems that this face is related to nobody that i've seen before ...") elif prediction == -2: response.error = "FILE_NOT_VALID" @@ -79,23 +78,23 @@ def train_network(folder_uncompress, zip_file, clf): :param clf: :return: """ - log.debug("train_network | uncompressing zip file ...") - folder_name = path_join(folder_uncompress, random_string()) - zip_ref = zipfile.ZipFile(zip_file) - zip_ref.extractall(folder_name) - zip_ref.close() - log.debug("train_network | zip file uncompressed!") - clf.init_peoples_list(peoples_path=folder_name) - dataset = clf.init_dataset() - neural_model_file = clf.train(dataset["X"], dataset["Y"]) - log.debug("train_network | Removing unzipped files") - remove_dir(folder_name) - response = Response() - response.status = "OK" - response.data = neural_model_file - response.description = "Model succesfully trained!" - return response.__dict__ + log.debug("train_network | Starting training phase ...") + dataset = retrieve_dataset(folder_uncompress, zip_file, clf) + + if dataset is None: + return Response(error="ERROR DURING LOADING DAT", description="Seems that the dataset is not valid").__dict__ + + else: + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + neural_model_file, elapsed_time = clf.train(dataset["X"], dataset["Y"], timestamp) + + response = Response(status="OK", data=neural_model_file) + response.description = "Model succesfully trained! | {}".format( + time.strftime("%H:%M:%S.%f", time.gmtime(elapsed_time))) + log.debug("train_network | Tuning phase finihsed! | {}".format(response.description)) + + return response.__dict__ def tune_network(folder_uncompress, zip_file, clf): @@ -106,50 +105,19 @@ def tune_network(folder_uncompress, zip_file, clf): :param clf: :return: """ - log.debug("tune_network | uncompressing zip file ...") - check = verify_extension(zip_file.filename) - if check == "zip": # Image provided - folder_name = unzip_data(folder_uncompress, zip_file) - log.debug("tune_network | zip file uncompressed!") - clf.init_peoples_list(peoples_path=folder_name) - dataset = clf.init_dataset() - elif check == "dat": - dataset = pickle.load(zip_file) + log.debug("tune_network | Starting tuning phase ...") + dataset = retrieve_dataset(folder_uncompress, zip_file, clf) + + if dataset is None: + return Response(error="ERROR DURING LOADING DAT", description="Seems that the dataset is not valid").__dict__ + else: - dataset = None - - if dataset is not None: - start_time = time.time() - neural_model_file = clf.tuning(dataset["X"], dataset["Y"]) - elapsed_time = time.time() - start_time - - log.debug("tune_network | Removing unzipped files") - if check == "zip": - # TODO: Refactor this method :/ - remove_dir(folder_name) - response = Response() - response.status = "OK" - response.data = neural_model_file + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + neural_model_file, elapsed_time = clf.tuning(dataset["X"], dataset["Y"], timestamp) + + response = Response(status="OK", data=neural_model_file) response.description = "Model succesfully trained! | {}".format( time.strftime("%H:%M:%S.%f", time.gmtime(elapsed_time))) - else: - response = Response() - response.error = "ERROR DURING LOADING DAT" - return response.__dict__ + log.debug("train_network | Tuning phase finihsed! | {}".format(response.description)) - -def verify_extension(file): - """ - Wrapper for validate file - :param file: - :return: - """ - extension = os.path.splitext(file)[1] - log.debug("verify_extension | File: {} | Ext: {}".format(file, extension)) - if extension == ".zip": - # In this case we have to analyze the photos - return "zip" - elif extension == ".dat": - # Photos have been alredy analyzed, dataset is ready! - return "dat" - return None + return response.__dict__ diff --git a/api/templates/train.html b/api/templates/train.html index 619e7be..ab9a3e7 100644 --- a/api/templates/train.html +++ b/api/templates/train.html @@ -10,6 +10,7 @@

Upload a zip file with all person that you want to save!

+
{% with messages = get_flashed_messages() %} {% if messages %} diff --git a/api/templates/upload.html b/api/templates/upload.html index a38c97d..0fc7ce8 100644 --- a/api/templates/upload.html +++ b/api/templates/upload.html @@ -10,6 +10,7 @@

Upload new File

+
{% with messages = get_flashed_messages() %} {% if messages %} diff --git a/conf/dashboard.ini b/conf/dashboard.ini index 33dd9e4..00b35e2 100644 --- a/conf/dashboard.ini +++ b/conf/dashboard.ini @@ -15,4 +15,4 @@ GUEST_PASSWORD = ['guest', 'password'] [database] TABLE_PREFIX = fmd -DATABASE = sqlite:///log/flask_monitoringdashboard.db \ No newline at end of file +DATABASE = sqlite:///conf/flask_monitoringdashboard.db \ No newline at end of file diff --git a/conf/ssl/localhost.crt b/conf/ssl/localhost.crt new file mode 100644 index 0000000..41bbfc1 --- /dev/null +++ b/conf/ssl/localhost.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC5TCCAc2gAwIBAgIJAJPNi4jjHSy3MA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0xOTA1MjIxNjMxMDJaFw0xOTA2MjExNjMxMDJaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBANskAjz6LENzhnpGkyJHztmIf3Pno8h/k70fjEI13osonv7W5alA3vgQ9az3 +ivD7cp6YPXkv5lK+mTx6dKccrdAPQLWQDZBqaotasTX1hBxaqILqNvh25QY5gjbz +jdfK27E+82QDZUzdYsFDyZQ4ORQ8qVUz0k42ulS4WMpluBEaLk8rHkDIyZSM4psv +EK+IcI7mN8z1YI8mS3jOW2ouQQVwRb60ZOe4b9wcFPYR7+NdNQM7rCR9UQU9ymjC +U4VmTUrIonmXML1gRPHs0Z694AsQe+Mr5O3OxeYhbsFb7d1Ry4WcZiPM+ugJJiNS +Fkpf4SDT7nHAcHbqFzibpSJPP7cCAwEAAaM6MDgwFAYDVR0RBA0wC4IJbG9jYWxo +b3N0MAsGA1UdDwQEAwIHgDATBgNVHSUEDDAKBggrBgEFBQcDATANBgkqhkiG9w0B +AQsFAAOCAQEADz/YL1DOV8n/15/ApaWCQhzcFGOPSv1DcnI6sY46I4zRKyG9yuHE +N11XqkCmQuKF9UnowhFFMLIfxzlqkUTWjKtaWKasnOdAd/LOqO9Eh4cnsyC4yEBB +aMO00YdUAdFb0eV3bR/UY3srji6LjRy9215Ad3eXYxjdTTB/btIsN75XTTsZLnbR +F0V3TRkZlxCQXcYh/lpfPHG9xWLxPZ8g8e+hrwJhsmW3a0BMzYNF8nJdzhZi7Dls +ldR2V8IqVP/Ip6dpsygn/CzbDlZVcZVV4jqhec8bbijsXdSizwm8bfc57TssRA1C +HlvLlwAsoiDj6PZ4PwRCvc5k6ydDbXNftw== +-----END CERTIFICATE----- diff --git a/conf/ssl/localhost.key b/conf/ssl/localhost.key new file mode 100644 index 0000000..8753f14 --- /dev/null +++ b/conf/ssl/localhost.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDbJAI8+ixDc4Z6 +RpMiR87ZiH9z56PIf5O9H4xCNd6LKJ7+1uWpQN74EPWs94rw+3KemD15L+ZSvpk8 +enSnHK3QD0C1kA2QamqLWrE19YQcWqiC6jb4duUGOYI2843XytuxPvNkA2VM3WLB +Q8mUODkUPKlVM9JONrpUuFjKZbgRGi5PKx5AyMmUjOKbLxCviHCO5jfM9WCPJkt4 +zltqLkEFcEW+tGTnuG/cHBT2Ee/jXTUDO6wkfVEFPcpowlOFZk1KyKJ5lzC9YETx +7NGeveALEHvjK+TtzsXmIW7BW+3dUcuFnGYjzProCSYjUhZKX+Eg0+5xwHB26hc4 +m6UiTz+3AgMBAAECggEBAIMpqFVK/9dXfDQPrd0k0cAOHQsIqFVHVuwpx8+RYqQ0 +KgYqJcgKVepwbDuc5oKaXd5jDNhOPTNldV5nhQ7I8ZfIqViC4juAFklWfR7o1qwJ +7zZ8bW6F60qwfSna2RlCCACsxw0joyxAje1TX4HhrPhZ3phqrgO2agxvUmXCQEur +HmZXEXP2grR0XdWiXazWI5jlG0MsX6J+qsMHFCApGR/9KcsB8Lwe8RAiszc1SPPp +TNGZopojkH1GK8DAXMFvODmTdwlStpDh1g711cX5KoINKlX5ppJjsoqcGOLhbEee +uCsfckXGrHJm51GbJePPZ16x7Op/BUdyKjYvSL31fuECgYEA7mumpBMDq4NQ1gju +n7kmU75k2ddrXSycvFJ5yxKCCec+hdJBtKm6WrGGD+uchjxFhZP37JRTimV/F5RL +Ps6xVwgwX3DtSLpwyOelLR8Zo2wT1cDFKp6EfD4ltDVbTsOW2X8yyKeJHac23/wT +HIRyv+8DUUo0GU4JMl4VAW9PwWkCgYEA60xv/8c0AfjOZIGlxdk2RCKWnZas6Rdk +STChPXoIOj5T75B7OfxJukY4R8d7jzXOwX5WX3wS/rtEuom5tFW5+fLl16HWUyz5 +pXa7/QW5dQa7GLB3K6HBKhfTm7/fDkaFKDu/c+sF46RWoP7vxqct1ir0L0Z1BFnk +/qSpSbhBtB8CgYA1/ajR9QBawbT3kzQ+dVYplq8N6cuFYQnpV5//DaTnCzfMZC2+ +9MSfrx3V0xwyBcoUksqNB5XXfF6If2t+wJ3GQLN7mX4Sfy31QQfVrPpIWLwxJqM/ +oIAOBqDRK1gPARnTDQv6Bn51eZ1ioZnOVmwJ7N1KdkxQAqzwe/+zwHpGKQKBgQCH +e/Pha2pe2Ey/QoeZbID6qo/fHatia72rBv1Q0Lt8Dfd2sdLCiKpLP7OYYRycUXdD +ouNJB8BIPLxOTI9JbzMu4NXHW8B1FCiLRdrozisDX2TLypBT50e6XQ3TWJ+vMJvr +lruem21ArpfTC/g0gn66GvGPZxpp7vkURuvTLu1mMQKBgQDI0yvH+FqxiXmnZjY6 +4rqoq7shenmrHxbywHOCJbXMVlFMhFovZUCKZtJ0G14e3yGystA3wkNj8CJtBYj4 +/R1ucQIXBeiGJHKY9lVuRuJI258jUrIQ8z6hNv8zXVW/2oM0R58dJXL2UJVFHDpU +ETwkYWrY5QeX4J4mxX2AfsrZ8Q== +-----END PRIVATE KEY----- diff --git a/conf/test.json b/conf/test.json index 5fcf24f..179c272 100644 --- a/conf/test.json +++ b/conf/test.json @@ -1,6 +1,6 @@ { "PyRecognizer": { - "Version": "0.0.1", + "Version": "0.1.2", "temp_upload_training": "uploads/training/", "temp_upload_predict": "uploads/predict/", "temp_upload": "uploads/upload" @@ -11,21 +11,26 @@ "level": "debug" }, "network": { - "host": "locahost", + "host": "0.0.0.0", "port": 11001, "templates": "api/templates/", "SSL": { - "enabled": false, - "cert.pub": "/dev/null", - "cert.priv": "/dev/null" + "enabled": true, + "cert.pub": "conf/ssl/localhost.crt", + "cert.priv": "conf/ssl/localhost.key" } }, "classifier": { "trainin_dir": "dataset/images/", "model_path": "dataset/model/", - "model": "model-20190518_191827.clf", - "n_neighbors": "", - "knn_algo": "" + "timestamp": "20190522_170246", + "params": { + "algorithm": "ball_tree", + "metric": "minkowski", + "n_neighbors": 80, + "p": 2, + "weights": "distance" + } }, "data": { "test_data": "/tmp/test_data/" diff --git a/dataset/model/model-20190519_210950.json b/dataset/model/20190522_170246/model.json similarity index 56% rename from dataset/model/model-20190519_210950.json rename to dataset/model/20190522_170246/model.json index 2cee954..b313bc7 100644 --- a/dataset/model/model-20190519_210950.json +++ b/dataset/model/20190522_170246/model.json @@ -1,9 +1,9 @@ { - "classifier_file": "dataset/model/model-20190519_210950", + "classifier_file": "20190522_170246/model.clf", "params": { "algorithm": "ball_tree", "metric": "minkowski", - "n_neighbors": 78, + "n_neighbors": 80, "p": 2, "weights": "distance" } diff --git a/dataset/model/model-20190519_210950.clf b/dataset/model/model-20190519_210950.clf deleted file mode 100644 index d0198cd..0000000 Binary files a/dataset/model/model-20190519_210950.clf and /dev/null differ diff --git a/datastructure/Classifier.py b/datastructure/Classifier.py index 5841615..1e01c89 100644 --- a/datastructure/Classifier.py +++ b/datastructure/Classifier.py @@ -6,9 +6,8 @@ import logging import os import pickle -from datetime import datetime +import time from math import sqrt -from multiprocessing.pool import ThreadPool from pprint import pformat import face_recognition @@ -16,6 +15,7 @@ precision_score from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.neighbors import KNeighborsClassifier +from tqdm import tqdm from datastructure.Person import Person from utils.util import dump_dataset @@ -32,19 +32,22 @@ def __init__(self): self.training_dir = None self.model_path = None self.n_neighbors = None - self.knn_algo = None + self.algorithm = None + self.metric = None + self.p = None + self.weights = None self.peoples_list = [] self.classifier = None - def init_knn_algo(self, knn_algo): + def init_algorithm(self, algorithm): """ - Initialize the knn_algorithm for the neural network. If not provided the 'ball_tree' will + Initialize the algorithmrithm for the neural network. If not provided the 'ball_tree' will be used as default - :param knn_algo: 'ball_tree' as default + :param algorithm: 'ball_tree' as default """ - log.debug("init_knn_algo | Initializing knn algorithm ...") - if self.knn_algo is None: - self.knn_algo = knn_algo + log.debug("init_algorithm | Initializing knn algorithm ...") + if self.algorithm is None: + self.algorithm = algorithm def init_n_neighbors(self, X_len=10): """ @@ -63,45 +66,51 @@ def init_classifier(self): """ if self.classifier is None: log.debug("init_classifier | START!") - if self.knn_algo is not None and self.n_neighbors is not None: + if self.algorithm is not None and self.n_neighbors is not None: log.debug("init_classifier | Initializing a new classifier ... | {0}".format(pformat(self.__dict__))) self.classifier = KNeighborsClassifier( - n_neighbors=self.n_neighbors, algorithm=self.knn_algo, weights='distance') + n_neighbors=self.n_neighbors, algorithm=self.algorithm, weights='distance') else: - log.error("init_classifier | Mandatory parameter not provided :/") - self.classifier = None + log.error("init_classifier | Mandatory parameter not provided | Init a new KNN Classifier") + self.classifier = KNeighborsClassifier() - def init_specs(self, X_len, knn_algo='ball_tree'): + def load_classifier_from_file(self, timestamp): """ - Initalize the classifier - :param knn_algo: - :param X_len: - """ - log.debug("init_specs | Init knn algorithm ...") - self.init_knn_algo(knn_algo) - self.init_n_neighbors(X_len) - self.init_classifier() - - def load_classifier_from_file(self, classifier_file): - """ - Initalize the classifier from file - :param classifier_file: + Initalize the classifier from file. + The classifier file rappresent the name of the directory related to the classifier that we want to load. + + The tree structure of the the model folder will be something like this + + Structure: + model/ + ├── <20190520_095119>/ --> Timestamp in which the model was created + │ ├── model.dat --> Dataset generated by encoding the faces and pickelizing them + │ ├── model.clf --> Classifier delegated to recognize a given face + │ ├── model.json --> Hyperparameters related to the current classifier + ├── <20190519_210950>/ + │ ├── model.dat + │ ├── model.clf + │ ├── model.json + └── ... + + :param timestamp: :return: """ - log.debug("load_classifier_from_file | Loading classifier from file ... | File: {}".format(classifier_file)) + log.debug("load_classifier_from_file | Loading classifier from file ... | File: {}".format(timestamp)) # Load a trained KNN model (if one was passed in) err = None if self.classifier is None: if self.model_path is None or not os.path.isdir(self.model_path): raise Exception("Model folder not provided!") - log.debug("load_classifier_from_file | Loading classifier from file ...") - log.debug("load_classifier_from_file | Path {} exist ...".format(self.model_path)) - filename = os.path.join(self.model_path, classifier_file) + # Adding the conventional name used for the classifier -> 'model.clf' + filename = os.path.join(self.model_path, timestamp, "model.clf") + log.debug("load_classifier_from_file | Loading classifier from file: {}".format(filename)) if os.path.isfile(filename): - log.debug("load_classifier_from_file | File {} exist ...".format(filename)) + log.debug("load_classifier_from_file | File {} exist!".format(filename)) with open(filename, 'rb') as f: self.classifier = pickle.load(f) + log.debug("load_classifier_from_file | Classifier loaded!") else: err = "load_classifier_from_file | FATAL | File {} DOES NOT EXIST ...".format(filename) else: @@ -113,32 +122,44 @@ def load_classifier_from_file(self, classifier_file): self.classifier = None return - def train(self, X, Y): + def train(self, X, Y, timestamp): """ Train a new model by the given data [X] related to the given target [Y] :param X: :param Y: + :param timestamp: """ log.debug("train | START") - if self.classifier is not None: - log.debug("train | Training ...") - X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25) - self.classifier.fit(X_train, Y_train) - log.debug("train | Model Trained!") - log.debug("train | Checking performance ...") - y_pred = self.classifier.predict(x_test) - # Static method - self.verify_performance(y_test, y_pred) - return self.dump_model(self.model_path, "model") - - def tuning(self, X, Y): + if self.classifier is None: + self.init_classifier() + + dump_dataset(X, Y, os.path.join(self.model_path, timestamp)) + + start_time = time.time() + + X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25) + log.debug("train | Training ...") + self.classifier.fit(X_train, Y_train) + log.debug("train | Model Trained!") + log.debug("train | Checking performance ...") + y_pred = self.classifier.predict(x_test) + # Static method + self.verify_performance(y_test, y_pred) + + return self.dump_model(timestamp=timestamp, classifier=self.classifier), time.time() - start_time + + def tuning(self, X, Y, timestamp): """ Tune the hyperparameter of a new model by the given data [X] related to the given target [Y] :param X: :param Y: + :param timestamp: :return: """ + start_time = time.time() + dump_dataset(X, Y, os.path.join(self.model_path, timestamp)) + X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25) self.classifier = KNeighborsClassifier() # Hyperparameter of the neural network (KKN) @@ -159,7 +180,7 @@ def tuning(self, X, Y): 'p': power_range, } log.debug("tuning | Parameter -> {}".format(pformat(parameter_space))) - grid = GridSearchCV(self.classifier, parameter_space, cv=3, scoring='accuracy', verbose=10, n_jobs=3) + grid = GridSearchCV(self.classifier, parameter_space, cv=3, scoring='accuracy', verbose=20, n_jobs=2) grid.fit(X_train, Y_train) log.info("TUNING COMPLETE | DUMPING DATA!") # log.info("tuning | Grid Scores: {}".format(pformat(grid.grid_scores_))) @@ -171,7 +192,8 @@ def tuning(self, X, Y): self.verify_performance(y_test, y_pred) - return self.dump_model(params=grid.best_params_) + return self.dump_model(timestamp=timestamp, params=grid.best_params_, + classifier=grid.best_estimator_), time.time() - start_time @staticmethod def verify_performance(y_test, y_pred): @@ -183,42 +205,47 @@ def verify_performance(y_test, y_pred): """ log.debug("verify_performance | Analyzing performance ...") - # log.info("Computing classifier score --> {}".format(pformat(clf.score(y_test,y_pred)))) log.info("Classification Report: {}".format(pformat(classification_report(y_test, y_pred)))) log.info("balanced_accuracy_score: {}".format(pformat(balanced_accuracy_score(y_test, y_pred)))) log.info("accuracy_score: {}".format(pformat(accuracy_score(y_test, y_pred)))) log.info("precision_score: {}".format(pformat(precision_score(y_test, y_pred, average='weighted')))) - def dump_model(self, params, path=None, file=None): + def dump_model(self, timestamp, classifier, params=None, path=None): """ Dump the model to the given path, file :param params: + :param timestamp: + :param classifier: :param path: - :param file: + """ + log.debug("dump_model | Dumping model ...") if path is None: if self.model_path is not None: if os.path.exists(self.model_path) and os.path.isdir(self.model_path): path = self.model_path - if file is None: - file = "model" - - if os.path.isdir(path): - time_parsed = datetime.now().strftime('%Y%m%d_%H%M%S') - classifier_file = os.path.join(path, "{}-{}".format(file, time_parsed)) - config = {'classifier_file': classifier_file, - 'params': params - } - - log.debug("dump_model | Dumping model ... | Path: {} | File: {}".format(path, classifier_file)) - # TODO: Save every model in a different folder - with open(classifier_file + ".clf", 'wb') as f: - pickle.dump(self.classifier, f) - with open(classifier_file + ".json", 'w') as f: - json.dump(config, f) - log.info('dump_model | Configuration saved to {0}'.format(classifier_file)) - - return config + config = {'classifier_file': os.path.join(timestamp, "model.clf"), + 'params': params + } + if not os.path.isdir(path): + os.makedirs(timestamp) + classifier_folder = os.path.join(path, timestamp) + classifier_file = os.path.join(classifier_folder, "model") + + log.debug("dump_model | Dumping model ... | Path: {} | Model folder: {}".format(path, timestamp)) + # TODO: Save every model in a different folder + if not os.path.exists(classifier_folder): + os.makedirs(classifier_folder) + + with open(classifier_file + ".clf", 'wb') as f: + pickle.dump(classifier, f) + log.info('dump_model | Model saved to {0}.clf'.format(classifier_file)) + + with open(classifier_file + ".json", 'w') as f: + json.dump(config, f) + log.info('dump_model | Configuration saved to {0}.json'.format(classifier_file)) + + return config def init_peoples_list(self, peoples_path=None): """ @@ -230,9 +257,13 @@ def init_peoples_list(self, peoples_path=None): log.debug("init_peoples_list | Initalizing people ...") if peoples_path is not None and os.path.isdir(peoples_path): self.training_dir = peoples_path - # freq_list = pool.map(partial(get_frequency, nlp=nlp_en, client=mongo_client), fileList) - pool = ThreadPool(3) - self.peoples_list = pool.map(self.init_peoples_list_core, os.listdir(self.training_dir)) + # pool = ThreadPool(3) + # self.peoples_list = pool.map(self.init_peoples_list_core, os.listdir(self.training_dir)) + + for people_name in tqdm(os.listdir(self.training_dir), + total=len(os.listdir(self.training_dir)), desc="Init people list ..."): + self.peoples_list.append(self.init_peoples_list_core(people_name)) + self.peoples_list = list(filter(None.__ne__, self.peoples_list)) # Remove None # TODO: Add method for dump datastructure in order to don't wait to load same data for test @@ -272,11 +303,10 @@ def init_dataset(self): DATASET["X"].append(item) for item in people.dataset["Y"]: DATASET["Y"].append(item) - dump_dataset(DATASET, self.model_path) return DATASET # TODO: Add configuration parameter for choose the distance_threshold - def predict(self, X_img_path, distance_threshold=0.45): + def predict(self, X_img_path, distance_threshold=0.54): """ Recognizes faces in given image using a trained KNN classifier @@ -298,29 +328,33 @@ def predict(self, X_img_path, distance_threshold=0.45): except OSError: log.error("predict | What have you uploaded ???") return -2 + # TODO: Manage multiple faces + log.debug("predict | Extracting faces locations ...") X_face_locations = face_recognition.face_locations(X_img) + log.debug("predict | Found {} face(s) for the given image".format(len(X_face_locations))) # If no faces are found in the image, or more than one face are found, return an empty result. - if len(X_face_locations) != 1: + if len(X_face_locations) == 0: + log.warning("predict | Seems that no faces was found :( ") return [] + log.debug("predict | Found more than one face, encoding the faces ...") # Find encodings for faces in the test iamge faces_encodings = face_recognition.face_encodings(X_img, known_face_locations=X_face_locations) - + log.debug("predict | Face encoded! Let's ask to the neural network ...") # Use the KNN model to find the best matches for the test face - closest_distances = self.classifier.kneighbors(faces_encodings, n_neighbors=1) + closest_distances = self.classifier.kneighbors(faces_encodings) log.debug("predict | Closest distances: {}".format(closest_distances)) - are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))] - log.debug("predict | are_matches: {}".format(are_matches)) - - prediction = [] - for pred, loc, rec in zip(self.classifier.predict(faces_encodings), X_face_locations, are_matches): - log.debug("predict_folder | Pred: {} | Loc: {} | Rec: {}".format(pred, loc, rec)) - if rec: # Face recognized ! - prediction.append((pred, loc)) - else: - log.debug("predict | Face {} not recognized :/".format(pred)) - prediction = -1 - log.debug("predict_folder | Prediction: {}".format(prediction)) + min_distance = min(closest_distances[0][0]) + log.debug("predict | Min: {}".format(min_distance)) + predictions = [] + if min_distance < distance_threshold: + for pred, loc in zip(self.classifier.predict(faces_encodings), X_face_locations): + log.debug("predict_folder | Pred: {} | Loc: {}".format(pred, loc)) + predictions.append((pred, loc)) + log.debug("predict_folder | Prediction: {}".format(predictions)) + else: + log.debug("predict | Face not recognized :/") + predictions = -1 - return prediction + return {"predictions": predictions, "score": min_distance} diff --git a/datastructure/Person.py b/datastructure/Person.py index 0df3ae1..6b7239e 100644 --- a/datastructure/Person.py +++ b/datastructure/Person.py @@ -3,7 +3,6 @@ Common structure for define how to manage a person """ from logging import getLogger -from multiprocessing.pool import ThreadPool from os.path import isdir from face_recognition import face_encodings, face_locations, load_image_file @@ -42,8 +41,10 @@ def init_dataset(self): if self.path != "" and isdir(self.path): log.debug("initDataset | Paramater provided, iterating images ..") # Iterating the images in parallel - pool = ThreadPool(1) - self.dataset["X"] = pool.map(self.init_dataset_core, image_files_in_folder(self.path)) + # pool = ThreadPool(2) + # self.dataset["X"] = pool.map(self.init_dataset_core, image_files_in_folder(self.path)) + for image_path in image_files_in_folder(self.path): + self.dataset["X"].append(self.init_dataset_core(image_path)) self.dataset["X"] = list(filter(None.__ne__, self.dataset["X"])) # Remove None # Loading the Y [target] for i in range(len(self.dataset["X"])): @@ -65,12 +66,12 @@ def init_dataset_core(img_path=None): return None # log.debug("initDataset | Image loaded! | Searching for face ...") # Array of w,x,y,z coordinates - face_bounding_boxes = face_locations(image) + face_bounding_boxes = face_locations(image, model="hog") face_data = None if len(face_bounding_boxes) == 1: - log.info("initDataset | Seems that {0} is valid, loading for future training ...".format(img_path)) + log.info("initDataset | Image {0} have only 1 face, loading for future training ...".format(img_path)) # Loading the X [data] - face_data = face_encodings(image, known_face_locations=face_bounding_boxes)[0] + face_data = face_encodings(image, known_face_locations=face_bounding_boxes, num_jitters=1)[0] else: log.error("initDataset | Image {0} not suitable for training!".format(img_path)) if len(face_bounding_boxes) == 0: diff --git a/datastructure/Response.py b/datastructure/Response.py index 82cbaa8..c028edf 100644 --- a/datastructure/Response.py +++ b/datastructure/Response.py @@ -12,9 +12,9 @@ class Response(object): external tools """ - def __init__(self): - self.status = "KO" - self.description = None - self.error = None - self.data = None + def __init__(self, status="KO", description=None, error=None, data=None): + self.status = status + self.description = description + self.error = error + self.data = data self.date = str(datetime.now()) diff --git a/main.py b/main.py index 79617a2..e36b518 100644 --- a/main.py +++ b/main.py @@ -6,12 +6,13 @@ import os import flask_monitoringdashboard as dashboard -from flask import Flask, flash, jsonify, render_template, request, send_from_directory +from flask import Flask, flash, jsonify, render_template, request, send_from_directory, session +from werkzeug.exceptions import abort from werkzeug.utils import redirect, secure_filename from api.Api import predict_image, train_network, tune_network from datastructure.Classifier import Classifier -from utils.util import init_main_data +from utils.util import init_main_data, random_string, secure_request # ===== LOAD CONFIGURATION FILE ===== # TODO: Add argument parser for manage configuration file @@ -37,9 +38,13 @@ # ===== CLASSIFIER CONFIGURATION ===== log.debug("Init classifier ...") + +PUB_KEY = CFG["network"]["SSL"]["cert.pub"] +PRIV_KEY = CFG["network"]["SSL"]["cert.priv"] + clf = Classifier() clf.model_path = CFG["classifier"]["model_path"] -clf.load_classifier_from_file(CFG["classifier"]["model"]) +clf.load_classifier_from_file(CFG["classifier"]["timestamp"]) # TODO Add check on extension allowed_ext = ["jpg", "jpeg", "png"] @@ -126,5 +131,41 @@ def uploaded_file(filename): return send_from_directory(TMP_UPLOAD_PREDICTION, filename) +@app.before_request +def csrf_protect(): + """ + + :return: + """ + if "dashboard" not in str(request.url_rule): + if request.method == "POST": + token = session.pop('_csrf_token', None) + if not token or token != request.form.get('_csrf_token'): + abort(403) + + +# secure_request(request) + + +def generate_csrf_token(): + """ + + :return: + """ + if '_csrf_token' not in session: + session['_csrf_token'] = random_string() + return session['_csrf_token'] + + + +app.jinja_env.globals['csrf_token'] = generate_csrf_token + if __name__ == '__main__': - app.run(host=CFG["network"]["host"], port=CFG["network"]["port"], threaded=True, debug=True) + app.jinja_env.autoescape = True + if CFG["network"]["SSL"]["enabled"] is True: + log.debug("main | RUNNING OVER SSL") + app.run(host=CFG["network"]["host"], port=CFG["network"]["port"], threaded=False, debug=True, ssl_context=( + PUB_KEY, PRIV_KEY)) + else: + log.debug("main | HTTPS DISABLED | RUNNING OVER HTTP") + app.run(host=CFG["network"]["host"], port=CFG["network"]["port"], threaded=False, debug=True) diff --git a/utils/util.py b/utils/util.py index 1db6639..f67448e 100644 --- a/utils/util.py +++ b/utils/util.py @@ -1,6 +1,15 @@ # -*- coding: utf-8 -*- """ Common method for reuse code + +Generate certificate + +openssl req -x509 -out localhost.crt -keyout localhost.key \ + -newkey rsa:2048 -nodes -sha256 \ + -subj '/CN=localhost' -extensions EXT -config <( \ + printf "[dn]\nCN=localhost\n[req]\ndistinguished_name = dn\n[EXT]\nsubjectAltName=DNS:localhost\nkeyUsage=digitalSignature\nextendedKeyUsage=serverAuth") + + """ import json import logging @@ -10,7 +19,6 @@ import shutil import string import zipfile -from datetime import datetime from logging.handlers import TimedRotatingFileHandler from PIL import Image, ImageDraw @@ -132,7 +140,7 @@ def unzip_data(unzipped_folder, zip_file): Unzip the zip file in input in the given 'unzipped_folder' :param unzipped_folder: :param zip_file: - :return: + :return: The name of the folder in which find the unzipped data """ log = logging.getLogger() folder_name = os.path.join(unzipped_folder, random_string()) @@ -144,23 +152,28 @@ def unzip_data(unzipped_folder, zip_file): return folder_name -def dump_dataset(dataset, path, dataset_name=None): +def dump_dataset(X, Y, path): """ - :param dataset: + :param X: + :param Y: :param path: - :param dataset_name: :return: """ log = logging.getLogger() - log.debug("dump_dataset | Dumping {} {}".format(path, dataset_name)) - if os.path.exists(path) and os.path.isdir(path): - if dataset_name is None: - dataset_name = "image_dataset" - time_parsed = datetime.now().strftime('%Y%m%d_%H%M%S') - dataset_name = os.path.join(path, "{}-{}".format(dataset_name, time_parsed)) - with open(dataset_name + ".dat", 'wb') as f: + dataset = { + 'X': X, + 'Y': Y + } + log.debug("dump_dataset | Dumping dataset int {}".format(path)) + if not os.path.exists(path): + os.makedirs(path) + log.debug("dump_dataset | Path {} exist".format(path)) + dataset_name = os.path.join(path, "model.dat") + with open(dataset_name, 'wb') as f: pickle.dump(dataset, f) + else: + log.error("dump_dataset | Path {} ALREDY EXIST exist".format(path)) def remove_dir(directory): @@ -173,3 +186,62 @@ def remove_dir(directory): log.debug("remove_dir | Removing directory {}".format(directory)) if os.path.isdir(directory): shutil.rmtree(directory) + + +def verify_extension(file): + """ + Wrapper for validate file + :param file: + :return: + """ + log = logging.getLogger() + extension = os.path.splitext(file)[1] + log.debug("verify_extension | File: {} | Ext: {}".format(file, extension)) + if extension == ".zip": + # In this case we have to analyze the photos + return "zip" + elif extension == ".dat": + # Photos have been alredy analyzed, dataset is ready! + return "dat" + return None + + +def retrieve_dataset(folder_uncompress, zip_file, clf): + """ + + :param folder_uncompress: + :param zip_file: + :param clf: + :return: + """ + log = logging.getLogger() + log.debug("retrieve_dataset | Parsing dataset ...") + check = verify_extension(zip_file.filename) + if check == "zip": # Image provided + log.debug("retrieve_dataset | Zip file uploaded") + folder_name = unzip_data(folder_uncompress, zip_file) + log.debug("retrieve_dataset | zip file uncompressed!") + clf.init_peoples_list(peoples_path=folder_name) + dataset = clf.init_dataset() + log.debug("retrieve_dataset | Removing [{}]".format(folder_name)) + remove_dir(folder_name) + elif check == "dat": + log.debug("retrieve_dataset | Pickle data uploaded") + dataset = pickle.load(zip_file) + else: + dataset = None + log.debug("tune_network | Dataset parsed!") + return dataset + + +def secure_request(request): + """ + + :param request: + :return: + """ + request.headers['Content-Security-Policy'] = "default-src 'self'" + request.headers['X-Content-Type-Options'] = 'nosniff' + request.headers['X-Content-Type-Options'] = 'nosniff' + request.headers['X-XSS-Protection'] = '1; mode=block' + return request