diff --git a/README.md b/README.md index 36e792a..1957ca7 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,20 @@ * **git Actions**: Ο Βασιλής ασχολήθηκε με την δημιουργία και την σωστή λειτουργία των git actions. Παρέχοντας αυτοματοποιημένα σενάρια που εκτελούνται με βάση συγκεκριμένες συνθήκες. Αυτά τα σενάρια εκτελούνται κάθε φορά που γίνεται η υποβολή νέου κώδικα. -- Unit tests: Δημιουργήθηκαν unit tests για την εξέταση του καινούργιου κώδικά. +- Unit tests: Δημιουργήθηκαν unit tests για την εξέταση του καινούργιου κώδικά (από την Άννα και τον Βασίλη). + +### Κύρια Branches +- main και K23-NNS-Project-v2: Σε αυτά τα branch υπάρχουν όλα τα αρχεία για τις δύο πρώτες εργασίες (ΚΝΝ και ΚΝΝ με βελτιώσεις). + +- K23-NNS-Project-v1: Σε αυτό το branch υπάρχουν όλα τα αρχεία για την πρώτη εργασία, τα οποία τροποποιήθηκαν σύμφωνα με τις οδηγίες που μας δώθηκαν. + +### Makefiles +Οι εντολές είναι ίδιες με αυτές στο 1ο παραδοτέο και μόνο στο Makefile στον φάκελο tests προστέθηκε έξτρα: +- Με make runKNN_imp: τρέχουν τα τεστ για τις συναρτήσεις του knn_improvements. + +### Extra +1. Για να τρέξει ο αλγόριθμος της 1ης εργασίας, πρέπει απλά στο αρχείο main.c να βγεί από το σχόλιο η γραμμή 25 και να μπει σε σχόλιο η γραμμή 24. +2. Υπάρχει και ένα αρχείο times.txt, όπου αναφέρονται οι χρόνοι αλλά και η ακρίβεια των αλγορίθμων ΚΝΝ και ΚΝΝ με βελτιώσεις για Ευκλείδια και για Manhattan αποστάσεις. ---------------------- @@ -85,5 +98,8 @@ 7. Με make run-all: τρέχουν τα τεστ για όλε τις συναρτήσεις * Γενικο Makefile - Όπως και παραπάνω οι make εντολές είναι ίδιες, απλά με make run-all τρέχει και ο αλγόριθμος αλλά και τα τεστ. + Όπως και παραπάνω οι make εντολές είναι ίδιες, και υπάρχουν έξτρα οι εντολές: + 1. make all: τρέχoυν και ο αλγόριθμος αλλά και τα τεστ. + 2. main-all: τρέχει ο αλγόριθμος με Ευκλείδια απόσταση αλλά και Manhattan + 3. run-all-tests: τρέχουν μόνο τα τεστ. diff --git a/include/MathematicalFunctions.h b/include/MathematicalFunctions.h index 925a8a3..59042f4 100644 --- a/include/MathematicalFunctions.h +++ b/include/MathematicalFunctions.h @@ -1,9 +1,21 @@ -#include "Dimension.h" +#ifndef MATHEMATICAL_FUNCTIONS_H +#define MATHEMATICAL_FUNCTIONS_H + +#include +#include +#include +#include "Node.h" #include "common_types.h" +#include double euclidean_distance(Dimension* nodeDimension1 , Dimension* nodeDimension2); double manhattan_distance(Dimension* nodeDimension1 , Dimension* nodeDimension2); -double distance(Dimension* nodeDimension1 , Dimension* nodeDimension2, String distance_function); +double calculate_norm(Dimension* head); + +double euclidean_distance_improved(Node* node1, Node* node2); + +double distance(Node* node1, Node* node2, String distance_function); +#endif \ No newline at end of file diff --git a/include/Node.h b/include/Node.h index 12ece01..71b725f 100644 --- a/include/Node.h +++ b/include/Node.h @@ -13,6 +13,7 @@ typedef struct NodeNeighborsLinkedList NodeNeighborsLinkedList; typedef struct Node { int nodeNameInt; + double norm; Dimension* dimension; NodeNeighborsLinkedList* neighbors; NodeNeighborsLinkedList* reversedNeighbors; @@ -26,4 +27,4 @@ void addNode(Node** headNode, Dimension* headDimension); void freeNode(Node* node); -#endif +#endif \ No newline at end of file diff --git a/main/Makefile b/main/Makefile index aae02c2..817ba18 100644 --- a/main/Makefile +++ b/main/Makefile @@ -8,7 +8,7 @@ CC = gcc # Compile options. Το -I tells to compiler where to find the include files # -Werror when complite code add -Werror -CFLAGS = -Wall -Werror -g -I$(INCLUDE) +CFLAGS = -Wall -g -I$(INCLUDE) LDFLAGS = -lm -fsanitize=address # .o files @@ -22,6 +22,8 @@ ARGSEUCL = $(DATASETS)/00000020.bin 10 65 euclidean 0.8 0.001 ARGSMAN = $(DATASETS)/00001000-4.bin 100 399 manhattan 0.1 0.001 +ARGSIMP = $(DATASETS)/asciiData3.bin 10 50 improved 0.1 0.001 + $(EXEC): $(OBJS) $(CC) $(OBJS) -o $(EXEC) $(LDFLAGS) @if [ -f $(EXEC).exe ]; then ln -sf $(EXEC).exe $(EXEC); fi @@ -36,5 +38,9 @@ run-euclidean: $(EXEC) run-manhattan: $(EXEC) ./$(EXEC) $(ARGSMAN) +run-euclidean-improved: $(EXEC) + ./$(EXEC) $(ARGSIMP) + + valgrind: $(EXEC) valgrind --error-exitcode=1 --leak-check=full --show-leak-kinds=all ./$(EXEC) $(ARGSEUCL) \ No newline at end of file diff --git a/modules/FindAllRightNeighborsAlgorithm.c b/modules/FindAllRightNeighborsAlgorithm.c index 2446660..3b1dec5 100644 --- a/modules/FindAllRightNeighborsAlgorithm.c +++ b/modules/FindAllRightNeighborsAlgorithm.c @@ -38,6 +38,6 @@ void FindAllRightNeighbors(Graph * graph, String distance_function) void OrderNodesByNeighbor(Node *dest , Node *src, String distance_function){ - addNeighbor(&dest->neighbors,src, distance(dest->dimension,src->dimension, distance_function)); + addNeighbor(&dest->neighbors,src, distance(dest, src, distance_function)); //Here or in the addNeigbor i would like to add a sorting method base on distance } \ No newline at end of file diff --git a/modules/Graph.c b/modules/Graph.c index 4e0de9b..fe8b33d 100644 --- a/modules/Graph.c +++ b/modules/Graph.c @@ -62,6 +62,7 @@ Graph* createGraphFromBinaryFile(String filename, int dimensions) { flag = 1; break; } + // printf("coordinate-%f\n",coordinate); addDimension(&headDimension, i, coordinate); } diff --git a/modules/MathematicalFunctions.c b/modules/MathematicalFunctions.c index 1c7572b..fc83f99 100644 --- a/modules/MathematicalFunctions.c +++ b/modules/MathematicalFunctions.c @@ -1,9 +1,4 @@ -#include -#include -#include -#include "Dimension.h" -#include "common_types.h" -#include +#include "../include/MathematicalFunctions.h" /// @brief Basic use of euclidian distance theorym the distance as cost /// @param nodeDimension1 @@ -38,7 +33,7 @@ double manhattan_distance(Dimension* nodeDimension1 , Dimension* nodeDimension2) Dimension* tempDimension1 = nodeDimension1; Dimension* tempDimension2 = nodeDimension2; - double distance = 0; + double distance = 0.0; while (tempDimension1 != NULL && tempDimension2 != NULL) { @@ -51,19 +46,55 @@ double manhattan_distance(Dimension* nodeDimension1 , Dimension* nodeDimension2) return distance; } +//isws boleyei node1, node2 +double calculate_norm(Dimension* head) { + double norm = 0.0; + + while (head != NULL) { + // printf("head->value-%f\n", head->value); + //norm = x1*x1 + x2*x2 + ...... + norm += head->value*head->value; + head = head->next; + } + // printf("norm-%f\n",norm); + return norm; +} + +double euclidean_distance_improved(Node* node1, Node* node2){ + //x^2+y^2-2xy + double norm1 = node1->norm; + double norm2 = node2->norm; + + Dimension* tempDimension1 = node1->dimension; + Dimension* tempDimension2 = node2->dimension; + + double mult = 0.0; + + while (tempDimension1 != NULL) { + mult += tempDimension1->value*tempDimension2->value; + tempDimension1 = tempDimension1->next; + tempDimension2 = tempDimension2->next; + } + printf("improved cost-%f\n",norm1+norm2-2.0*mult); + return norm1+norm2-2.0*mult; +} + /// @brief This function decides with mathematical therym (formula ) we are going to use /// and returns it cost /// @param nodeDimension1 /// @param nodeDimension2 /// @param distance_function /// @return -double distance(Dimension* nodeDimension1 , Dimension* nodeDimension2, String distance_function){ +double distance(Node* node1 , Node* node2, String distance_function){ double cost = 0.0; - if(strcmp(distance_function, "euclidean") == 0){ - cost = euclidean_distance(nodeDimension1, nodeDimension2); + if(strcmp(distance_function, "euclidean") == 0) { + cost = euclidean_distance(node1->dimension, node2->dimension); + } + else if(strcmp(distance_function, "manhattan") == 0) { + cost = manhattan_distance(node1->dimension, node2->dimension); } - else if(strcmp(distance_function, "manhattan") == 0){ - cost = manhattan_distance(nodeDimension1, nodeDimension2); + else if(strcmp(distance_function, "improved") == 0) { + cost = euclidean_distance_improved(node1, node2); } return cost; } diff --git a/modules/Node.c b/modules/Node.c index 479136a..d57ed5e 100644 --- a/modules/Node.c +++ b/modules/Node.c @@ -1,5 +1,5 @@ #include "../include/Node.h" - +#include "../include/MathematicalFunctions.h" /// @brief Initialzation of a node /// @return @@ -24,6 +24,9 @@ Node* initNode() { void addNode(Node** headNode, Dimension* headDimension) { Node* newNode = initNode(); newNode->dimension = headDimension; + // printf("head->value-%f\n", headDimension->value); + newNode->norm = calculate_norm(headDimension); + if (*headNode == NULL) { newNode->nodeNameInt = 0; *headNode = newNode; diff --git a/modules/knn.c b/modules/knn.c index f727712..cdc3df3 100644 --- a/modules/knn.c +++ b/modules/knn.c @@ -73,7 +73,7 @@ void checkNeighborofNeighbors(Graph** graph, Node** sourceNode, NodeNeighborsLin double cost = 0.0; if(matrixNodes[neighborName][sourceName] == -1.00){ - cost = distance((*sourceNode)->dimension, tempNeighbors->node->dimension, distance_function); + cost = distance(*sourceNode, tempNeighbors->node, distance_function); matrixNodes[neighborName][sourceName] = cost; } else{ @@ -133,7 +133,7 @@ void KRandomNodes(Graph** graph, int K, String distance_function) { neighborNode = neighborNode->next; } - double cost = distance(currentNode->dimension, neighborNode->dimension, distance_function); + double cost = distance(currentNode, neighborNode, distance_function); addNeighbor(&(currentNode->neighbors), neighborNode, cost); addNeighbor(&(neighborNode->reversedNeighbors), currentNode, cost); diff --git a/modules/knn_improvements.c b/modules/knn_improvements.c index a493506..72f6114 100644 --- a/modules/knn_improvements.c +++ b/modules/knn_improvements.c @@ -118,7 +118,7 @@ void localJoin(Node** node, String distance_function, int pK) { int nodeName2 = tempNeig->node->nodeNameInt; if ( nodeName1 != nodeName2 && incrementalSearch(tempNeig, temp) == 1 ) { - double cost = distance(temp->node->dimension, tempNeig->node->dimension, distance_function); + double cost = distance(temp->node, tempNeig->node, distance_function); addCost(&((*node)->cost), nodeName1, nodeName2, cost); // Update the flags @@ -133,7 +133,7 @@ void localJoin(Node** node, String distance_function, int pK) { int nodeName4 = tempRev->node->nodeNameInt; if (nodeName3 != nodeName4 && incrementalSearch(tempRev, temp) == 1) { - double cost = distance(temp->node->dimension, tempRev->node->dimension, distance_function); + double cost = distance(temp->node, tempRev->node, distance_function); addCost(&((*node)->cost), nodeName3, nodeName4, cost); // Update the flags diff --git a/times.txt b/times.txt index 6a52c13..8c00e57 100644 --- a/times.txt +++ b/times.txt @@ -1,10 +1,38 @@ +------------------------------------------------------- + + +/* FOR KNN */ + +datasets/00001000-4.bin 100 399 euclidean 0.2 0.001 +Time for KNN algorithm: 717.410187 sec +~ Acurate by 100.000000 % ~ +~ Acurate Supper by 100.000000 % ~ + + +-------------------------------------------------------- + + +datasets/00001000-4.bin 100 399 manhattan 0.1 0.001 +Time for KNN algorithm: 711.582880 sec +~ Acurate by 100.000000 % ~ +~ Acurate Supper by 100.000000 % ~ + + +--------------------------------------------------------- +--------------------------------------------------------- + + +/* FOR KNN IMPROVEMENTS */ + + datasets/00001000-4.bin 100 399 euclidean 0.2 0.001 Time for KNN algorithm: 10.568234 sec ~ Acurate by 97.468672 % ~ ~ Acurate Supper by 55.828070 % ~ -------------------------------------------------------- +-------------------------------------------------------- + datasets/00001000-4.bin 100 399 manhattan 0.1 0.001