fix bug in calculate number words

arminZolfaghari · Jul 18, 2021 · a2799b6 · a2799b6
1 parent 852d0ab
commit a2799b6
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 16 deletions.
diff --git a/BigramModel.py b/BigramModel.py
@@ -66,8 +66,8 @@ def create_unary_words_dict(self):
                 else:
                     self.count_unary_train_neg_dict[word] = 1
 
-        self.do_alpha_cut()
-        self.remove_from_above()
+        # self.do_alpha_cut()
+        # self.remove_from_above()
         self.calculate_number_words()  # to calculate numbers of all words
 
     def create_binary_words_dict(self):
@@ -113,14 +113,16 @@ def calculate_simple_conditional_probability(self, word1, word2, dataset_mode):
     # calculate number of all words in dictionary
     def calculate_number_words(self):
         sum_in_pos = 0
-        for key, value in self.count_unary_train_pos_dict:
+        for value in self.count_unary_train_pos_dict.values():
             sum_in_pos += value
         sum_in_neg = 0
-        for key, value in self.count_unary_train_neg_dict:
+        for value in self.count_unary_train_neg_dict.values():
             sum_in_neg += value
 
         self.number_words_in_pos = sum_in_pos
         self.number_words_in_neg = sum_in_neg
+        # print(self.number_words_in_neg)
+        # print(self.number_words_in_pos)
 
     # calculate p(w) = count(w)/M   (M: all words in dictionary)
     def calculate_unary_probability(self, word, dataset_mode):
@@ -143,12 +145,15 @@ def calculate_conditional_probability(self, word1, word2, dataset_mode):
         res = h2 * self.calculate_simple_conditional_probability(word1, word2,
                                                                  dataset_mode) + h1 * self.calculate_unary_probability(
             word2, dataset_mode) + h0 * self.epsilon
+
+        print(res)
         return res
 
     def calculate_sentence_probability(self, sentence, dataset_mode):
         words_array = get_words_array(sentence)
         PI = self.calculate_unary_probability(words_array[0], dataset_mode)
         for i in range(1, len(words_array)):
+            print(self.calculate_conditional_probability(words_array[i - 1], words_array[i], dataset_mode))
             PI *= self.calculate_conditional_probability(words_array[i - 1], words_array[i], dataset_mode)
 
         return PI

diff --git a/Dataset.py b/Dataset.py
@@ -58,10 +58,10 @@ def pre_process(set):
     # return set
 
 
-sett = store_positive_comments_from_file()
-print(sett[0])
-print(sett[1])
-
-pre_process(sett)
-print(sett[0])
-print(sett[1])
+# sett = store_positive_comments_from_file()
+# print(sett[0])
+# print(sett[1])
+#
+# pre_process(sett)
+# print(sett[0])
+# print(sett[1])
diff --git a/Main.py b/Main.py
@@ -9,6 +9,7 @@ def do_test(test_set, label, model):
         if model_response == label or model_response == "equal":
             correct_answer_count += 1
 
+    print(correct_answer_count / len(test_set))
     return correct_answer_count/len(test_set)
 
 
@@ -29,13 +30,14 @@ def do_test(test_set, label, model):
     cut_above= 10
     bigram_model = BigramModel(positive_train_set, negative_train_set, lambda_arr, epsilon, cut_down, cut_above)
     bigram_model.learning()     # start learning
+    # print(bigram_model.count_unary_train_pos_dict)
 
-
+    arr = ['effective but too-tepid biopic']
     # analyse
-    accuracy_pos_test = do_test(positive_test_set, "positive", bigram_model)
-    print("Accuracy in positive test set : ".format(accuracy_pos_test * 100))
-    accuracy_neg_test = do_test(negative_test_set, "negative", bigram_model)
-    print("Accuracy in negative test set : ".format(accuracy_neg_test * 100))
+    accuracy_pos_test = do_test(arr, "positive", bigram_model)
+    print("Accuracy in positive test set : ", accuracy_pos_test * 100)
+    # accuracy_neg_test = do_test(negative_test_set, "negative", bigram_model)
+    # print("Accuracy in negative test set : ".format(accuracy_neg_test * 100))