From a2799b6f81336cec1a32f8bd1e7e7bb4f23c2e97 Mon Sep 17 00:00:00 2001 From: Armin Zolfaghari Daryani Date: Sun, 18 Jul 2021 11:08:10 +0430 Subject: [PATCH] fix bug in calculate number words --- BigramModel.py | 13 +++++++++---- Dataset.py | 14 +++++++------- Main.py | 12 +++++++----- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/BigramModel.py b/BigramModel.py index bed2d05..2c2e7f7 100644 --- a/BigramModel.py +++ b/BigramModel.py @@ -66,8 +66,8 @@ def create_unary_words_dict(self): else: self.count_unary_train_neg_dict[word] = 1 - self.do_alpha_cut() - self.remove_from_above() + # self.do_alpha_cut() + # self.remove_from_above() self.calculate_number_words() # to calculate numbers of all words def create_binary_words_dict(self): @@ -113,14 +113,16 @@ def calculate_simple_conditional_probability(self, word1, word2, dataset_mode): # calculate number of all words in dictionary def calculate_number_words(self): sum_in_pos = 0 - for key, value in self.count_unary_train_pos_dict: + for value in self.count_unary_train_pos_dict.values(): sum_in_pos += value sum_in_neg = 0 - for key, value in self.count_unary_train_neg_dict: + for value in self.count_unary_train_neg_dict.values(): sum_in_neg += value self.number_words_in_pos = sum_in_pos self.number_words_in_neg = sum_in_neg + # print(self.number_words_in_neg) + # print(self.number_words_in_pos) # calculate p(w) = count(w)/M (M: all words in dictionary) def calculate_unary_probability(self, word, dataset_mode): @@ -143,12 +145,15 @@ def calculate_conditional_probability(self, word1, word2, dataset_mode): res = h2 * self.calculate_simple_conditional_probability(word1, word2, dataset_mode) + h1 * self.calculate_unary_probability( word2, dataset_mode) + h0 * self.epsilon + + print(res) return res def calculate_sentence_probability(self, sentence, dataset_mode): words_array = get_words_array(sentence) PI = self.calculate_unary_probability(words_array[0], dataset_mode) for i in range(1, len(words_array)): + print(self.calculate_conditional_probability(words_array[i - 1], words_array[i], dataset_mode)) PI *= self.calculate_conditional_probability(words_array[i - 1], words_array[i], dataset_mode) return PI diff --git a/Dataset.py b/Dataset.py index 5da002a..113170f 100644 --- a/Dataset.py +++ b/Dataset.py @@ -58,10 +58,10 @@ def pre_process(set): # return set -sett = store_positive_comments_from_file() -print(sett[0]) -print(sett[1]) - -pre_process(sett) -print(sett[0]) -print(sett[1]) +# sett = store_positive_comments_from_file() +# print(sett[0]) +# print(sett[1]) +# +# pre_process(sett) +# print(sett[0]) +# print(sett[1]) diff --git a/Main.py b/Main.py index 8415c54..014cb5a 100644 --- a/Main.py +++ b/Main.py @@ -9,6 +9,7 @@ def do_test(test_set, label, model): if model_response == label or model_response == "equal": correct_answer_count += 1 + print(correct_answer_count / len(test_set)) return correct_answer_count/len(test_set) @@ -29,13 +30,14 @@ def do_test(test_set, label, model): cut_above= 10 bigram_model = BigramModel(positive_train_set, negative_train_set, lambda_arr, epsilon, cut_down, cut_above) bigram_model.learning() # start learning + # print(bigram_model.count_unary_train_pos_dict) - + arr = ['effective but too-tepid biopic'] # analyse - accuracy_pos_test = do_test(positive_test_set, "positive", bigram_model) - print("Accuracy in positive test set : ".format(accuracy_pos_test * 100)) - accuracy_neg_test = do_test(negative_test_set, "negative", bigram_model) - print("Accuracy in negative test set : ".format(accuracy_neg_test * 100)) + accuracy_pos_test = do_test(arr, "positive", bigram_model) + print("Accuracy in positive test set : ", accuracy_pos_test * 100) + # accuracy_neg_test = do_test(negative_test_set, "negative", bigram_model) + # print("Accuracy in negative test set : ".format(accuracy_neg_test * 100))