Skip to content

Commit

Permalink
fix bug in calculate number words
Browse files Browse the repository at this point in the history
  • Loading branch information
arminZolfaghari committed Jul 18, 2021
1 parent 852d0ab commit a2799b6
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 16 deletions.
13 changes: 9 additions & 4 deletions BigramModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def create_unary_words_dict(self):
else:
self.count_unary_train_neg_dict[word] = 1

self.do_alpha_cut()
self.remove_from_above()
# self.do_alpha_cut()
# self.remove_from_above()
self.calculate_number_words() # to calculate numbers of all words

def create_binary_words_dict(self):
Expand Down Expand Up @@ -113,14 +113,16 @@ def calculate_simple_conditional_probability(self, word1, word2, dataset_mode):
# calculate number of all words in dictionary
def calculate_number_words(self):
sum_in_pos = 0
for key, value in self.count_unary_train_pos_dict:
for value in self.count_unary_train_pos_dict.values():
sum_in_pos += value
sum_in_neg = 0
for key, value in self.count_unary_train_neg_dict:
for value in self.count_unary_train_neg_dict.values():
sum_in_neg += value

self.number_words_in_pos = sum_in_pos
self.number_words_in_neg = sum_in_neg
# print(self.number_words_in_neg)
# print(self.number_words_in_pos)

# calculate p(w) = count(w)/M (M: all words in dictionary)
def calculate_unary_probability(self, word, dataset_mode):
Expand All @@ -143,12 +145,15 @@ def calculate_conditional_probability(self, word1, word2, dataset_mode):
res = h2 * self.calculate_simple_conditional_probability(word1, word2,
dataset_mode) + h1 * self.calculate_unary_probability(
word2, dataset_mode) + h0 * self.epsilon

print(res)
return res

def calculate_sentence_probability(self, sentence, dataset_mode):
words_array = get_words_array(sentence)
PI = self.calculate_unary_probability(words_array[0], dataset_mode)
for i in range(1, len(words_array)):
print(self.calculate_conditional_probability(words_array[i - 1], words_array[i], dataset_mode))
PI *= self.calculate_conditional_probability(words_array[i - 1], words_array[i], dataset_mode)

return PI
Expand Down
14 changes: 7 additions & 7 deletions Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def pre_process(set):
# return set


sett = store_positive_comments_from_file()
print(sett[0])
print(sett[1])

pre_process(sett)
print(sett[0])
print(sett[1])
# sett = store_positive_comments_from_file()
# print(sett[0])
# print(sett[1])
#
# pre_process(sett)
# print(sett[0])
# print(sett[1])
12 changes: 7 additions & 5 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def do_test(test_set, label, model):
if model_response == label or model_response == "equal":
correct_answer_count += 1

print(correct_answer_count / len(test_set))
return correct_answer_count/len(test_set)


Expand All @@ -29,13 +30,14 @@ def do_test(test_set, label, model):
cut_above= 10
bigram_model = BigramModel(positive_train_set, negative_train_set, lambda_arr, epsilon, cut_down, cut_above)
bigram_model.learning() # start learning
# print(bigram_model.count_unary_train_pos_dict)


arr = ['effective but too-tepid biopic']
# analyse
accuracy_pos_test = do_test(positive_test_set, "positive", bigram_model)
print("Accuracy in positive test set : ".format(accuracy_pos_test * 100))
accuracy_neg_test = do_test(negative_test_set, "negative", bigram_model)
print("Accuracy in negative test set : ".format(accuracy_neg_test * 100))
accuracy_pos_test = do_test(arr, "positive", bigram_model)
print("Accuracy in positive test set : ", accuracy_pos_test * 100)
# accuracy_neg_test = do_test(negative_test_set, "negative", bigram_model)
# print("Accuracy in negative test set : ".format(accuracy_neg_test * 100))



Expand Down

0 comments on commit a2799b6

Please sign in to comment.