Skip to content

Commit

Permalink
modify core function
Browse files Browse the repository at this point in the history
  • Loading branch information
vtyushkevich committed Oct 2, 2023
1 parent aa19b9a commit 84cc079
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
10 changes: 10 additions & 0 deletions find_similar/calc_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(self, # pylint: disable=too-many-arguments
:return: cos similarity
"""
self.text = text
self.key = 0
for k, val in kwargs.items():
setattr(self, k, val)
self.tokens = tokens if tokens else get_tokens(text,
Expand Down Expand Up @@ -79,3 +80,12 @@ def get_tokens(text, dictionary=None, language="russian", remove_stopwords=True)
"""
tokens = tokenize(text, language, dictionary, remove_stopwords)
return tokens


def calc_keywords_rating(text, keywords):
rating = 0
for token in text.tokens:
for k, v in keywords.items():
if k == token:
rating = rating + v
return rating
7 changes: 6 additions & 1 deletion find_similar/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Core module with search functions
"""

from .calc_functions import TokenText, calc_cosine_similarity_opt
from .calc_functions import TokenText, calc_cosine_similarity_opt, calc_keywords_rating
from .tokenize import tokenize


Expand All @@ -14,6 +14,7 @@ def find_similar( # pylint: disable=too-many-arguments
count=5,
dictionary=None,
remove_stopwords=True,
keywords=None,
) -> list[TokenText]:
"""
The main function to search similar texts.
Expand All @@ -22,6 +23,7 @@ def find_similar( # pylint: disable=too-many-arguments
:param language: Language, default='russian'
:param count: Count results
:param dictionary: default = None.
:param keywords: default = None.
If you want to replace one words to others you can send the dictionary.
:param remove_stopwords: default = True. Remove or not stopwords
:return: Result list sorted by similarity percent
Expand All @@ -42,6 +44,9 @@ def find_similar( # pylint: disable=too-many-arguments
remove_stopwords=remove_stopwords)
cos = calc_cosine_similarity_opt(text.tokens, text_to_check_tokens)
text.cos = cos
if keywords:
keywords_rating = calc_keywords_rating(text.tokens, keywords)
text.key = keywords_rating
token_texts.append(text)
text_rated_sorted = sorted(token_texts, key=lambda item: item.cos, reverse=True)
return text_rated_sorted[:count]

0 comments on commit 84cc079

Please sign in to comment.