From 2cf770212162d13c37353d0441e0256431c80e63 Mon Sep 17 00:00:00 2001 From: Seinu <102974491+Seinuve@users.noreply.github.com> Date: Sun, 16 Apr 2023 22:52:29 -0600 Subject: [PATCH] Final Final prep for Kijiku Release where do i even fucking start. cSpell additons Lots of documentation, like actual documentation and not my shitty comments. new imports run_kijiku updated for minimizing and maximizing windows when needed. proper dict printing. Kaiseki and Kijiku binary updates. kaiseki documentation updated. requirements.txt updated Kijiku.py updates.. a lot hold on settings for kijikuDict updated a lot added a reset to default option Changed the way results are outputed prompt generation changed translate() refactored redistribute refactored build messages refactored okay everything was refactored --- .vscode/settings.json | 1 + Kudasai.py | 171 +++++++++-- Models/Kaiseki.py | 58 +++- Models/Kijiku.py | 334 ++++++++++++++++----- Models/__pycache__/Kaiseki.cpython-311.pyc | Bin 20338 -> 21959 bytes Models/__pycache__/Kijiku.cpython-311.pyc | Bin 19570 -> 27569 bytes requirements.txt | 5 +- 7 files changed, 459 insertions(+), 110 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index a61ba34..d70e8d3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,7 @@ "cSpell.words": [ "ents", "Kōhei", + "lowkey", "sensei" ] } \ No newline at end of file diff --git a/Kudasai.py b/Kudasai.py index 9e7b2c8..450ad15 100644 --- a/Kudasai.py +++ b/Kudasai.py @@ -40,16 +40,17 @@ Step 3: Copy the path of .txt file you want to preprocess to cmd and type a space Step 4: Copy the path of replacements.json to CMD Step 5: Press enter -Step 6: Follow Instructions to use auto-translation +Step 6: Follow internal instructions to use auto-translation Any questions or bugs, please email Seinuve@gmail.com Security: -api keys are stored locally and they are obfuscated. +api keys are stored locally in ProgramData and are obfuscated. """ + import sys import json import os @@ -57,6 +58,7 @@ import itertools import spacy import requests +import ctypes from time import sleep from enum import Flag @@ -65,15 +67,13 @@ from Models import Kaiseki from Models import Kijiku - - -#-------------------start of globals--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-globals--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Character = namedtuple('Character', 'japName engName') ner = spacy.load("ja_core_news_lg") # large model for japanese NER (named entity recognition) -VERBOSE = True +VERBOSE = True ## lowkey forgot what this does SINGLE_KANJI_FILTER = True ## filters out single kanji or uses specific function to deal with it when replacing names JAPANESE_NAME_SEPARATORS = ["・", ""] ## japanese names are separated by the ・ or not at all @@ -85,7 +85,7 @@ replacementJson = dict() -#-------------------start of Names()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-Names()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- class Names(Flag): ## name markers NONE = 0 @@ -97,17 +97,25 @@ class Names(Flag): ## name markers FIRST_AND_LAST = 6 ALL_NAMES = 7 -#-------------------start of check_update()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-check_update()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def check_update(): """ + determines if Kudasai has a new latest release using requests + + Parameters: + None + + Returns: + None + """ try: - CURRENT_VERSION = "V1.3.2" + CURRENT_VERSION = "V1.4.0" ## hardcoded current vers response = requests.get("https://api.github.com/repos/Seinuve/Kudasai/releases/latest") latestVersion = response.json()["tag_name"] @@ -119,15 +127,29 @@ def check_update(): return True - except: ## used to determine if user has an internet connection + except: ## used to determine if user lacks an internet connection or posses another issue that would cause the automated mtl to fail return False -#-------------------start of output_file_names()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +#-------------------start-of-output_file_names()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -def output_file_names(): ## returns the file path for the output files +def output_file_names(): """ - spits out output file paths and creates dir for them + + spits out output file paths and creates directory for them + + Parameters: + None + + Returns: + preprocessPath (string - path) where the preprocessed text is stored + outputPath (string - path) where the output/results is stored + debugPath (string - path) where the debug text is stored + jePath (string - path) where the text for the j-e checkers is stored + translatedPath (string - path) where the text translated by Kijiku/Kaiseki is stored + errorPath (string - path) where the errors are stored (if any) + """ dirPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput" @@ -137,21 +159,30 @@ def output_file_names(): ## returns the file path for the output files sleep(0.1) - errorPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\errors.txt" preprocessPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\preprocessedText.txt" outputPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\output.txt" debugPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\tlDebug.txt" jePath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\jeCheck.txt" translatedPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\translatedText.txt" + errorPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\errors.txt" return preprocessPath,outputPath,debugPath,jePath,translatedPath,errorPath -#-------------------start of replace_single_kanji()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-replace_single_kanji()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def replace_single_kanji(jap, replacement): ## replaces a single kanji in the text """ + uses ner (Named Entity Recognition) from the spacy module to replace names that are composed of a single kanji in the japanese text + + Parameters: + jap (string - kanji) holds a japanese word to be replaced + replacement (string - english) holds the replacement for jap + + Returns: + nameCount (int - number) how many names were replaced + """ global japaneseText, totalReplacements @@ -178,12 +209,21 @@ def replace_single_kanji(jap, replacement): ## replaces a single kanji in the te return nameCount -#-------------------start of replace_single_word()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-replace_single_word()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -def replace_single_word(word, replacement): ## replaces all single words in the japanese text with their english equivalents +def replace_single_word(word, replacement): """ + replaces single words/names in the japanese text + + Parameters: + word (string - japanese) word to be replaced + replacement (string - english) replacement for the word + + Returns: + numOccurrences (int - number) number of occurrences for word + """ global japaneseText, totalReplacements @@ -198,12 +238,22 @@ def replace_single_word(word, replacement): ## replaces all single words in the return numOccurrences -#-------------------start of loop_names()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-loop_names()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def loop_names(character, replace=Names.FULL_NAME, honorific=Names.ALL_NAMES): """ + generates tuples of English and Japanese names to be replaced, along with a boolean indicating whether honorifics should be kept or removed + + Parameters: + character (namedtuple - ('character', japName engName) ) represents a japanese word/name along with it's replacements + replace (name - flag) how a name should be replaced + honorific (name - flag) how a honorific should be replaced + + Returns: + unnamed tuple (tuple - name) names to be replaced along with honorific flag + """ japaneseNames = character.japName.split(" ") @@ -236,18 +286,29 @@ def loop_names(character, replace=Names.FULL_NAME, honorific=Names.ALL_NAMES): if(Names.LAST_NAME in replace): yield (englishNames[-1], f'{japaneseNames[-1]}', Names.LAST_NAME in honorific) -#-------------------start of replace_name()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-replace_name()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -def replace_name(character,replace=Names.FULL_NAME,noHonorific=Names.ALL_NAMES,replacedNames=list()): +def replace_name(character,replace=Names.FULL_NAME,noHonorific=Names.ALL_NAMES,replacedNames=dict()): """ + replaces names in the japanese text based off of tuples returned by loop_names + + Parameters + character (namedtuple - ('character', japName engName) ) represents a japanese word/name along with it's replacements + replace (name - flag) how a name should be replaced + noHonorific (name - flag) if a name has honorific or not + replacedNames (dict - string) a list of replaced names and their occurrences + + Returns: + None + """ global replacementText for eng, jap, noHonor in loop_names(character, replace, noHonorific): - if jap in replacedNames: + if(jap in replacedNames): continue data = dict() @@ -269,7 +330,8 @@ def replace_name(character,replace=Names.FULL_NAME,noHonorific=Names.ALL_NAMES,r total = sum(data.values()) replacedNames[jap] = total - if not VERBOSE or total == 0: + + if(not VERBOSE or total == 0): continue print(f'{eng} : {total} (', end='') @@ -280,15 +342,24 @@ def replace_name(character,replace=Names.FULL_NAME,noHonorific=Names.ALL_NAMES,r replacementText += ', '.join([f'{key}-{value}' for key, value in data.items() if value > 0]) + ')\n' -#-------------------start of replace()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +#-------------------start-of-replace()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def replace(): - global japaneseText, replacementRules,replacementText """ + handles replacements and replacement rules for names in the japanese text + + Parameters: + None + + Returns: + japaneseText (string - japanese) the text that will be modified + """ + global japaneseText, replacementRules, replacementText + ## (title, jsonKey, isName, replace_name, noHonorific) replacementRules = [ @@ -352,7 +423,15 @@ def replace(): def determine_translation_automation(preprocessPath): """ + determines which translation module the user wants to use and calls it + + Parameters: + preprocessPath (string - path) path to where the preprocessed text was stored + + Returns: + None + """ print("Please choose an auto translation model") @@ -375,7 +454,15 @@ def determine_translation_automation(preprocessPath): def run_kaiseki(preprocessPath): """ + Handles the optional auto translation using the deepL api if enabled + + Parameters: + preprocessPath (string - path) path to where the preprocessed text was stored + + Returns: + None + """ os.system('cls') @@ -393,40 +480,65 @@ def run_kaiseki(preprocessPath): def run_kijiku(preprocessPath): """ + Handles the optional auto translation using the gpt/openai api if enabled + + Parameters: + preprocessPath (string - path) path to where the preprocessed text was stored + + Returns: + None + """ + hwnd = ctypes.windll.kernel32.GetConsoleWindow() + ctypes.windll.user32.ShowWindow(hwnd, 3) + os.system('cls') japaneseText,kijikuRules = Kijiku.initialize_text(preprocessPath) print("\nAre these settings okay? (1 for yes or 2 for no) : \n\n") - print(kijikuRules) + for key,value in kijikuRules["open ai settings"].items(): + print(key + " : " + str(value)) - if(input() == 1): + if(input("\n") == "1"): pass else: Kijiku.change_settings(kijikuRules) - + + os.system('cls') print("Commencing Automated Translation\n") sleep(2) + hwnd = ctypes.windll.kernel32.GetConsoleWindow() + ctypes.windll.user32.ShowWindow(hwnd, 9) + Kijiku.commence_translation(japaneseText) #-------------------start of main()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def main(inputFile, jsonFile): - connection = check_update() - """ + reads the text from `inputFile`, replaces names and honorifics in the text based on the data in `jsonFile`, and writes the results to the folder "KudasaiInput" + + Parameters: + inputFile (string - path) path to the txt file we are preprocessing + jsonFile (string - path) path to the json file whose "rules" we are following + + Returns: + None + """ global japaneseText, replacementJson, totalReplacements, replacementText + + connection = check_update() with open(inputFile, 'r', encoding='utf-8') as file: japaneseText = file.read() @@ -477,6 +589,7 @@ def main(inputFile, jsonFile): #-------------------start of sub_main()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- if(__name__ == '__main__'): # checks sys arguments and if less than 3 or called outside cmd prints usage statement + if(len(sys.argv) < 3): try: diff --git a/Models/Kaiseki.py b/Models/Kaiseki.py index 5b6ff6a..ba67938 100644 --- a/Models/Kaiseki.py +++ b/Models/Kaiseki.py @@ -15,9 +15,6 @@ Original Author: Seinu#7854 -Known issues and limitations: -capitalization can be an issue in sentences that have multiple parts -Since this is being translated one sentence at a time, the translation is less accurate compared to translating in bulk, however, doing it one line at a time seems to completely eliminate sentence duplications and additions. ''' #-------------------start of initialize_translator()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -25,7 +22,16 @@ def initialize_translator(textToTranslate): """ + Creates the deepL translator object and a list full of the sentences we need to translate. + + Parameters: + textToTranslate (string - path) path to the text we are translating + + Returns: + translator (object - deepL) deepL translator object + japaneseText (list - japanese) a list of japanese lines we are translating + """ try: @@ -84,7 +90,17 @@ def initialize_translator(textToTranslate): def separate(sentence): """ + Separates a sentence into parts based of punctuation. + + Parameters: + sentence (string - japanese) a sentence(line) of japanese text + + Returns: + sentenceParts (list - japanese) a list of parts of text which is derived from sentence + sentencePunctuation (list - punctuation) a list of punctuation found in sentence + specialPunctuation (list - booleans) a list of booleans indicating whether "special" punctuation exist in the sentence + """ sentenceParts = [] @@ -219,7 +235,18 @@ def separate(sentence): def translate(translator,sentenceParts,sentencePunctuation,specialPunctuation): ## for translating each part of a sentence """ + Translates individual sentence parts and quotes + + Parameters: + translator (object - deepL) a deepL translator object + sentenceParts (list - japanese) a list of parts of text which is derived from sentence + sentencePunctuation (list - punctuation) a list of punctuation found in sentence + specialPunctuation (list - booleans) a list of booleans indicating whether "special" punctuation exist in the sentence + + Returns: + finalSentence (string - english) a fully translated and reassembled version of sentence + """ i = 0 @@ -319,6 +346,18 @@ def translate(translator,sentenceParts,sentencePunctuation,specialPunctuation): def output_results(): + ''' + + Outputs results to several txt files + + Parameters: + None + + Returns: + None + + ''' + global debugText,jeCheckText,finalText,errorText errorPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\errors.txt" @@ -346,6 +385,19 @@ def output_results(): def commence_translation(translator,japaneseText): + """ + + Uses all the other functions to translate the text provided + + Parameters: + translator (object - deepL) a deepL translator object + japaneseText (list - japanese) a list of japanese lines to translate + + Returns: + None + + """ + try: sentenceParts = [] sentencePunctuation = [] diff --git a/Models/Kijiku.py b/Models/Kijiku.py index f577a9d..28468c3 100644 --- a/Models/Kijiku.py +++ b/Models/Kijiku.py @@ -6,6 +6,7 @@ import tiktoken import time import json +import spacy from time import sleep @@ -19,6 +20,18 @@ #-------------------start of change_settings()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def change_settings(kijikuRules): + + """ + + changes the settings located in the kijikuRules json + + Parameters: + kijikuRules (dict - rules) a dictionary of the rules kijiku will follow + + Returns: + None + + """ while(True): @@ -26,7 +39,7 @@ def change_settings(kijikuRules): print("See https://platform.openai.com/docs/api-reference/chat/create for further details\n") - print("model : ID of the model to use") + print("\nmodel : ID of the model to use. As of right now, Kijiku only works with 'chat' models.") print("\ntemperature : What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. Lower Values are typically better for translation") print("\ntop_p : An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. I generally recommend altering this or temperature but not both.") print("\nn : How many chat completion choices to generate for each input message. Do not change this.") @@ -36,41 +49,94 @@ def change_settings(kijikuRules): print("\npresence_penalty : Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.") print("\nfrequency_penalty : Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.") print("\nlogit_bias : Modify the likelihood of specified tokens appearing in the completion. Do not change this.") + print("\nsystem_message : Instructions to the model. Do not change this unless you know what you're doing.") + print("\nmessage_mode : 1 or 2. 1 means the system message will actually be treated as a system message. 2 means it'll be treating as a user message. 1 is recommend for gpt-4 otherwise either works.") + print("\nnum_lines : the number of lines to be built into a prompt at once. Theoretically, more lines would be more cost effective, but other complications occur with higher lines.") + print("\nsentence_fragmenter_mode : 1 or 2 (1 - via regex and other nonsense, 2 - via spacy ) the api returns a result on a single line, so this determines the way Kijiku fragments the sentences.") print("\n\nCurrent settings:\n\n") - print(kijikuRules) + for key,value in kijikuRules["open ai settings"].items(): + print(key + " : " + str(value)) - action = input("\nEnter the name of the setting you want to change, or type 'q' to quit: ").lower() + action = input("\nEnter the name of the setting you want to change, type d to reset to default, or type 'q' to continue: ").lower() if(action == "q"): break - if action not in kijikuRules["open ai settings"]: + elif(action == "d"): + reset_kijiku_rules() + + with open(r'C:\\ProgramData\\Kudasai\\Kijiku Rules.json', 'r', encoding='utf-8') as file: + kijikuRules = json.load(file) + + + elif(action not in kijikuRules["open ai settings"]): print("Invalid setting name. Please try again.") sleep(1) continue + else: - newValue = input("Enter a new value for " + action + " : ") + newValue = input("\nEnter a new value for " + action + " : ") - kijikuRules[action] = newValue + kijikuRules["open ai settings"][action] = newValue with open(r'C:\\ProgramData\\Kudasai\\Kijiku Rules.json', 'w+', encoding='utf-8') as file: json.dump(kijikuRules, file) +#-------------------start of reset_kijiku_rules()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def reset_kijiku_rules(): + + """ + + resets the kijikuRules json to default + + Parameters: + None + + Returns: + None + + """ + + default = { + "open ai settings": + { + "model":"gpt-3.5-turbo", + "temp":1, + "top_p":1, + "n":1, + "stream":False, + "stop":None, + "max_tokens":9223372036854775807, + "presence_penalty":0, + "frequency_penalty":0, + "logit_bias":None, + "system_message":"You are a Japanese To English translator. Please remember that you need to translate the narration into English simple past. Try to keep the original formatting and punctuation as well. ", + "message_mode":1, + "num_lines":13, + "sentence_fragmenter_mode":1 + } + } + + with open(r'C:\\ProgramData\\Kudasai\\Kijiku Rules.json', 'w+', encoding='utf-8') as file: + json.dump(default,file) + #-------------------start of initialize_text()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def initialize_text(textToTranslate): """ + Set the open api key and create a list full of the sentences we need to translate. - Parameters: - textToTranslate (string) : file path of txt file provided to Kudasai + textToTranslate (string - path) a path to the text kijiku will translate Returns: - text (list) a list of lines to be translated + text (list - japanese) a list of japanese lines we need to translate + """ try: @@ -132,25 +198,8 @@ def initialize_text(textToTranslate): return text, kijikuRules except: - - default = { - "open ai settings": - { - "model":"gpt-3.5-turbo", - "temp":1, - "top_p":1, - "n":1, - "stream":False, - "stop":None, - "max_tokens":9223372036854775807, - "presence_penalty":0, - "frequency_penalty":0, - "logit_bias":None - } - } - - with open(r'C:\\ProgramData\\Kudasai\\Kijiku Rules.json', 'w+', encoding='utf-8') as file: - json.dump(default,file) + + reset_kijiku_rules() with open(r'C:\\ProgramData\\Kudasai\\Kijiku Rules.json', 'r', encoding='utf-8') as file: kijikuRules = json.load(file) @@ -161,40 +210,57 @@ def initialize_text(textToTranslate): def output_results(): - ''' - outputs results - ''' + ''' + + Outputs results to several txt files + + Parameters: + None + + Returns: + None - global debugText,jeCheckText,resultText,errorText + ''' + + global debugText,jeCheckText,resultText,errorText - debugPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\tlDebug.txt" - jePath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\jeCheck.txt" - errorPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\errors.txt" - resultsPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\translatedText.txt" + debugPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\tlDebug.txt" + jePath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\jeCheck.txt" + errorPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\errors.txt" + resultsPath = str(os.getcwd()) + "\\Desktop\\KudasaiOutput\\translatedText.txt" - with open(debugPath, 'w+', encoding='utf-8') as file: - file.writelines(debugText) + with open(debugPath, 'w+', encoding='utf-8') as file: + file.writelines(debugText) - with open(jePath, 'w+', encoding='utf-8') as file: - file.writelines(jeCheckText) + with open(jePath, 'w+', encoding='utf-8') as file: + file.writelines(jeCheckText) - with open(resultsPath, 'w+', encoding='utf-8') as file: - file.writelines(resultText) + with open(resultsPath, 'w+', encoding='utf-8') as file: + file.writelines(resultText) - with open(errorPath, 'w+', encoding='utf-8') as file: - file.writelines(errorText) + with open(errorPath, 'w+', encoding='utf-8') as file: + file.writelines(errorText) - print("\n\nDebug text have been written to : " + debugPath) - print("\nJ->E text have been written to : " + jePath) - print("\nTranslated text has been written to : " + resultsPath) - print("\nError Text has been written to : " + errorPath) + print("\n\nDebug text have been written to : " + debugPath) + print("\nJ->E text have been written to : " + jePath) + print("\nTranslated text has been written to : " + resultsPath) + print("\nError Text has been written to : " + errorPath) #-------------------start-of-generate_prompt()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -def generate_prompt(index): +def generate_prompt(index,promptSize): ''' + generates prompts but skips punctuation or plain english + + Parameters: + index (int - number) an int representing where we currently are in the text file + + Returns: + prompt (list - string) a list of japanese lines that will be assembled into messages + index (int - number) an updated int representing where we currently are in the text file + ''' global text @@ -204,13 +270,13 @@ def generate_prompt(index): while(index < len(text)): sentence = text[index] - if(len(prompt) < 13): + if(len(prompt) < promptSize): if(bool(re.match(r'^[\W_\s\n-]+$', sentence))): - debugText.append("\n-----------------------------------------------\nSentence : " + sentence + "\nSentence is punctuation... skipping translation\n-----------------------------------------------\n\n") + debugText.append("\n-----------------------------------------------\nSentence : " + sentence + "\nSentence is punctuation... skipping\n-----------------------------------------------\n\n") elif(bool(re.match(r'^[A-Za-z0-9\s\.,\'\?!]+\n*$', sentence))): - debugText.append("\n-----------------------------------------------\nSentence : " + sentence + "\nSentence is english... skipping translation\n-----------------------------------------------\n\n") + debugText.append("\n-----------------------------------------------\nSentence : " + sentence + "\nSentence is english... skipping\n-----------------------------------------------\n\n") else: prompt.append(sentence) @@ -224,67 +290,135 @@ def generate_prompt(index): #-------------------start-of-translate()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -import backoff -import openai - @backoff.on_exception(backoff.expo, (openai.error.ServiceUnavailableError, openai.error.RateLimitError, openai.error.Timeout, openai.error.APIError, openai.error.APIConnectionError)) -def translate(systemMessage,userMessage,MODEL): +def translate(systemMessage,userMessage,MODEL,kijikuRules): ''' + translates system and user message + + Parameters: + systemMessage (string - message) a string that gives instructions to the gpt chat model + userMessage (string - message) a string that gpt will alter based on the systemMessage + MODEL (string - constant) a constant that represents which model we will be using + kijikuRules (dict - rules) a dictionary of rules that kijiku follows as it translates + + Returns: + output (string - response) a string that gpt gives to us + ''' + ## max_tokens and logit bias are currently excluded due to a lack of need, and the fact that i am lazy + + global debugText + response = openai.ChatCompletion.create( model=MODEL, messages=[ systemMessage, userMessage, ], - temperature=0, + + temperature = float(kijikuRules["open ai settings"]["temp"]), + top_p = float(kijikuRules["open ai settings"]["top_p"]), + n = int(kijikuRules["open ai settings"]["top_p"]), + stream = kijikuRules["open ai settings"]["stream"], + stop = kijikuRules["open ai settings"]["stop"], + presence_penalty = float(kijikuRules["open ai settings"]["presence_penalty"]), + frequency_penalty = float(kijikuRules["open ai settings"]["frequency_penalty"]), + ) output = response['choices'][0]['message']['content'] + + debugText.append("\nResponse from GPT was : \n" + output) return output - #-------------------start-of-redistribute()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -def redistribute(translatedText): +def redistribute(translatedText,sentence_fragmenter_mode): ''' + puts translated text back into text file - ''' + Parameters: + translatedText (string - response) a string that gpt gives to us + + Returns: + None + + ''' global resultText + + if(sentence_fragmenter_mode == 1): + + sentences = re.findall(r"(.+?(?:\"|\'|-|~|!|\?|%|\(|\)|\.\.\.|\.|---))(?:\s|$)", translatedText) + + patched_sentences = [] + built_string = None + + for sentence in sentences: + if(sentence.startswith("\"") and not sentence.endswith("\"") and built_string is None): + built_string = sentence + continue + elif(not sentence.startswith("\"") and sentence.endswith("\"") and built_string is not None): + built_string += f" {sentence}" + patched_sentences.append(built_string) + built_string = None + continue + elif(built_string is not None): + built_string += f" {sentence}" + continue + + resultText.append(sentence) - sentences = translatedText.split(". ") - for sentence in sentences: - resultText.append(sentence + ".") + else: - resultText[-1] = resultText[-1][:-1] + nlp = spacy.load("en_core_web_lg") -#-------------------start-of-buildMessages()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + doc = nlp(translatedText) + sentences = [sent.text.strip() for sent in doc.sents] + + for sentence in sentences: + resultText.append(sentence) -def buildMessages(systemMessage): - global text +#-------------------start-of-buildMessages()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +def buildMessages(systemMessage,message_mode,promptSize): ''' builds messages dict for ai + + Parameters: + systemMessage (string - message) a string that gives instructions to the gpt chat model + mode (int - mode) the method of assembling the messages + + Returns: + messages (dict - messages) the assembled messages that will be given to the model ''' + global text,debugText + i = 0 messages = [] while i < len(text): - prompt, i = generate_prompt(i) + prompt, i = generate_prompt(i,promptSize) prompt = ''.join(prompt) - system_msg = {} - system_msg["role"] = "system" - system_msg["content"] = systemMessage + if(message_mode == 1): + system_msg = {} + system_msg["role"] = "system" + system_msg["content"] = systemMessage + + else: + system_msg = {} + system_msg["role"] = "user" + system_msg["content"] = systemMessage messages.append(system_msg) @@ -294,13 +428,26 @@ def buildMessages(systemMessage): messages.append(model_msg) + debugText.append("Messages : \n\n") + debugText.append(str(messages)) + return messages #-------------------start-of-redistribute()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def estimate_cost(messages, model="gpt-3.5-turbo-0301"): ''' + attempts to estimate cost, (no idea how accurate) + + Parameters: + messages (dict - messages) the assembled messages that will be given to the model + model (string - constant) a constant that represents which model we will be using + + Returns: + numTokens (int - number) the estimated number of tokens in the messages + cost (double - money) the estimated cost of translating messages + ''' try: @@ -327,7 +474,7 @@ def estimate_cost(messages, model="gpt-3.5-turbo-0301"): tokensPerName = 1 else: - raise NotImplementedError(f"""numTokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") + raise NotImplementedError(f"""estimate_cost() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") numTokens = 0 @@ -345,12 +492,27 @@ def estimate_cost(messages, model="gpt-3.5-turbo-0301"): numTokens += 3 # every reply is primed with <|start|>assistant<|message|> minCost = (float(numTokens) / 1000.00) * costPer1000Tokens + debugText.append("\n\n\nEstimated Tokens in Messages : " + str(numTokens)) + debugText.append("\nEstimated Minimum Cost : " + str(minCost) + '\n') + return numTokens,minCost #-------------------start-of-main()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- def commence_translation(japaneseText): + """ + + Uses all the other functions to translate the text provided + + Parameters: + japaneseText (list - japanese) a list of japanese lines that we need to translate + + Returns: + None + + """ + try: global debugText,jeCheckText,errorText,resultText,text @@ -358,14 +520,28 @@ def commence_translation(japaneseText): i = 0 - MODEL = "gpt-3.5-turbo" - systemMessage= "You are a Japanese To English translator. Please remember that you need to translate the narration into English simple past. Try to keep the original formatting and punctuation as well. " + debugText.append("Kijiku Activated\n\n") + debugText.append("Settings are as follows : \n\n") + + with open(r'C:\\ProgramData\\Kudasai\\Kijiku Rules.json', 'r', encoding='utf-8') as file: + kijikuRules = json.load(file) + + for key,value in kijikuRules["open ai settings"].items(): + debugText.append(key + " : " + str(value) +'\n') + + MODEL = kijikuRules["open ai settings"]["model"] + systemMessage = kijikuRules["open ai settings"]["system_message"] + message_mode = int(kijikuRules["open ai settings"]["message_mode"]) + promptSize = int(kijikuRules["open ai settings"]["num_lines"]) + sentence_fragmenter_mode = int(kijikuRules["open ai settings"]["sentence_fragmenter_mode"]) timeStart = time.time() os.system('cls') - messages = buildMessages(systemMessage) + debugText.append("\nStarting\n-------------------------\n") + + messages = buildMessages(systemMessage,message_mode,promptSize) numTokens,minCost = estimate_cost(messages,MODEL) @@ -379,9 +555,12 @@ def commence_translation(japaneseText): os.system('cls') print("Trying " + str(i+2) + " of " + str(len(messages))) - translatedText = translate(messages[i],messages[i+1],MODEL) + debugText.append("\n\nTrying " + str(i+2) + " of " + str(len(messages)) + "\n") + + translatedText = translate(messages[i],messages[i+1],MODEL,kijikuRules) + + redistribute(translatedText.strip(),sentence_fragmenter_mode) - redistribute(translatedText.strip()) i+=2 resultText = list(map(lambda x: x + '\n', resultText)) @@ -392,6 +571,7 @@ def commence_translation(japaneseText): print("\nMinutes Elapsed : " + str(round((timeEnd - timeStart)/ 60,2)) + "\n") + except Exception as e: print("\nUncaught error has been raised in Kijiku, error is as follows : " + str(e) + "\nOutputting incomplete results\n") output_results() diff --git a/Models/__pycache__/Kaiseki.cpython-311.pyc b/Models/__pycache__/Kaiseki.cpython-311.pyc index d85a597ee07c8cd493d08ee991f24d0606ba651c..f1dec3e9e4cd43783225845003b927626cc8d570 100644 GIT binary patch delta 2274 zcmcgtO>7%Q6yEV)636S5q_LA4hlv9sD;k1SwP`3)oWv1SB5uHoM9Tpt-f_KUv%B4& zIE@lqfjA(JBg6p-ae-EfI8-ZC2@Y^W0t7j5;({u{sY0@l0CD5Z#%pgBD=G&V%d@+0 z-n{qbd*8hG`-kZ3J(RvBib(;6{QWbEymlx3MIu2=BnV0NZ9-`#aeJ&0i)>JbTK3Z& znM1c6*^n)w+Zu5wp0*uJ)vw|iY!as~`7?G5?9?gdy0}3xvEaSE19X!eV=q$2we+xc z(8_YdhJ~k%^=q{1fMtbJ^LdH=oEVckTdxt5=+ve%_c5)hHdhRU5+>Le8MTfK1fbt5 z9Pi1#ccV!5K2OR>%m6I*H&H>jDYPcMT&ggXCVz5h0#MjOZsB-{Y^pm{uZFP+2No|;-D`Wj&SAP04|TF0slNLuP9RdCHRHgI6BbF@IP=IT|) zC5~$79Vcd2CgElm-jE#E3|CjMs`s#L(yB_dUWV5VL!-c^aLxi5*izIg1OfzW)G2Vv z!Ufx;)7)4Ahkes6P66=Ei5DCs>FwtJM(9`8$p0W-;hB{aerB;(nRr`Fkc9{*{ZCKK zcM0B;^lkF?U?nD;mt8^xuEEUzn4+dn{}3KwG(+P5Dn-qsIeO3(b=Y^L7}QjqXqBUv ziRxG3azm{lxaMlw4X7h zsZsXPTpC5%Z>fR7o#;0BQBoi|*;M)~f+TL==-&5Bu4B4D8jISl22ZG>Hih_!h8-w! zHC3Z_SGyNa%Z5(78b<&PFGnzD`*R9`e}}Ix+0agi2FZbnWB9eH#)~Ad z&CgxX@u2qwz0kCbO;w>vH%CDhr9(aT!OKn1SpIDr?o41_@k3LX{gsV<`6Nfd;l^yEb^N{?1p1i_w5j2!_X ziT7h%u5D^>-I95Sw%yzs_db@fVZHM;k;Mh$-j)pT0BpaNW!Zpa&d}Awv^A0Xi}-~K znQhi|NKN;EMPE6A0!VapmdFmC0ti{v6kbQHgMP)#3o%!i<0T+kn&pq5n zW26-u1FKaW`rAsL1x5NmxR=sXIaytnNT14iWK;OLPvDTs;Tq)q4O6FfDN4!Gn)RX- zL{>&odtGvB|M6N|=kp#FZVMBj6QfllCUHg)<0NWyvy!B+5moM<<(i1pWQ_;tlaa%t zWE)`&^a%?_ly;3cjyM(usWY>Hp?>6pD?)0ekc~J!Jrh+o#N87fb$rX7ez!4`<}Z&@ zw~|apOa`#xx}O^o0d9>9F^|*B$|N46-O4nMIwMkfwucFNGpl=&rcVr>qVKcyVES+| zbQW8#<}gFNs-rG4!?a#aC16Ee?Qe+HmM;C@sXK4F_~pj^do)9nj-f5HmLXUw0y6 G4gUb%bf?Au diff --git a/Models/__pycache__/Kijiku.cpython-311.pyc b/Models/__pycache__/Kijiku.cpython-311.pyc index ea0226b67e916f5799c2ce43212f8cf489c28ceb..663d6f18af0ef7260be61d5be47a1d2a942e30e2 100644 GIT binary patch literal 27569 zcmd6Qd2k%pd1v<=K+j+XgKKbuYyczcK#CA05|l`Sf(PgZFyvqc?irBc zXdp8#R|;}5705-cC`Bs6UPqxO zyDqtS&c#XgZ*Z5~k^^Cpjnd)0JwPelI(lt}da#ItW zR9=&w;#pcFE1v9yc$B-+I)H^b#ZTF+t0Tcn6>*e}xiS?ba@G^2=l0-sTdXKkLh@&) z&lY6TGO_F)P1zH6sqz}zJPIHFHr}dIZ)alZ&5f2(L1sf?ZKpRl$4S*vja2(l-3Nfc zhiNG6=B6C6^fu?Xs|=Y@KN2~seTtLn=_h*|^IW*7ncL1q9XD*R+Anf9c%Hk+@%Ra! zO4zeuf1OuED?Fu!Y)GKLRT&6K>vB_P_}l*@3zF*-c7nGtE7`fHoC#-k>k0b-(prCSRNkA>Y+#By5yC8&Cq{a!gjDU92W+t~Mz+91}Z4h03&x z0g>K=k#In{DMm(Er7TG_3W+y@p^!Kd356mzjLKh>W0OiaRdZh?EE}KCC+)+bX#Ctc zSr*4)v59Ec)~yqvKx`zUjCV#Rw(ZD<{%WjCX3rT_ZeUt58#caFESaMn2e!W zw4$J3WPFQwV=NewF`qY9l-^+}9ZZ!Pi40?)5^EK3k(KdaI2eruhdagRSQQtH4g}Cb z?B+ypI1s|f(OYC?K#if@DCUEvdN-(?I~JK3oWPj7!(t$WTw$bLN8v~Y01RXnC~Og_ zTJ$13IUJHFqv@P98cZe|t1&Udk#IC9VFem1N{&v3Vr-5G9I?nXIh>jiqZ<=SWGFBc z3`XJS zUSt9+-Xp`g>hQ! zBed_gh!X)N77T;{3sw~SA`}Y{_^>mF2seb0iLE?fg&uV2#M$$m;&~IunOq`DNr70P zgKdkbtX!9s4k|9n*AYf39Z;rqgF(q+;5znSU??PGerfm645WHztU!i}hHerTLA71t z<)hE{9lK(76q5*KK!u*i{_pM;dm@o*L-@xQr2H!FGnyYMKQ!WB0VpE0MEfTQqPsBC zuTB!iz}VBvt@~`R0ST7Zm|jAaX)Izaw!H9;&$-6~uMVb`W0xqNM_=(JI6gU!H9kIs zy+b(BfTVR&Q|LEm{sDBh8e^EOkQ^Qb$WYvv_f#c>HBz0#K1zMWstX0jgM`-sOad6& zM6FR;yfOLJSgJN!z&9e3At~G%v#ejXgv|9D|JXgD$e7e&d62Mf2s^4v>@x;qVy})J zohAa?)+wHiM1ySar;r#c7L3bcSiWI)aVS982TL4~M+dP-Xq^NmCggx(5+l~rXha+d zC|mH2FnW56nggO`8VZg@A~bjrfVw6IhJYSZZ2@rsE&rW+L@}oN=0D3A(D$Gs%C8av zrIlhfmUkeM{1gLGWB?*&Y!HxMOlMdYvAc$VRL48xWA0F7G#DEk3Sx(KiDx2GaKxBs z3p}t?q7(9Pa3q*n83ur(FE{6a(P#79iR(J`ocEj0m2lUFUk`{K_W^}j;0|994CkH%HY^JaWm;NW^B-02g(N2OtTdwbP z9$pgMw_P(EHKAG;s#T$SN${%0k|xNyAghARUdJ>cs0%?=2rdZTWsdWmaD2*fu9J>` zWFfvFZYDWdl9MGlS&|b;YH?T-BDxS!g-9yrWmd@mt7O1IIR~f`wfL)=py-043Q8)c zWT!f1JJl)MDW_~tC7okQ=ULKumUP}s3b3RhmNdkYhA2rbzNrauU5KkfoV`XhA*Ks4 zRfwhPJj)8czzV*=3cg_0`MQmI@--Xv0TRs#0=9TFy3!ZhFN7Ox{>JgVp{c7dWhr{am%WCO>Ryv@U4xl^UPW3C&!n&sS zg7+NnMBaVrB=6^Cfh(@K`;6MUU$5&@z4W`_DOJm! z(L4uq&jHnQ0IxHRZ=Ae+a{AaMcZJ!`=riTEnvscWVtNwW?mds(1S2;+{j( zr`76Bx@YsfAoZ z(}WGWut60zV0g}rESUuuo^vCHXZ@B*&50RfTqA3gw%$)G{m1v(f4bLk{F%a^9^w%` zYOL&a*#Fw$=oJe8x`;*1Tn%UpM8V?I%{6CNp|+*kepJ1REQs^B;bn+|J?5t z2l=TiWvf&qc_i;g#VZs|e2nT%Jy+A3V5n616)F`c+Q`uuL($?~Ri%QIc3*thP*O0A zX%Hz3CS;luA`mIUkza|DQFW}M0Z|B9e&HqFV`t~?4v^KMh(bEJ<9#7gCI8dcjGsbI zp&`cq^dfjHl28NUslY@4+=vXJO*{t58;nBON(t-{B~48fd0b{fNi4;bfr>C*F`d~E zx1h{{N`nMa#=Mv{MuSY?ng~Q=;9o$QFyw1cmopj*>^&NUxIpA+JixR~jE|U@3=`MJ zY6J6iLk@*H#ZSB#Kzni0$=Ld&g9PTJlZo<4J`57ZMA)Q*WU{11#!8mt$|lLuH1}zQ<=8M$P&$#o2x%t(q$Qb=*B(Bf4{S*NhB+ZEFMDzy~UBWSlJ z3sbYNG~zu9vuxvPFWBd+s^=>z=bJYyH~5?d%NzyHGGD>16Q8%Of)T_tMrggeK@-;N!g^I$ zPZ&X@oo)n#5k$fWmT#HVoLCAYte!Pmicx(~+3m7_>~eH_3qLO55oXy@NVmQ~cNfDzjUZiZL>D>U92(*!5wSb?T&E^gHx z(V9*@SBuTqti={!euI8I$9^QaQnrKt9;?gb$`}SEw^Wpikh37L(k?_BDD_Ay?Lztc zYJrzo7JcQj=Q-Wi-_Gj6O6wBw-Khtw)F;{VtWsD}W-Yc3Ge72wR($HYT4BZ#wtv2r znLSD(ZC_@c#@2*)_yJbqFIj;bIOQZ(pLPDz23%>S{f+QO>XBC3-}2Y8!V(wjFzZ-p zsk+F8uf#et>FD1JuuSEzdF}p{$F^tc$+cpwW-nS57Ar=PaD1JYoKue19XI$JoMH1q z?;U2`YIlYO3d~cUmB>r}Z-DKYwe0>(pR=uaM{}%tQ*5O?Jx0z z&2LO4l)v`|P~xqtD^*~Z=$;6kmTz{(UX3Y4^fD}2U%U(21*lO1*GVgwf+iU>y0TO> zriLb~uK4DZzJ)2r(vt!OlUWhzGsjRhshO%*>*4WuW&YNJ{|=KLe@3CPS@lJ!&a{fg zj7y?43E)S>P+<6)IpwgO!VWh2?mP7S=y3e>YPAex4XG9SHR%6STGD*=LQ`X~wZ!qn zC%Y$OV_@Rr2CFVB3iO~%oyh8-wPG#;Bt(X;LLmv$lA)%OB9pN4K&K|7-vE`8h8y|- z6QrSrX4>N+8A8Qy1UkLpSf?9WssOaH@e}=F(iVrsr)>8;7O9aHdPTHh7?`7CyxGl? zULBSvn8?A5T%?CayGj7MT&dHY#g-C_NTmC-JqX_BM!5ucrE-b~3dQ@%CmX%IX}eu@ zj{82xJcqEMCLjmcI=3YqO!sP$0lsh_pq)UbzC-b5m|n>s2NTI4ZJf>nWR(v^4wd@co+thBGwLIgQQ8}rP?9VlTg#qZN!QSxT`)dC zkCt>`8A!zC0Jg*dgO1Ie+Z z>zFxb3h@g`7j>C|(U7u;a4<=#y)fy^!DdY4f)0}524x);=P_u?r8M9zO8hn{A@MMg zEa}Uc0_7#-xJ6kXV?h|o#Oehv6gUSS3_m)dde3X#^SbvuE{-e78=wtusIsfTLCp7HmWc} z2x0gFDJ?$m<&|HnIILG3Rx1uKRMe=o7qyB@dc`HR;?hz@&E2Lqd++ql^e$D^ z+_~}g>)(1^Z8@t|y`WdUp!Q#ytGc9CU0Q$&Y+YEZj_B19wK}rY(5N;&_efH&NNR(u zHOP8{jM3M8g;#^2`TF(W?tQoS_eb=WJ@>Ew$ckBE%Pj3oL7^R??)G zG^r&`OC=R=7ThVAVLwad^;9Y5%?$F?tX2;G{gAAVjA}z;`p}qGIfk0Y`Sa9R6$?xI zS(E*&$^I2hE>>2}6k^OzmVIc3B2ShNW3}M#=VwU?IUTthy$2oSZq2n@ckKqKoIU)5 zig|{n?pOTDx<6j0ZSU5%cLPUhRY&xyBQp+CKJ?Hmq(jm_^w2D%zgSaO#;F^tWiBXo zHZIX-efD9qdiEvN`?BVJS@*t-0H?`@qvO}lUtOJM#v-4iVHl&U1b!n<5A+T>bq-Bl{z2ow&CT`UKiqj&2@WDd7MA?g`!sfE)HgbQn+y7jp{pW5E;ywsrZfBPe_QB3)8_o!dJo0h9A~z;{&tI<;#+x&Z!1K67{g9KP(B%Is9)HL z-dn|kEMYyZMb4)9*+PrNlh~K7rT!Iav9VhCU$ZaFWf6|07J=ti=!=8(#U4wmsNk?k zN`!)s3RfuS90^B$MMf;`?77lu6&im01LEX&#rHMBU9F?QjV3;sX z)kb3jvCvU@XmXT9(^d0am3v4;VtE=@sfY2}QI@5Bl`N(pCQIi@d>~<%Ce|tpZ*aR= zcMMUUNv=cWcS+$7K*vCKfG9Ed_cpEHotbS|oyE!^d_ol}{m}@t^Q7`for9ZgHQBTxrEyNAC2#dHT-j z6erhwN-H^at9QX$@>bcMnm6n2)M?%Z-P@pwCp7PgTgMkl%5zKfY9+lWQRaVZ=uYs> zYj>_`Wlefnle+PoR(9^z=_QwUI(WC@UZLvRrn$E1u5DmIoXz)-&I_L3^?lR#zDpCD zbfHNVnuw!nW~nS7cB+}UDa*G^Y9^LqqjGbBkp*KnG9%4`vFp%W9Xhk(J2&31Fqefc1>1%g)-D_1{J2cl0-L-=fHr$uYrrny*tP9Pm&`hJ=z*1R2quxNH zwtUN^W@4$Pb8{I@|4RYf=~mr4?qAg0hcw4w-EmlT9A0p^RQJ~VEt-3u=Gd=0_N$Kl z?AxyUuW9ZhnxjW|^r()WRK9l2{fy=~pgRtz=5O>Q*6d%buj?tW|G2=><1YNMm#46~ z?x@@Tmu|<=;=;cyaFoJcXO_}jyNLLO4zLmIX8kTz&SB` zEjWQM7PCL}g*eFpRV-YZq2Lf97s$_-;f0 z4Tnea41_N}c|oKcTw_Fih`<}IWNE2WKA5JS+E9V7p`~VS5~iCxP8Gm;f=(|mr-+mv zL6j-PSL^%a1pL~K62Hb6;@(#-54|~QS0bTExZowU|d*yO>$E83=d|StUbhUHKKzAT~UvT&Ay33qs8)wL8fD5lbuy|8{c4JqPt` zvcQNZ1=IT@N~na`f|Q=nT;qdAMkS2o|BZjqR%`?437fd$l3T|Y-M+V)-;&;JztcYF zu2bE03*}XJop+v@7M6;Nr?0>9%J?psls#dX2uh^&g_Upd= zq||9!D5-f|BF;fe)EH2ukk%N2@@9g6j-2tDSXg?7T=EbiAsxc$4gAEIy8hsUSGCHM zdgV#2v{x_fUFICC)_IcE$->fJR%b7(GaVLirB!cxzvaCJU!Uq`9hv`vGL5P=JncR}w5-+mEJ(>8F^C_sPH<6ceCCH(! zIPhtjill`Z90y}Z510wm>Y|^YUKp9rq|P*D?3pRVOlUo2<78-j6yEc&Mddlm0Eau{ zAQp2*WB8m}fBKqHo)HOl4opm+bRk1(Rwr52XP!TL>^Y11m5!Ii;FC`MHRCWq2IzoJ zis;dCr63M~vHp?a*Ca`k1zRW}-wkCe1=|p`yOcKkNeU)?O%_>B(;3A! zQ|3HnlZC1CbId<7S(vHX8UU^6>Ijol)Uhw#4iq!f}y=3eCLcL_4 z=Gm`%_Rp8rs|{QA(rx#v_0q0+9}Gery07!zb=|jTp-!BwQ#%gpjnAsx=k&(&T3x?h z*S}ETIJ;Txd{%GjR*&@SO&7HKm-PCVmg~5ZEuV6}l7fGv;MPf^kCih)-7C(P>fYwr zUftVy@22kEf9v>?!~57#G3TgI{X6fEX#NA5D5HRvwwlFYdgV+L}V<0^n4ɒs+)xGg8hWb)^(+n_NT2qy7y^oSDF%z-x7hc(evNq-nTJ!9^%HVmAt0=nCqkjttZBQdkS^9 za_hJ~orTo%*SF#-irfEMvY+E(Y;3mbbI8uq_OO9^*e=AsW8gEKfPo`|=?IfjX$_IX1>@^;4*m*q9h-OANFp20G~ne-aP^ z+S@5(G_j$*?+d;u$&1NiIXnpW3VHB`JTw>@&F|~*EI^;~b*Lj>hpVZ%;JDfJyre%N zl5GQl*8?sZP`Z%NZey&s@{AEbfIsls2CmI=fIfuJanLgf-VI6+LZY0{<>7V0cpH!v zE)Nw4jo*8_X3pQa`1atp1~q?+?r%vaF!TUoF-k2Kj)1erIl3ec{Nk*0l-yS6jFrS< z3{@*vD5H~|UQ@`I0CDPOyO1o-o>PU~)sn8XK#(k*AYnq5aPlbSe3kUDqkV%0f1!+1 z0ed(!k+jq81hkGBUQeTTk9FwE4ARfjSSJubgjmN3;_UXh!VPNSh9y|mPpRu)QZJ5b z6=QnE*tBc0ew`}R%vaS-dlsu3mpQMijRn&u=Y3U=eNA(|rrBMZuTA&iyW3lHzxwg^ zBXir2Jlv&iKdEm&$v!?BP|pPB*SF4i=1Z#Q{SA-(ZFBy%dlj0$Q}^S@OCiebd%W}L z+|Hv9&ucqR={rxckB<`S^P}@6^|QsZC-JXXE9s;v8XuRm&6Tv>tJg~Q=p}p9l0Eb5 zn~l0l8`RSFl_jc|Je7|<4Rf9b$P=Dc-P5YF-{m@UJfAmlzKYvd)tYv#c#~edY1+P6 zTsl2G)2S6V>cx%I_Ib7_?(h7;eyjqm;-Fq}5YQoz@B|VB(KW?H%tc|OPR8RNC2f;iOMh2kSK3iwL-X`#FkBX*sU*4kRDrpB9h2KtRPIhWKWSd zZ;*p`q$2?jN!nnLe+r}miiJHarEq1+YKFN)4oc=Z)BpsWz|k94w-Yf|_o_AI_>%M7 zmxI*(w|+U9$@MFT7pKLbn4i>HNl-4?%O*JJu>%Cb71)6ZXWTRGW+`f(2*yEGy5fY+ z0SAKr?`03-@=Y}u#f_u3xQ=BEXWrpSnbJcshdf9aCZC*#SvQkJQz9@wq)zXq6If8*XWhEzt!qus<>%SbZ_1?f+4 zz7z++4YW)u8fFt}7nGY6-$?ty`+z&M@N$`)|tWa=x82X>t&N^c0hRt zg@2ontu!yCcd!Ebg2jnSADDMZsnWn@2sc8hgEmC&lT0HMfLMFb*hCXN9?BNX;eLp zP?6NuQ)=sq)J36$%s6*5293pv%4yepebcmSrbPpqU%wGCoUyDovsx@Zo4F#7wa1@# za?WCthi=gvt-7OCb+j&+jP+aR?pD8f<<1q&(X2a~RY&t;QTeUs83z2KEbEwA$F`l@ zwr6SkiHI?*SdB3Ev`&V_Ug5=U3-81k0SjvefBP2f-!xOW@+3yKkG4gZpK}A&6#S?& z-tZRuC}!23!?>gNB8(;VkSoJS1=+Y6dU0MVOySz>x^m99tf9beZs-m{(?l| ztOc&-7<1V*l-kYck1*v*6j)EYz=n|417%8s!sOiimW@ACZ3|LaYMk)Ihpvb03I(_K z+C@aBJqXb2-BIP-{8j3iq`L>U)@Km|>g(?sDV-4>3BxXdUZ zj=>Q*Ko>P2tvx?!N#o+*DAHya(;}^ynx%_*KCC9*KbKbb=CdY_F^uvS*-Tjnv zFHXvEV-B2E%x=ACOSzSIfck{xn+Da&RK)Voxd`)SD!8bpD`B<7cfS-+!l0C0Dcu=e z#56*F&7ERDyIN*Aw0+yQYlEb-gLTTdlc6(NWcjSLU>U?;NmrDGH*whyyTl3itcel$ zmx!2lT;5=Sg(x%zi}k>6s{-Ag#HCxK>~fLx0bhoBJC%3QNITC|UvVcju`Aw})#R>U zxv5=fV*9SnxSLhWUPk}V_dlIGGxU|ipAe${`aCyfg&)I;|yHE>At2Ox@d*2_Hg51vJeZx#Rv12EHjtIAo|CyO$j9h96>R> z-^mLq>4785pqVY{%>rMt9Io`ebZ**!oKdU-#SEsXhhjG!?|yvyiPCgyf>+p@7BwAPYct+lZB%4WzOc@mkHQC6vs5- zxGo%5h2yMD+r0*@e6L=}KxLz$>&&pXY*6`KeeOc}3R((e_ z-x1w+L=}!Kx{7C7=Ug?atA^!%MH2>fVNewY7by3=0oB)~`MPvpmnw82vs%_N=W0=1 zE%Wt_Bt_T3DYtsQx^~(-?WMZ5%(>R7u65@38u|{e5ML9_Yt1j))$UJvCoabN7l?vT4Tk zWVwiHCviAy*;dWhu67*KiVxj7VNtIh`19z){XcsB&tBL3 zXEaBj?&wn;eQ7RH_3!xMxd&B$H1LA~&ENg-sOCSZIeK+Ruj=SsEUK7!3GA-wsE^VK z;ICIyac_5Y7ur8A;1Omimxz;K9ODnblMu%^n)-q7fpY*0?EXns3tvG_n%83eE41FGnv<_M zlP{ZVoD$Y3qK^W)=cM|htD_1S*0toO6Qz( zUwxgd0;QbWhM`sA4Q@HM(K@c|Rd5aT1b5}~igT4(SDpcH&eSGVWsM-!UUmy?9Mu$B z^RCsWmF2U)JQd!lv9@MHBDxfe}`c#TSC!q*`h@vsWO!*gpW{_hDK} zpYp}pEj9RNEpl7td-m>0xU<)5YW_(IghAoySHgX#_4Iy>48f^Ks`~~PD=^<6)n@IF zY}f|u*0cH9J;nTG2G6aE4N9{1O1*VHW2BTyJ&BS;Q5@Pxse$BIsdBAeu^G=hW8|(5 zJ;^S!R*r+7_92n8Gg#g0Da?ihM!P*zYc3=%;@HDf>FcG`z7fmZHP9}` zf5F5iVh35IKMI!#*j0=J(GcV4Ze?8O!!8^oHr%wLu?);|1~;ke5Ot;e!mW7<+_0Uo zv*Ay5T-+r{pcvCw(o8bTkpfPKi*InBBTE!oSlyG(9dEISA$@7F z*kIVnpOJwHE_-dONGlM2&zr(8WG3X?$Bt8rThEG3XW?;x;o+8Hx>f+QMP3keeW7ux z+nhVhCH(N3K4G9L#H~&wqQ)JhX^y+x@cm~Wm}Hp6?oob{em*1W3Tr$JC$t!$6tVRg z5bDA!4o35@MhWQ~4s~A$2SS1IAt`Vu-h#u>kcn{3thFnaqtHgaJ!mLO+W%YJJ^tR& z?+dp$rooJtyCJ#3lo%C{g#r_F^uJ5|glrh`Zg+opI50_fd9zajnZpRi32*|fu_m_U zJwy&Tu6G-!I+(*LomXd9G~)=laiGNAUT4T_187fqg#xlK8Y*rk!x6@@99*uTSHh~w zs|av38F~VC3W90SnU>lRx(K9(WJpdR%vQ*Zt8}D13jdSwzyv#*kaQ}f$538CuJ*E| z$GWqWe?>)!yI{U28L3ZsjlSKafGyf<6f?}7)FHNH4YBh>O8JO_Zz4z*Qt~l0*N?X- zo!TZGk^LABlNI@9Rrynt_@7t>Q4TZ*qGbI+XARWk&m4U4;zQfR9S=il*Nd947pm{g zcZ2ub_jfINywm)WQ2JP?p2J0sHCr`dn=Wiqg>4I&kNY*DOBYDe`3QluimTfJE>RqPJ0)sYRy{)9~{!_PH9!A^{Ug;o&|sP-8!Qy59;)qW19cC?ms^5 zUTAKaeNEkYOm97|o(SlzLt68&-aL#0p-UCj#%Y#m*UDQn(||vs2|c>dqY6Ds<>G9^ zy~Kl4TKNgR`~+NE>)Y?O-QTU%@7L=w6*ZW>R(AM&@1Hk62>z(^&tSPdsR_Nh(5ni) zOkCgh=Q|(V_|btsJD?Sxc(g$)#_9Cub>Vqcc%F4ZEqhKA&gjA!RXDR)?3>}=7&dJf zbHz?+_r|ocmV1?2+14L6|8V$0 z+aHbpU|idARNrw_D?6%rj_ICbs^{23dOEKB;ko;z_qYF1`S)M`=w(dDg95GL&`-*L za_&*}$5;OHiUxKd9xvYCGtDehC4K1^;-sivf+WmbGx*r67)bj(J zjDPqVGU^TIru&vW)pyrDuIZSo>CkGn>NQ){Z6`HPukPtpJ-xKBzxu|}>F(t!cu1`r z;peqnW$pXT>bh`gu}S-1jLqoV;+76a({<R4DGi|JzttgEl_$7qp#jX!PtIZuo2f^C4_F4%^Q zKa=!v<8@k@wj1;b;j(QLU$)F4_|%^Jb%5{S;S7=vBz_BDvCO4|SNMZ`!!nm09I>6| LX$bj4He>%6a*{zM delta 7189 zcmcIJTTmQVc73Pk(eq>m=KWv>G&}|vuq+Am0LcPL2w{*dKu?e~&;uF-Gq|T&Nq8{G z<;@~LDo9t3unT9Y-K;G?c8yp0P&P@elPbSfwyT?TO4Mv{wAmt6-rCBJVyj9~rOG+C zho|KIOb>JB+;dK!bMLwLo_o*XgWKr8r;zC_qftk}bLFEg?6Y;3O-@vQxn%#CVn98} z9A^-vrC9m}>bRCw0@Sf8fO=L9(7;kG8c-fr%D?n+T0upX{YIjTJb;_B7~ph&&NAL_ zUdGl@E$gY9v(5h8_2eaL-uF_iIDlN6G%rwZ0{h)%5$d>&WuUuuRtwO<8d?1d=(v+L zK+DCVHY#Mo1IVjW!wWQgzsbcDt-c%YqrX7b6!j(oRq)L|h~hQ4oOWYZvmL*@&txA( zDa4x3DhQiH@qHs0sKX7yVV1z4Zg_=aNIbRQ5L95@Rq!wu2N*GB&JGWs&G%9 zbTA|`WfD^+FlCF3apJ7VxFyCdFz!W*U2t@ZmLAE{bK4S|v&2NpS;=yCLVZVXnQVKp zZlZ49U;%u}BNWw0uC0Qe{NaDH<(I1|t4nZgoZTn7w%_a!UG-vKgOt}W!Q9C&p3qP3 z6B*Cl1_T|tw-1}`FISR{@{!BFx0e2>R=JPTd`u&NJu6EW*ktJt-+WWAx$#?f2x(FL z4;x@9@OgN*x5j7%*3#3j$9JFdz&rc z(5QxAGYPO~bMRfa18u^I0-IKoLfI{d>+-kZ{RPF}YhVlhtiZ_lMp#_{9{g3o3H(iA zSeHf|r-%M6TK?}{onX$hNzng__99YO>$&uq?H4S2+NUK3Nh&%K6;*SPqcH9cyb^D+uub* z)RCcBbM!)*J_`WBE+kT%ov<`)v^O$5z$Zy>$&#Hd(uIW6?1FEDdZ=AJ&MK%~EBv=zCJ~J$c*LHs@=D*F|5aS@!W$2VQ` z;2Cku!$}@a@NkQPpip#Bc)CM4-6sUjh=DUw;0z4Sdj<&uiTPm3TTMS{`gyNZx%1`^ zem(lD(NB+vyE>#@9b)BSsq*mc%C5P}F5zTUtOSu>G1w;s`-EU0co-C9)np+!7%1T2 zEPd=D_FB;_p7rqeirrIFuKeY(PHFEiI(%1ClA|U=X)OL#u;_G8YiEi?XQ|{Y6`ZBu zpbSO%k0|a^)R^n2;#-OW2qb_DvTTXyER&pNg0pPV>3vN-t)7xUi;h5sQd`!5toSB*UT8dQ^G?2hN&~~bpD}_lf{^E) znKI>0R?TdhbqL07qH&vK+y)-fHlUmBc(yEX3b1TD3&GJq0Y^VXUn{c~2NP?X>u|MS zTZF4^=f+@7Tb*#gzjAF=6!|p9{+s7=J}G+`O?sQ zK~cu0x8!Ch0#qi4X`=C7fXcNC_B|i6LPw`y=@KnnlBEkw8cT&pbir)Bs+}sC35wbD zMR?58R7?MJNqupnLh+Gtw`pIA>SK=q8XuP^_myiuE~g1zi3nbm3-~7p!5g0_cDtHt zRG)ej(D>A+C3x{}L(}7`&mLDn%4l>CJ91X-52G1&arUd{NeFkaAPRjv~i4_3`uf4 z5$yp^j`*gml~YSbxFf)Royff--_BsM+njGGrnRr>r}d&WC|QF-NvmjWozN~=tgjxr zdhGIvt0zQDfn+HVg3Y3(c|yHlbv*n=i)d|`7}GA;U2halhh8h6E*I^^lD$|69TDwE zCiII&%VcP({d%Kd+$xZI!!?ZLMOk1YtK9PX({TZr&&wH%i8hgd3i1U$(6m%@vZlLNHel+hLh2 z17bT&Y_FlMa#yjX9XFjkAkYROnl-m176T(+$f&E=B0Trigt+Z8fb z2E=v+vAu@2%3Z~lY!j)mwX#{;|L;C09a)~4)5|E>)|v!U^-Y&(su!6CiD?j+h6Tnb zm^RJ!il$nTsgsyGfvJ;eTW{_VO>i}Dl$b_=Xgv{lHP=FF zCHuS&)N5291(_v=%8P$nQ>3L+3X(RmO1!bt zim%la(W>v^stH#acc&08*y_L!s&kx(iYrOJ+N)@zAZyi(Kz=X^7+9?^%`PmnLd-}# zeq-xSE!l}*lN+XgI9X$v4lC zCQT1S<6IMHODnq*iGj3&i>6hB5x(aPcaTuwS_lao8=^^a$41Ym)k9ojaEMPcNk~8; z9gHSPE(3ky-1y=So246Czk+YF1bhYeKQn%7M@W-9r}qeY55B!)v;Qa!=V_q~Eyrj$ z!U4j0x(sny?G}H#EF(iphb+?}%Pc|sTy0BX5m@OV#+M%wsN}@zc|fjFJcTc}S~052 zFJ#esiuLLFD4n8Nn#8y4ga5J4X=hcdgi&Qm3F)_bqyqnbXQ`2RJ$o~!xf1-~33qK; z({m;fgYl;|gVAI%(ia6vg7}0F(OfmCO`96dMEC}9*FaRho2z@^Ud~5DD*UtE8+N7{ z2%$rXcruz+FGcQ$LalarTBF<$==%!3$#wwa)IAe6)Vq&m7|LeP7%6kbT}oqCe@npl z{!A`qvQNe&V_?Q288^%{Nyf_S7vRz{z7LYz@fJ?@+}dTo;<;ad_G@JEjThPRL7&+~ zDl2=aF8`>4hwzU7r#7Hq6=V_bc{xBR%gmQ>*YuZg(a?4jXaLq_EwG& z*N!ME0}oK;*Wba@N)}skXduROPlGIX5&)1)E6!lsAvH;BR~8@_0g2UR2#vIXi?T7u zgks&pkl>sH(mf(`0svW)l*NDB9Go);XWGRv`J5PUnzzE;5~<2$Fu^P^xwjeD9OIfQ zooRawDh(lt2?_GwqRl^}xO#G?Vdl_GgJdiH;eiS6V(U139`&peEM}8^@b@)R#Y3YA z!ipk=mhw?N(NwIu$iozn$fD~6r;gI22)vwWu;ZGoX74B+qej7}DJ82U-h)Dvl2yqV zN#q^4wmHX(C=gPy>Xd3phz4w}(0?`MC=ryfNL*=~y_kY<6VkNGYdTGF*<_MS3`Dsz zU?Qy?PDZ(qiaQPsIX-aRGPyqyi)Uv<&7CC_6ysPTa~kVfJX=WoB=fm^Sul1F#|GGz z?EXk{c(6Tj zHX2W!>V=SZYH5=v@sHYP6(K!Nb%fxS^k&DFVhD{;1WRUpwHXwoRlSgK^C5lKH6=u+ z5gQISnEMFYD2vF|Hr#dC?Qs+0OUocg{PPb0z&{3j<#497n-)&?K!n`TO=92*geaQR zLt-O9plf^ZJ4ar{FC5*XcutA`{%Fw7Dn=D74P{7DomhJMRd_sXH$xF-O$D=GgJ&9? zusV1P4sUhmj;i>Tnhz{Da**9Uk(Z~28p@hV53AxGxcPt^=P0e_bqqByUIE7YeihO{ zu&^1|8#|$J>!4t(B?l(&!r|j?{O>2#X6~1dcB)`~y@}QFj^&|1&xnpICq1UM#3h9< zJ(Yva$~^p;->?TpfL6m1X|V=kdkyX3#QzpH;rBaL7Oi|plcMuO zWAM)Yk}p_&k>9Y2vqnsx3Zo(@jCn{!T9KWKt)tp?zFR*QvUHZ7ls2Utf$KY)GXnF; zYUP{9vWKmuEEJ13P<-jKxLj=8F{)dq+zb=b_p~2b$fU?!aTgzh`tzvnIUP}u#m=E~ z)Oq?CbqT9E1O|J8b_epmPBLA(TevldyOx zavzxM-eP-}EN7B+j8oUq^*wJh;}mR7Z_o2%Hu;9ZJq0}O5CL+u&)!A&*kDvnOXR>L z$Bpa`F%!Xh0_4avL@*NGIpTGwb3xEg(}R&AIW0-6xWsUr<#q!vq)!|ABSR6WiABlz z!5tbUuMH9)2Qn?O*~UjWKD%qL5YANsh5@8CguNG(?T1#9>?jiDmY!^8WW~Shw&rm^ z1>R9I>Q)Hlu!)uB_$xMOykquHs4@!5*-lRw@#$W#E-x_S6Z5L1yejP5ar$=5j1x@d4t<==LN=UidYRVBHqM8ji};W5GR z*u23w@yw+?6T34OD#tvDF7>?HHs!qBb+v2GR4AAV?^*Hvz68Cfx8i{_@jfSX@%hG0 zjWyIKHM;$f>!Qc@*U+EVr~rq6coEnkH`fV2*-6+)F!@TO>5lZAP4xD1KO&@Q0;~kc zag!A4xn=@LQpk1Vp_mix#Ft|2wW|{~PFODyKpZ8n8u{3iefWT>9ZazBkAa=sd%y)R zQpp#vtN*F7JCt2m|Cdp!kTR27jEXYU3TQ`15FxCAKTvqlZYZ#(2- zGj1ZEdCE90f9@dLxI;p=$-b)tGl%gX``h|02x=h!?$VUv9H|5W%qR%8!hk{riz7 diff --git a/requirements.txt b/requirements.txt index 81f555d..db86c51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ pip install spacy[jp] +pip install spacy[en] python -m spacy download ja_core_news_lg +python -m spacy download en_core_web_lg pip install deepl pip install openai -pip install backoff \ No newline at end of file +pip install backoff +pip install requests \ No newline at end of file