This repository has been archived by the owner on Nov 24, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtoxicity.py
62 lines (58 loc) · 4.6 KB
/
toxicity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from googleapiclient import discovery
# Import perspective_api_key from the config.py file
from config import perspective_api_key, toxicity_names, toxicity_definitions
# Import the re module to use regular expressions
import re
# Create an instance of the API client
client = discovery.build("commentanalyzer",
"v1alpha1",
developerKey=perspective_api_key,
discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
static_discovery=False,
)
analyze_request = {
'comment': {'text': ''},
'requestedAttributes': {'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'IDENTITY_ATTACK': {}, 'INSULT': {}, 'PROFANITY': {}, 'THREAT': {}, 'SEXUALLY_EXPLICIT': {}, 'FLIRTATION': {}, 'OBSCENE': {}, 'SPAM': {}},
'languages': [],
'doNotStore': 'true'
}
analyze_request_not_en = {
'comment': {'text': ''},
'requestedAttributes': {'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'IDENTITY_ATTACK': {}, 'INSULT': {}, 'PROFANITY': {}, 'THREAT': {}},
'languages': [],
'doNotStore': 'true'
}
def get_toxicity(message: str):
#we first remove all kind of markdown from the message to avoid exploits
message = re.sub(r'\*([^*]+)\*', r'\1', message)
message = re.sub(r'\_([^_]+)\_', r'\1', message)
message = re.sub(r'\*\*([^*]+)\*\*', r'\1', message)
message = re.sub(r'\_\_([^_]+)\_\_', r'\1', message)
message = re.sub(r'\|\|([^|]+)\|\|', r'\1', message)
message = re.sub(r'\~([^~]+)\~', r'\1', message)
message = re.sub(r'\~\~([^~]+)\~\~', r'\1', message)
message = re.sub(r'\`([^`]+)\`', r'\1', message)
message = re.sub(r'\`\`\`([^`]+)\`\`\`', r'\1', message)
message = re.sub(r'\:([^:]+)\:', r'\1', message)
#we try doing the request in english, but if we get 'errorType': 'LANGUAGE_NOT_SUPPORTED_BY_ATTRIBUTE' we try again with the analyze_request_not_en
try:
analyze_request['comment']['text'] = message
response = client.comments().analyze(body=analyze_request).execute()
except:
analyze_request_not_en['comment']['text'] = message
response = client.comments().analyze(body=analyze_request_not_en).execute()
try: return [float(response['attributeScores']['TOXICITY']['summaryScore']['value']), float(response['attributeScores']['SEVERE_TOXICITY']['summaryScore']['value']), float(response['attributeScores']['IDENTITY_ATTACK']['summaryScore']['value']), float(response['attributeScores']['INSULT']['summaryScore']['value']), float(response['attributeScores']['PROFANITY']['summaryScore']['value']), float(response['attributeScores']['THREAT']['summaryScore']['value']), float(response['attributeScores']['SEXUALLY_EXPLICIT']['summaryScore']['value']), float(response['attributeScores']['FLIRTATION']['summaryScore']['value']), float(response['attributeScores']['OBSCENE']['summaryScore']['value']), float(response['attributeScores']['SPAM']['summaryScore']['value'])]
except: return [float(response['attributeScores']['TOXICITY']['summaryScore']['value']), float(response['attributeScores']['SEVERE_TOXICITY']['summaryScore']['value']), float(response['attributeScores']['IDENTITY_ATTACK']['summaryScore']['value']), float(response['attributeScores']['INSULT']['summaryScore']['value']), float(response['attributeScores']['PROFANITY']['summaryScore']['value']), float(response['attributeScores']['THREAT']['summaryScore']['value'])]
#test part
def test():
print("Testing toxicity.py...")
print("Hello world:")
result = get_toxicity('Hello world')
try: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}; SEXUALLY EXPLICIT: {result[6]}; FLIRTATION: {result[7]}; OBSCENE: {result[8]}; SPAM: {result[9]}")
except: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}")
print("HELLO WORLD GET ABSOLUTELY BUY MY NEW MERCH OMGGGGGGG:")
result = get_toxicity('HELLO WORLD GET ABSOLUTELY BUY MY NEW MERCH OMGGGGGGG')
try: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}; SEXUALLY EXPLICIT: {result[6]}; FLIRTATION: {result[7]}; OBSCENE: {result[8]}; SPAM: {result[9]}")
except: print(f"TOXICITY: {result[0]}; SEVERE_TOXICITY: {result[1]}; IDENTITY ATTACK: {result[2]}; INSULT: {result[3]}; PROFANITY: {result[4]}; THREAT: {result[5]}")
#uncomment the following line to test the code
#test()