-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoperation.py
28 lines (22 loc) · 938 Bytes
/
operation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import re, string
from numpy import random
class processing:
def __init__(self, text):
self.text = text
def random_words(self, number):
wordList = self.word_list()
return random.choice(wordList, number, replace=False) if len(wordList)>=number else False
def random_sentences(self, number):
senList = self.sentence_list()
return random.choice(senList, number, replace=False) if len(senList)>=number else False
def word_list(self):
wordList = re.sub('['+string.punctuation+']', ' ', self.text).split(' ')
for i, word in enumerate(wordList):
if len(word)==1 or word=='':
del wordList[i]
return list(set(wordList))
def sentence_list(self):
import nltk.data
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentenceList = tokenizer.tokenize(self.text)
return sentenceList