-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquery_object.py
45 lines (36 loc) · 1.45 KB
/
query_object.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# coding=utf-8
from lib_process import superCleanSentence,superCleanSentence_lemma
from lib_process import *
from list_term_object import List_Term_Object
from config import *
from document_object import Document_Object
from nltk.util import ngrams
class Query_Object(Document_Object):
contents_obj=None
subqueries=None
ngrams=None
query_entities=None
query_predicates=None
numeric_data=None
aggregation_node=None
def __init__(self,query,structure,lucene_handler):
super().__init__()
mongoObj=structure.mongoObj
# query: query_id, clusterd query, raw query
self.setAttr('qid',query[0])
self.setAttr('raw_query',query[2])
if PREPROCESS_TYPE=='STEM':
self.setAttr('querystr',superCleanSentence(query[2]))
else:
self.setAttr('querystr',superCleanSentence_lemma(query[2]))
self.update_terms()
self.update_bigrams()
def update_terms(self):
if self.ngrams is None:
self.ngrams={}
self.ngrams[1] = ' '.join(self.querystr.split()).split()
def update_bigrams(self):
if self.ngrams is None:
self.ngrams={}
bigram_pairs=list(ngrams(self.ngrams[1],2))
self.ngrams[2] = [pair[0]+' '+pair[1] for pair in bigram_pairs]