-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
35 lines (27 loc) · 876 Bytes
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[FOLDER_NAME]
INPUT_FOLDER_NAME = DEV/
OUTPUT_FOLDER_NAME = output/
PARTIAL_INDEX_FOLDER_NAME = partial_index/
[FILE_NAME]
DOC_IDS_FILE_NAME = doc_ids.bin
ANCHOR_TERMS_FILE_NAME = anchor_terms.bin
TERM_LINE_RELATIONSHIP_FILE_NAME = term_line_relationships.bin
INDEX_FILE_NAME = index.bin
QUERY_CACHE_FILE_NAME = query_cache.bin
STRONG_TERMS_FILE_NAME = strong_terms.bin
RESULT_DATABASE_FILENAME = result.db
[NUMBERS]
THRESHOLD_SIM_HASH_VALUE = 3
THRESHOLD_HIGH_IDF_TERMS = 0.35
THRESHOLD_INCREASE_PERCENT = 0.2
THRESHOLD_PERCENT_OF_TERMS_IN_DOCS = 0.85
NUM_ITERATIONS_FOR_PAGE_RANKING = 2
MAX_DOCUMENTS_PER_BATCH = 500
MAX_LENGTH_FOR_TITLE = 120
MAX_NUM_URLS_PER_PAGE = 25
MAX_NUM_URLS_PER_QUERY = 10000
MAX_QUERY_CACHE_TERMS = 50
DEFAULT_SCORE_BOOLEAN_AND = 0.3
DEFAULT_SCORE_BOOLEAN_AND_POSITION = 0.4
DEFAULT_SCORE_ANCHOR_TEXT = 0.5
DEFAULT_SCORE_STRONG_TERMS = 0.5