-
Notifications
You must be signed in to change notification settings - Fork 0
/
index
31 lines (25 loc) · 970 Bytes
/
index
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env python3
import argparse
import json
import os
import subprocess
parser = argparse.ArgumentParser()
parser.add_argument('--json', type=json.loads, required=True, help='the args')
# Parse the args
args, unknown = parser.parse_known_args()
# Indexing options for robust04
index_options = {
"robust04": {
"collection": "TrecCollection",
"generator": "JsoupGenerator"
}
}
for collection in args.json["collections"]:
name, path = collection["name"], collection["path"]
collection, generator = index_options[name]["collection"], index_options[name]["generator"]
subprocess.run("""
/bin/sh anserini/target/appassembler/bin/IndexCollection -collection {0} \
-generator {1} -threads {2} -input {3} -index \
lucene-index.{4}.pos+docvectors+rawdocs -storePositions -storeDocvectors \
-storeRawDocs
""".format(collection, generator, os.cpu_count(), path, name).split())