forked from ahmia/ahmia-index
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter_onions.py
68 lines (58 loc) · 2.04 KB
/
filter_onions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*-
""" Filter some onions from the index """
import sys
from elasticsearch import Elasticsearch, NotFoundError
import settings
def print_error_and_quit():
"""Printing the usage information"""
print("Filters an onion domain from the index.\n")
print("Usage: python filter_onions.py some.onion")
print("Example: python filter_onions.py msydqstlz2kzerdg.onion\n")
sys.exit()
def filter_content(es, domain):
""" Bans certain onions """
try:
# Query to find documents matching the domain
query = {
"size": 30000,
"query": {
"bool": {
"must": [
{"match": {"domain": domain}}
],
"must_not": [
{"match": {"is_banned": True}}
]
}
}
}
response = es.search(index=settings.ES_TOR_INDEX, body=query)
hits = response["hits"]["hits"]
total = len(hits)
print(f"Found {total} documents to update.")
for index, hit in enumerate(hits):
doc_id = hit['_id']
index_name = hit['_index']
print(f"{index+1}/{total} - Updating document ID: {doc_id}")
# Update document to set is_banned to True
es.update(index=index_name, id=doc_id, body={"doc": {"is_banned": True}})
print(f"Document {doc_id} updated.")
print("Filtered the content.")
except NotFoundError:
print("Index not found. Please check the index name.")
except Exception as e:
print(f"An error occurred: {str(e)}")
def main():
""" Read command line arguments """
if len(sys.argv) != 2 or len(sys.argv[1]) < 22:
print_error_and_quit()
domain = sys.argv[1]
# Connect to Elasticsearch
es = Elasticsearch(
[settings.ES_HOST],
ca_certs=settings.ES_CA_CERTS,
basic_auth=(settings.ES_USERNAME, settings.ES_PASSWORD)
)
filter_content(es, domain)
if __name__ == '__main__':
main()