-
Notifications
You must be signed in to change notification settings - Fork 7
/
main.py
234 lines (210 loc) · 6.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
"""
Main
"""
import os
from argparse import (
Action,
ArgumentDefaultsHelpFormatter,
ArgumentParser,
RawDescriptionHelpFormatter,
RawTextHelpFormatter,
)
from modules.process_flags import process_flags
class CustomFormatter(
RawTextHelpFormatter,
RawDescriptionHelpFormatter,
ArgumentDefaultsHelpFormatter,
):
"""Custom Help text formatter for argparse."""
class MinimumOneAction(Action):
"""Ensures minimum argument input value of 1"""
def __call__(self, parser, namespace, values, option_string=None):
if values < 1:
parser.error("Minimum input value for {0} is 1".format(option_string))
setattr(namespace, self.dest, values)
if __name__ == "__main__":
parser = ArgumentParser(
description="""
Generate malicious URL blocklists for
DNSBL applications like pfBlockerNG or Pi-hole using the
Safe Browsing API from Google and/or Yandex, with URLs
sourced from various public lists like
Tranco TOP1M, DomCop TOP10M, and Domains Project.
For example, to generate a blocklist of malicious URLs
from Tranco TOP1M using Google Safe Browsing API,
run `python3 main.py --fetch-urls --identify-malicious-urls
--sources top1m --vendors google`
""",
formatter_class=CustomFormatter,
# Disallows long options to be abbreviated
# if the abbreviation is unambiguous
allow_abbrev=False,
)
parser.add_argument(
"-f",
"--fetch-urls",
dest="fetch",
action="store_true",
help="""
Fetch URL datasets from local and/or remote sources,
and update database with URL datasets
""",
)
parser.add_argument(
"-u",
"--update-hashes",
action="store_true",
help="""
Download the latest Safe Browsing API malicious
URL full hashes and update database with full hashes.
(WARNING: Enabling this flag will
cost more than 5000 Safe Browsing API calls)
""",
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"-i",
"--identify-malicious-urls",
dest="identify",
action="store_true",
help="""
Use Safe Browsing API hashes to identify malicious URLs in database,
write the URLs to a .txt file blocklist,
and update database with these malicious URLs
(this flag cannot be enabled together
with '--retrieve-known-malicious-urls')
""",
)
group.add_argument(
"-r",
"--retrieve-known-malicious-urls",
dest="retrieve",
action="store_true",
help="""
Retrieve URLs in database that have been flagged
as malicious from past scans, then create a .txt file blocklist
(this flag cannot be enabled together with '--identify-malicious-urls')
""",
)
sources = {
"top1m": "Tranco TOP1M",
"top10m": "DomCop TOP10M",
"r01": "Registrar R01 (.ru, .su, .rf)",
"cubdomain": "CubDomain.com",
"icann": "ICANN zone files (ICANN Terms-of-Service "
"download limit per zone file: Once every 24 hours)",
"domainsproject": "domainsproject.org",
"ec2": "Amazon Web Services EC2 public hostnames",
"gce": "Google Compute Engine public hostnames",
"openintel": "OpenINTEL.nl (.nu .se .ee .gov .fed.us)",
"switch_ch": "Switch.ch (.ch .li)",
"afnic": "AFNIC.fr (.fr .re .pm .tf .wf .yt)",
"internet_ee": "Internet.ee (.ee)",
"internetstiftelsen": "Internetstiftelsen",
"sknic": "SK-NIC.sk (.sk)",
"googletag": "Google Threat Analysis Group Indicators of Compromise",
"ipv4": "ipv4 addresses",
}
parser.add_argument(
"-s",
"--sources",
nargs="+",
required=False,
choices=list(sources.keys()),
help=f"""
(OPTIONAL: Omit this flag to use all URL sources)
Choose 1 or more URL sources
----------------------------
{os.linesep.join(f"{name} -> {description}"
for name,description in sources.items())}
""",
default=list(sources.keys()),
type=str,
)
parser.add_argument(
"--cubdomain-num-days",
required=False,
help="""
(OPTIONAL: Omit this flag to fetch and/or
analyse the entire CubDomain.com dataset)
Counting back from current date, the number of days of CubDomain.com
data to fetch and/or analyse. By default all available data
dating back to 25 June 2017 will be considered.
If 'cubdomain' is not enabled in `--sources`,
this flag will be silently ignored.
""",
default=None,
type=int,
action=MinimumOneAction,
)
parser.add_argument(
"--afnic-num-days",
required=False,
help="""
(OPTIONAL: Omit this flag to fetch only
monthly archives and not fetch AFNIC.fr daily updates)
Counting back from current date, the number of days of AFNIC.fr
daily updates to fetch and/or analyse.
By default only monthly archives will be fetched,
and no daily updates will be fetched,
.
If 'afnic' is not enabled in `--sources`,
this flag will be silently ignored.
""",
default=None,
type=int,
action=MinimumOneAction,
)
choices = {
"google": "Google Safe Browsing API",
"yandex": "Yandex Safe Browsing API",
}
parser.add_argument(
"-v",
"--vendors",
nargs="+",
required=False,
choices=list(choices.keys()),
help=f"""
(OPTIONAL: Omit this flag to use all Safe Browsing API vendors)
Choose 1 or more URL sources
----------------------------
{os.linesep.join(f"{name} -> {description}"
for name,description in choices.items())}
""",
default=list(choices.keys()),
type=str,
)
parser.add_argument(
"-n",
"--num-cpus",
required=False,
help="""
(OPTIONAL: Omit this flag to use all available CPUs)
Number of CPUs to use for parallel processes. By default
all available CPUs will be used.
""",
default=None,
type=int,
action=MinimumOneAction,
)
parser.add_argument(
"--include-dashboard",
action="store_true",
help="""
Whether or not to start the Ray dashboard,
which displays the status of the Ray cluster.
""",
)
parser.add_argument(
"--blocklist-suffix",
required=False,
type=str,
help="Suffix to be added to blocklist filenames when uploading to GitHub",
default=None,
)
args = parser.parse_args()
args.vendors = sorted([vendor.capitalize() for vendor in args.vendors])
if not (args.fetch or args.update_hashes or args.identify or args.retrieve):
parser.error("No action requested, add -h for help")
process_flags(parser_args=vars(args))