Skip to content

Commit

Permalink
[mod] comprehensive revision of the searxng_extra/update/ scripts
Browse files Browse the repository at this point in the history
- pylint all scripts
- fix some errors reported by pyright
- from searx.data import data_dir (Path.open)
- fix import from pygments.formatters.html

NOTE: none functional changes!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
  • Loading branch information
return42 committed Mar 10, 2024
1 parent 0ffec44 commit ce4aaf6
Show file tree
Hide file tree
Showing 10 changed files with 51 additions and 61 deletions.
14 changes: 5 additions & 9 deletions searxng_extra/update/update_ahmia_blacklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,23 @@
"""
# pylint: disable=use-dict-literal

from os.path import join

import requests
from searx import searx_dir
from searx.data import data_dir

DATA_FILE = data_dir / 'ahmia_blacklist.txt'
URL = 'https://ahmia.fi/blacklist/banned/'


def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200:
# pylint: disable=broad-exception-raised
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) # type: ignore
return resp.text.split()


def get_ahmia_blacklist_filename():
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")


if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
blacklist.sort()
with DATA_FILE.open("w", encoding='utf-8') as f:
f.write('\n'.join(blacklist))
15 changes: 5 additions & 10 deletions searxng_extra/update/update_currencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
import unicodedata
import json

# set path
from os.path import join

from searx import searx_dir
from searx.locales import LOCALE_NAMES, locales_initialize
from searx.engines import wikidata, set_loggers
from searx.data import data_dir

DATA_FILE = data_dir / 'currencies.json'

set_loggers(wikidata, 'wikidata')
locales_initialize()
Expand Down Expand Up @@ -133,10 +132,6 @@ def fetch_db():
return db


def get_filename():
return join(join(searx_dir, "data"), "currencies.json")


def main():

db = fetch_db()
Expand All @@ -156,8 +151,8 @@ def main():
if len(db['names'][name]) == 1:
db['names'][name] = db['names'][name][0]

with open(get_filename(), 'w', encoding='utf8') as f:
json.dump(db, f, ensure_ascii=False, indent=4)
with DATA_FILE.open('w', encoding='utf8') as f:
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)


if __name__ == '__main__':
Expand Down
7 changes: 5 additions & 2 deletions searxng_extra/update/update_engine_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from searx.utils import gen_useragent, detect_language
import searx.search
import searx.network
from searx.data import data_dir

DATA_FILE = data_dir / 'engine_descriptions.json'

set_loggers(wikidata, 'wikidata')
locales_initialize()
Expand Down Expand Up @@ -362,8 +365,8 @@ def main():
fetch_website_descriptions()

output = get_output()
with open(get_engine_descriptions_filename(), 'w', encoding='utf8') as f:
f.write(json.dumps(output, indent=1, separators=(',', ':'), ensure_ascii=False))
with DATA_FILE.open('w', encoding='utf8') as f:
f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions searxng_extra/update/update_engine_traits.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,17 @@ def write_languages_file(sxng_tag_list):

item = (
sxng_tag,
sxng_locale.get_language_name().title(),
sxng_locale.get_language_name().title(), # type: ignore
sxng_locale.get_territory_name() or '',
sxng_locale.english_name.split(' (')[0],
sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '',
UnicodeEscape(flag),
)

language_codes.append(item)

language_codes = tuple(language_codes)

with open(languages_file, 'w', encoding='utf-8') as new_file:
with languages_file.open('w', encoding='utf-8') as new_file:
file_content = "{header} {language_codes}{footer}".format(
header=languages_file_header,
language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
Expand Down
11 changes: 4 additions & 7 deletions searxng_extra/update/update_external_bangs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,17 @@
"""

from pathlib import Path
import json

import httpx

from searx import searx_dir
from searx.external_bang import LEAF_KEY
from searx.data import data_dir

DATA_FILE = data_dir / 'external_bangs.json'

BANGS_URL = 'https://duckduckgo.com/bang.js'
"""JSON file which contains the bangs."""

BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json'

HTTPS_COLON = 'https:'
HTTP_COLON = 'http:'

Expand All @@ -36,8 +33,8 @@ def main():
'version': 0,
'trie': trie,
}
with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f:
json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4)
with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False)


def merge_when_no_leaf(node):
Expand Down
15 changes: 6 additions & 9 deletions searxng_extra/update/update_firefox_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@

import json
import re
from os.path import join
from urllib.parse import urlparse, urljoin
from packaging.version import parse

import requests
from lxml import html
from searx import searx_dir
from searx.data import data_dir

DATA_FILE = data_dir / 'useragents.json'

URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/'
Expand All @@ -41,7 +42,7 @@ def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200:
# pylint: disable=broad-exception-raised
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) # type: ignore
dom = html.fromstring(resp.text)
versions = []

Expand Down Expand Up @@ -74,11 +75,7 @@ def fetch_firefox_last_versions():
return result


def get_useragents_filename():
return join(join(searx_dir, "data"), "useragents.json")


if __name__ == '__main__':
useragents["versions"] = fetch_firefox_last_versions()
with open(get_useragents_filename(), "w", encoding='utf-8') as f:
json.dump(useragents, f, indent=4, ensure_ascii=False)
with DATA_FILE.open('w', encoding='utf-8') as f:
json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False)
14 changes: 7 additions & 7 deletions searxng_extra/update/update_locales.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Update locale names in :origin:`searx/data/locales.json` used by
:ref:`searx.locales`
- :py:obj:`searx.locales.RTL_LOCALES`
- :py:obj:`searx.locales.LOCALE_NAMES`
"""
# pylint: disable=invalid-name
from __future__ import annotations

from typing import Set
import json
from pathlib import Path
import os

import babel
import babel.languages
Expand Down Expand Up @@ -61,7 +62,7 @@ def main():
"RTL_LOCALES": sorted(RTL_LOCALES),
}

with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f:
json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)


Expand All @@ -84,11 +85,10 @@ def get_locale_descr(locale: babel.Locale, tr_locale):
return native_language
return native_language + ' (' + english_language + ')'

else:
result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory:
return result + ', ' + english_territory + ')'
return result + ')'
result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory:
return result + ', ' + english_territory + ')'
return result + ')'


def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
Expand Down
13 changes: 5 additions & 8 deletions searxng_extra/update/update_osm_keys_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@

import json
import collections
from pathlib import Path

from searx import searx_dir
from searx.network import set_timeout_for_thread
from searx.engines import wikidata, set_loggers
from searx.sxng_locales import sxng_locales
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
from searx.data import data_dir

DATA_FILE = data_dir / 'osm_keys_tags.json'

set_loggers(wikidata, 'wikidata')

Expand Down Expand Up @@ -203,16 +204,12 @@ def optimize_keys(data):
return data


def get_osm_tags_filename():
return Path(searx_dir) / "data" / "osm_keys_tags.json"


if __name__ == '__main__':

set_timeout_for_thread(60)
result = {
'keys': optimize_keys(get_keys()),
'tags': optimize_tags(get_tags()),
}
with open(get_osm_tags_filename(), 'w', encoding="utf8") as f:
json.dump(result, f, indent=4, ensure_ascii=False, sort_keys=True)
with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)
8 changes: 5 additions & 3 deletions searxng_extra/update/update_pygments.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Update pygments style
Call this script after each upgrade of pygments
"""
# pylint: disable=too-few-public-methods

from pathlib import Path
import pygments
from pygments.formatters import HtmlFormatter
from pygments.formatters.html import HtmlFormatter

from searx import searx_dir

Expand Down Expand Up @@ -41,7 +43,7 @@
"""


class Formatter(HtmlFormatter):
class Formatter(HtmlFormatter): # pylint: disable=missing-class-docstring
@property
def _pre_style(self):
return 'line-height: 100%;'
Expand All @@ -67,5 +69,5 @@ def generat_css(light_style, dark_style) -> str:

if __name__ == '__main__':
print("update: %s" % LESS_FILE)
with open(LESS_FILE, 'w') as f:
with LESS_FILE.open('w', encoding='utf8') as f:
f.write(generat_css('default', 'lightbulb'))
9 changes: 6 additions & 3 deletions searxng_extra/update/update_wikidata_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

from searx import searx_dir
from searx.engines import wikidata, set_loggers
from searx.data import data_dir

DATA_FILE = data_dir / 'wikidata_units.json'

set_loggers(wikidata, 'wikidata')

Expand Down Expand Up @@ -58,9 +61,9 @@ def get_data():


def get_wikidata_units_filename():
return join(join(searx_dir, "data"), "wikidata_units.json")
return join(join(searx_dir, "data"), "")


if __name__ == '__main__':
with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
json.dump(get_data(), f, indent=4, ensure_ascii=False)
with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)

0 comments on commit ce4aaf6

Please sign in to comment.