Update pylint and fix linting errors

meilisearch · Dec 1, 2022 · e2b3b43 · e2b3b43
1 parent e89b9d0
commit e2b3b43
Show file tree

Hide file tree

Showing 9 changed files with 40 additions and 105 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -60,16 +60,7 @@ confidence=
 # --enable=similarities". If you want to run only the classes checker, but have
 # no Warning level messages displayed, use "--disable=all --enable=classes
 # --disable=W".
-disable=print-statement,
-        parameter-unpacking,
-        unpacking-in-except,
-        old-raise-syntax,
-        backtick,
-        long-suffix,
-        old-ne-operator,
-        old-octal-literal,
-        import-star-module-level,
-        non-ascii-bytes-literal,
+disable=
         raw-checker-failed,
         bad-inline-option,
         locally-disabled,
@@ -78,69 +69,7 @@ disable=print-statement,
         useless-suppression,
         deprecated-pragma,
         use-symbolic-message-instead,
-        apply-builtin,
-        basestring-builtin,
-        buffer-builtin,
-        cmp-builtin,
-        coerce-builtin,
-        execfile-builtin,
-        file-builtin,
-        long-builtin,
-        raw_input-builtin,
-        reduce-builtin,
-        standarderror-builtin,
-        unicode-builtin,
-        xrange-builtin,
-        coerce-method,
-        delslice-method,
-        getslice-method,
-        setslice-method,
-        no-absolute-import,
-        old-division,
-        dict-iter-method,
-        dict-view-method,
-        next-method-called,
-        metaclass-assignment,
-        indexing-exception,
-        raising-string,
-        reload-builtin,
-        oct-method,
-        hex-method,
-        nonzero-method,
-        cmp-method,
-        input-builtin,
-        round-builtin,
-        intern-builtin,
-        unichr-builtin,
-        map-builtin-not-iterating,
-        zip-builtin-not-iterating,
-        range-builtin-not-iterating,
-        filter-builtin-not-iterating,
-        using-cmp-argument,
-        eq-without-hash,
-        div-method,
-        idiv-method,
-        rdiv-method,
-        exception-message-attribute,
-        invalid-str-codec,
-        sys-max-int,
-        bad-python3-import,
-        deprecated-string-function,
-        deprecated-str-translate-call,
-        deprecated-itertools-function,
-        deprecated-types-field,
-        next-method-defined,
-        dict-items-not-iterating,
-        dict-keys-not-iterating,
-        dict-values-not-iterating,
-        deprecated-operator-function,
-        deprecated-urllib-function,
-        xreadlines-attribute,
-        deprecated-sys-function,
-        exception-escape,
-        comprehension-escape,
         arguments-differ,
-        bad-continuation,
         broad-except,
         consider-using-in,
         dangerous-default-value,
@@ -152,7 +81,6 @@ disable=print-statement,
         literal-comparison,
         missing-docstring,
         no-else-return,
-        no-self-use,
         pointless-string-statement,
         protected-access,
         redefined-builtin,
@@ -364,13 +292,6 @@ max-line-length=120
 # Maximum number of lines in a module.
 max-module-lines=1000
 
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,
-               dict-separator
-
 # Allow the body of a class to be on the same line as the declaration if body
 # contains single statement.
 single-line-class-stmt=no

diff --git a/Pipfile b/Pipfile
@@ -15,3 +15,4 @@ python-keycloak-client = "==0.2.3"
 pylint = "==2.15.7"
 tox = "==3.27.0"
 tox-pipenv = "==1.10.1"
+wrapt = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/scraper/src/config/browser_handler.py b/scraper/src/config/browser_handler.py
@@ -24,14 +24,13 @@ def init(config_original_content, js_render, user_agent):
             chrome_options = Options()
             chrome_options.add_argument('--no-sandbox')
             chrome_options.add_argument('--headless')
-            chrome_options.add_argument('user-agent={0}'.format(user_agent))
+            chrome_options.add_argument(f'user-agent={user_agent}')
 
             CHROMEDRIVER_PATH = os.environ.get('CHROMEDRIVER_PATH',
                                                "/usr/bin/chromedriver")
             if not os.path.isfile(CHROMEDRIVER_PATH):
                 raise Exception(
-                    "Env CHROMEDRIVER_PATH='{}' is not a path to a file".format(
-                        CHROMEDRIVER_PATH))
+                    f"Env CHROMEDRIVER_PATH='{CHROMEDRIVER_PATH}' is not a path to a file")
             driver = webdriver.Chrome(
                 CHROMEDRIVER_PATH,
                 options=chrome_options)

diff --git a/scraper/src/config/config_loader.py b/scraper/src/config/config_loader.py
@@ -42,7 +42,7 @@ class ConfigLoader:
     stop_content = []
     strategy = 'default'
     strict_redirect = True
-    strip_chars = u".,;:§¶"
+    strip_chars = ".,;:§¶"
     use_anchors = False
     user_agent = 'Meilisearch docs-scraper'
     only_content_level = False
@@ -90,7 +90,7 @@ def __init__(self, config):
     def _load_config(self, config):
         if os.path.isfile(config):
             self.config_file = config
-            with open(self.config_file, 'r') as f:
+            with open(self.config_file, mode='r', encoding='utf-8') as f:
                 config = f.read()
 
         try:

diff --git a/scraper/src/documentation_spider.py b/scraper/src/documentation_spider.py
@@ -156,8 +156,7 @@ def add_records(self, response, from_sitemap):
         # Arbitrary limit
         if self.nb_hits_max > 0 and DocumentationSpider.NB_INDEXED > self.nb_hits_max:
             DocumentationSpider.NB_INDEXED = 0
-            self.reason_to_stop = "Too much hits, Docs-Scraper only handle {} records".format(
-                int(self.nb_hits_max))
+            self.reason_to_stop = f"Too much hits, Docs-Scraper only handle {int(self.nb_hits_max)} records"
             raise ValueError(self.reason_to_stop)
 
     def parse_from_sitemap(self, response):

diff --git a/scraper/src/index.py b/scraper/src/index.py
@@ -113,7 +113,7 @@ def run_config(config):
 
     if DocumentationSpider.NB_INDEXED > 0:
         # meilisearch_helper.commit_tmp_index()
-        print('Nb hits: {}'.format(DocumentationSpider.NB_INDEXED))
+        print(f'Nb hits: {DocumentationSpider.NB_INDEXED}')
     else:
         print('Crawling issue: nbHits 0 for ' + config.index_uid)
         # meilisearch_helper.report_crawling_issue()

diff --git a/scraper/src/meilisearch_helper.py b/scraper/src/meilisearch_helper.py
@@ -123,8 +123,7 @@ def add_records(self, records, url, from_sitemap):
         color = "96" if from_sitemap else "94"
 
         print(
-            '\033[{}m> Docs-Scraper: \033[0m{}\033[93m {} records\033[0m)'.format(
-                color, url, record_count))
+            f'\033[{color}m> Docs-Scraper: \033[0m{url}\033[93m {record_count} records\033[0m)')
 
 # Algolia's settings:
     # {"minWordSizefor1Typo"=>3,

diff --git a/scraper/src/strategies/default_strategy.py b/scraper/src/strategies/default_strategy.py
@@ -48,9 +48,9 @@ def get_records_from_response(self, response):
     def _update_hierarchy_with_global_content(self, hierarchy,
                                               current_level_int):
         for index in range(0, current_level_int + 1):
-            if 'lvl{}'.format(index) in self.global_content:
-                hierarchy['lvl{}'.format(index)] = self.global_content[
-                    'lvl{}'.format(index)]
+            if f'lvl{index}' in self.global_content:
+                hierarchy[f'lvl{index}'] = self.global_content[
+                    f'lvl{index}']
 
         return hierarchy
 
@@ -100,8 +100,8 @@ def get_records_from_dom(self, current_page_url=None):
                 anchors[current_level] = Anchor.get_anchor(node)
 
                 for index in range(current_level_int + 1, 7):
-                    hierarchy['lvl{}'.format(index)] = None
-                    anchors['lvl{}'.format(index)] = None
+                    hierarchy[f'lvl{index}'] = None
+                    anchors[f'lvl{index}'] = None
                 previous_hierarchy = hierarchy
 
                 if self.config.only_content_level:
@@ -229,7 +229,7 @@ def _get_text_content_for_level(self, node, current_level, selectors):
     def _get_closest_anchor(anchors):
         # Getting the element anchor as the closest one
         for index in list(range(6, -1, -1)):
-            potential_anchor = anchors['lvl{}'.format(index)]
+            potential_anchor = anchors[f'lvl{index}']
             if potential_anchor is None:
                 continue
             return potential_anchor