Skip to content

Commit

Permalink
add notebooks linting with black (#1913)
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Apr 12, 2024
1 parent 6483a24 commit cb49574
Show file tree
Hide file tree
Showing 214 changed files with 13,999 additions and 7,155 deletions.
68 changes: 39 additions & 29 deletions .ci/check_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,35 @@
NOTEBOOKS_ROOT = Path(__file__).resolve().parents[1]

EXCEPTIONS_URLs = [
"medium.com",
"https://www.paddlepaddle.org.cn/",
"mybinder.org",
"https://arxiv.org",
"http://host.robots.ox.ac.uk",
"https://gitee.com/"
"medium.com",
"https://www.paddlepaddle.org.cn/",
"mybinder.org",
"https://arxiv.org",
"http://host.robots.ox.ac.uk",
"https://gitee.com/",
]


def get_all_ast_nodes(ast_nodes):
for node in ast_nodes:
yield node
if 'children' in node:
yield from get_all_ast_nodes(node['children'])
if "children" in node:
yield from get_all_ast_nodes(node["children"])


def get_all_references_from_md(md_path):
parse_markdown = mistune.create_markdown(renderer=mistune.AstRenderer())
ast = parse_markdown(md_path.read_text(encoding='UTF-8'))
ast = parse_markdown(md_path.read_text(encoding="UTF-8"))

for node in get_all_ast_nodes(ast):
if node['type'] == 'image':
yield node['src']
elif node['type'] == 'link':
yield node['link']
if node["type"] == "image":
yield node["src"]
elif node["type"] == "link":
yield node["link"]


def validate_colab_url(url: str) -> bool:
OPENVINO_COLAB_URL_PREFIX = 'https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/latest/'
OPENVINO_COLAB_URL_PREFIX = "https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/latest/"

if not url.startswith(OPENVINO_COLAB_URL_PREFIX):
return
Expand All @@ -45,7 +47,8 @@ def validate_colab_url(url: str) -> bool:
absolute_notebook_path = NOTEBOOKS_ROOT / notebook_path

if not absolute_notebook_path.exists():
raise ValueError(f'notebook not found for colab url {url!r}')
raise ValueError(f"notebook not found for colab url {url!r}")


def main():
all_passed = True
Expand All @@ -55,45 +58,52 @@ def complain(message):
all_passed = False
print(message, file=sys.stderr)

for md_path in NOTEBOOKS_ROOT.glob('**/*README*.md'):
for md_path in NOTEBOOKS_ROOT.glob("**/*README*.md"):
for url in get_all_references_from_md(md_path):

try:
components = urllib.parse.urlparse(url)
except ValueError:
complain(f'{md_path}: invalid URL reference {url!r}')
complain(f"{md_path}: invalid URL reference {url!r}")
continue

if not components.path: # self-link
if not components.path: # self-link
continue

if not components.scheme and not components.netloc:
# check if it is relative path on file from repo
file_name = md_path.parent / components.path
if not file_name.exists():
complain(f'{md_path}: invalid URL reference {url!r}')
complain(f"{md_path}: invalid URL reference {url!r}")
continue

try:
validate_colab_url(url)
except ValueError as err:
complain(f'{md_path}: {err}')
complain(f"{md_path}: {err}")

try:
get = requests.get(url, timeout=10)
if get.status_code != 200:
if get.status_code in [500, 429, 443] and any([known_url in url for known_url in EXCEPTIONS_URLs]):
print(f'SKIP - {md_path}: URL can not be reached {url!r}, status code {get.status_code}')
if get.status_code in [500, 429, 443] and any(
[known_url in url for known_url in EXCEPTIONS_URLs]
):
print(
f"SKIP - {md_path}: URL can not be reached {url!r}, status code {get.status_code}"
)
continue
complain(f'{md_path}: URL can not be reached {url!r}, status code {get.status_code}')
complain(
f"{md_path}: URL can not be reached {url!r}, status code {get.status_code}"
)
except Exception as err:
if any([known_url in url for known_url in EXCEPTIONS_URLs]):
print(f'SKIP - {md_path}: URL can not be reached {url!r}, error {err}')
else:
complain(f'{md_path}: URL can not be reached {url!r}, error {err}')
print(
f"SKIP - {md_path}: URL can not be reached {url!r}, error {err}"
)
else:
complain(f"{md_path}: URL can not be reached {url!r}, error {err}")

sys.exit(0 if all_passed else 1)


if __name__ == '__main__':
main()
if __name__ == "__main__":
main()
70 changes: 57 additions & 13 deletions .ci/convert_notebooks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse
import shutil
import subprocess # nosec - disable B404:import-subprocess check
import subprocess # nosec - disable B404:import-subprocess check
import time
from pathlib import Path
import nbformat
Expand All @@ -18,14 +18,17 @@ def disable_gradio_debug(notebook_path):
print(f"Disabled gradio debug mode for {notebook_path}")
nbformat.write(nb, str(notebook_path), version=nbformat.NO_CONVERT)


def arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--exclude_execution_file")
parser.add_argument("--exclude_conversion_file")
parser.add_argument("--timeout", type=float, default=7200,
help="timeout for notebook execution")
parser.add_argument("--rst_dir", type=Path,
help="rst files output directory", default=Path("rst"))
parser.add_argument(
"--timeout", type=float, default=7200, help="timeout for notebook execution"
)
parser.add_argument(
"--rst_dir", type=Path, help="rst files output directory", default=Path("rst")
)

return parser.parse_args()

Expand All @@ -48,19 +51,36 @@ def main():
ignore_execution_list = prepare_ignore_list(args.exclude_execution_file)
root = Path(__file__).parents[1]
notebooks_dir = root / "notebooks"
notebooks = sorted(list(notebooks_dir.rglob('**/*.ipynb')))
notebooks = sorted(list(notebooks_dir.rglob("**/*.ipynb")))
for notebook in notebooks:
notebook_path = notebook.relative_to(root)
if str(notebook_path) in ignore_conversion_list:
continue
disable_gradio_debug(notebook_path)
notebook_executed = notebook_path.parent / notebook_path.name.replace(".ipynb", "-with-output.ipynb")
notebook_executed = notebook_path.parent / notebook_path.name.replace(
".ipynb", "-with-output.ipynb"
)
start = time.perf_counter()
print(f"Convert {notebook_path}")
if str(notebook_path) not in ignore_execution_list:
try:
retcode = subprocess.run(["jupyter", "nbconvert", "--log-level=INFO", "--execute", "--to", "notebook", "--output",
str(notebook_executed), '--output-dir', str(root), '--ExecutePreprocessor.kernel_name=python3', str(notebook_path)], timeout=args.timeout).returncode
retcode = subprocess.run(
[
"jupyter",
"nbconvert",
"--log-level=INFO",
"--execute",
"--to",
"notebook",
"--output",
str(notebook_executed),
"--output-dir",
str(root),
"--ExecutePreprocessor.kernel_name=python3",
str(notebook_path),
],
timeout=args.timeout,
).returncode
except subprocess.TimeoutExpired:
retcode = -42
print(f"TIMEOUT: {notebook_path}")
Expand All @@ -69,11 +89,35 @@ def main():
continue
else:
shutil.copyfile(notebook_path, notebook_executed)
rst_retcode = subprocess.run(["jupyter", "nbconvert", "--to", "rst", str(notebook_executed), "--output-dir", str(args.rst_dir),
"--TagRemovePreprocessor.remove_all_outputs_tags=hide_output --TagRemovePreprocessor.enabled=True"], timeout=args.timeout).returncode
rst_retcode = subprocess.run(
[
"jupyter",
"nbconvert",
"--to",
"rst",
str(notebook_executed),
"--output-dir",
str(args.rst_dir),
"--TagRemovePreprocessor.remove_all_outputs_tags=hide_output --TagRemovePreprocessor.enabled=True",
],
timeout=args.timeout,
).returncode
notebook_rst = args.rst_dir / notebook_executed.name.replace(".ipynb", ".rst")
# remove all non-printable characters
subprocess.run(["sed", "-i", "-e", "s/\x1b\[[0-9;]*m//g", "-e", "s/\x1b\[?25h//g", "-e", "s/\x1b\[?25l//g", str(notebook_rst)], timeout=args.timeout)
subprocess.run(
[
"sed",
"-i",
"-e",
"s/\x1b\[[0-9;]*m//g",
"-e",
"s/\x1b\[?25h//g",
"-e",
"s/\x1b\[?25l//g",
str(notebook_rst),
],
timeout=args.timeout,
)

end = time.perf_counter() - start
print(f"Notebook conversion took: {end:.4f} s")
Expand All @@ -90,4 +134,4 @@ def main():


if __name__ == "__main__":
main()
main()
2 changes: 1 addition & 1 deletion .ci/dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# black==21.8 requires typing-extensions>3.10 which is incompatible
# with other packages
-r ../requirements.txt
black==24.3.0 # format Python code
black[jupyter]==24.3.0 # format Python code
isort # sort imports
jupyterlab-code-formatter # format code in notebooks in Jupyter Lab
jupyterlab-git # checkout and commit code in Jupyter Lab
Expand Down
1 change: 1 addition & 0 deletions .ci/spellcheck/.pyspelling.wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ finetuning
FLAC
floyd
Formatter
formatter
fp
FP
FPN
Expand Down
6 changes: 5 additions & 1 deletion .ci/spellcheck/ipynb_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ def _filter(self, nb):

def sfilter(self, source):
"""Execute filter."""
return [filters.SourceText(self._filter(source.text), source.context, source.encoding, "ipynb")]
return [
filters.SourceText(
self._filter(source.text), source.context, source.encoding, "ipynb"
)
]


def get_plugin():
Expand Down
4 changes: 3 additions & 1 deletion .ci/spellcheck/run_spellcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
env=dict(os.environ, PYTHONPATH=PYTHONPATH),
)

result_output = result.stdout.strip("\n") if result.stdout else result.stderr.strip("\n")
result_output = (
result.stdout.strip("\n") if result.stdout else result.stderr.strip("\n")
)

print(result_output, file=sys.stderr if result.returncode else sys.stdout, flush=True)

Expand Down
Loading

0 comments on commit cb49574

Please sign in to comment.