Skip to content

Commit

Permalink
Add id to notebook cells (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
hoxbro committed Oct 7, 2023
1 parent 2db0300 commit 7d878fc
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 19 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ ignore = [
"E402", # Module level import not at top of file
"E731", # Do not assign a lambda expression, use a def
"E741", # Ambiguous variable name
"PLR2004", # magic-value-comparison
]
fix = true
unfixable = [
"F401", # unused imports
"F841", # unused variables
]

[tool.ruff.per-file-ignores]
Expand Down
14 changes: 13 additions & 1 deletion src/clean_notebook/clean.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import annotations

import json
import uuid
from pathlib import Path
from typing import Any, AnyStr, Iterator

__all__ = ("clean_notebook", "clean_single_notebook")


def clean_notebook(
paths: list[str | Path],
Expand All @@ -29,6 +32,11 @@ def find_line_ending(s: AnyStr) -> AnyStr:
return counter[max(counter)]


def _check_set_id(nb: dict[str, Any]) -> bool:
# https://jupyter.org/enhancement-proposals/62-cell-id/cell-id.html
return (nb["nbformat"] == 4 and nb["nbformat_minor"] >= 5) or nb["nbformat"] >= 5


def clean_single_notebook(
file: Path,
*,
Expand All @@ -42,6 +50,7 @@ def clean_single_notebook(
newline = find_line_ending(raw)
nb = json.loads(raw)

set_id = _check_set_id(nb)
cleaned = False
for cell in nb["cells"].copy():
cleaned |= _update_value(cell, "outputs", [])
Expand All @@ -53,6 +62,9 @@ def clean_single_notebook(
if "attachments" in cell and len(cell["attachments"]) == 0:
del cell["attachments"]
cleaned = True
if set_id and cell.get("id") is None:
cell["id"] = str(uuid.uuid4())
cleaned = True

if not nb["cells"]:
print(f"Notebook '{file}' does not have any valid cells.")
Expand All @@ -63,7 +75,7 @@ def clean_single_notebook(

if cleaned and not dryrun:
with open(file, "w", encoding="utf8", newline=newline) as f:
json.dump(nb, f, indent=1, ensure_ascii=False)
json.dump(nb, f, indent=1, ensure_ascii=False, sort_keys=True)
f.write(newline) # empty line at the end of the file
print(f"Cleaned notebook: {file}")
elif cleaned:
Expand Down
34 changes: 17 additions & 17 deletions tests/data/clean_colab.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"cells": [
{
"cell_type": "code",
Expand All @@ -21,21 +13,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"saved from colab\")"
],
"metadata": {},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
""
],
"metadata": {},
"execution_count": null,
"outputs": []
]
}
]
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
38 changes: 38 additions & 0 deletions tests/data/clean_id.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3d183bd1-509f-4758-9f2d-db94b23c58f9",
"metadata": {},
"outputs": [],
"source": [
"a = 2"
]
},
{
"cell_type": "markdown",
"id": "66cdc779-4931-4306-881a-4bf30cb0fdbb",
"metadata": {},
"source": [
"Markdown"
]
},
{
"cell_type": "raw",
"id": "5cbb8154-79ee-4290-953c-89a89b4276b7",
"metadata": {},
"source": [
"Raw"
]
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
42 changes: 42 additions & 0 deletions tests/data/dirty_id.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"a = 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Markdown"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Raw"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
29 changes: 28 additions & 1 deletion tests/test_clean_notebook.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import uuid
from pathlib import Path
from shutil import copy2, rmtree
from typing import TYPE_CHECKING, Iterator
Expand All @@ -10,6 +11,7 @@

if TYPE_CHECKING:
from _pytest.capture import CaptureFixture
from _pytest.monkeypatch import MonkeyPatch
from _pytest.tmpdir import TempPathFactory


Expand All @@ -34,7 +36,7 @@ def temp_path(tmp_path_factory: TempPathFactory) -> Iterator[Path]:
TESTS = ["ascii", "jupyterlab", "vscode", "colab", "empty_cell", "empty_multi_cell"]


@pytest.mark.parametrize("test", [*TESTS, "ignore_slideshow"])
@pytest.mark.parametrize("test", [*TESTS, "ignore_slideshow", "id"])
def test_noclean_notebook(temp_path: Path, test: str) -> None:
dirty = temp_path / f"dirty_{test}.ipynb"
clean = temp_path / f"clean_{test}.ipynb"
Expand Down Expand Up @@ -74,6 +76,31 @@ def test_ignore_metadata(temp_path: Path) -> None:
assert clean_bytes != dirty_bytes


def test_notebook_id(temp_path: Path, monkeypatch: MonkeyPatch) -> None:
test = "id"
dirty = temp_path / f"dirty_{test}.ipynb"
clean = temp_path / f"clean_{test}.ipynb"

ids = [
"3d183bd1-509f-4758-9f2d-db94b23c58f9",
"66cdc779-4931-4306-881a-4bf30cb0fdbb",
"5cbb8154-79ee-4290-953c-89a89b4276b7",
]
iterator = iter(ids)
monkeypatch.setattr(uuid, "uuid4", lambda: next(iterator))

clean_single_notebook(dirty)
clean_bytes = load_file(clean)
dirty_bytes = load_file(dirty)
assert clean_bytes == dirty_bytes


def test_notebook_no_overwrite_ids(temp_path: Path) -> None:
test = "id"
clean = temp_path / f"clean_{test}.ipynb"
assert not clean_single_notebook(clean)


def test_empty_notebook(capsys: CaptureFixture[str], temp_path: Path) -> None:
dirty = temp_path / "dirty_empty.ipynb"

Expand Down

0 comments on commit 7d878fc

Please sign in to comment.