From 14d923413b9c4ea90017d85f9be4ebd67700ca48 Mon Sep 17 00:00:00 2001 From: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:48:40 -0400 Subject: [PATCH] docs: Add tooling to check and validate build: (#447) - Add script to check for invalid links. - Add workflow to validate build. - Fail on warnings. - Fix invalid link in log-viewer-webui README. Co-authored-by: Henry8192 <50559854+Henry8192@users.noreply.github.com> --- .github/workflows/clp-docs.yaml | 38 ++++++++ components/log-viewer-webui/README.md | 2 +- docs/tasks.yml | 4 + tools/scripts/find-broken-docs-links.py | 117 ++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/clp-docs.yaml create mode 100644 tools/scripts/find-broken-docs-links.py diff --git a/.github/workflows/clp-docs.yaml b/.github/workflows/clp-docs.yaml new file mode 100644 index 000000000..2f0a68e77 --- /dev/null +++ b/.github/workflows/clp-docs.yaml @@ -0,0 +1,38 @@ +name: "clp-docs" + +on: + pull_request: + push: + workflow_dispatch: + +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + # Cancel in-progress jobs for efficiency + cancel-in-progress: true + +jobs: + build: + strategy: + matrix: + os: ["macos-latest", "ubuntu-latest"] + runs-on: "${{matrix.os}}" + steps: + - uses: "actions/checkout@v4" + with: + submodules: "recursive" + + - uses: "actions/setup-python@v5" + with: + python-version: "3.10" + + - name: "Install task" + shell: "bash" + run: "npm install -g @go-task/cli" + + - if: "matrix.os == 'macos-latest'" + name: "Install coreutils (for md5sum)" + run: "brew install coreutils" + + - name: "Build docs" + shell: "bash" + run: "task docs:site" diff --git a/components/log-viewer-webui/README.md b/components/log-viewer-webui/README.md index 265385dc6..e052ec9d9 100644 --- a/components/log-viewer-webui/README.md +++ b/components/log-viewer-webui/README.md @@ -4,6 +4,6 @@ A webapp that allows us to serve the [log-viewer] and integrate it with CLP's [w See the [docs] for more details. -[docs]: https://docs.yscope.com/clp/main/dev-guide/components-log-viewer-webui.md +[docs]: https://docs.yscope.com/clp/main/dev-guide/components-log-viewer-webui [log-viewer]: https://github.com/y-scope/yscope-log-viewer [webui]: ../webui diff --git a/docs/tasks.yml b/docs/tasks.yml index 44f5709c5..45e2cf05e 100644 --- a/docs/tasks.yml +++ b/docs/tasks.yml @@ -34,12 +34,16 @@ tasks: # Call `clean` before building since `sphinx-build --write-all --fresh-env` isn't always # equivalent to building from scratch. - task: "clean" + - "python3 '{{.ROOT_DIR}}/tools/scripts/find-broken-docs-links.py'" - |- . "{{.G_DOCS_VENV_DIR}}/bin/activate" sphinx-build \ --write-all \ --fresh-env \ --conf-dir conf \ + --nitpicky \ + --fail-on-warning \ + --keep-going \ --builder html \ src "{{.OUTPUT_DIR}}" # This command must be last diff --git a/tools/scripts/find-broken-docs-links.py b/tools/scripts/find-broken-docs-links.py new file mode 100644 index 000000000..349e927ef --- /dev/null +++ b/tools/scripts/find-broken-docs-links.py @@ -0,0 +1,117 @@ +import os +import subprocess +import sys +from pathlib import Path + + +def main(argv): + repo_root = _get_repo_root() + + found_violation = False + + # Check for docs.yscope.com links with ".md" suffixes + if _check_tracked_files( + r"docs\.yscope\.com/.+\.md", + repo_root, + repo_root, + "docs.yscope.com links cannot have \".md\" suffixes." + ): + found_violation = True + + # Check for sphinx :link: attributes that have ".md" suffixes + if _check_tracked_files( + r":link:[[:space:]]*.+\.md", + repo_root, + repo_root / "docs", + "sphinx :link: attributes cannot have \".md\" suffixes" + ): + found_violation = True + + if found_violation: + return 1 + + return 0 + + +def _get_repo_root() -> Path: + path_str = subprocess.check_output( + ["git", "rev-parse", "--show-toplevel"], + cwd=Path(__file__).parent, + text=True + ) + return Path(path_str.strip()) + + +def _check_tracked_files( + pattern: str, + repo_root: Path, + dir_to_search: Path, + error_msg: str +) -> bool: + """ + Check for a pattern in all tracked files in the repo (except this script). + :param pattern: The pattern to search for. + :param repo_root: The root of the repository. + :param dir_to_search: The directory to search in. + :param error_msg: Error message if the pattern is found. + :return: Whether the pattern was found in any file. + """ + found_matches = False + + # NOTE: "-z" ensures the paths won't be quoted (while delimiting them using '\0') + for path_str in subprocess.check_output( + [ + "git", + "ls-files", + "--cached", + "--exclude-standard", + "-z", + str(dir_to_search.relative_to(repo_root)) + ], + cwd=repo_root, + text=True, + ).split("\0"): + path = Path(path_str) + + # Skip directories and this script + if path == __file__ or (repo_root / path).is_dir(): + continue + + try: + for match in subprocess.check_output( + [ + "grep", + "--extended-regexp", + "--line-number", + "--with-filename", + pattern, + path + ], + cwd=repo_root, + text=True, + ).splitlines(): + _parse_and_print_match(match, error_msg) + found_matches = True + except subprocess.CalledProcessError: + pass + + return found_matches + + +def _parse_and_print_match(match: str, error_msg: str): + """ + Parses and prints grep matches in a format relevant to the current environment. + :param match: The match to parse and print. + :param error_msg: Error message if the pattern is found. + """ + if os.getenv("GITHUB_ACTIONS") == "true": + # Print a GitHub Actions error annotation + file, line, _ = match.split(":", 2) + print(f"::error file={file},line={line}::{error_msg}") + else: + print(error_msg, file=sys.stderr) + print(match, file=sys.stderr) + + +if "__main__" == __name__: + sys.exit(main(sys.argv))