From 2f10d670619dcc1befa8ef236f323487de17ed2a Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 4 Nov 2024 09:56:27 +0100 Subject: [PATCH] Complete automation of version replacement pre-commit for pip and uv (#43205) (#43623) The scripts to update pip and uv version were not complete - they did not replace a few of our scripts and documentation. This was especially troublesome for doc replacement, because updating versions manually led to misalignments of tables in markdown. Lack of completeness of the upgrade caused #43197 and #43135 manual PRs to bump all references. Also an earlier upgrade caused the markdown table to be broken - with UV row table offset by 1. This PR fixes it: * all the scripts and docs are updated now * when markdown is updated, the table structure is not broken (cherry picked from commit 7ede73c85a3e5815b061f9b520e999cd4b5efd52) --- dev/breeze/doc/ci/02_images.md | 63 ++++++++-------- scripts/ci/pre_commit/update_installers.py | 86 ++++++++++++++++++++-- 2 files changed, 113 insertions(+), 36 deletions(-) diff --git a/dev/breeze/doc/ci/02_images.md b/dev/breeze/doc/ci/02_images.md index 19c58ebc2d2d..f9ea4faaee7c 100644 --- a/dev/breeze/doc/ci/02_images.md +++ b/dev/breeze/doc/ci/02_images.md @@ -421,36 +421,39 @@ DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ The following build arguments (`--build-arg` in docker build command) can be used for CI images: -| Build argument | Default value | Description | -|-----------------------------------|-------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `PYTHON_BASE_IMAGE` | `python:3.8-slim-bookworm` | Base Python image | -| `PYTHON_MAJOR_MINOR_VERSION` | `3.8` | major/minor version of Python (should match base image) | -| `DEPENDENCIES_EPOCH_NUMBER` | `2` | increasing this number will reinstall all apt dependencies | -| `ADDITIONAL_PIP_INSTALL_FLAGS` | | additional `pip` flags passed to the installation commands (except when reinstalling `pip` itself) | -| `PIP_NO_CACHE_DIR` | `true` | if true, then no pip cache will be stored | -| `UV_NO_CACHE` | `true` | if true, then no uv cache will be stored | -| `HOME` | `/root` | Home directory of the root user (CI image has root user as default) | -| `AIRFLOW_HOME` | `/root/airflow` | Airflow's HOME (that's where logs and sqlite databases are stored) | -| `AIRFLOW_SOURCES` | `/opt/airflow` | Mounted sources of Airflow | -| `AIRFLOW_REPO` | `apache/airflow` | the repository from which PIP dependencies are pre-installed | -| `AIRFLOW_BRANCH` | `main` | the branch from which PIP dependencies are pre-installed | -| `AIRFLOW_CI_BUILD_EPOCH` | `1` | increasing this value will reinstall PIP dependencies from the repository from scratch | -| `AIRFLOW_CONSTRAINTS_LOCATION` | | If not empty, it will override the source of the constraints with the specified URL or file. | -| `AIRFLOW_CONSTRAINTS_REFERENCE` | | reference (branch or tag) from GitHub repository from which constraints are used. By default it is set to `constraints-main` but can be `constraints-2-X`. | -| `AIRFLOW_EXTRAS` | `all` | extras to install | -| `UPGRADE_INVALIDATION_STRING` | | If set to any random value the dependencies are upgraded to newer versions. In CI it is set to build id. | -| `AIRFLOW_PRE_CACHED_PIP_PACKAGES` | `true` | Allows to pre-cache airflow PIP packages from the GitHub of Apache Airflow This allows to optimize iterations for Image builds and speeds up CI jobs. | -| `ADDITIONAL_AIRFLOW_EXTRAS` | | additional extras to install | -| `ADDITIONAL_PYTHON_DEPS` | | additional Python dependencies to install | -| `DEV_APT_COMMAND` | | Dev apt command executed before dev deps are installed in the first part of image | -| `ADDITIONAL_DEV_APT_COMMAND` | | Additional Dev apt command executed before dev dep are installed in the first part of the image | -| `DEV_APT_DEPS` | Empty - install default dependencies (see `install_os_dependencies.sh`) | Dev APT dependencies installed in the first part of the image | -| `ADDITIONAL_DEV_APT_DEPS` | | Additional apt dev dependencies installed in the first part of the image | -| `ADDITIONAL_DEV_APT_ENV` | | Additional env variables defined when installing dev deps | -| `AIRFLOW_PIP_VERSION` | `24.0` | PIP version used. | -| `AIRFLOW_UV_VERSION` | `0.1.10` | UV version used. | -| `AIRFLOW_USE_UV` | `true` | Whether to use UV for installation. | -| `PIP_PROGRESS_BAR` | `on` | Progress bar for PIP installation | +| Build argument | Default value | Description | +|-----------------------------------|----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `PYTHON_BASE_IMAGE` | `python:3.8-slim-bookworm` | Base Python image | +| `PYTHON_MAJOR_MINOR_VERSION` | `3.8` | major/minor version of Python (should match base image) | +| `DEPENDENCIES_EPOCH_NUMBER` | `2` | increasing this number will reinstall all apt dependencies | +| `ADDITIONAL_PIP_INSTALL_FLAGS` | | additional `pip` flags passed to the installation commands (except when reinstalling `pip` itself) | +| `PIP_NO_CACHE_DIR` | `true` | if true, then no pip cache will be stored | +| `UV_NO_CACHE` | `true` | if true, then no uv cache will be stored | +| `HOME` | `/root` | Home directory of the root user (CI image has root user as default) | +| `AIRFLOW_HOME` | `/root/airflow` | Airflow's HOME (that's where logs and sqlite databases are stored) | +| `AIRFLOW_SOURCES` | `/opt/airflow` | Mounted sources of Airflow | +| `AIRFLOW_REPO` | `apache/airflow` | the repository from which PIP dependencies are pre-installed | +| `AIRFLOW_BRANCH` | `main` | the branch from which PIP dependencies are pre-installed | +| `AIRFLOW_CI_BUILD_EPOCH` | `1` | increasing this value will reinstall PIP dependencies from the repository from scratch | +| `AIRFLOW_CONSTRAINTS_LOCATION` | | If not empty, it will override the source of the constraints with the specified URL or file. | +| `AIRFLOW_CONSTRAINTS_REFERENCE` | | reference (branch or tag) from GitHub repository from which constraints are used. By default it is set to `constraints-main` but can be `constraints-2-X`. | +| `AIRFLOW_EXTRAS` | `all` | extras to install | +| `UPGRADE_INVALIDATION_STRING` | | If set to any random value the dependencies are upgraded to newer versions. In CI it is set to build id. | +| `AIRFLOW_PRE_CACHED_PIP_PACKAGES` | `true` | Allows to pre-cache airflow PIP packages from the GitHub of Apache Airflow This allows to optimize iterations for Image builds and speeds up CI jobs. | +| `ADDITIONAL_AIRFLOW_EXTRAS` | | additional extras to install | +| `ADDITIONAL_PYTHON_DEPS` | | additional Python dependencies to install | +| `DEV_APT_COMMAND` | | Dev apt command executed before dev deps are installed in the first part of image | +| `ADDITIONAL_DEV_APT_COMMAND` | | Additional Dev apt command executed before dev dep are installed in the first part of the image | +| `DEV_APT_DEPS` | | Dev APT dependencies installed in the first part of the image | +| `ADDITIONAL_DEV_APT_DEPS` | | Additional apt dev dependencies installed in the first part of the image | +| `ADDITIONAL_DEV_APT_ENV` | | Additional env variables defined when installing dev deps | +| `AIRFLOW_PIP_VERSION` | `24.0` | PIP version used. | +| `AIRFLOW_UV_VERSION` | `0.1.10` | UV version used. | +| `AIRFLOW_USE_UV` | `true` | Whether to use UV for installation. | +| `PIP_PROGRESS_BAR` | `on` | Progress bar for PIP installation | + + +The" Here are some examples of how CI images can built manually. CI is always built from local sources. diff --git a/scripts/ci/pre_commit/update_installers.py b/scripts/ci/pre_commit/update_installers.py index 1cbd38c8333a..a90e07d38c9f 100755 --- a/scripts/ci/pre_commit/update_installers.py +++ b/scripts/ci/pre_commit/update_installers.py @@ -30,8 +30,22 @@ FILES_TO_UPDATE = [ AIRFLOW_SOURCES_ROOT_PATH / "Dockerfile", AIRFLOW_SOURCES_ROOT_PATH / "Dockerfile.ci", + AIRFLOW_SOURCES_ROOT_PATH / "scripts" / "ci" / "install_breeze.sh", AIRFLOW_SOURCES_ROOT_PATH / "scripts" / "docker" / "common.sh", AIRFLOW_SOURCES_ROOT_PATH / "pyproject.toml", + AIRFLOW_SOURCES_ROOT_PATH / "dev" / "breeze" / "src" / "airflow_breeze" / "global_constants.py", + AIRFLOW_SOURCES_ROOT_PATH + / "dev" + / "breeze" + / "src" + / "airflow_breeze" + / "commands" + / "release_management_commands.py", +] + + +DOC_FILES_TO_UPDATE: list[Path] = [ + AIRFLOW_SOURCES_ROOT_PATH / "dev/" / "breeze" / "doc" / "ci" / "02_images.md" ] @@ -43,13 +57,39 @@ def get_latest_pypi_version(package_name: str) -> str: return latest_version -PIP_PATTERN = re.compile(r"AIRFLOW_PIP_VERSION=[0-9.]+") -UV_PATTERN = re.compile(r"AIRFLOW_UV_VERSION=[0-9.]+") -UV_GREATER_PATTERN = re.compile(r'"uv>=[0-9]+[0-9.]+"') +AIRFLOW_PIP_PATTERN = re.compile(r"(AIRFLOW_PIP_VERSION=)([0-9.]+)") +AIRFLOW_PIP_QUOTED_PATTERN = re.compile(r"(AIRFLOW_PIP_VERSION = )(\"[0-9.]+\")") +PIP_QUOTED_PATTERN = re.compile(r"(PIP_VERSION = )(\"[0-9.]+\")") +AIRFLOW_PIP_DOC_PATTERN = re.compile(r"(\| *`AIRFLOW_PIP_VERSION` *\| *)(`[0-9.]+`)( *\|)") +AIRFLOW_PIP_UPGRADE_PATTERN = re.compile(r"(python -m pip install --upgrade pip==)([0-9.]+)") + +AIRFLOW_UV_PATTERN = re.compile(r"(AIRFLOW_UV_VERSION=)([0-9.]+)") +AIRFLOW_UV_QUOTED_PATTERN = re.compile(r"(AIRFLOW_UV_VERSION = )(\"[0-9.]+\")") +AIRFLOW_UV_DOC_PATTERN = re.compile(r"(\| *`AIRFLOW_UV_VERSION` *\| *)(`[0-9.]+`)( *\|)") +UV_GREATER_PATTERN = re.compile(r'"(uv>=)([0-9]+)"') UPGRADE_UV: bool = os.environ.get("UPGRADE_UV", "true").lower() == "true" UPGRADE_PIP: bool = os.environ.get("UPGRADE_PIP", "true").lower() == "true" + +def replace_group_2_while_keeping_total_length(pattern: re.Pattern[str], replacement: str, text: str) -> str: + def replacer(match): + original_length = len(match.group(2)) + padding = "" + if len(match.groups()) > 2: + padding = match.group(3) + new_length = len(replacement) + diff = new_length - original_length + if diff <= 0: + padding = " " * -diff + padding + else: + padding = padding[diff:] + padded_replacement = match.group(1) + replacement + padding + return padded_replacement.strip() + + return re.sub(pattern, replacer, text) + + if __name__ == "__main__": pip_version = get_latest_pypi_version("pip") console.print(f"[bright_blue]Latest pip version: {pip_version}") @@ -62,10 +102,44 @@ def get_latest_pypi_version(package_name: str) -> str: file_content = file.read_text() new_content = file_content if UPGRADE_PIP: - new_content = re.sub(PIP_PATTERN, f"AIRFLOW_PIP_VERSION={pip_version}", new_content, re.MULTILINE) + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_PIP_PATTERN, pip_version, new_content + ) + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_PIP_UPGRADE_PATTERN, pip_version, new_content + ) + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_PIP_QUOTED_PATTERN, f'"{pip_version}"', new_content + ) + new_content = replace_group_2_while_keeping_total_length( + PIP_QUOTED_PATTERN, f'"{pip_version}"', new_content + ) + if UPGRADE_UV: + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_UV_PATTERN, uv_version, new_content + ) + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_UV_QUOTED_PATTERN, f'"{uv_version}"', new_content + ) + new_content = replace_group_2_while_keeping_total_length( + UV_GREATER_PATTERN, uv_version, new_content + ) + if new_content != file_content: + file.write_text(new_content) + console.print(f"[bright_blue]Updated {file}") + changed = True + for file in DOC_FILES_TO_UPDATE: + console.print(f"[bright_blue]Updating {file}") + file_content = file.read_text() + new_content = file_content + if UPGRADE_PIP: + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_PIP_DOC_PATTERN, f"`{pip_version}`", new_content + ) if UPGRADE_UV: - new_content = re.sub(UV_PATTERN, f"AIRFLOW_UV_VERSION={uv_version}", new_content, re.MULTILINE) - new_content = re.sub(UV_GREATER_PATTERN, f'"uv>={uv_version}"', new_content, re.MULTILINE) + new_content = replace_group_2_while_keeping_total_length( + AIRFLOW_UV_DOC_PATTERN, f"`{uv_version}`", new_content + ) if new_content != file_content: file.write_text(new_content) console.print(f"[bright_blue]Updated {file}")