diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index c95732ea69..d0519ef4f2 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -27,6 +27,7 @@ * Project definition no longer accept extra fields. Any extra field will cause an error. * Changing imports in function/procedure section in `snowflake.yml` will cause the definition update on replace * Adding `--pattern` flag to `stage list` command for filtering out results with regex. +* Fixed snowpark build paths for builds with --project option (fixed empty zip issue). # v2.1.1 diff --git a/src/snowflake/cli/plugins/snowpark/commands.py b/src/snowflake/cli/plugins/snowpark/commands.py index 8814e28712..3966bc9191 100644 --- a/src/snowflake/cli/plugins/snowpark/commands.py +++ b/src/snowflake/cli/plugins/snowpark/commands.py @@ -2,7 +2,6 @@ import logging from enum import Enum -from pathlib import Path from typing import Dict, List, Optional, Set import typer @@ -32,7 +31,7 @@ FunctionSchema, ProcedureSchema, ) -from snowflake.cli.api.project.schemas.snowpark.snowpark import Snowpark +from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.object.manager import ObjectManager from snowflake.cli.plugins.object.stage.manager import StageManager from snowflake.cli.plugins.snowpark.common import ( @@ -42,6 +41,7 @@ from snowflake.cli.plugins.snowpark.manager import FunctionManager, ProcedureManager from snowflake.cli.plugins.snowpark.models import PypiOption from snowflake.cli.plugins.snowpark.package_utils import get_snowflake_packages +from snowflake.cli.plugins.snowpark.snowpark_package_paths import SnowparkPackagePaths from snowflake.cli.plugins.snowpark.snowpark_shared import ( CheckAnacondaForPyPiDependencies, PackageNativeLibrariesOption, @@ -79,6 +79,10 @@ def deploy( All deployed objects use the same artifact which is deployed only once. """ snowpark = cli_context.project_definition + paths = SnowparkPackagePaths.for_snowpark_project( + project_root=SecurePath(cli_context.project_root), + snowpark_project_definition=snowpark, + ) procedures = snowpark.procedures functions = snowpark.functions @@ -88,9 +92,7 @@ def deploy( "No procedures or functions were specified in the project definition." ) - build_artifact_path = _get_snowpark_artifact_path(snowpark) - - if not build_artifact_path.exists(): + if not paths.artifact_file.exists(): raise ClickException( "Artifact required for deploying the project does not exist in this directory. " "Please use build command to create it." @@ -125,10 +127,12 @@ def deploy( packages = get_snowflake_packages() artifact_stage_directory = get_app_stage_path(stage_name, snowpark.project_name) - artifact_stage_target = f"{artifact_stage_directory}/{build_artifact_path.name}" + artifact_stage_target = ( + f"{artifact_stage_directory}/{paths.artifact_file.path.name}" + ) stage_manager.put( - local_path=build_artifact_path, + local_path=paths.artifact_file.path, stage_path=artifact_stage_directory, overwrite=True, ) @@ -143,7 +147,7 @@ def deploy( existing_objects=existing_procedures, packages=packages, stage_artifact_path=artifact_stage_target, - source_name=build_artifact_path.name, + source_name=paths.artifact_file.path.name, ) deploy_status.append(operation_result) @@ -156,7 +160,7 @@ def deploy( existing_objects=existing_functions, packages=packages, stage_artifact_path=artifact_stage_target, - source_name=build_artifact_path.name, + source_name=paths.artifact_file.path.name, ) deploy_status.append(operation_result) @@ -304,12 +308,6 @@ def _deploy_single_object( } -def _get_snowpark_artifact_path(snowpark_definition: Snowpark): - source = Path(snowpark_definition.src) - artifact_file = Path.cwd() / (source.name + ".zip") - return artifact_file - - @app.command("build") @with_project_definition("snowpark") def build( @@ -322,19 +320,19 @@ def build( Builds the Snowpark project as a `.zip` archive that can be used by `deploy` command. The archive is built using only the `src` directory specified in the project file. """ - snowpark = cli_context.project_definition - source = Path(snowpark.src) - artifact_file = _get_snowpark_artifact_path(snowpark) - log.info("Building package using sources from: %s", source.resolve()) + paths = SnowparkPackagePaths.for_snowpark_project( + project_root=SecurePath(cli_context.project_root), + snowpark_project_definition=cli_context.project_definition, + ) + log.info("Building package using sources from: %s", paths.source.path) snowpark_package( - source=source, - artifact_file=artifact_file, + paths=paths, pypi_download=pypi_download, # type: ignore[arg-type] check_anaconda_for_pypi_deps=check_anaconda_for_pypi_deps, package_native_libraries=package_native_libraries, # type: ignore[arg-type] ) - return MessageResult(f"Build done. Artifact path: {artifact_file}") + return MessageResult(f"Build done. Artifact path: {paths.artifact_file.path}") class _SnowparkObject(Enum): diff --git a/src/snowflake/cli/plugins/snowpark/package/commands.py b/src/snowflake/cli/plugins/snowpark/package/commands.py index 2feb88510b..5f4dac0560 100644 --- a/src/snowflake/cli/plugins/snowpark/package/commands.py +++ b/src/snowflake/cli/plugins/snowpark/package/commands.py @@ -12,6 +12,7 @@ ) from snowflake.cli.api.commands.snow_typer import SnowTyper from snowflake.cli.api.output.types import CommandResult, MessageResult +from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.snowpark.models import ( PypiOption, Requirement, @@ -213,11 +214,13 @@ def package_create( f"Package {name} is already available in Snowflake Anaconda Channel." ) + packages_dir = SecurePath(".packages") packages_are_downloaded, dependencies = download_packages( anaconda=anaconda, perform_anaconda_check_for_dependencies=not ignore_anaconda, package_name=name, - file_name=None, + requirements_file=None, + packages_dir=packages_dir, index_url=index_url, allow_shared_libraries=allow_shared_libraries_pypi_option, skip_version_check=skip_version_check, diff --git a/src/snowflake/cli/plugins/snowpark/package_utils.py b/src/snowflake/cli/plugins/snowpark/package_utils.py index dd2f0efe7e..86aef63647 100644 --- a/src/snowflake/cli/plugins/snowpark/package_utils.py +++ b/src/snowflake/cli/plugins/snowpark/package_utils.py @@ -2,7 +2,6 @@ import logging import os -from pathlib import Path from typing import List import click @@ -26,7 +25,7 @@ def parse_requirements( - requirements_file: str = "requirements.txt", + requirements_file: SecurePath = SecurePath("requirements.txt"), ) -> List[Requirement]: """Reads and parses a Python requirements.txt file. @@ -38,15 +37,14 @@ def parse_requirements( list[str]: A flat list of package names, without versions """ reqs: List[Requirement] = [] - requirements_file_spath = SecurePath(requirements_file) - if requirements_file_spath.exists(): - with requirements_file_spath.open( + if requirements_file.exists(): + with requirements_file.open( "r", read_file_limit_mb=DEFAULT_SIZE_LIMIT_MB, encoding="utf-8" ) as f: for req in requirements.parse(f): reqs.append(req) else: - log.info("No %s found", requirements_file) + log.info("No %s found", requirements_file.path) return deduplicate_and_sort_reqs(reqs) @@ -103,7 +101,8 @@ def generate_deploy_stage_name(identifier: str) -> str: def download_packages( anaconda: AnacondaChannel | None, - file_name: str | None, + requirements_file: SecurePath | None, + packages_dir: SecurePath, perform_anaconda_check_for_dependencies: bool = True, package_name: str | None = None, index_url: str | None = None, @@ -122,12 +121,12 @@ def download_packages( which are available on the Snowflake Anaconda channel. They will not be downloaded into '.packages' directory. """ - if file_name and package_name: + if requirements_file and package_name: raise ClickException( "Could not use package name and requirements file simultaneously" ) - if file_name and not Path(file_name).exists(): - raise ClickException(f"File {file_name} does not exists.") + if requirements_file and not requirements_file.exists(): + raise ClickException(f"File {requirements_file.path} does not exists.") if perform_anaconda_check_for_dependencies and not anaconda: raise ClickException( "Cannot perform anaconda checks if anaconda channel is not specified." @@ -136,19 +135,18 @@ def download_packages( with Venv() as v, SecurePath.temporary_directory() as downloads_dir: if package_name: # This is a Windows workaround where use TemporaryDirectory instead of NamedTemporaryFile - tmp_requirements = Path(v.directory.name) / "requirements.txt" - tmp_requirements.write_text(str(package_name)) - file_name = str(tmp_requirements) + requirements_file = SecurePath(v.directory.name) / "requirements.txt" + requirements_file.write_text(str(package_name)) pip_wheel_result = v.pip_wheel( - file_name, download_dir=downloads_dir.path, index_url=index_url + requirements_file.path, download_dir=downloads_dir.path, index_url=index_url # type: ignore ) if pip_wheel_result != 0: log.info(pip_failed_log_msg, pip_wheel_result) return False, None dependencies = v.get_package_dependencies( - file_name, downloads_dir=downloads_dir.path + requirements_file, downloads_dir=downloads_dir.path ) dependency_requirements = [d.requirement for d in dependencies] @@ -177,10 +175,9 @@ def download_packages( ) and not _confirm_shared_libraries(allow_shared_libraries): return False, split_dependencies else: - packages_dest = SecurePath(".packages") - packages_dest.mkdir(exist_ok=True) + packages_dir.mkdir(exist_ok=True) for package in dependencies_to_be_packed: - package.extract_files(packages_dest.path) + package.extract_files(packages_dir.path) return True, split_dependencies diff --git a/src/snowflake/cli/plugins/snowpark/snowpark_package_paths.py b/src/snowflake/cli/plugins/snowpark/snowpark_package_paths.py new file mode 100644 index 0000000000..459bfd5367 --- /dev/null +++ b/src/snowflake/cli/plugins/snowpark/snowpark_package_paths.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass + +from snowflake.cli.api.project.schemas.snowpark.snowpark import Snowpark +from snowflake.cli.api.secure_path import SecurePath + +_DEFINED_REQUIREMENTS = "requirements.txt" +_REQUIREMENTS_SNOWFLAKE = "requirements.snowflake.txt" +_REQUIREMENTS_OTHER = "requirements.other.txt" +_PACKAGES_DIR = ".packages" + + +@dataclass +class SnowparkPackagePaths: + source: SecurePath + artifact_file: SecurePath + defined_requirements_file: SecurePath = SecurePath(_DEFINED_REQUIREMENTS) + snowflake_requirements_file: SecurePath = SecurePath(_REQUIREMENTS_SNOWFLAKE) + other_requirements_file: SecurePath = SecurePath(_REQUIREMENTS_OTHER) + downloaded_packages_dir: SecurePath = SecurePath(_PACKAGES_DIR) + + @classmethod + def for_snowpark_project( + cls, project_root: SecurePath, snowpark_project_definition: Snowpark + ) -> "SnowparkPackagePaths": + defined_source_path = SecurePath(snowpark_project_definition.src) + return cls( + source=cls._get_snowpark_project_source_absolute_path( + project_root=project_root, + defined_source_path=defined_source_path, + ), + artifact_file=cls._get_snowpark_project_artifact_absolute_path( + project_root=project_root, + defined_source_path=defined_source_path, + ), + defined_requirements_file=project_root / _DEFINED_REQUIREMENTS, + snowflake_requirements_file=project_root / _REQUIREMENTS_SNOWFLAKE, + other_requirements_file=project_root / _REQUIREMENTS_OTHER, + downloaded_packages_dir=project_root / _PACKAGES_DIR, + ) + + @classmethod + def _get_snowpark_project_source_absolute_path( + cls, project_root: SecurePath, defined_source_path: SecurePath + ) -> SecurePath: + if defined_source_path.path.is_absolute(): + return defined_source_path + return SecurePath((project_root / defined_source_path.path).path.resolve()) + + @classmethod + def _get_snowpark_project_artifact_absolute_path( + cls, project_root: SecurePath, defined_source_path: SecurePath + ) -> SecurePath: + source_path = cls._get_snowpark_project_source_absolute_path( + project_root=project_root, defined_source_path=defined_source_path + ) + artifact_file = project_root / (source_path.path.name + ".zip") + return artifact_file diff --git a/src/snowflake/cli/plugins/snowpark/snowpark_shared.py b/src/snowflake/cli/plugins/snowpark/snowpark_shared.py index 51b35248ed..62f35b8062 100644 --- a/src/snowflake/cli/plugins/snowpark/snowpark_shared.py +++ b/src/snowflake/cli/plugins/snowpark/snowpark_shared.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -from pathlib import Path from typing import List import click @@ -10,6 +9,7 @@ from snowflake.cli.plugins.snowpark import package_utils from snowflake.cli.plugins.snowpark.models import PypiOption, Requirement from snowflake.cli.plugins.snowpark.package.anaconda import AnacondaChannel +from snowflake.cli.plugins.snowpark.snowpark_package_paths import SnowparkPackagePaths from snowflake.cli.plugins.snowpark.zipper import zip_dir PyPiDownloadOption: PypiOption = typer.Option( @@ -44,19 +44,17 @@ log = logging.getLogger(__name__) -REQUIREMENTS_SNOWFLAKE = "requirements.snowflake.txt" -REQUIREMENTS_OTHER = "requirements.other.txt" - def snowpark_package( - source: Path, - artifact_file: Path, + paths: SnowparkPackagePaths, pypi_download: PypiOption, check_anaconda_for_pypi_deps: bool, package_native_libraries: PypiOption, ): log.info("Resolving any requirements from requirements.txt...") - requirements = package_utils.parse_requirements() + requirements = package_utils.parse_requirements( + requirements_file=paths.defined_requirements_file + ) if requirements: anaconda = AnacondaChannel.from_snowflake() log.info("Comparing provided packages from Snowflake Anaconda...") @@ -64,7 +62,9 @@ def snowpark_package( if not split_requirements.other: log.info("No packages to manually resolve") else: - _write_requirements_file(REQUIREMENTS_OTHER, split_requirements.other) + _write_requirements_file( + paths.other_requirements_file, split_requirements.other + ) do_download = ( click.confirm( "Do you want to try to download non-Anaconda packages?", @@ -75,13 +75,15 @@ def snowpark_package( ) if do_download: log.info("Installing non-Anaconda packages...") + ( should_continue, second_chance_results, ) = package_utils.download_packages( - anaconda, - REQUIREMENTS_OTHER, - check_anaconda_for_pypi_deps, + anaconda=anaconda, + requirements_file=paths.other_requirements_file, + packages_dir=paths.downloaded_packages_dir, + perform_anaconda_check_for_dependencies=check_anaconda_for_pypi_deps, allow_shared_libraries=package_native_libraries, ) # add the Anaconda packages discovered as dependencies @@ -93,19 +95,23 @@ def snowpark_package( # write requirements.snowflake.txt file if split_requirements.snowflake: _write_requirements_file( - REQUIREMENTS_SNOWFLAKE, + paths.snowflake_requirements_file, package_utils.deduplicate_and_sort_reqs(split_requirements.snowflake), ) - zip_dir(source=source, dest_zip=artifact_file) + zip_dir(source=paths.source.path, dest_zip=paths.artifact_file.path) - if Path(".packages").exists(): - zip_dir(source=Path(".packages"), dest_zip=artifact_file, mode="a") - log.info("Deployment package now ready: %s", artifact_file) + if paths.downloaded_packages_dir.exists(): + zip_dir( + source=paths.downloaded_packages_dir.path, + dest_zip=paths.artifact_file.path, + mode="a", + ) + log.info("Deployment package now ready: %s", paths.artifact_file.path) -def _write_requirements_file(file_name: str, requirements: List[Requirement]): - log.info("Writing %s file", file_name) - with SecurePath(file_name).open("w", encoding="utf-8") as f: +def _write_requirements_file(file_path: SecurePath, requirements: List[Requirement]): + log.info("Writing %s file", file_path.path) + with file_path.open("w", encoding="utf-8") as f: for req in requirements: f.write(f"{req.line}\n") diff --git a/src/snowflake/cli/plugins/snowpark/venv.py b/src/snowflake/cli/plugins/snowpark/venv.py index 58394c3d62..daf67a4f2a 100644 --- a/src/snowflake/cli/plugins/snowpark/venv.py +++ b/src/snowflake/cli/plugins/snowpark/venv.py @@ -11,6 +11,7 @@ from tempfile import TemporaryDirectory from typing import Dict, List +from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.snowpark.models import ( Requirement, RequirementWithWheelAndDeps, @@ -55,12 +56,12 @@ def run_python(self, args): return process - def pip_install(self, requirements_files): - process = self.run_python(["-m", "pip", "install", "-r", requirements_files]) + def pip_install(self, requirements_file): + process = self.run_python(["-m", "pip", "install", "-r", requirements_file]) return process.returncode - def pip_wheel(self, requirements_files, download_dir, index_url): - command = ["-m", "pip", "wheel", "-r", requirements_files, "-w", download_dir] + def pip_wheel(self, requirements_file, download_dir, index_url): + command = ["-m", "pip", "wheel", "-r", requirements_file, "-w", download_dir] if index_url is not None: command += ["-i", index_url] process = self.run_python(command) @@ -81,7 +82,7 @@ def _get_library_path(self) -> Path: ][0] def get_package_dependencies( - self, requirements_file: str, downloads_dir: Path + self, requirements_file: SecurePath, downloads_dir: Path ) -> List[RequirementWithWheelAndDeps]: packages_metadata: Dict[str, WheelMetadata] = { meta.name: meta @@ -113,7 +114,7 @@ def _get_dependencies(package: Requirement): for package in requires: _get_dependencies(Requirement.parse_line(package)) - with open(requirements_file, "r") as req_file: + with requirements_file.open("r", read_file_limit_mb=512) as req_file: for line in req_file: _get_dependencies(Requirement.parse_line(line)) diff --git a/tests/snowpark/test_snowpark_shared.py b/tests/snowpark/test_snowpark_shared.py index 450f0f4a8f..02b084b2a8 100644 --- a/tests/snowpark/test_snowpark_shared.py +++ b/tests/snowpark/test_snowpark_shared.py @@ -4,7 +4,9 @@ from zipfile import ZipFile import snowflake.cli.plugins.snowpark.snowpark_shared as shared +from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.snowpark.models import Requirement, SplitRequirements +from snowflake.cli.plugins.snowpark.snowpark_package_paths import SnowparkPackagePaths @mock.patch( @@ -31,7 +33,15 @@ def test_snowpark_package( app_root.mkdir() app_root.joinpath(Path("app.py")).touch() - shared.snowpark_package(app_root, Path("app.zip"), "yes", False, "yes") + shared.snowpark_package( + SnowparkPackagePaths( + source=SecurePath(app_root), + artifact_file=SecurePath("app.zip"), + ), + pypi_download="yes", + check_anaconda_for_pypi_deps=False, + package_native_libraries="yes", + ) zip_path = os.path.join(temp_dir, "app.zip") assert os.path.isfile(zip_path) diff --git a/tests/test_utils.py b/tests/test_utils.py index 845e7cc15f..cf3bfd319e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -53,17 +53,19 @@ def test_prepare_app_zip_if_exception_is_raised_if_no_dst(app_zip): assert expected_error.type == FileNotFoundError -def test_parse_requierements_with_correct_file( +def test_parse_requirements_with_correct_file( correct_requirements_snowflake_txt: str, temp_dir ): - result = package_utils.parse_requirements(correct_requirements_snowflake_txt) + result = package_utils.parse_requirements( + SecurePath(correct_requirements_snowflake_txt) + ) assert len(result) == len(test_data.requirements) def test_parse_requirements_with_nonexistent_file(temp_dir): path = os.path.join(temp_dir, "non_existent.file") - result = package_utils.parse_requirements(path) + result = package_utils.parse_requirements(SecurePath(path)) assert result == [] @@ -129,7 +131,7 @@ def test_get_packages(contents, expected, correct_requirements_snowflake_txt): def test_parse_requirements(correct_requirements_txt: str): - result = package_utils.parse_requirements(correct_requirements_txt) + result = package_utils.parse_requirements(SecurePath(correct_requirements_txt)) assert len(result) == 3 assert result[0].name == "Django" @@ -211,7 +213,8 @@ def test_pip_fail_message(mock_installer, correct_requirements_txt, caplog): with caplog.at_level(logging.INFO, "snowflake.cli.plugins.snowpark.package_utils"): package_utils.download_packages( anaconda=AnacondaChannel([]), - file_name=correct_requirements_txt, + requirements_file=SecurePath(correct_requirements_txt), + packages_dir=SecurePath(".packages"), perform_anaconda_check_for_dependencies=True, allow_shared_libraries=PypiOption.YES, ) diff --git a/tests_integration/conftest.py b/tests_integration/conftest.py index faf02cda0c..fd0c2de507 100644 --- a/tests_integration/conftest.py +++ b/tests_integration/conftest.py @@ -129,18 +129,24 @@ def __init__(self, output: str): def project_directory(temporary_working_directory, test_root_path): @contextmanager def _temporary_project_directory( - project_name, merge_project_definition: Optional[dict] = None + project_name, + merge_project_definition: Optional[dict] = None, + subpath: Optional[Path] = None, ): test_data_file = test_root_path / "test_data" / "projects" / project_name - shutil.copytree(test_data_file, temporary_working_directory, dirs_exist_ok=True) + project_dir = temporary_working_directory + if subpath: + project_dir = temporary_working_directory / subpath + project_dir.mkdir(parents=True) + shutil.copytree(test_data_file, project_dir, dirs_exist_ok=True) if merge_project_definition: with Path("snowflake.yml").open("r") as fh: project_definition = yaml.safe_load(fh) merge_left(project_definition, merge_project_definition) - with open(Path(temporary_working_directory) / "snowflake.yml", "w") as file: + with open(Path(project_dir) / "snowflake.yml", "w") as file: yaml.dump(project_definition, file) - yield temporary_working_directory + yield project_dir return _temporary_project_directory diff --git a/tests_integration/test_snowpark.py b/tests_integration/test_snowpark.py index 38af5956f3..2865361d6f 100644 --- a/tests_integration/test_snowpark.py +++ b/tests_integration/test_snowpark.py @@ -343,6 +343,52 @@ def test_snowpark_with_single_requirement_having_transient_deps( ) +@pytest.mark.integration +def test_snowpark_commands_executed_outside_project_dir( + runner, _test_steps, project_directory, alter_snowflake_yml, test_database +): + project_subpath = "my_snowpark_project" + with project_directory( + "snowpark_with_single_requirements_having_transient_deps", + subpath=project_subpath, + ): + result = runner.invoke_json( + [ + "snowpark", + "build", + "--project", + project_subpath, + "--pypi-download", + "yes", + "--check-anaconda-for-pypi-deps", + ] + ) + assert result.exit_code == 0 + + packages_dir = Path(f"{project_subpath}/.packages") + + assert packages_dir.exists() + assert (packages_dir / "dummy_pkg_for_tests_with_deps").exists() + assert (packages_dir / "dummy_pkg_for_tests").exists() # as transient dep + + _test_steps.snowpark_deploy_should_finish_successfully_and_return( + additional_arguments=["--project", project_subpath], + expected_result=[ + { + "object": f"{test_database.upper()}.PUBLIC.TEST_FUNC(name string)", + "type": "function", + "status": "created", + } + ], + ) + + _test_steps.snowpark_execute_should_return_expected_value( + object_type="function", + identifier="test_func('foo')", + expected_value="['We want... a shrubbery!', 'fishy, fishy, fish!']", + ) + + @pytest.mark.integration def test_snowpark_default_arguments( _test_steps, project_directory, alter_snowflake_yml, test_database diff --git a/tests_integration/testing_utils/snowpark_utils.py b/tests_integration/testing_utils/snowpark_utils.py index 28bd4dbb35..4ea04965f1 100644 --- a/tests_integration/testing_utils/snowpark_utils.py +++ b/tests_integration/testing_utils/snowpark_utils.py @@ -192,7 +192,7 @@ def _run_deploy( ] if additional_arguments: - arguments.append(*additional_arguments) + arguments.extend(additional_arguments) result = self._setup.runner.invoke_with_connection_json(arguments) assert_that_result_is_successful(result)