Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SNOW-1243569] Fixed snowpark build paths for builds with --project option #915

Merged
merged 1 commit into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* Project definition no longer accept extra fields. Any extra field will cause an error.
* Changing imports in function/procedure section in `snowflake.yml` will cause the definition update on replace
* Adding `--pattern` flag to `stage list` command for filtering out results with regex.
* Fixed snowpark build paths for builds with --project option (fixed empty zip issue).

# v2.1.1

Expand Down
42 changes: 20 additions & 22 deletions src/snowflake/cli/plugins/snowpark/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import logging
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Set

import typer
Expand Down Expand Up @@ -32,7 +31,7 @@
FunctionSchema,
ProcedureSchema,
)
from snowflake.cli.api.project.schemas.snowpark.snowpark import Snowpark
from snowflake.cli.api.secure_path import SecurePath
from snowflake.cli.plugins.object.manager import ObjectManager
from snowflake.cli.plugins.object.stage.manager import StageManager
from snowflake.cli.plugins.snowpark.common import (
Expand All @@ -42,6 +41,7 @@
from snowflake.cli.plugins.snowpark.manager import FunctionManager, ProcedureManager
from snowflake.cli.plugins.snowpark.models import PypiOption
from snowflake.cli.plugins.snowpark.package_utils import get_snowflake_packages
from snowflake.cli.plugins.snowpark.snowpark_package_paths import SnowparkPackagePaths
from snowflake.cli.plugins.snowpark.snowpark_shared import (
CheckAnacondaForPyPiDependencies,
PackageNativeLibrariesOption,
Expand Down Expand Up @@ -79,6 +79,10 @@ def deploy(
All deployed objects use the same artifact which is deployed only once.
"""
snowpark = cli_context.project_definition
paths = SnowparkPackagePaths.for_snowpark_project(
project_root=SecurePath(cli_context.project_root),
snowpark_project_definition=snowpark,
)

procedures = snowpark.procedures
functions = snowpark.functions
Expand All @@ -88,9 +92,7 @@ def deploy(
"No procedures or functions were specified in the project definition."
)

build_artifact_path = _get_snowpark_artifact_path(snowpark)

if not build_artifact_path.exists():
if not paths.artifact_file.exists():
raise ClickException(
"Artifact required for deploying the project does not exist in this directory. "
"Please use build command to create it."
Expand Down Expand Up @@ -125,10 +127,12 @@ def deploy(
packages = get_snowflake_packages()

artifact_stage_directory = get_app_stage_path(stage_name, snowpark.project_name)
artifact_stage_target = f"{artifact_stage_directory}/{build_artifact_path.name}"
artifact_stage_target = (
f"{artifact_stage_directory}/{paths.artifact_file.path.name}"
)

stage_manager.put(
local_path=build_artifact_path,
local_path=paths.artifact_file.path,
stage_path=artifact_stage_directory,
overwrite=True,
)
Expand All @@ -143,7 +147,7 @@ def deploy(
existing_objects=existing_procedures,
packages=packages,
stage_artifact_path=artifact_stage_target,
source_name=build_artifact_path.name,
source_name=paths.artifact_file.path.name,
)
deploy_status.append(operation_result)

Expand All @@ -156,7 +160,7 @@ def deploy(
existing_objects=existing_functions,
packages=packages,
stage_artifact_path=artifact_stage_target,
source_name=build_artifact_path.name,
source_name=paths.artifact_file.path.name,
)
deploy_status.append(operation_result)

Expand Down Expand Up @@ -304,12 +308,6 @@ def _deploy_single_object(
}


def _get_snowpark_artifact_path(snowpark_definition: Snowpark):
source = Path(snowpark_definition.src)
artifact_file = Path.cwd() / (source.name + ".zip")
return artifact_file


@app.command("build")
@with_project_definition("snowpark")
def build(
Expand All @@ -322,19 +320,19 @@ def build(
Builds the Snowpark project as a `.zip` archive that can be used by `deploy` command.
The archive is built using only the `src` directory specified in the project file.
"""
snowpark = cli_context.project_definition
source = Path(snowpark.src)
artifact_file = _get_snowpark_artifact_path(snowpark)
log.info("Building package using sources from: %s", source.resolve())
paths = SnowparkPackagePaths.for_snowpark_project(
project_root=SecurePath(cli_context.project_root),
snowpark_project_definition=cli_context.project_definition,
)
log.info("Building package using sources from: %s", paths.source.path)

snowpark_package(
source=source,
artifact_file=artifact_file,
paths=paths,
pypi_download=pypi_download, # type: ignore[arg-type]
check_anaconda_for_pypi_deps=check_anaconda_for_pypi_deps,
package_native_libraries=package_native_libraries, # type: ignore[arg-type]
)
return MessageResult(f"Build done. Artifact path: {artifact_file}")
return MessageResult(f"Build done. Artifact path: {paths.artifact_file.path}")


class _SnowparkObject(Enum):
Expand Down
3 changes: 2 additions & 1 deletion src/snowflake/cli/plugins/snowpark/package/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def lookup(
anaconda=anaconda,
perform_anaconda_check=True,
package_name=name,
file_name=None,
requirements_file=None,
packages_dir=SecurePath(PACKAGES_DIR),
allow_native_libraries=allow_native_libraries,
)

Expand Down
34 changes: 20 additions & 14 deletions src/snowflake/cli/plugins/snowpark/package_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


def parse_requirements(
requirements_file: str = "requirements.txt",
requirements_file: SecurePath = SecurePath("requirements.txt"),
) -> List[Requirement]:
"""Reads and parses a Python requirements.txt file.

Expand All @@ -39,15 +39,14 @@ def parse_requirements(
list[str]: A flat list of package names, without versions
"""
reqs: List[Requirement] = []
requirements_file_spath = SecurePath(requirements_file)
if requirements_file_spath.exists():
with requirements_file_spath.open(
if requirements_file.exists():
with requirements_file.open(
"r", read_file_limit_mb=DEFAULT_SIZE_LIMIT_MB, encoding="utf-8"
) as f:
for req in requirements.parse(f):
reqs.append(req)
else:
log.info("No %s found", requirements_file)
log.info("No %s found", requirements_file.path)

return deduplicate_and_sort_reqs(reqs)

Expand All @@ -71,7 +70,8 @@ def deduplicate_and_sort_reqs(

def install_packages(
anaconda: AnacondaChannel,
file_name: str | None,
requirements_file: SecurePath | None,
packages_dir: SecurePath,
perform_anaconda_check: bool = True,
package_name: str | None = None,
allow_native_libraries: PypiOption = PypiOption.ASK,
Expand All @@ -87,23 +87,24 @@ def install_packages(
which are available on the Snowflake Anaconda channel. These will have
been deleted from the local packages folder.
"""
if file_name and package_name:
if requirements_file and package_name:
raise ClickException(
"Could not use package name and requirements file simultaneously"
)

if file_name and not Path(file_name).exists():
raise ClickException(f"File {file_name} does not exists.")
if requirements_file and not requirements_file.exists():
raise ClickException(f"File {requirements_file.path} does not exists.")

with Venv() as v:
if package_name:
# This is a Windows workaround where use TemporaryDirectory instead of NamedTemporaryFile
tmp_requirements = Path(v.directory.name) / "requirements.txt"
tmp_requirements = SecurePath(v.directory.name) / "requirements.txt"
tmp_requirements.write_text(str(package_name))
file_name = str(tmp_requirements)
requirements_file = tmp_requirements

pip_install_result = v.pip_install(file_name)
dependencies = v.get_package_dependencies(file_name)
assert type(requirements_file) is SecurePath
pip_install_result = v.pip_install(str(requirements_file.path))
dependencies = v.get_package_dependencies(str(requirements_file.path))

if pip_install_result != 0:
log.info(pip_failed_msg.format(pip_install_result))
Expand Down Expand Up @@ -138,7 +139,12 @@ def install_packages(
return False, second_chance_results
else:
v.copy_files_to_packages_dir(
[Path(file) for dep in dependencies_to_be_packed for file in dep.files]
files_to_be_copied=[
Path(file)
for dep in dependencies_to_be_packed
for file in dep.files
],
destination=packages_dir.path,
)
return True, second_chance_results

Expand Down
57 changes: 57 additions & 0 deletions src/snowflake/cli/plugins/snowpark/snowpark_package_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from dataclasses import dataclass

from snowflake.cli.api.project.schemas.snowpark.snowpark import Snowpark
from snowflake.cli.api.secure_path import SecurePath

_DEFINED_REQUIREMENTS = "requirements.txt"
_REQUIREMENTS_SNOWFLAKE = "requirements.snowflake.txt"
_REQUIREMENTS_OTHER = "requirements.other.txt"
_PACKAGES_DIR = ".packages"


@dataclass
class SnowparkPackagePaths:
source: SecurePath
artifact_file: SecurePath
defined_requirements_file: SecurePath = SecurePath(_DEFINED_REQUIREMENTS)
snowflake_requirements_file: SecurePath = SecurePath(_REQUIREMENTS_SNOWFLAKE)
other_requirements_file: SecurePath = SecurePath(_REQUIREMENTS_OTHER)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we change the name? I never know what "other" means here

downloaded_packages_dir: SecurePath = SecurePath(_PACKAGES_DIR)

@classmethod
def for_snowpark_project(
cls, project_root: SecurePath, snowpark_project_definition: Snowpark
) -> "SnowparkPackagePaths":
defined_source_path = SecurePath(snowpark_project_definition.src)
return cls(
source=cls._get_snowpark_project_source_absolute_path(
project_root=project_root,
defined_source_path=defined_source_path,
),
artifact_file=cls._get_snowpark_project_artifact_absolute_path(
project_root=project_root,
defined_source_path=defined_source_path,
),
defined_requirements_file=project_root / _DEFINED_REQUIREMENTS,
snowflake_requirements_file=project_root / _REQUIREMENTS_SNOWFLAKE,
other_requirements_file=project_root / _REQUIREMENTS_OTHER,
downloaded_packages_dir=project_root / _PACKAGES_DIR,
)

@classmethod
def _get_snowpark_project_source_absolute_path(
cls, project_root: SecurePath, defined_source_path: SecurePath
) -> SecurePath:
if defined_source_path.path.is_absolute():
return defined_source_path
return SecurePath((project_root / defined_source_path.path).path.resolve())

@classmethod
def _get_snowpark_project_artifact_absolute_path(
cls, project_root: SecurePath, defined_source_path: SecurePath
) -> SecurePath:
source_path = cls._get_snowpark_project_source_absolute_path(
project_root=project_root, defined_source_path=defined_source_path
)
artifact_file = project_root / (source_path.path.name + ".zip")
return artifact_file
43 changes: 24 additions & 19 deletions src/snowflake/cli/plugins/snowpark/snowpark_shared.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
from pathlib import Path
from typing import List

import click
Expand All @@ -10,6 +9,7 @@
from snowflake.cli.plugins.snowpark import package_utils
from snowflake.cli.plugins.snowpark.models import PypiOption, Requirement
from snowflake.cli.plugins.snowpark.package.anaconda import AnacondaChannel
from snowflake.cli.plugins.snowpark.snowpark_package_paths import SnowparkPackagePaths
from snowflake.cli.plugins.snowpark.zipper import zip_dir

PyPiDownloadOption: PypiOption = typer.Option(
Expand Down Expand Up @@ -44,27 +44,27 @@

log = logging.getLogger(__name__)

REQUIREMENTS_SNOWFLAKE = "requirements.snowflake.txt"
REQUIREMENTS_OTHER = "requirements.other.txt"


def snowpark_package(
source: Path,
artifact_file: Path,
paths: SnowparkPackagePaths,
pypi_download: PypiOption,
check_anaconda_for_pypi_deps: bool,
package_native_libraries: PypiOption,
):
log.info("Resolving any requirements from requirements.txt...")
requirements = package_utils.parse_requirements()
requirements = package_utils.parse_requirements(
requirements_file=paths.defined_requirements_file
)
if requirements:
anaconda = AnacondaChannel.from_snowflake()
log.info("Comparing provided packages from Snowflake Anaconda...")
split_requirements = anaconda.parse_anaconda_packages(packages=requirements)
if not split_requirements.other:
log.info("No packages to manually resolve")
else:
_write_requirements_file(REQUIREMENTS_OTHER, split_requirements.other)
_write_requirements_file(
paths.other_requirements_file, split_requirements.other
)
do_download = (
click.confirm(
"Do you want to try to download non-Anaconda packages?",
Expand All @@ -76,9 +76,10 @@ def snowpark_package(
if do_download:
log.info("Installing non-Anaconda packages...")
should_continue, second_chance_results = package_utils.install_packages(
anaconda,
REQUIREMENTS_OTHER,
check_anaconda_for_pypi_deps,
anaconda=anaconda,
requirements_file=paths.other_requirements_file,
packages_dir=paths.downloaded_packages_dir,
perform_anaconda_check=check_anaconda_for_pypi_deps,
allow_native_libraries=package_native_libraries,
)
# add the Anaconda packages discovered as dependencies
Expand All @@ -90,19 +91,23 @@ def snowpark_package(
# write requirements.snowflake.txt file
if split_requirements.snowflake:
_write_requirements_file(
REQUIREMENTS_SNOWFLAKE,
paths.snowflake_requirements_file,
package_utils.deduplicate_and_sort_reqs(split_requirements.snowflake),
)

zip_dir(source=source, dest_zip=artifact_file)
zip_dir(source=paths.source.path, dest_zip=paths.artifact_file.path)

if Path(".packages").exists():
zip_dir(source=Path(".packages"), dest_zip=artifact_file, mode="a")
log.info("Deployment package now ready: %s", artifact_file)
if paths.downloaded_packages_dir.exists():
zip_dir(
source=paths.downloaded_packages_dir.path,
dest_zip=paths.artifact_file.path,
mode="a",
)
log.info("Deployment package now ready: %s", paths.artifact_file.path)


def _write_requirements_file(file_name: str, requirements: List[Requirement]):
log.info("Writing %s file", file_name)
with SecurePath(file_name).open("w", encoding="utf-8") as f:
def _write_requirements_file(file_path: SecurePath, requirements: List[Requirement]):
log.info("Writing %s file", file_path.path)
with file_path.open("w", encoding="utf-8") as f:
for req in requirements:
f.write(f"{req.line}\n")
Loading
Loading