Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed version parsing in AnacondaChannel #944

Merged
merged 29 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6299b18
fix package availability parsing in Anaconda
sfc-gh-pczajka Mar 27, 2024
ac0894f
fix non-pep508 version formats
sfc-gh-pczajka Mar 27, 2024
cb9457f
update release notes
sfc-gh-pczajka Mar 27, 2024
eba4926
fix unit tests
sfc-gh-pczajka Mar 28, 2024
0ae2fd1
fix dependency format in sql commands
sfc-gh-pczajka Mar 28, 2024
69eae89
fix calculating diff between anaconda and requirements
sfc-gh-pczajka Mar 28, 2024
23ebdbe
fix unit tests
sfc-gh-pczajka Mar 28, 2024
7f893db
fix unit test
sfc-gh-pczajka Mar 28, 2024
e479ab2
review fixes part 1
sfc-gh-pczajka Mar 29, 2024
429b2cb
run pre-commit
sfc-gh-pczajka Apr 2, 2024
4e8f374
Add pre-commit hook to use our implementation of Requirements
sfc-gh-pczajka Apr 2, 2024
9ec3d3a
standarize name while parsing a requirement
sfc-gh-pczajka Apr 2, 2024
3df8cd0
fix unit tests IN PROGRESS
sfc-gh-pczajka Apr 2, 2024
14cd3ef
Merge branch 'main' into SNOW-1250044-fix-anaconda-availability-check
sfc-gh-pczajka Apr 4, 2024
0c43754
save anaconda package names in requirements.snowflake.txt
sfc-gh-pczajka Apr 4, 2024
989e81d
fix tests
sfc-gh-pczajka Apr 4, 2024
0dbae45
Merge branch 'main' into SNOW-1250044-fix-anaconda-availability-check
sfc-gh-pczajka Apr 4, 2024
1decec9
fix unit tests
sfc-gh-pczajka Apr 4, 2024
45bd493
Lookup: return all possible versions if available versions cannot be …
sfc-gh-pczajka Apr 4, 2024
a53b156
smallfix
sfc-gh-pczajka Apr 4, 2024
071c8ec
Merge branch 'main' into SNOW-1250044-fix-anaconda-availability-check
sfc-gh-pczajka Apr 5, 2024
0dfe66f
refactor
sfc-gh-pczajka Apr 5, 2024
83a73f7
refactor anaconda
sfc-gh-pczajka Apr 5, 2024
0cd9c13
refactor
sfc-gh-pczajka Apr 5, 2024
e3b5a09
sort available versions in decreasing order
sfc-gh-pczajka Apr 5, 2024
472ea75
Merge branch 'main' into SNOW-1250044-fix-anaconda-availability-check
sfc-gh-pczajka Apr 8, 2024
a1c3ee2
refactor ignore_anaconda logic
sfc-gh-pczajka Apr 8, 2024
0549559
Merge branch 'main' into SNOW-1250044-fix-anaconda-availability-check
sfc-gh-pczajka Apr 8, 2024
b351a3a
Merge branch 'main' into SNOW-1250044-fix-anaconda-availability-check
sfc-gh-pczajka Apr 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,12 @@ repos:
language: system
entry: python snyk/dependency-sync.py
files: ^pyproject.toml$
- id: use-local-requirements-model
language: pygrep
name: "Use snowflake.cli.plugins.snowpark.models.Requirement for requirements parsing"
files: ^src/snowflake/.*\.py$
entry: >
^from requirements.* import|
^import requirements.*
pass_filenames: true
exclude: ^src/snowflake/cli/plugins/snowpark/models.py$
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
* Fixed snowpark build paths for builds with --project option (fixed empty zip issue).
* More clear error messages in `snow snowpark build` command
* Adding support for any source supported by `pip` in `snow snowpark`.
* Fixed version parsing for packages lookup on Snowflake Anaconda Channel

# v2.1.2

Expand Down
58 changes: 34 additions & 24 deletions src/snowflake/cli/plugins/snowpark/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
)
from snowflake.cli.api.commands.project_initialisation import add_init_command
from snowflake.cli.api.commands.snow_typer import SnowTyper
from snowflake.cli.api.constants import DEPLOYMENT_STAGE, ObjectType
from snowflake.cli.api.constants import (
DEFAULT_SIZE_LIMIT_MB,
DEPLOYMENT_STAGE,
ObjectType,
)
from snowflake.cli.api.exceptions import (
SecretsWithoutExternalAccessIntegrationError,
)
Expand All @@ -41,8 +45,8 @@
check_if_replace_is_required,
)
from snowflake.cli.plugins.snowpark.manager import FunctionManager, ProcedureManager
from snowflake.cli.plugins.snowpark.models import Requirement, YesNoAsk
from snowflake.cli.plugins.snowpark.package_utils import get_snowflake_packages
from snowflake.cli.plugins.snowpark.models import YesNoAsk
from snowflake.cli.plugins.snowpark.package.anaconda import AnacondaChannel
from snowflake.cli.plugins.snowpark.snowpark_package_paths import SnowparkPackagePaths
from snowflake.cli.plugins.snowpark.snowpark_shared import (
AllowSharedLibrariesOption,
Expand Down Expand Up @@ -130,7 +134,9 @@ def deploy(
stage_name=stage_name, comment="deployments managed by Snowflake CLI"
)

packages = get_snowflake_packages()
snowflake_dependencies = _read_snowflake_requrements_file(
paths.snowflake_requirements_file
)

artifact_stage_directory = get_app_stage_path(stage_name, snowpark.project_name)
artifact_stage_target = (
Expand All @@ -151,9 +157,8 @@ def deploy(
object_type=ObjectType.PROCEDURE,
object_definition=procedure,
existing_objects=existing_procedures,
packages=packages,
snowflake_dependencies=snowflake_dependencies,
stage_artifact_path=artifact_stage_target,
source_name=paths.artifact_file.path.name,
)
deploy_status.append(operation_result)

Expand All @@ -164,9 +169,8 @@ def deploy(
object_type=ObjectType.FUNCTION,
object_definition=function,
existing_objects=existing_functions,
packages=packages,
snowflake_dependencies=snowflake_dependencies,
stage_artifact_path=artifact_stage_target,
source_name=paths.artifact_file.path.name,
)
deploy_status.append(operation_result)

Expand Down Expand Up @@ -251,9 +255,8 @@ def _deploy_single_object(
object_type: ObjectType,
object_definition: Callable,
existing_objects: Dict[str, Dict],
packages: List[str],
snowflake_dependencies: List[str],
stage_artifact_path: str,
source_name: str,
):
identifier = build_udf_sproc_identifier(
object_definition, manager, include_parameter_names=False
Expand All @@ -274,12 +277,13 @@ def _deploy_single_object(
object_exists = identifier in existing_objects
if object_exists:
replace_object = check_if_replace_is_required(
object_type,
existing_objects[identifier],
handler,
returns,
imports,
stage_artifact_path,
object_type=object_type,
current_state=existing_objects[identifier],
handler=handler,
return_type=returns,
snowflake_dependencies=snowflake_dependencies,
imports=imports,
stage_artifact_file=stage_artifact_path,
)

if object_exists and not replace_object:
Expand All @@ -294,7 +298,7 @@ def _deploy_single_object(
"handler": handler,
"return_type": returns,
"artifact_file": stage_artifact_path,
"packages": packages,
"packages": snowflake_dependencies,
"runtime": object_definition.runtime,
"external_access_integrations": object_definition.external_access_integrations,
"secrets": object_definition.secrets,
Expand Down Expand Up @@ -326,9 +330,10 @@ def _deploy_single_object(
)


def _write_requirements_file(file_path: SecurePath, requirements: List[Requirement]):
log.info("Writing requirements into file %s", file_path.path)
file_path.write_text("\n".join(req.line for req in requirements))
def _read_snowflake_requrements_file(file_path: SecurePath):
if not file_path.exists():
return []
return file_path.read_text(file_size_limit_mb=DEFAULT_SIZE_LIMIT_MB).splitlines()


@app.command("build")
Expand Down Expand Up @@ -363,11 +368,16 @@ def build(
requirements = package_utils.parse_requirements(
requirements_file=snowpark_paths.defined_requirements_file,
)
anaconda: AnacondaChannel = (
AnacondaChannel.empty()
if ignore_anaconda
else AnacondaChannel.from_snowflake()
)

download_result = package_utils.download_unavailable_packages(
requirements=requirements,
target_dir=packages_dir,
ignore_anaconda=ignore_anaconda,
anaconda=anaconda,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this method requires anaconda or packages available in anaconda? If the latter is true the we can do something like

anaconda_packages = [] if ignore_anaconda else AnacondaChannel.from_snowflake().packages

download_result = package_utils.download_unavailable_packages(anaconda_packages=anaconda_packages, ...)

In this way we have clear interface. WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored - I've added AnacondaChannel.empty(), which ignores all packages. This way download_unavailable_packages can forget about ignore_anacoda=true case

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice and clean 🚀

skip_version_check=skip_version_check,
pip_index_url=index_url,
)
Expand All @@ -390,9 +400,9 @@ def build(
"Try again with --allow-shared-libraries."
)
if download_result.packages_available_in_anaconda:
_write_requirements_file(
snowpark_paths.snowflake_requirements_file,
download_result.packages_available_in_anaconda,
anaconda.write_requirements_file_in_snowflake_format( # type: ignore
file_path=snowpark_paths.snowflake_requirements_file,
requirements=download_result.packages_available_in_anaconda,
)

zip_dir(
Expand Down
49 changes: 21 additions & 28 deletions src/snowflake/cli/plugins/snowpark/common.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

import re
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set

from snowflake.cli.api.constants import DEFAULT_SIZE_LIMIT_MB, ObjectType
from snowflake.cli.api.constants import ObjectType
from snowflake.cli.api.project.schemas.snowpark.argument import Argument
from snowflake.cli.api.secure_path import SecurePath
from snowflake.cli.api.sql_execution import SqlExecutionMixin
from snowflake.cli.plugins.snowpark.package_utils import generate_deploy_stage_name
from snowflake.cli.plugins.snowpark.models import Requirement
from snowflake.cli.plugins.snowpark.package_utils import (
generate_deploy_stage_name,
)
from snowflake.connector.cursor import SnowflakeCursor

DEFAULT_RUNTIME = "3.8"
Expand All @@ -18,28 +20,25 @@ def check_if_replace_is_required(
current_state,
handler: str,
return_type: str,
snowflake_dependencies: List[str],
imports: List[str],
stage_artifact_file: str,
) -> bool:
import logging

log = logging.getLogger(__name__)
resource_json = _convert_resource_details_to_dict(current_state)
anaconda_packages = resource_json["packages"]
old_dependencies = resource_json["packages"]
log.info(
"Found %s defined Anaconda packages in deployed %s...",
len(anaconda_packages),
"Found %d defined Anaconda packages in deployed %s...",
len(old_dependencies),
object_type,
)
log.info("Checking if app configuration has changed...")
updated_package_list = _get_snowflake_packages_delta(
anaconda_packages,
)

if updated_package_list:
diff = len(updated_package_list) - len(anaconda_packages)
if _snowflake_dependencies_differ(old_dependencies, snowflake_dependencies):
log.info(
"Found difference of %s packages. Replacing the %s.", diff, object_type
"Found difference of package requirements. Replacing the %s.", object_type
)
return True

Expand Down Expand Up @@ -74,21 +73,15 @@ def _convert_resource_details_to_dict(function_details: SnowflakeCursor) -> dict
return function_dict


def _get_snowflake_packages_delta(anaconda_packages) -> List[str]:
updated_package_list = []
requirements_file = SecurePath("requirements.snowflake.txt")
if requirements_file.exists():
with requirements_file.open(
"r", read_file_limit_mb=DEFAULT_SIZE_LIMIT_MB, encoding="utf-8"
) as f:
# for each line, check if it exists in anaconda_packages. If it
# doesn't, add it to the return string
for line in f:
if line.strip() not in anaconda_packages:
updated_package_list.append(line.strip())
return updated_package_list
else:
return updated_package_list
def _snowflake_dependencies_differ(
old_dependencies: List[str], new_dependencies: List[str]
) -> bool:
def _standardize(packages: List[str]) -> Set[str]:
return set(
Requirement.parse_line(package).name_and_version for package in packages
)

return _standardize(old_dependencies) != _standardize(new_dependencies)


def _sql_to_python_return_type_mapper(resource_return_type: str) -> str:
Expand Down
21 changes: 10 additions & 11 deletions src/snowflake/cli/plugins/snowpark/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,21 @@ def parse_line(cls, line: str) -> Requirement:

if result.uri and not result.name:
result.name = get_package_name(result.uri)
result.name = cls.standardize_name(result.name)

return result

@classmethod
def _look_for_specifier(cls, specifier: str, line: str):
return re.search(cls.specifier_pattern.format(specifier), line)

@staticmethod
def standardize_name(name: str) -> str:
return WheelMetadata.to_wheel_name_format(name.lower())

@dataclass
class SplitRequirements:
"""A dataclass to hold the results of parsing requirements files and dividing them into
snowflake-supported vs other packages.
"""
@property
def formatted_specs(self):
return ",".join(sorted(spec[0] + spec[1] for spec in self.specs))

snowflake: List[Requirement]
other: List[Requirement]
@property
def name_and_version(self):
return self.name + self.formatted_specs


@dataclass
Expand Down
Loading
Loading