Skip to content

Commit

Permalink
feat(telemetry): Combine Telemetry hook to send heap event once (#766)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Nok Lam Chan <nok.lam.chan@quantumblack.com>
Signed-off-by: Dmitry Sorokin <40151847+DimedS@users.noreply.github.com>
Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
  • Loading branch information
noklam authored Jul 19, 2024
1 parent 0ea4218 commit 142342d
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 131 deletions.
173 changes: 87 additions & 86 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
import sys
import uuid
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -153,103 +152,119 @@ def _generate_new_uuid(full_path: str) -> str:
return ""


class KedroTelemetryCLIHooks:
class KedroTelemetryHook:
"""Hook to send CLI command data to Heap"""

def __init__(self):
self._consent = None
self._sent = False
self._event_properties = None
self._project_path = None
self._user_uuid = None

@cli_hook_impl
def before_command_run(
self, project_metadata: ProjectMetadata, command_args: list[str]
):
"""Hook implementation to send command run data to Heap"""
try:
if not project_metadata: # in package mode
return

consent = _check_for_telemetry_consent(project_metadata.project_path)
if not consent:
logger.info(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
return

logger.info(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
)

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)
main_command = masked_command_args[0] if masked_command_args else "kedro"
if not project_metadata: # in package mode
return

user_uuid = _get_or_create_uuid()
project_properties = _get_project_properties(
user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
)
cli_properties = _format_user_cli_data(
project_properties, masked_command_args
)
self._consent = _check_for_telemetry_consent(project_metadata.project_path)
if not self._consent:
self._opt_out_notification()
return

_send_heap_event(
event_name=f"Command run: {main_command}",
identity=user_uuid,
properties=cli_properties,
)
# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)

# send generic event too, so it's easier in data processing
generic_properties = deepcopy(cli_properties)
generic_properties["main_command"] = main_command
_send_heap_event(
event_name="CLI command",
identity=user_uuid,
properties=generic_properties,
)
except Exception as exc:
logger.warning(
"Something went wrong in hook implementation to send command run data to Heap. "
"Exception: %s",
exc,
)
self._user_uuid = _get_or_create_uuid()

event_properties = _get_project_properties(
self._user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
)
event_properties["command"] = (
f"kedro {' '.join(masked_command_args)}" if masked_command_args else "kedro"
)
event_properties["main_command"] = (
masked_command_args[0] if masked_command_args else "kedro"
)

class KedroTelemetryProjectHooks:
"""Hook to send project statistics data to Heap"""
self._event_properties = event_properties

@cli_hook_impl
def after_command_run(self):
if self._consent and not self._sent:
self._send_telemetry_heap_event("CLI command")

@hook_impl
def after_context_created(self, context):
"""Hook implementation to send project statistics data to Heap"""
self.consent = _check_for_telemetry_consent(context.project_path)
self.project_path = context.project_path

if self._consent is None:
self._consent = _check_for_telemetry_consent(context.project_path)
if not self._consent:
self._opt_out_notification()
self._project_path = context.project_path

@hook_impl
def after_catalog_created(self, catalog):
# The user notification message is sent only once per command during the before_command_run hook
if not self.consent:
if self._consent is False:
return

default_pipeline = pipelines.get("__default__") # __default__
user_uuid = _get_or_create_uuid()

project_properties = _get_project_properties(
user_uuid, self.project_path / PYPROJECT_CONFIG_NAME
if not self._user_uuid:
self._user_uuid = _get_or_create_uuid()

if not self._event_properties:
self._event_properties = _get_project_properties(
self._user_uuid, self._project_path / PYPROJECT_CONFIG_NAME
)

project_properties = _format_project_statistics_data(
catalog, default_pipeline, pipelines
)
self._event_properties.update(project_properties)

self._send_telemetry_heap_event("Kedro Project Statistics")

project_statistics_properties = _format_project_statistics_data(
project_properties, catalog, default_pipeline, pipelines
def _opt_out_notification(self):
logger.info(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
_send_heap_event(
event_name="Kedro Project Statistics",
identity=user_uuid,
properties=project_statistics_properties,

def _send_telemetry_heap_event(self, event_name: str):
"""Hook implementation to send command run data to Heap"""

logger.info(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
)

try:
_send_heap_event(
event_name=event_name,
identity=self._user_uuid,
properties=self._event_properties,
)
self._sent = True
except Exception as exc:
logger.warning(
"Something went wrong in hook implementation to send command run data to Heap. "
"Exception: %s",
exc,
)


def _is_known_ci_env(known_ci_env_var_keys: set[str]):
# Most CI tools will set the CI environment variable to true
Expand Down Expand Up @@ -281,33 +296,20 @@ def _get_project_properties(user_uuid: str, pyproject_path: Path) -> dict:
return properties


def _format_user_cli_data(
properties: dict,
command_args: list[str],
):
"""Add format CLI command data to send to Heap."""
cli_properties = properties.copy()
cli_properties["command"] = (
f"kedro {' '.join(command_args)}" if command_args else "kedro"
)
return cli_properties


def _format_project_statistics_data(
properties: dict,
catalog: DataCatalog,
default_pipeline: Pipeline,
project_pipelines: dict,
):
"""Add project statistics to send to Heap."""
project_statistics_properties = properties.copy()
project_statistics_properties = {}
project_statistics_properties["number_of_datasets"] = sum(
1
for c in catalog.list()
if not c.startswith("parameters") and not c.startswith("params:")
)
project_statistics_properties["number_of_nodes"] = (
len(default_pipeline.nodes) if default_pipeline else None
len(default_pipeline.nodes) if default_pipeline else None # type: ignore
)
project_statistics_properties["number_of_pipelines"] = len(project_pipelines.keys())
return project_statistics_properties
Expand Down Expand Up @@ -375,5 +377,4 @@ def _is_valid_syntax(telemetry: Any) -> bool:
)


cli_hooks = KedroTelemetryCLIHooks()
project_hooks = KedroTelemetryProjectHooks()
telemetry_hook = KedroTelemetryHook()
4 changes: 2 additions & 2 deletions kedro-telemetry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ test = [
]

[project.entry-points."kedro.cli_hooks"]
kedro-telemetry = "kedro_telemetry.plugin:cli_hooks"
kedro-telemetry = "kedro_telemetry.plugin:telemetry_hook"

[project.entry-points."kedro.hooks"]
kedro-telemetry = "kedro_telemetry.plugin:project_hooks"
kedro-telemetry = "kedro_telemetry.plugin:telemetry_hook"

[tool.setuptools]
include-package-data = true
Expand Down
Loading

0 comments on commit 142342d

Please sign in to comment.