diff --git a/keepercommander/__init__.py b/keepercommander/__init__.py index 9fd3bf13a..5f625c7d6 100644 --- a/keepercommander/__init__.py +++ b/keepercommander/__init__.py @@ -10,4 +10,7 @@ # Contact: ops@keepersecurity.com # -__version__ = '17.0.0' +import keepercommander.keeper_dag as keeper_dag +import keepercommander.discovery_common as discovery_common + +__version__ = '17.0.2' diff --git a/keepercommander/__main__.py b/keepercommander/__main__.py index 85a7dae22..84713dc71 100644 --- a/keepercommander/__main__.py +++ b/keepercommander/__main__.py @@ -21,6 +21,7 @@ import re import shlex import sys + from pathlib import Path from . import __version__ @@ -28,6 +29,8 @@ from .params import KeeperParams from .config_storage import loader + + def get_params_from_config(config_filename=None, launched_with_shortcut=False): # type: (Optional[str], bool) -> KeeperParams if os.getenv("KEEPER_COMMANDER_DEBUG"): logging.getLogger().setLevel(logging.DEBUG) diff --git a/keepercommander/discovery_common/README.md b/keepercommander/discovery_common/README.md new file mode 100644 index 000000000..e670d9ffa --- /dev/null +++ b/keepercommander/discovery_common/README.md @@ -0,0 +1,5 @@ +# Discovery Common + +Python code that is used by the Gateway/KDNRM and Commander. + +This is common code to interact with the DAG. diff --git a/keepercommander/discovery_common/__init__.py b/keepercommander/discovery_common/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/keepercommander/discovery_common/__version__.py b/keepercommander/discovery_common/__version__.py new file mode 100644 index 000000000..2df0153c3 --- /dev/null +++ b/keepercommander/discovery_common/__version__.py @@ -0,0 +1 @@ +__version__ = '1.0.26' diff --git a/keepercommander/discovery_common/constants.py b/keepercommander/discovery_common/constants.py new file mode 100644 index 000000000..3811c5282 --- /dev/null +++ b/keepercommander/discovery_common/constants.py @@ -0,0 +1,33 @@ +# This should the relationship between Keeper Vault record +RECORD_LINK_GRAPH_ID = 0 + +# The rules +DIS_RULES_GRAPH_ID = 10 + +# The discovery job history +DIS_JOBS_GRAPH_ID = 11 + +# Discovery infrastructure +DIS_INFRA_GRAPH_ID = 12 + +# The user-to-services graph +USER_SERVICE_GRAPH_ID = 13 + +PAM_DIRECTORY = "pamDirectory" +PAM_DATABASE = "pamDatabase" +PAM_MACHINE = "pamMachine" +PAM_USER = "pamUser" + +LOCAL_USER = "local" + +# The record types to process. +# The order defined the order the user will be presented the new discovery objects. +# The sort defined how the discovery objects for a record type are sorted and presented. +# Cloud-based users are presented first, then directories second. +# We want to prompt about users that may appear on machines before processing the machine. +VERTICES_SORT_MAP = { + PAM_USER: {"order": 1, "sort": "sort_infra_name", "item": "DiscoveryUser", "key": "user"}, + PAM_DIRECTORY: {"order": 1, "sort": "sort_infra_name", "item": "DiscoveryDirectory", "key": "host_port"}, + PAM_MACHINE: {"order": 2, "sort": "sort_infra_host", "item": "DiscoveryMachine", "key": "host"}, + PAM_DATABASE: {"order": 3, "sort": "sort_infra_host", "item": "DiscoveryDatabase", "key": "host_port"}, +} diff --git a/keepercommander/discovery_common/dag_sort.py b/keepercommander/discovery_common/dag_sort.py new file mode 100644 index 000000000..467d8de16 --- /dev/null +++ b/keepercommander/discovery_common/dag_sort.py @@ -0,0 +1,121 @@ +from __future__ import annotations +from .constants import VERTICES_SORT_MAP +from .types import DiscoveryObject +import logging +import functools +import re +from typing import List, Optional, Union, TYPE_CHECKING + +Logger = Union[logging.RootLogger, logging.Logger] +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + + +def sort_infra_name(vertices: List[DAGVertex]) -> List[DAGVertex]: + """ + Sort the vertices by name in ascending order. + """ + + def _sort(t1: DAGVertex, t2: DAGVertex): + t1_name = t1.content_as_dict.get("name") + t2_name = t2.content_as_dict.get("name") + if t1_name < t2_name: + return -1 + elif t1_name > t2_name: + return 1 + else: + return 0 + + return sorted(vertices, key=functools.cmp_to_key(_sort)) + + +def sort_infra_host(vertices: List[DAGVertex]) -> List[DAGVertex]: + """ + Sort the vertices by host name. + + Host name should appear first in ascending order. + IP should appear second in ascending order. + + """ + + def _is_ip(host: str) -> bool: + if re.match(r'^\d+\.\d+\.\d+\.\d+', host) is not None: + return True + return False + + def _make_ip_number(ip: str) -> int: + ip_port = ip.split(":") + parts = ip_port[0].split(".") + value = "" + for part in parts: + value += part.zfill(3) + return int(value) + + def _sort(t1: DAGVertex, t2: DAGVertex): + t1_name = t1.content_as_dict.get("name") + t2_name = t2.content_as_dict.get("name") + + # Both names are ip addresses + if _is_ip(t1_name) and _is_ip(t2_name): + t1_num = _make_ip_number(t1_name) + t2_num = _make_ip_number(t2_name) + + if t1_num < t2_num: + return -1 + elif t1_num > t2_num: + return 1 + else: + return 0 + + # T1 is an IP, T2 is a host name + elif _is_ip(t1_name) and not _is_ip(t2_name): + return 1 + # T2 is not an IP and T2 is an IP + elif not _is_ip(t1_name) and _is_ip(t2_name): + return -1 + # T1 and T2 are host name + else: + if t1_name < t2_name: + return -1 + elif t1_name > t2_name: + return 1 + else: + return 0 + + return sorted(vertices, key=functools.cmp_to_key(_sort)) + + +def sort_infra_vertices(current_vertex: DAGVertex, logger: Optional[Logger] = None) -> dict: + + if logger is None: + logger = logging.getLogger() + + # Make a map, record type to list of vertices (of that record type) + record_type_to_vertices_map = {k: [] for k, v in VERTICES_SORT_MAP.items()} + + # Collate the vertices into a record type lookup. + vertices = current_vertex.has_vertices() + logger.debug(f" found {len(vertices)} vertices") + for vertex in vertices: + if vertex.active is True: + content = DiscoveryObject.get_discovery_object(vertex) + logger.debug(f" * {content.description}") + for vertex in vertices: + if vertex.active is False: + logger.debug(" vertex is not active") + continue + # We can't load into a pydantic object since Pydantic has a problem with Union type. + # We only want the record type, so it is too much work to try to get into an object. + content_dict = vertex.content_as_dict + record_type = content_dict.get("record_type") + if record_type in record_type_to_vertices_map: + record_type_to_vertices_map[record_type].append(vertex) + + # Sort the vertices for each record type. + for k, v in VERTICES_SORT_MAP.items(): + if v["sort"] == "sort_infra_name": + record_type_to_vertices_map[k] = sort_infra_name(record_type_to_vertices_map[k]) + elif v["sort"] == "sort_infra_host": + record_type_to_vertices_map[k] = sort_infra_host(record_type_to_vertices_map[k]) + + return record_type_to_vertices_map diff --git a/keepercommander/discovery_common/infrastructure.py b/keepercommander/discovery_common/infrastructure.py new file mode 100644 index 000000000..5954b581d --- /dev/null +++ b/keepercommander/discovery_common/infrastructure.py @@ -0,0 +1,293 @@ +from __future__ import annotations +import logging +from .constants import DIS_INFRA_GRAPH_ID +from .utils import get_connection +from keeper_dag import DAG, EdgeType +from keeper_dag.exceptions import DAGVertexException +from keeper_dag.crypto import urlsafe_str_to_bytes +import os +import importlib +from typing import Any, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + + +class Infrastructure: + + """ + Create a graph of the infrastructure. + + The first run will create a full graph since the vertices do not exist. + Further discovery run will only show vertices that ... + * do not have vaults records. + * the data has changed. + * the ACL has changed. + + """ + + KEY_PATH = "infrastructure" + DELTA_PATH = "delta" + ADMIN_PATH = "ADMINS" + USER_PATH = "USERS" + + def __init__(self, record: Any, logger: Optional[Any] = None, history_level: int = 0, + debug_level: int = 0, fail_on_corrupt: bool = True, **kwargs): + + self.conn = get_connection(**kwargs) + + # This will either be a KSM Record, or Commander KeeperRecord + self.record = record + self._dag = None + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.history_level = history_level + self.debug_level = debug_level + self.fail_on_corrupt = fail_on_corrupt + + self.auto_save = False + self.delta_graph = True + self.last_sync_point = -1 + + @property + def dag(self) -> DAG: + if self._dag is None: + + self.logger.debug(f"loading the dag graph {DIS_INFRA_GRAPH_ID}") + + self._dag = DAG(conn=self.conn, record=self.record, graph_id=DIS_INFRA_GRAPH_ID, auto_save=self.auto_save, + logger=self.logger, history_level=self.history_level, debug_level=self.debug_level, + name="Discovery Infrastructure", fail_on_corrupt=self.fail_on_corrupt) + # Do not load the DAG here. + # We don't know if we are using a sync point yet. + + return self._dag + + @property + def has_discovery_data(self) -> bool: + # Does the graph array have any vertices? + if self.dag.has_graph is False: + return False + + # If we at least have the root, does is have the configuration? + if self.get_root.has_vertices() is False: + return False + + return True + + @property + def get_root(self) -> DAGVertex: + return self.dag.get_root + + @property + def get_configuration(self) -> DAGVertex: + try: + configuration = self.get_root.has_vertices()[0] + except (Exception,): + raise DAGVertexException("Could not find the configuration vertex for the infrastructure graph.") + return configuration + + @property + def sync_point(self): + return self._dag.load(sync_point=0) + + def load(self, sync_point: int = 0): + return self.dag.load(sync_point=sync_point) or 0 + + def save(self, delta_graph: Optional[bool] = None): + if delta_graph is None: + delta_graph = self.delta_graph + + self.logger.debug(f"current sync point {self.last_sync_point}") + if delta_graph is True: + self.logger.debug("saving delta graph of the infrastructure") + self._dag.save(delta_graph=delta_graph) + + def to_dot(self, graph_format: str = "svg", show_hex_uid: bool = False, + show_version: bool = True, show_only_active_vertices: bool = False, + show_only_active_edges: bool = False, sync_point: int = None, graph_type: str = "dot"): + + try: + mod = importlib.import_module("graphviz") + except ImportError: + raise Exception("Cannot to_dot(), graphviz module is not installed.") + + dot = getattr(mod, "Digraph")(comment=f"DAG for Discovery", format=graph_format) + + if sync_point is None: + sync_point = self.last_sync_point + + self.logger.debug(f"generating infrastructure dot starting at sync point {sync_point}") + + self.dag.load(sync_point=sync_point) + + count = 0 + if len(self.dag.get_root.has_vertices()) > 0: + config_vertex = self.dag.get_root.has_vertices()[0] + count = len(config_vertex.has_vertices()) + + if graph_type == "dot": + dot.attr(rankdir='RL') + rank_sep = 10 + if count > 10: + rank_sep += int(count * 0.10) + dot.attr(ranksep=str(rank_sep)) + elif graph_type == "twopi": + rank_sep = 20 + if count > 20: + rank_sep += int(count * 0.10) + + dot.attr(layout="twopi") + dot.attr(ranksep=str(rank_sep)) + dot.attr(ratio="auto") + else: + dot.attr(layout=graph_type) + dot.attr(ranksep=10) + + for v in self.dag.all_vertices: + if show_only_active_vertices is True and v.active is False: + continue + + shape = "ellipse" + fillcolor = "white" + color = "black" + + if v.corrupt is False: + + if v.active is False: + fillcolor = "grey" + + record_type = None + record_uid = None + name = v.name + source = None + try: + data = v.content_as_dict + record_type = data.get("record_type") + record_uid = data.get("record_uid") + name = data.get("name") + item = data.get("item") + if item is not None: + if item.get("managed", False) is True: + shape = "box" + source = item.get("source") + if record_uid is not None: + fillcolor = "#AFFFAF" + if data.get("ignore_object", False) is True: + fillcolor = "#DFDFFF" + except (Exception,): + pass + + label = f"uid={v.uid}" + if record_type is not None: + label += f"\\nrt={record_type}" + if name is not None and name != v.uid: + name = name.replace("\\", "\\\\") + label += f"\\nname={name}" + if source is not None: + label += f"\\nsource={source}" + if record_uid is not None: + label += f"\\nruid={record_uid}" + if show_hex_uid is True: + label += f"\\nhex={urlsafe_str_to_bytes(v.uid).hex()}" + if v.uid == self.dag.get_root.uid: + fillcolor = "gold" + label += f"\\nsp={sync_point}" + + tooltip = f"ACTIVE={v.active}\\n\\n" + try: + content = v.content_as_dict + for k, val in content.items(): + if k == "item": + continue + if isinstance(val, str) is True: + val = val.replace("\\", "\\\\") + tooltip += f"{k}={val}\\n" + + item = content.get("item") + if item is not None: + tooltip += f"------------------\\n" + for k, val in item.items(): + if isinstance(val, str) is True: + val = val.replace("\\", "\\\\") + tooltip += f"{k}={val}\\n" + except Exception as err: + tooltip += str(err) + else: + fillcolor = "red" + label = f"{v.uid} (CORRUPT)" + tooltip = "CORRUPT" + + dot.node(v.uid, label, color=color, fillcolor=fillcolor, style="filled", shape=shape, tooltip=tooltip) + + head_uids = [] + for edge in v.edges: + + # Don't show edges that reference self, DATA and data that has been DELETION + if edge.head_uid == v.uid: + continue + + if edge.head_uid not in head_uids: + head_uids.append(edge.head_uid) + + def _render_edge(e): + + color = "grey" + style = "solid" + + if e.corrupt is False: + + # To reduce the number of edges, only show the active edges + if e.active is True: + color = "black" + style = "bold" + elif show_only_active_edges is True: + return + + # If the vertex is not active, gray out the DATA edge + if e.edge_type == EdgeType.DATA and v.active is False: + color = "grey" + + if e.edge_type == EdgeType.DELETION: + style = "dotted" + + edgetip = "" + if e.edge_type == EdgeType.ACL and v.active is True: + content = e.content_as_dict + for k, val in content.items(): + edgetip += f"{k}={val}\\n" + if content.get("is_admin") is True: + color = "red" + + label = DAG.EDGE_LABEL.get(e.edge_type) + if label is None: + label = "UNK" + if e.path is not None and e.path != "": + label += f"\\npath={e.path}" + if show_version is True: + label += f"\\nv={e.version}" + else: + label = f"{e.edge_type.value} (CORRUPT)" + color = "red" + edgetip = "CORRUPT" + + # tail, head (arrow side), label, ... + dot.edge(v.uid, e.head_uid, label, style=style, fontcolor=color, color=color, tooltip=edgetip) + + for head_uid in head_uids: + version, edge = v.get_highest_edge_version(head_uid) + _render_edge(edge) + + data_edge = v.get_data() + if data_edge is not None: + _render_edge(data_edge) + + return dot + + def render(self, name: str, **kwargs): + + output_name = os.environ.get("GRAPH_DIR", os.environ.get("HOME", os.environ.get("PROFILENAME", "."))) + output_name = os.path.join(output_name, name) + dot = self.to_dot(**kwargs) + dot.render(output_name) diff --git a/keepercommander/discovery_common/jobs.py b/keepercommander/discovery_common/jobs.py new file mode 100644 index 000000000..bbacfdcb3 --- /dev/null +++ b/keepercommander/discovery_common/jobs.py @@ -0,0 +1,192 @@ +from __future__ import annotations +from .constants import DIS_JOBS_GRAPH_ID +from .utils import get_connection +from .types import JobContent, JobItem, Settings +from keeper_dag import DAG, EdgeType +import logging +import os +import base64 +from time import time +from typing import Any, Optional, List, TYPE_CHECKING + +if TYPE_CHECKING: + from .types import DiscoveryDelta + + +class Jobs: + + KEY_PATH = "jobs" + + def __init__(self, record: Any, logger: Optional[Any] = None, debug_level: int = 0, fail_on_corrupt: bool = True, + **kwargs): + + self.conn = get_connection(**kwargs) + + # This will either be a KSM Record, or Commander KeeperRecord + self.record = record + self._dag = None + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.debug_level = debug_level + self.fail_on_corrupt = fail_on_corrupt + + @property + def dag(self) -> DAG: + if self._dag is None: + + self._dag = DAG(conn=self.conn, record=self.record, graph_id=DIS_JOBS_GRAPH_ID, auto_save=False, + logger=self.logger, debug_level=self.debug_level, name="Discovery Jobs", + fail_on_corrupt=self.fail_on_corrupt) + self._dag.load() + + # Has the status been initialized? + if self._dag.has_graph is False: + self._dag.allow_auto_save = False + status = self._dag.add_vertex() + status.belongs_to_root( + EdgeType.KEY, + path=Jobs.KEY_PATH) + status.add_data( + content=JobContent( + active_job_id=None, + history=[] + ), + ) + self._dag.allow_auto_save = True + self._dag.save() + return self._dag + + @property + def data_path(self): + return f"/{Jobs.KEY_PATH}" + + def get_jobs(self): + + self.logger.debug("loading discovery jobs from DAG") + + vertex = self.dag.walk_down_path(self.data_path) + current_json = vertex.content_as_str + if current_json is None: + vertex.add_data( + content=JobContent( + active_job_id=None, + history=[] + ), + ) + current_json = vertex.content_as_str + + return JobContent.model_validate_json(current_json) + + def set_jobs(self, jobs: JobContent): + + self.logger.debug("saving discovery jobs to DAG") + + self.dag.walk_down_path(self.data_path).add_data( + content=jobs + ) + self.dag.save() + + def start(self, settings: Optional[Settings] = None, resource_uid: Optional[str] = None, + conversation_id: Optional[str] = None) -> str: + + self.logger.debug("starting a discovery job") + + if settings is None: + settings = Settings() + + jobs = self.get_jobs() + + new_job = JobItem( + job_id="JOB" + base64.urlsafe_b64encode(os.urandom(8)).decode().rstrip('='), + start_ts=int(time()), + settings=settings, + resource_uid=resource_uid, + conversation_id=conversation_id + ) + jobs.active_job_id = new_job.job_id + jobs.job_history.append(new_job) + + self.set_jobs(jobs) + + return new_job.job_id + + def get_job(self, job_id) -> Optional[JobItem]: + jobs = self.get_jobs() + for job in jobs.job_history: + if job.job_id == job_id: + return job + return None + + def error(self, job_id: str, error: Optional[str], stacktrace: Optional[str] = None): + + self.logger.debug("flag discovery job as error") + + jobs = self.get_jobs() + for job in jobs.job_history: + if job.job_id == job_id: + logging.debug("found job to add error message") + job.end_ts = int(time()) + job.success = False + job.error = error + job.stacktrace = stacktrace + + self.set_jobs(jobs) + + def finish(self, job_id: str, sync_point: int, delta: DiscoveryDelta): + + self.logger.debug("finish discovery job") + + jobs = self.get_jobs() + for job in jobs.job_history: + if job.job_id == job_id: + self.logger.debug("found job to finish") + job.sync_point = sync_point + job.end_ts = int(time()) + job.success = True + job.delta = delta + + self.set_jobs(jobs) + + def cancel(self, job_id): + + self.logger.debug("cancel discovery job") + + jobs = self.get_jobs() + for job in jobs.job_history: + if job.job_id == job_id: + self.logger.debug("found job to cancel") + job.end_ts = int(time()) + job.success = None + jobs.active_job_id = None + self.set_jobs(jobs) + + @property + def history(self) -> List[JobItem]: + jobs = self.get_jobs() + return jobs.job_history + + @property + def job_id_list(self) -> List[str]: + return [j.job_id for j in self.history] + + @property + def current_job(self) -> Optional[JobItem]: + """ + Get the current job + + The current job is the oldest unprocessed job + """ + jobs = self.get_jobs() + if jobs.active_job_id is None: + return None + return self.get_job(jobs.active_job_id) + + def __str__(self): + def _h(i: JobItem): + return f"Job ID: {i.job_id}, {i.success}, {i.sync_point} " + + ret = "HISTORY\n" + for item in self.history: + ret += _h(item) + return ret diff --git a/keepercommander/discovery_common/process.py b/keepercommander/discovery_common/process.py new file mode 100644 index 000000000..b2946dd36 --- /dev/null +++ b/keepercommander/discovery_common/process.py @@ -0,0 +1,1711 @@ +from __future__ import annotations +import logging +from .constants import PAM_DIRECTORY, PAM_USER, VERTICES_SORT_MAP, LOCAL_USER +from .jobs import Jobs +from .infrastructure import Infrastructure +from .record_link import RecordLink +from .user_service import UserService +from .rule import Rules +from .types import (DiscoveryObject, DiscoveryUser, RecordField, RuleActionEnum, UserAcl, + PromptActionEnum, PromptResult, BulkRecordAdd, BulkRecordConvert, BulkProcessResults, + DirectoryInfo, NormalizedRecord) +from .utils import value_to_boolean, split_user_and_domain +from .dag_sort import sort_infra_vertices +from keeper_dag import EdgeType +from keeper_dag.crypto import bytes_to_urlsafe_str +import hashlib +import time +import os +from typing import Any, Callable, List, Optional, Union, TYPE_CHECKING + + +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + DirectoryResult = Union[DirectoryInfo, List] + DirectoryUserResult = Union[NormalizedRecord, DAGVertex] + + +class QuitException(Exception): + """ + This exception used when the user wants to stop processing of the results, before the end. + """ + pass + + +class UserNotFoundException(Exception): + """ + We could not find the user. + """ + pass + + +class DirectoryNotFoundException(Exception): + """ + We could not find the directory. + """ + pass + + +class NoDiscoveryDataException(Exception): + """ + This exception is thrown when there is no discovery data. + This is not an error. + There is just nothing to do. + """ + pass + + +class Process: + + def __init__(self, record: Any, job_id: str, logger: Optional[Any] = None, debug_level: int = 0, **kwargs): + self.job_id = job_id + self.record = record + + env_debug_level = os.environ.get("PROCESS_GS_DEBUG_LEVEL") + if env_debug_level is not None: + debug_level = int(env_debug_level) + + # Remember what passed in a kwargs + self.passed_kwargs = kwargs + + self.jobs = Jobs(record=record, logger=logger, debug_level=debug_level, **kwargs) + self.job = self.jobs.get_job(self.job_id) + + # These are lazy load, so the graph is not loaded here. + self.infra = Infrastructure(record=record, logger=logger, + debug_level=debug_level, + fail_on_corrupt=False, + **kwargs) + self.record_link = RecordLink(record=record, logger=logger, debug_level=debug_level, **kwargs) + self.user_service = UserService(record=record, logger=logger, debug_level=debug_level, **kwargs) + + # This is the root UID for all graphs; get it from one of them. + self.configuration_uid = self.jobs.dag.uid + + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.debug_level = debug_level + + self.logger.debug(f"discovery process is using configuration uid {self.configuration_uid}") + + @staticmethod + def get_key_field(record_type: str) -> str: + return VERTICES_SORT_MAP.get(record_type)["key"] + + @staticmethod + def set_user_based_ids(configuration_uid: str, content: DiscoveryObject, parent_vertex: Optional[DAGVertex] = None): + + if configuration_uid is None: + raise ValueError("The configuration UID is None when trying to create an id and UID for user.") + + if content.item.user is None: + raise Exception("The user name is blank. Cannot make an ID for the user.") + + parent_content = DiscoveryObject.get_discovery_object(parent_vertex) + object_id = content.item.user + if "\\" in content.item.user: + # Remove the domain name from the user. + # [0] will be the domain, [1] will be the user. + object_id = object_id.split("\\")[1] + if parent_content.record_type == PAM_DIRECTORY: + domain = parent_content.name + if object_id.endswith(domain) is False: + object_id += f"@{domain}" + else: + object_id += parent_content.id + + content.id = object_id + + uid = configuration_uid + content.object_type_value + object_id + m = hashlib.sha256() + m.update(uid.lower().encode()) + + content.uid = bytes_to_urlsafe_str(m.digest()[:16]) + + def populate_admin_content_ids(self, content: DiscoveryObject, parent_vertex: Optional[DAGVertex] = None): + + """ + Populate the id and uid attributes for content. + """ + + return self.set_user_based_ids(self.configuration_uid, content, parent_vertex) + + def get_keys_for_vertex(self, vertex: DAGVertex) -> List[str]: + """ + For the vertex + :param vertex: + :return: + """ + + content = DiscoveryObject.get_discovery_object(vertex) + key_field = self.get_key_field(content.record_type) + keys = [] + if key_field == "host_port": + if content.item.port is not None: + if content.item.host is not None: + keys.append(f"{content.item.host}:{content.item.port}".lower()) + if content.item.ip is not None: + keys.append(f"{content.item.ip}:{content.item.port}".lower()) + elif key_field == "host": + if content.item.host is not None: + keys.append(content.item.host.lower()) + if content.item.ip is not None: + keys.append(content.item.ip.lower()) + elif key_field == "user": + if content.parent_record_uid is not None: + if content.item.user is not None: + keys.append(f"{content.parent_record_uid}:{content.item.user}".lower()) + if content.item.dn is not None: + keys.append(f"{content.parent_record_uid}:{content.item.dn}".lower()) + return keys + + def _update_with_record_uid(self, record_cache: dict, current_vertex: DAGVertex): + + # If the current vertex is not active, then return. + # It won't have a DATA edge. + if current_vertex.active is False: + return + + for vertex in current_vertex.has_vertices(): + + # Skip if the vertex is not active. + # It won't have a DATA edge. + if vertex.active is False: + continue + + # Don't worry about "item" class type + content = DiscoveryObject.get_discovery_object(vertex) + + # If we are ignoring the object, then skip. + if content.action_rules_result == RuleActionEnum.IGNORE.value or content.ignore_object is True: + continue + elif content.record_uid is not None: + cache_keys = self.get_keys_for_vertex(vertex) + for key in cache_keys: + + # If we find an item in the cache, update the vertex with the record UID + if key in record_cache.get(content.record_type): + content.record_uid = record_cache.get(content.record_type).get(key) + vertex.add_data(content) + break + + # Process the vertices that belong to the current vertex. + self._update_with_record_uid( + record_cache=record_cache, + current_vertex=vertex, + ) + + @staticmethod + def _prepare_record(record_prepare_func: Callable, + bulk_add_records: List[BulkRecordAdd], + content: DiscoveryObject, + parent_content: DiscoveryObject, + vertex: DAGVertex, + context: Optional[Any] = None) -> DiscoveryObject: + """ + Prepare a record to be added. + + :param record_prepare_func: + :param bulk_add_records: + :param content: + :param parent_content: + :param vertex: + :param context: + :return: + """ + + record_to_be_added, record_uid = record_prepare_func( + content=content, + context=context + ) + if record_to_be_added is None: + raise Exception("Did not get prepare record.") + if record_uid is None: + raise Exception("The prepared record did not contain a record UID.") + + bulk_add_records.append( + BulkRecordAdd( + title=content.title, + record=record_to_be_added, + record_type=content.record_type, + record_uid=record_uid, + parent_record_uid=parent_content.record_uid, + shared_folder_uid=content.shared_folder_uid + ) + ) + + content.record_uid = record_uid + content.parent_record_uid = parent_content.record_uid + vertex.add_data(content) + + return content + + def _default_acl(self, + discovery_vertex: DAGVertex, + content: DiscoveryObject, + discovery_parent_vertex: DAGVertex) -> UserAcl: + # Check to see if this user already belongs to another record vertex, or belongs to this one. + belongs_to = False + is_admin = False + is_iam_user = False + + parent_content = DiscoveryObject.get_discovery_object(discovery_parent_vertex) + + # User record the already exists. + # This means the vertex has a record UID, doesn't mean it exists in the vault. + # It may have been added during this processing. + if content.record_exists is False: + belongs_to = True + + # Is this user the admin for the resource? + if parent_content.access_user is not None: + # If this user record's user matches the user that was used to log into the parent resource, + # then this user is the admin for the parent resource. + if parent_content.access_user.user == content.item.user: + is_admin = True + + # User record does not exist. + else: + belongs_to_record_vertex = self.record_link.acl_has_belong_to_vertex(discovery_vertex) + + # If the user doesn't belong to any other vertex, it will be long the parent resource. + if belongs_to_record_vertex is None: + self.logger.debug(" user vertex does not belong to another resource vertex") + belongs_to = True + + else: + parent_record_vertex = self.record_link.get_record_uid(discovery_parent_vertex) + if parent_record_vertex is not None: + if belongs_to_record_vertex == parent_record_vertex: + self.logger.debug(" user vertex already belongs to the parent resource vertex") + belongs_to = True + else: + self.logger.debug(" user vertex does not belong to any other resource vertex") + + # If the parent resource is a provider, then this user is an IAM user. + if parent_content.object_type_value == "providers": + is_iam_user = True + + return UserAcl(belongs_to=belongs_to, is_admin=is_admin, is_iam_user=is_iam_user) + + def _directory_exists(self, domain: str, directory_info_func: Callable, context: Any) -> Optional[DirectoryResult]: + + """ + This method will find the directory in the Infrastructure graph or in the Vault. + + If the domain contains more than one DC, the domain will be split and the full DC will be search and then + the first DC. + For example, if EXAMPLE.COM is passed in for the domain, EXAMPLE.COM and EXAMPLE will be searched for. + + The Infrastructure graph will be searched first. + If nothing is found, the Vault will be searched. + + If the directory is found in the graph, a list if directory vertices will be returned. + If the directory is found in the Vault, a DirectoryInfo instance will be returned. + If nothing is found, None is returned. + + The returned results can be passed to the _find_directory_user method. + + """ + + domains = [domain] + if "." in domains: + domains.append(domain.split(".")[0]) + + self.logger.debug(f"search for directories: {', '.join(domains)}") + + # Check the graph first. + for domain_name in domains: + directories = self.infra.dag.search_content({ + "record_type": ["pamDirectory", "pamDomainConfiguration"], + "name": domain_name + }, ignore_case=True) + + self.logger.debug(f"found {len(directories)} directories in the graph") + + # If we found directories, return the list of directory vertices. + if len(directories) > 0: + return directories + + # Check the vault secondly. + for domain_name in domains: + info = directory_info_func(domain=domain_name, skip_users=False, context=context) + if info is not None: + # If we found directories in the Vault, then return directory info + return info + + return None + + def _find_directory_user(self, + results: DirectoryResult, + record_lookup_func: Callable, + context: Any, + find_user: Optional[str] = None, + find_dn: Optional[str] = None) -> Optional[DirectoryUserResult]: + + # If the passed in results were a DirectoryInfo then check the Vault for users. + if isinstance(results, DirectoryInfo) is True: + self.logger.debug("search for directory user from vault records") + self.logger.debug(f"have {len(results.directory_user_record_uids)} users") + for user_record_id in results.directory_user_record_uids: + record = record_lookup_func(record_uid=user_record_id, context=context) # type: NormalizedRecord + if record is not None: + found = None + self.logger.debug(f"find user {find_user}, dn {find_dn}") + if find_user is not None: + found = record.find_user(find_user) + if found is None and find_dn is not None: + found = record.find_dn(find_dn) + return found + return None + + # Else it was a list of directory vertices, check its children for the users. + else: + self.logger.debug("search for directory user from the graph") + for directory_vertex in results: # type: DAGVertex + for user_vertex in directory_vertex.has_vertices(): + user_content = DiscoveryObject.get_discovery_object(user_vertex) + + # We should only have pamUser vertices. + if user_content.record_type != PAM_USER: + self.logger.debug(f"in find directory user, a vertex {user_vertex.uid} was not a pamUser, " + f"was {user_content.record_type}.") + continue + + found_vertex = None + if find_user is not None: + user, domain = split_user_and_domain(find_user) + if user_content.item.user.lower() == user.lower(): + found_vertex = user_vertex + elif user_content.item.user.lower() == find_user.lower(): + found_vertex = user_vertex + elif find_dn is not None: + if user_content.item.dn.lower() == find_dn.lower(): + found_vertex = user_vertex + + if found_vertex is not None: + return found_vertex + return None + + def _record_link_directory_users(self, + directory_vertex: DAGVertex, + directory_content: DiscoveryObject, + directory_info_func: Callable, + context: Optional[Any] = None): + + """ + Link user record to directory when adding a new directory. + + When adding a new directory, there may be other directories for the same domain. + We need to link existing directory users, of the same domain, to this new directory. + + """ + + self.logger.debug(f"resource is directory; connect users to this directory for {directory_vertex.uid}") + + record_link = context.get("record_link") # type: RecordLink + + # Get the directory user record UIDs from the vault that belong to directories using the same domain. + directory_info = directory_info_func( + domain=directory_content.name, + context=context + ) # type: DirectoryInfo + if directory_info is None: + self.logger.debug("there were no directory record for this domain") + directory_info = DirectoryInfo() + + user_record_uids = directory_info.directory_user_record_uids + + self.logger.debug(f"found {len(directory_info.directory_user_record_uids)} users" + f"from {len(directory_info.directory_record_uids)} directories.") + + # Check our current discovery data. + # This is a delta, it will not contain discovery from prior runs. + # This will only contain objects in this run. + # Make sure the object is a directory and the domain is the same. + # Also make sure there is a record UID; it might not be added yet. + self.logger.debug("finding directories in discovery vertices") + for parent_vertex in directory_vertex.belongs_to_vertices(): + self.logger.debug(f"find directories under {parent_vertex.uid}") + for other_directory_vertex in parent_vertex.has_vertices(): + if other_directory_vertex.uid == directory_vertex.uid: + self.logger.debug(" skip this directory, it's the current one") + continue + other_directory_content = DiscoveryObject.get_discovery_object(other_directory_vertex) + self.logger.debug(f"{other_directory_content.record_type}, {other_directory_content.name}, " + f"{other_directory_content.uid}, {other_directory_content.record_uid}") + if (other_directory_content.record_type == PAM_DIRECTORY + and other_directory_content.name == directory_content.name + and other_directory_content.record_uid is not None): + self.logger.debug(f"check {other_directory_content.uid} for users") + for user_vertex in other_directory_vertex.has_vertices(): + user_content = DiscoveryObject.get_discovery_object(user_vertex) + self.logger.debug(f" * {user_vertex.uid}, {user_content.record_uid}") + if user_content.record_uid is not None and user_content.record_uid not in user_record_uids: + user_record_uids.append(user_content.record_uid) + del user_content + del other_directory_content + + self.logger.debug(f"found {len(user_record_uids)} user to connect to directory") + + # Make sure there is a link from the user record to the directory record. + # We also might need to make a KEY edge from the user to the directory if one does not exist. + for record_uid in user_record_uids: + if record_link.get_acl(record_uid, directory_content.record_uid) is None: + record_link.belongs_to(record_uid, directory_content.record_uid, acl=UserAcl.default()) + + # Check if the user vertex has a KEY edge to the directory_vertex. + found_vertices = directory_vertex.dag.search_content({"record_uid": record_uid}) + if len(found_vertices) == 1: + user_vertex = found_vertices[0] + if user_vertex.get_edge(directory_vertex, EdgeType.KEY) is None: + self.logger.debug(f"adding a KEY edge from the user {user_vertex.uid} to {directory_vertex.uid}") + user_vertex.belongs_to(directory_vertex, EdgeType.KEY) + else: + self.logger.debug("could not find user vertex") + + def _record_link_user_to_directories(self, + directory_vertex: DAGVertex, + directory_content: DiscoveryObject, + user_content: DiscoveryObject, + directory_info_func: Callable, + context: Optional[Any] = None): + + """ + Connect a user to all the directories for a domain. + + Directories may be in the vault or in the discovery graph. + The first step is to get all vault directories. + + """ + + self.logger.debug("resource is directory and we are a user; handle record links to others") + + record_link = context.get("record_link") # type: RecordLink + + # Get the directory user record UIDs from the vault that belong to directories using the same domain. + # We can skip getting directory users. + directory_record_uids = [] + directory_info = directory_info_func( + domain=directory_content.name, + skip_users=True, + context=context + ) # type: DirectoryInfo + if directory_info is not None: + directory_record_uids = directory_info.directory_record_uids + + self.logger.debug(f"found {len(directory_record_uids)} directories in records.") + + # Check our current discovery data. + # This is a delta, it will not contain discovery from prior runs. + # This will only contain objects in this run. + # Make sure the object is a directory and the domain is the same. + # Also make sure there is a record UID; it might not be added yet. + for parent_vertex in directory_vertex.belongs_to_vertices(): + self.logger.debug("finding directories in discovery vertices") + for child_vertex in parent_vertex.has_vertices(): + try: + other_directory_content = DiscoveryObject.get_discovery_object(child_vertex) + self.logger.debug(f"{other_directory_content.record_type}, {other_directory_content.name}, " + f"{directory_content.name}, {other_directory_content.record_uid}") + if (other_directory_content.record_type != PAM_DIRECTORY or + other_directory_content.name != directory_content.name): + continue + if (other_directory_content.record_uid is not None and + other_directory_content.record_uid not in directory_record_uids): + self.logger.debug(f" * adding {other_directory_content.record_uid}") + directory_record_uids.append(other_directory_content.record_uid) + except Exception as err: + self.logger.debug(f"could not link user to directory {directory_content.name}: {err}") + + self.logger.debug(f"found {len(directory_record_uids)} directories in records and discovery data.") + + for directory_record_uid in directory_record_uids: + if record_link.get_acl(user_content.record_uid, directory_record_uid) is None: + record_link.belongs_to(user_content.record_uid, directory_record_uid, acl=UserAcl.default()) + + def _find_admin_directory_user(self, + domain: str, + admin_acl: UserAcl, + directory_info_func: Callable, + record_lookup_func: Callable, + context: Any, + user: Optional[str] = None, + dn: Optional[str] = None) -> Optional[str]: + + # Check any directories for the domain exist. + results = self._directory_exists(domain=domain, + directory_info_func=directory_info_func, + context=context) + + if results is not None: + # Find the user (clean of domain) or DN in the found directories. + directory_user = self._find_directory_user(results=results, + record_lookup_func=record_lookup_func, + context=context, + find_user=user, + find_dn=dn) + if directory_user is not None: + + # If we got a normalized record, then a Vault record exists. + # No need to create a record, just link, belongs_to is False + # Since we are using records, just the belongs_to method instead of + # discovery_belongs_to. + if isinstance(directory_user, NormalizedRecord) is True: + admin_acl.belongs_to = False + return directory_user.record_uid + else: + admin_content = DiscoveryObject.get_discovery_object(directory_user) + + # If not a PAM User, then this is bad. + if admin_content.record_type != PAM_USER: + self.logger.warning( + f"found record type {admin_content.record_type} instead of " + f"pamUser for record UID {admin_content.record_uid}") + return None + + # If the record UID exists, then connect the directory user to the + # resource. + if admin_content.record_uid is not None: + admin_acl.belongs_to = False + return admin_content.record_uid + else: + raise UserNotFoundException(f"Could not find the directory user in domain {domain}") + else: + raise DirectoryNotFoundException(f"Could not find the directory for domain {domain}") + + def _process_auto_add_level(self, + current_vertex: DAGVertex, + bulk_add_records: List[BulkRecordAdd], + bulk_convert_records: List[BulkRecordConvert], + record_lookup_func: Callable, + record_prepare_func: Callable, + directory_info_func: Callable, + record_cache: dict, + smart_add: bool = False, + add_all: bool = False, + context: Optional[Any] = None): + + """ + This method will add items to the bulk_add_records queue to be added by the client. + + Items are added because: + * Smart Add is enabled, and the resource was logged into with credentials. + * The rule engine flagged an item as ADD + + :param current_vertex: The current/parent discovery vertex. + :param bulk_add_records: List of records to be added. + :param bulk_convert_records: List of existing records to be covert to this gateway. + :params record_lookup_func: A function to lookup records to see if they exist. + :param record_prepare_func: Function to convert content into an unsaved record. + :param directory_info_func: Function to lookup directories. + :param record_cache: + :param smart_add: Add the resource record if the admin exists. + :param add_all: Just add the record. This is not the params from Commander. + :param context: Client context; could be anything. + :return: + """ + + if current_vertex.active is False: + self.logger.debug(f"vertex {current_vertex.uid} is not active, skip") + return + + # Check if this vertex has a record. + # We cannot add child vertices to a vertex that does not have a record. + current_content = current_vertex.content_as_object(DiscoveryObject) + if current_content.record_uid is None: + self.logger.debug(f"vertex {current_content.uid} does not have a record id") + return + + self.logger.debug(f"Current Vertex: {current_content.record_type}, {current_vertex.uid}, " + f"{current_content.name}, smart add {smart_add}, add all {add_all}") + + # Sort all the vertices under the current vertex. + # Return a dictionary where the record type is the key. + # The value will be an array of vertices of the specific record type. + record_type_to_vertices_map = sort_infra_vertices(current_vertex, logger=self.logger) + + # Process the record type by their map order in ascending order. + for record_type in sorted(record_type_to_vertices_map, key=lambda i: VERTICES_SORT_MAP[i]['order']): + self.logger.debug(f" processing {record_type}") + for vertex in record_type_to_vertices_map[record_type]: + + content = DiscoveryObject.get_discovery_object(vertex) + self.logger.debug(f" child vertex {vertex.uid}, {content.name}") + + # If we are going to add an admin user, this is the default ACL + # This is for the smart add feature + admin_acl = UserAcl(is_admin=True, belongs_to=False, is_iam_user=False) + + # This ACL is None for resource, and populated for users. + default_acl = None + if content.record_type == PAM_USER: + default_acl = self._default_acl( + discovery_vertex=vertex, + content=content, + discovery_parent_vertex=current_vertex) + + # Check for a vault record, if it exists. + # Default to the DAG content. + # Check the bulk_add_records list, to make sure it is not in the list of record we are about to add. + # We are doing this because the record might be an active directory user, that we have + # not created a record for yet, however it might have been assigned a record UID from a prior prompt. + + existing_record = content.record_exists + if record_lookup_func is not None: + check_the_vault = True + for item in bulk_add_records: + if item.record_uid == content.record_uid: + self.logger.debug(f" record is in the bulk add list, do not check the vault if exists") + check_the_vault = False + break + if check_the_vault is True: + existing_record = record_lookup_func(record_uid=content.record_uid, context=context) is not None + self.logger.debug(f" record exists in the vault: {existing_record}") + else: + self.logger.debug(f" record lookup function not defined, record existing: {existing_record}") + + # Determine if we are going to add the item. + # If the item has a record UID already, we don't need to add. + add_record = False + add_all_users = False + if content.record_exists is False: + + ################################################################################################# + # + # RULE ENGINE ADD + + if content.action_rules_result == RuleActionEnum.ADD.value: + self.logger.debug(f" vertex {vertex.uid} had an ADD result for the rule engine, auto add") + add_record = True + + ################################################################################################# + # + # SMART ADD + + # If we are using smart add and the there was an admin user, add it. + elif smart_add is True and content.access_user is not None and content.record_type != PAM_USER: + self.logger.debug(f" resource has credentials, and using smart add") + add_record = True + add_all_users = True + + ################################################################################################# + # + # ADD ALL FLAG (not Commander's) + + # If add_all is set, then add it. + # This is normally used with smart_add to add the resource's users. + elif add_all is True: + # If the current content/parent is not a Directory + # and the content is a User and the source is not 'local' user, + # then don't add the user. + # We don't want an AD user to belongs_to a machine. + if (current_content.record_type != PAM_DIRECTORY + and content.record_type == PAM_USER + and content.item.source != LOCAL_USER): + add_record = False + else: + self.logger.debug(f" items is a user, add all is True, adding record") + add_record = True + + if add_record is True: + + # If we can create an admin user record, then the admin_user_record_uid will be populated. + admin_user_record_uid = None + admin_content = None + admin_vertex = None + + # If this is a resource, then auto add the admin user if one exists. + # In this scenario ... + # There is a rule to auto add. + # A credential was passed to discovery and it worked. + # Along with the resource, auto create the admin user. + # First we need to make sure the current record type is a resource and logged in. + if smart_add is True and content.access_user is not None: + + # Get the username and DN. + # Lowercase them for the comparison. + access_username_and_domain = content.access_user.user + access_username = access_username_and_domain + access_domain = None + if access_username_and_domain is not None: + access_username_and_domain = access_username_and_domain.lower() + access_username, access_domain = split_user_and_domain(access_username_and_domain) + + # We want to pay attention to the admin source. + # The users from the user list might not contain a source. + # For example, Linux PAM that are remote users will not have a domain in their username. + admin_source = content.access_user.source + + # If the admin source is the current directory name, then it local to the resource (directory). + if content.record_type == PAM_DIRECTORY and content.name == admin_source: + self.logger.debug(" change source to local for directory user") + admin_source = LOCAL_USER + + access_dn = content.access_user.dn + if access_dn is not None: + access_dn = access_dn.lower() + + self.logger.debug(f"REMOVE ME: access_username_and_domain = {access_username_and_domain}") + self.logger.debug(f"REMOVE ME: access_username = {access_username}") + self.logger.debug(f"REMOVE ME: access_domain = {access_domain}") + self.logger.debug(f"REMOVE ME: access_dn = {access_dn}") + + # Go through the users to find the administrative user. + found_user_in_discovery_user_list = False + for user_vertex in vertex.has_vertices(): + + user_content = DiscoveryObject.get_discovery_object(user_vertex) + if user_content.record_type != PAM_USER: + continue + + # Get the user from the content. + # We want to use the full username and also one without the domain, if there is a domain. + user_and_domain = user_content.item.user + user = user_and_domain + domain = None + if user_and_domain is not None: + user_and_domain = user_and_domain.lower() + user, domain = split_user_and_domain(user_and_domain) + if user is None: + continue + + # Get the dn, if it exists. + dn = user_content.item.dn + if dn is not None: + dn = dn.lower() + + self.logger.debug(f"REMOVE ME: user_and_domain = {user_and_domain}") + self.logger.debug(f"REMOVE ME: user = {user}") + self.logger.debug(f"REMOVE ME: domain = {domain}") + self.logger.debug(f"REMOVE ME: dn = {dn}") + + if (access_username_and_domain == user_and_domain + or access_username_and_domain == user + or access_username == user + or access_dn == dn): + + self.logger.debug(" access user matches the current user") + self.logger.debug(f" access user source is {user_content.item.source}") + + # If the user has a record UID, it has already been created. + # This means the record already belongs to another resource, so belongs_to is False. + if user_content.record_uid is not None: + self.logger.debug(" user has a record uid, add this user as admin") + admin_acl.belongs_to = False + admin_user_record_uid = user_content.record_uid + found_user_in_discovery_user_list = True + break + + # Is this user a local user? + # If so prepare a record and link it. Since its local belongs_to is True + if admin_source == LOCAL_USER or admin_source is None: + + self.logger.debug(" user is new local user, add this user as admin") + admin_acl.belongs_to = True + admin_content = user_content + admin_vertex = user_vertex + found_user_in_discovery_user_list = True + break + + # The user is a remote user. + else: + self.logger.debug(" check directory for remote user") + + domain = content.access_user.source + if content.record_type == PAM_DIRECTORY: + domain = content.name + + try: + admin_user_record_uid = self._find_admin_directory_user( + domain=domain, + admin_acl=admin_acl, + directory_info_func=directory_info_func, + record_lookup_func=record_lookup_func, + context=context, + user=access_username, + dn=access_dn + ) + self.logger.debug(" found directory user for admin") + found_user_in_discovery_user_list = True + except (DirectoryNotFoundException, UserNotFoundException) as err: + # Not an error. + # Just could not find the directory or directory user. + self.logger.debug(f" did not find the directory user: {err}") + + self.logger.debug("done checking user list") + + # If the user_record_uid is None, and it's a domain user, and we didn't find a user + # then there is chance that it's dirctory user not picked up while getting users in + # discovery. + # This is similar to the remote user code above, except the access user was not found in + # the user list. + if (found_user_in_discovery_user_list is False and admin_user_record_uid is None + and access_domain is not None): + self.logger.debug("could not find admin user in the user list, " + "attempt to find in directory") + try: + admin_user_record_uid = self._find_admin_directory_user( + domain=access_domain, + admin_acl=admin_acl, + directory_info_func=directory_info_func, + record_lookup_func=record_lookup_func, + context=context, + user=access_username, + dn=access_dn + ) + except (DirectoryNotFoundException, UserNotFoundException): + # Not an error. + # Just could not find the directory or directory user. + pass + + # Create the record if we are not using smart add. + # If we are using smart add, only added if we could make an admin record. + if smart_add is False or (smart_add is True + and (admin_user_record_uid is not None or admin_content is not None)): + + self.logger.debug(f"adding resource record, smart add {smart_add}") + # The record could be a resource or user record. + self._prepare_record( + record_prepare_func=record_prepare_func, + bulk_add_records=bulk_add_records, + content=content, + parent_content=current_content, + vertex=vertex, + context=context + ) + if content.record_uid is None: + raise Exception(f"the record uid is blank for {content.description} after prepare") + + # For a resource, the ACL will be None. + # It will a UserAcl if a user. + self.record_link.belongs_to(content.record_uid, current_content.record_uid, acl=default_acl) + + # user_record_uid will only be populated if using smart add. + # Link the admin user to the resource. + if admin_user_record_uid is not None or admin_content is not None: + + if admin_content is not None: + + self.logger.debug("the admin record does not exists, create it") + + # Create the local admin here since we need the resource record added. + self._prepare_record( + record_prepare_func=record_prepare_func, + bulk_add_records=bulk_add_records, + content=admin_content, + parent_content=content, + vertex=admin_vertex, + context=context + ) + if admin_content.record_uid is None: + raise Exception(f"the record uid is blank for {admin_content.description} " + "after prepare") + + admin_user_record_uid = admin_content.record_uid + + self.logger.debug("connecting admin user to resource") + self.record_link.belongs_to(admin_user_record_uid, content.record_uid, acl=admin_acl) + + # If the record type is a PAM User, we don't need to go deeper. + # In the future we might need to change if PAM User becomes a branch and not a leaf. + # This is for safety reasons + if content.record_type != PAM_USER: + # Process the vertices that belong to the current vertex. + + next_smart_add = smart_add + if add_all_users is True: + add_all = True + if add_all is True: + self.logger.debug("turning off smart add since add_all is enabled") + next_smart_add = False + self.logger.debug(f"smart add = {next_smart_add}, add all = {add_all}") + + self._process_auto_add_level( + current_vertex=vertex, + bulk_add_records=bulk_add_records, + bulk_convert_records=bulk_convert_records, + record_lookup_func=record_lookup_func, + record_prepare_func=record_prepare_func, + directory_info_func=directory_info_func, + record_cache=record_cache, + + # Use the value of smart_add if add_all is False. + # If add_all is True, we don't have to run it through the logic, we are going add a record. + smart_add=next_smart_add, + + # If we could access a resource, add all it's users. + add_all=add_all_users, + context=context + ) + + self.logger.debug(f" finished auto add processing {record_type}") + self.logger.debug(f" Finished auto add current Vertex: {current_vertex.uid}, {current_content.name}") + + def _process_level(self, + current_vertex: DAGVertex, + bulk_add_records: List[BulkRecordAdd], + bulk_convert_records: List[BulkRecordConvert], + record_lookup_func: Callable, + prompt_func: Callable, + prompt_admin_func: Callable, + record_prepare_func: Callable, + directory_info_func: Callable, + record_cache: dict, + item_count: int = 0, + items_left: int = 0, + indent: int = 0, + context: Optional[Any] = None): + + """ + This method will walk the user through discovery delta objects. + + At this point, we only have the delta objects from the graph. + We do not have the full graph. + + :param current_vertex: The current/parent discovery vertex. + :param bulk_add_records: List of records to be added. + :param bulk_convert_records: List of existing records to be covert to this gateway. + :param prompt_func: Function to call for user prompt. + :param record_prepare_func: Function to convert content into an unsaved record. + :param indent: Amount to indent text. + :param context: Client context; could be anything. + :return: + """ + + if current_vertex.active is False: + self.logger.debug(f"vertex {current_vertex.uid} is not active, skip") + return + + # Check if this vertex has a record. + # We cannot add child vertices to a vertex that does not have a record. + current_content = current_vertex.content_as_object(DiscoveryObject) + if current_content.record_uid is None: + self.logger.debug(f"vertex {current_content.uid} does not have a record id") + return + + self.logger.debug(f"Current Vertex: {current_content.record_type}, {current_vertex.uid}, " + f"{current_content.name}") + + # Sort all the vertices under the current vertex. + # Return a dictionary where the record type is the key. + # The value will be an array of vertices of the specific record type. + record_type_to_vertices_map = sort_infra_vertices(current_vertex, logger=self.logger) + + # Process the record type by their map order in ascending order. + for record_type in sorted(record_type_to_vertices_map, key=lambda i: VERTICES_SORT_MAP[i]['order']): + self.logger.debug(f" processing {record_type}") + for vertex in record_type_to_vertices_map[record_type]: + + content = DiscoveryObject.get_discovery_object(vertex) + self.logger.debug(f" child vertex {vertex.uid}, {content.name}") + + default_acl = None + if content.record_type == PAM_USER: + default_acl = self._default_acl( + discovery_vertex=vertex, + content=content, + discovery_parent_vertex=current_vertex) + + # Check for a vault record, if it exists. + # Default to the DAG content. + # Check the bulk_add_records list, to make sure it is not in the list of record we are about to add. + # We are doing this because the record might be an active directory user, that we have + # not created a record for yet, however it might have been assigned a record UID from a prior prompt. + + existing_record = content.record_exists + if record_lookup_func is not None: + check_the_vault = True + for item in bulk_add_records: + if item.record_uid == content.record_uid: + self.logger.debug(f" record is in the bulk add list, do not check the vault if exists") + check_the_vault = False + break + if check_the_vault is True: + existing_record = record_lookup_func(record_uid=content.record_uid, context=context) is not None + self.logger.debug(f" record exists in the vault: {existing_record}") + else: + self.logger.debug(f" record lookup function not defined, record existing: {existing_record}") + + # If we have a record UID, the record exists; we don't need to prompt the user. + # If a user, we do want to make sure an ACL exists between this user and the resource. + if existing_record is True: + self.logger.debug(f" record already exists.") + # Don't continue since we might want to recurse into its children. + + # If the rule engine result is to ignore this object, then continue. + # This normally would not happen since discovery wouldn't add the object. + # However, make sure we skip any object where the rule engine action is to ignore the object. + elif content.action_rules_result == RuleActionEnum.IGNORE.value: + self.logger.debug(f" vertex {vertex.uid} had a IGNORE result for the rule engine, " + "skip processing") + # If the rule engine result is to ignore this object, then continue. + continue + + # If this flag is set, the user set the ignore_object flag when prompted. + elif content.ignore_object is True: + self.logger.debug(f" vertex {vertex.uid} was flagged as ignore, skip processing") + # If the ignore_object flag is set, then continue. + continue + + # # If the rule engine flagged this object to be auto added, and no record exists, + # # prepare a record and add it to the bulk_add_records queue. + # # At the end of processing, the record will be added. + # elif content.action_rules_result == RuleActionEnum.ADD.value and content.record_exists is False: + # self.logger.debug(f" vertex {vertex.uid} had an ADD result for the rule engine, auto add") + # + # # The record could be a resource or user record. + # self._prepare_record( + # record_prepare_func=record_prepare_func, + # bulk_add_records=bulk_add_records, + # content=content, + # parent_content=current_content, + # vertex=vertex, + # context=context + # ) + # + # self.record_link.discovery_belongs_to(vertex, current_vertex, acl=default_acl) + + # If the record doesn't exist, then prompt the user. + else: + self.logger.debug(f" vertex {vertex.uid} had an PROMPT result, prompt user") + + # For user record, check if the resource record has an admin. + # If not, prompt the user if they want to add this user as the admin. + # The returned ACL will have the is_admin flag set to True if they do. + resource_has_admin = False + if content.record_type == PAM_USER: + resource_has_admin = (self.record_link.get_admin_record_uid(current_content.record_uid) + is not None) + self.logger.debug(f"resource has an admin is {resource_has_admin}") + + # If the current resource does not allow an admin, then it has and admin, it's just controlled by + # us. + # This is going to be a resource record, or a configuration record. + if hasattr(current_content.item, "allows_admin") is True: + if current_content.item.allows_admin is False: + self.logger.debug(f"resource allows an admin is {current_content.item.allows_admin}") + resource_has_admin = True + else: + self.logger.debug(f"resource type {current_content.record_type} does not have " + "allows_admin attr") + + result = prompt_func( + vertex=vertex, + parent_vertex=current_vertex, + content=content, + acl=default_acl, + resource_has_admin=resource_has_admin, + indent=indent, + item_count=item_count, + items_left=items_left, + context=context) # type: PromptResult + + if result.action == PromptActionEnum.IGNORE: + self.logger.debug(f" vertex {vertex.uid} is being ignored from prompt") + result.content.ignore_object = True + + action_rule_item = Rules.make_action_rule_from_content( + content=result.content, + action=RuleActionEnum.IGNORE + ) + + # Add a rule to ignore this object when doing future discovery. + rules = Rules(record=self.record, **self.passed_kwargs) + rules.add_rule(action_rule_item) + + # Even though we are ignoring the object, we will still add it to the infrastructure graph. + # This is user selected ignored, not from the rule engine. + # vertex.belongs_to(current_vertex, EdgeType.KEY) + vertex.add_data(result.content) + + elif result.action == PromptActionEnum.ADD: + self.logger.debug(f" vertex {vertex.uid} is being added from prompt") + + # Use the content from the prompt. + # The user may have modified it. + content = result.content + acl = result.acl + + # The record could be a resource or user record. + # The content + self._prepare_record( + record_prepare_func=record_prepare_func, + bulk_add_records=bulk_add_records, + content=content, + parent_content=current_content, + vertex=vertex, + context=context + ) + + # Update the DATA edge for this vertex. + # vertex.add_data(content) + + # Make a record link. + # The acl will be None if not a pamUser. + self.record_link.discovery_belongs_to(vertex, current_vertex, acl) + + # If the object is NOT a pamUser and the resource allows an admin. + # Prompt the user to create an admin. + should_prompt_for_admin = True + self.logger.debug(f" added record type was {content.record_type}") + if (content.record_type != PAM_USER and content.item.allows_admin is True and + prompt_admin_func is not None): + + # This block checks to see if the admin is a directory user that exists. + # We don't want to prompt the user for an admin if we have one already. + if content.access_user is not None and content.access_user.user is not None: + + self.logger.debug(" for this resource, credentials were provided.") + self.logger.error(f" {content.access_user.user}, {content.access_user.dn}, " + f"{content.access_user.password}") + + # Check if this user is a directory users, first check the source. + # If local, check the username incase the domain in part of the username. + source = content.access_user.source + if content.record_type == PAM_DIRECTORY: + source = content.name + elif source == LOCAL_USER: + _, domain = split_user_and_domain(content.access_user.user) + if domain is not None: + source = domain + + if source != LOCAL_USER: + self.logger.debug(" admin was not a local user, " + f"find user in directory {source}, if exists.") + + acl = UserAcl.default() + acl.is_admin = True + admin_record_uid = None + + try: + admin_record_uid = self._find_admin_directory_user( + domain=source, + admin_acl=acl, + directory_info_func=directory_info_func, + record_lookup_func=record_lookup_func, + context=context, + user=content.access_user.user, + dn=content.access_user.dn + ) + except DirectoryNotFoundException: + self.logger.debug(f" directory {source} was not found for admin user") + except UserNotFoundException: + self.logger.debug(f" directory user was not found in directory {source}") + if admin_record_uid is not None: + self.logger.debug(" found directory user admin, connect to resource") + self.record_link.belongs_to(admin_record_uid, content.record_uid, acl=acl) + should_prompt_for_admin = False + else: + self.logger.debug(" did not find the directory user for the admin, " + "prompt the user") + + if should_prompt_for_admin is True: + self.logger.debug(f" prompt for admin user") + self._process_admin_user( + resource_vertex=vertex, + resource_content=content, + bulk_add_records=bulk_add_records, + bulk_convert_records=bulk_convert_records, + prompt_admin_func=prompt_admin_func, + record_prepare_func=record_prepare_func, + indent=indent, + context=context + ) + + # When a user is added to a directory, check to see if there are other directories with the + # same domain. + # This used needs to be added to those directories too. + if current_content.record_type == PAM_DIRECTORY and content.record_type == PAM_USER: + + self._record_link_user_to_directories( + directory_vertex=current_vertex, + directory_content=current_content, + user_content=content, + directory_info_func=directory_info_func, + context=context + ) + + # If the new record is a directory, we may need to attach more users to this record. + elif content.record_type == PAM_DIRECTORY: + + self._record_link_directory_users( + directory_vertex=vertex, + directory_content=content, + directory_info_func=directory_info_func, + context=context + ) + + items_left -= 1 + + # If the record type is a PAM User, we don't need to go deeper. + # In the future we might need to change if PAM User becomes a branch and not a leaf. + # This is for safety reasons + if content.record_type != PAM_USER: + # Process the vertices that belong to the current vertex. + self._process_level( + current_vertex=vertex, + bulk_add_records=bulk_add_records, + bulk_convert_records=bulk_convert_records, + record_lookup_func=record_lookup_func, + prompt_func=prompt_func, + prompt_admin_func=prompt_admin_func, + record_prepare_func=record_prepare_func, + directory_info_func=directory_info_func, + record_cache=record_cache, + indent=indent + 1, + item_count=item_count, + items_left=items_left, + context=context + ) + self.logger.debug(f" finished processing {record_type}") + self.logger.debug(f" Finished current Vertex: {current_vertex.uid}, {current_content.name}") + + def _process_admin_user(self, + resource_vertex: DAGVertex, + resource_content: DiscoveryObject, + bulk_add_records: List[BulkRecordAdd], + bulk_convert_records: List[BulkRecordConvert], + prompt_admin_func: Callable, + record_prepare_func: Callable, + indent: int = 0, + context: Optional[Any] = None): + + # Find the record UID that admins this resource. + # If it is None, there is a user vertex that has an ACL with is_admin with a true value. + record_uid = self.record_link.get_record_uid(resource_vertex) + admin = self.record_link.get_admin_record_uid(record_uid) + if admin is None: + + # If the access_user is None, create an empty one. + # We will need this below when adding values to the fields. + if resource_content.access_user is None: + resource_content.access_user = DiscoveryUser() + + # Initialize a discovery object for the admin user. + # The PLACEHOLDER will be replaced after the admin user prompt. + + values = {} + for field in ["user", "password", "private_key", "dn", "database"]: + value = getattr(resource_content.access_user, field) + if value is None: + value = [] + else: + value = [value] + values[field] = value + + managed = [False] + if resource_content.access_user.managed is not None: + managed = [resource_content.access_user.managed] + + admin_content = DiscoveryObject( + uid="PLACEHOLDER", + added_ts=int(time.time()), + object_type_value="users", + parent_record_uid=resource_content.record_uid, + record_type=PAM_USER, + id="PLACEHOLDER", + name="PLACEHOLDER", + description=resource_content.description + ", Administrator", + title=resource_content.title + ", Administrator", + item=DiscoveryUser( + user="PLACEHOLDER" + ), + fields=[ + RecordField(type="login", label="login", value=values["user"], required=True), + RecordField(type="password", label="password", value=values["password"], required=False), + RecordField(type="secret", label="privatePEMKey", value=values["private_key"], required=False), + RecordField(type="text", label="distinguishedName", value=values["dn"], required=False), + RecordField(type="text", label="connectDatabase", value=values["database"], required=False), + RecordField(type="checkbox", label="managed", value=managed, required=False), + ] + ) + + # Prompt to add an admin user to this resource. + # We are not passing an ACL instance. + # We'll make it based on if the user is adding a new record or linking to an existing record. + admin_result = prompt_admin_func( + parent_vertex=resource_vertex, + content=admin_content, + acl=None, + indent=indent, + context=context + ) + + # If the action is to ADD, replace the PLACEHOLDER data. + if admin_result.action == PromptActionEnum.ADD: + source = "local" + if resource_content.record_type == PAM_DIRECTORY: + source = resource_content.name + + admin_record_uid = admin_result.record_uid + + # We know the ACL is for the admin, so set that to True. + admin_acl = UserAcl(is_admin=True) + + if admin_record_uid is None: + logging.debug("add admin user from content") + admin_content = admin_result.content + + # With the result, we can fill in information in the object item. + admin_content.item.user = admin_content.get_field_value("login") + admin_content.item.password = admin_content.get_field_value("password") + admin_content.item.private_key = admin_content.get_field_value("privatePEMKey") + admin_content.item.dn = admin_content.get_field_value("distinguishedName") + admin_content.item.database = admin_content.get_field_value("connectDatabase") + admin_content.item.managed = value_to_boolean( + admin_content.get_field_value("managed")) or False + admin_content.item.source = source + admin_content.name = admin_content.item.user + + if admin_content.item.user is None or admin_content.item.user == "": + raise ValueError("The user name is missing or is blank. Cannot create the administrator user.") + + if admin_content.name is not None: + admin_content.description = (resource_content.description + ", User " + + admin_content.name) + + # We need to populate the id and uid of the content, now that we have data in the content. + self.populate_admin_content_ids(admin_content, resource_vertex) + + # Does an admin vertex already exist for this user? + # This most likely user on the gateway, since without a resource record users can be discovered. + # If we did find it, get the content for the admin; we really want any existing record uid. + admin_vertex = self.infra.dag.get_vertex(admin_content.uid) + if admin_vertex is not None and admin_vertex.active is True and admin_vertex.has_data is True: + found_content = DiscoveryObject.get_discovery_object(admin_vertex) + admin_record_uid = found_content.record_uid + + # If there is a record UID for the admin user, connect it. + if admin_record_uid is not None: + + # If the admin record does not belong to another resource, make this resource its owner. + if self.record_link.get_parent_record_uid(admin_record_uid) is None: + admin_acl.belongs_to = True + + admin_vertex.belongs_to(resource_vertex, edge_type=EdgeType.KEY) + self.record_link.belongs_to(admin_record_uid, resource_content.record_uid, acl=admin_acl) + else: + if admin_vertex is None: + admin_vertex = self.infra.dag.add_vertex(uid=admin_content.uid, + name=admin_content.description) + + # Since this record does not exist, it will belong to the resource, + admin_acl.belongs_to = True + + # Connect the user vertex to the resource vertex. + # We need to add a KEY edge for the admin content stored on the DATA edge. + admin_vertex.belongs_to(resource_vertex, edge_type=EdgeType.KEY) + admin_vertex.add_data(admin_content) + + # The record will be a user record; admin_acl will not be None + self._prepare_record( + record_prepare_func=record_prepare_func, + bulk_add_records=bulk_add_records, + content=admin_content, + parent_content=resource_content, + vertex=admin_vertex, + context=context + ) + + self.record_link.discovery_belongs_to(admin_vertex, resource_vertex, acl=admin_acl) + + else: + logging.debug("add admin user from existing record") + + # This is a pamUser record that may need to have the controller set. + # Add it to this queue to make sure the protobuf items are current. + bulk_convert_records.append( + BulkRecordConvert( + record_uid=admin_record_uid, + parent_record_uid=resource_content.record_uid, + ) + ) + + # If this user record does not belong to another resource, make it belong to this one. + record_vertex = self.record_link.acl_has_belong_to_record_uid(admin_record_uid) + if record_vertex is None: + admin_acl.belongs_to = True + + # There is _prepare_record, the record exists. + # Needs to add to records linking. + + # Link the record UIDs. + # We might not have this user in discovery data. + # It might not belong to the resource; if so, it cannot be rotated. + # It only has is_admin in the ACL. + self.record_link.belongs_to( + admin_record_uid, + record_uid, + acl=admin_acl + ) + + def _get_count(self, current_vertex: DAGVertex) -> int: + + """ + Get the number of vertices that have not been converted to record. + + This will recurse down the graph. + To be counted, the current vertex being evaluated, must ... + + * not have record UID. + * not be ignored either by flag or rule. + * not be auto added. + + To recurse down, the current vertex being evaluated, must ... + + * have a record UID + * not be ignored either by flag or rule. + + """ + + count = 0 + + for vertex in current_vertex.has_vertices(): + if vertex.active is False: + continue + content = DiscoveryObject.get_discovery_object(vertex) + + # Add this record to the count, if no record UID, not ignoring, and we are not auto adding or + # ignoring from rules. + if (content.record_uid is None + and content.ignore_object is False + and content.action_rules_result != "add" + and content.action_rules_result != "ignore"): + count += 1 + + # Go deeper if there is a record UID, and we are not ignoring, and the rule result is not to ignore. + if ( + content.record_uid is not None + and content.ignore_object is False + and content.action_rules_result != "ignore"): + count += self._get_count(vertex) + + return count + + @property + def no_items_left(self): + return self._get_count(self.infra.get_root) == 0 + + def run(self, + prompt_func: Callable, + record_prepare_func: Callable, + smart_add: bool = False, + record_lookup_func: Optional[Callable] = None, + record_create_func: Optional[Callable] = None, + record_convert_func: Optional[Callable] = None, + prompt_confirm_add_func: Optional[Callable] = None, + prompt_admin_func: Optional[Callable] = None, + auto_add_result_func: Optional[Callable] = None, + directory_info_func: Optional[Callable] = None, + context: Optional[Any] = None, + record_cache: Optional[dict] = None, + force_quit: bool = False + ) -> BulkProcessResults: + """ + Process the discovery results. + + :param record_cache: A dictionary of record types to keys to record UID. + :param prompt_func: Function to call when the user needs to make a decision about an object. + :param smart_add: If we have resource cred, add the resource and the users. + :param record_lookup_func: Function to look up a record by UID. + :param record_prepare_func: Function to call to prepare a record to be created. + :param record_create_func: Function to call to save the prepared records. + :param record_convert_func: Function to convert record to use this gateway. + :param prompt_confirm_add_func: Function to call if quiting and record have been added to queue. + :param prompt_admin_func: Function to prompt user for admin. + :param auto_add_result_func: Function to call after auto adding. Provided records to bulk add. + :param directory_info_func: Function to get users of a directory from vault records. + :param context: Context passed to the prompt and add function. These could be objects that are not in the scope + of the function. + :param force_quit: Used for testing. Throw a Quit exception after processing. + :return: + """ + sync_point = self.job.sync_point + if sync_point is None: + raise Exception("The job does not have a sync point for the graph.") + + # Get the root vertex, which has nothing we care about. + # But from the root, get the configuration vertex. + # There will be only one. + self.logger.debug(f"loading the graph at sync point {sync_point}") + self.infra.load(sync_point=sync_point) + if self.infra.has_discovery_data is False: + raise NoDiscoveryDataException("There is no discovery data to process.") + + # If the graph is corrupted, delete the bad vertices. + # + if self.infra.dag.is_corrupt is True: + self.logger.debug("the graph is corrupt, deleting vertex") + for uid in self.infra.dag.corrupt_uids: + vertex = self.infra.dag.get_vertex(uid) + vertex.delete() + self.infra.dag.corrupt_uids = [] + self.logger.info("fixed the corrupted vertices") + + root = self.infra.get_root + configuration = root.has_vertices()[0] + + # If we have a record cache, attempt to find vertices where the content does not have the record UID set and + # then update them with cached records from the vault. + # This is done incase someone has manually created a record after discovery has been done. + if record_cache is not None: + self._update_with_record_uid( + record_cache=record_cache, + current_vertex=configuration, + ) + + # Store records that to be created and record where their protobuf settings need to be updated. + bulk_add_records = [] # type: List[BulkRecordAdd] + bulk_convert_records = [] # type: List[BulkRecordConvert] + + should_add_records = True + bulk_process_results = None + + # Pass an empty + if context is None: + context = {} + + # We need record linking and infra graphs in the context. + # We are adding admin users to check existing admin relationships and to see if AD user. + context["record_link"] = self.record_link + context["infra"] = self.infra + + try: + + self.logger.debug("# ####################################################################################") + self.logger.debug("# AUTO ADD ITEMS") + self.logger.debug("#") + self.logger.debug(f"smart add = {smart_add}") + + # Process the auto add entries first. + # There are no prompts. + self._process_auto_add_level( + current_vertex=configuration, + bulk_add_records=bulk_add_records, + bulk_convert_records=bulk_convert_records, + smart_add=smart_add, + record_lookup_func=record_lookup_func, + record_prepare_func=record_prepare_func, + directory_info_func=directory_info_func, + record_cache=record_cache, + context=context) + + # If set, give the client a list of record that will be added. + # Can be used for displaying how many record are auto added. + if auto_add_result_func is not None: + auto_add_result_func(bulk_add_records=bulk_add_records) + + self.logger.debug("# ####################################################################################") + self.logger.debug("# PROMPT USER ITEMS") + self.logger.debug("#") + + # This is the total number of items that processing needs to process. + # We start with items_left equal to item_count. + item_count = self._get_count(configuration) + + self._process_level( + current_vertex=configuration, + bulk_add_records=bulk_add_records, + bulk_convert_records=bulk_convert_records, + record_lookup_func=record_lookup_func, + prompt_func=prompt_func, + prompt_admin_func=prompt_admin_func, + record_prepare_func=record_prepare_func, + directory_info_func=directory_info_func, + record_cache=record_cache, + indent=0, + item_count=item_count, + items_left=item_count, + context=context) + + # This mainly for testing. + # If throw and quit exception, so we can prompt the user. + if force_quit is True: + raise QuitException() + + except QuitException: + should_add_records = False + + # If we have record ready to be created, and the confirm prompt function was set, ask the user if they want + # to add the records. + if (len(bulk_add_records) > 0 and prompt_confirm_add_func is not None and + prompt_confirm_add_func(bulk_add_records) is True): + should_add_records = True + + modified_count = len(self.infra.dag.modified_edges) + self.logger.debug(f"quiting and there are {modified_count} modified edges.") + + # If we don't have a create function, then there is no way to add record. + if record_create_func is None: + should_add_records = False + + # We should add the record, and a method was passed in to create them; then add the records. + if should_add_records is True: + + self.logger.debug("# ####################################################################################") + self.logger.debug("# CREATE NEW RECORD") + self.logger.debug("#") + + # Save new records. + bulk_process_results = record_create_func( + bulk_add_records=bulk_add_records, + context=context + ) + self.logger.debug("# ####################################################################################") + + self.logger.debug("# ####################################################################################") + self.logger.debug("# CONVERT EXISTING RECORD") + self.logger.debug("#") + + # Update existing record to use this gateway. + record_convert_func( + bulk_convert_records=bulk_convert_records, + context=context + ) + self.logger.debug("# ####################################################################################") + else: + + self.logger.debug("# ####################################################################################") + self.logger.debug("# ROLLBACK GRAPH") + self.logger.debug("#") + + for record in bulk_add_records: + vertices = self.infra.dag.search_content({"record_uid": record.record_uid}) + for vertex in vertices: + self.logger.debug(f" * {record.title}, flagged") + vertex.skip_save = True + for record in bulk_convert_records: + vertices = self.infra.dag.search_content({"record_uid": record.record_uid}) + for vertex in vertices: + self.logger.debug(f" * {record.title}, flagged") + vertex.skip_save = True + + self.logger.debug("# ####################################################################################") + + self.logger.debug("# ####################################################################################") + self.logger.debug("# Save INFRASTRUCTURE graph") + self.logger.debug("#") + + # Disable delta save. + self.logger.debug(f"saving additions from process run") + self.infra.save(delta_graph=False) + self.logger.debug("# ####################################################################################") + + # Save the record linking, only if we added records. + # This will be the additions and any changes to ACL. + if should_add_records is True: + + self.logger.debug("# ####################################################################################") + self.logger.debug("# Save RECORD LINKING graph") + self.logger.debug("#") + + self.logger.debug(f"save additions from record linking ") + self.record_link.save() + self.logger.debug("# ####################################################################################") + + # Map user to service/task on a machine + self.user_service.run(infra=self.infra) + + return bulk_process_results diff --git a/keepercommander/discovery_common/record_link.py b/keepercommander/discovery_common/record_link.py new file mode 100644 index 000000000..30f59b47e --- /dev/null +++ b/keepercommander/discovery_common/record_link.py @@ -0,0 +1,439 @@ +from __future__ import annotations +import logging +from .constants import RECORD_LINK_GRAPH_ID +from .utils import get_connection +from .types import UserAcl, DiscoveryObject +from keeper_dag import DAG, EdgeType +import importlib +from typing import Any, Optional, List, TYPE_CHECKING + +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + + +class RecordLink: + + def __init__(self, record: Any, logger: Optional[Any] = None, debug_level: int = 0, fail_on_corrupt: bool = True, + **kwargs): + + self.conn = get_connection(**kwargs) + + # This will either be a KSM Record, or Commander KeeperRecord + self.record = record + self._dag = None + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.debug_level = debug_level + + # Technically, since there is no encryption in this graph, there should be no corruption. + # Allow it to be set regardlessly. + self.fail_on_corrupt = fail_on_corrupt + + @property + def dag(self) -> DAG: + if self._dag is None: + + # Make sure this auto save is False. + # Since we don't have transactions, we want to save the record link if everything worked. + self._dag = DAG(conn=self.conn, record=self.record, graph_id=RECORD_LINK_GRAPH_ID, auto_save=False, + logger=self.logger, debug_level=self.debug_level, name="Record Linking", + fail_on_corrupt=self.fail_on_corrupt) + sync_point = self._dag.load(sync_point=0) + self.logger.debug(f"the record linking sync point is {sync_point or 0}") + if self.dag.has_graph is False: + self.dag.add_vertex(name=self.record.title, uid=self._dag.uid) + + return self._dag + + @property + def has_graph(self) -> bool: + return self.dag.has_graph + + def reload(self): + self._dag.load(sync_point=0) + + def get_record_link(self, uid: str) -> DAGVertex: + return self.dag.get_vertex(uid) + + def get_parent_uid(self, uid: str) -> Optional[str]: + """ + Get the vertex that the UID belongs to. + + This method will check the vertex ACL to see which edge has a True value for belongs_to. + If it is found, the record UID that the head points at will be returned. + If not found, None is returned. + """ + + vertex = self.dag.get_vertex(uid) + if vertex is not None: + for edge in vertex.edges: + if edge.edge_type == EdgeType.ACL: + content = edge.content_as_object(UserAcl) + if content.belongs_to is True: + return edge.head_uid + return None + + @staticmethod + def get_record_uid(discovery_vertex: DAGVertex, validate_record_type: Optional[str] = None) -> str: + """ + Get the record UID from the vertex + + """ + data = discovery_vertex.get_data() + if data is None: + raise Exception(f"The discovery vertex {discovery_vertex.uid} does not have a DATA edge. " + "Cannot get record UID.") + content = DiscoveryObject.get_discovery_object(discovery_vertex) + + if validate_record_type is not None: + if validate_record_type != content.record_type: + raise Exception(f"The vertex is not record type {validate_record_type}") + + if content.record_uid is not None: + return content.record_uid + raise Exception(f"The discovery vertex {discovery_vertex.uid} data does not have a populated record UID.") + + def add_configuration(self, discovery_vertex: DAGVertex): + """ + Add the configuration vertex to the DAG root. + + The configuration record UID will be the same as root UID. + + """ + + record_uid = self.get_record_uid(discovery_vertex) + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + record_vertex = self.dag.add_vertex(uid=record_uid, name=discovery_vertex.name) + if self.dag.get_root.has(record_vertex) is False: + record_vertex.belongs_to_root(EdgeType.LINK) + + def discovery_belongs_to(self, discovery_vertex: DAGVertex, discovery_parent_vertex: DAGVertex, + acl: Optional[UserAcl] = None): + + """ + Link vault record using the vertices from discovery. + + If a link already exists, no additional link will be created. + """ + + try: + record_uid = self.get_record_uid(discovery_vertex) + except Exception as err: + self.logger.warning(f"The discovery vertex is missing a record uid, cannot connect record: {err}") + return + + # If the parent_vertex is the root, then don't get the record UID from the data. + # The root vertex will have no data, and the record UID is the same as the vertex UID. + if discovery_parent_vertex.uid == self.dag.uid: + parent_record_uid = discovery_parent_vertex.uid + else: + try: + parent_record_uid = self.get_record_uid(discovery_parent_vertex) + except Exception as err: + self.logger.warning("The discovery parent vertex is missing a record uid, cannot connect record: " + f"{err}") + return + + self.belongs_to( + record_uid=record_uid, + parent_record_uid=parent_record_uid, + acl=acl, + record_name=discovery_vertex.name, + parent_record_name=discovery_parent_vertex.name + ) + + def belongs_to(self, record_uid: str, parent_record_uid: str, acl: Optional[UserAcl] = None, + record_name: Optional[str] = None, parent_record_name: Optional[str] = None): + + """ + Link vault records using record UIDs. + + If a link already exists, no additional link will be created. + """ + + # Get the record's vertices. + # If a vertex does not exist, then add the vertex using the record UID + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + self.logger.debug(f"adding record linking vertex for record UID {record_uid} ({record_name})") + record_vertex = self.dag.add_vertex(uid=record_uid, name=record_name) + + parent_record_vertex = self.dag.get_vertex(parent_record_uid) + if parent_record_vertex is None: + self.logger.debug(f"adding record linking vertex for parent record UID {parent_record_uid}") + parent_record_vertex = self.dag.add_vertex(uid=parent_record_uid, name=parent_record_name) + + self.logger.debug(f"record UID {record_vertex.uid} belongs to {parent_record_vertex.uid} " + f"({parent_record_name})") + + # By default, the LINK edge will link records. + # If ACL information was passed in, use the ACL edge. + edge_type = EdgeType.LINK + if acl is not None: + edge_type = EdgeType.ACL + + # Get the current edge if it exists. + # We need to create it if it does not exist and only add it if the ACL changed. + # TODO: create a better ACL diff + existing_edge = record_vertex.get_edge(parent_record_vertex, edge_type=edge_type) + add_edge = True + if existing_edge is not None and existing_edge.active is True: + if edge_type == EdgeType.ACL: + # content = existing_edge.content_as_object(UserAcl) # type: UserAcl + # if content.is_admin == acl.is_admin: + add_edge = False + else: + add_edge = False + + if add_edge is True: + self.logger.debug(f" added {edge_type} edge") + record_vertex.belongs_to(parent_record_vertex, edge_type=edge_type, content=acl) + + def get_acl(self, record_uid: str, parent_record_uid: str, record_name: Optional[str] = None, + parent_record_name: Optional[str] = None) -> Optional[UserAcl]: + + # Get the record's vertices. + # If a vertex does not exist, then add the vertex using the record UID + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + self.logger.debug(f"adding record linking vertex for record UID {record_uid} ({record_name})") + record_vertex = self.dag.add_vertex(uid=record_uid, name=record_name) + + parent_record_vertex = self.dag.get_vertex(parent_record_uid) + if parent_record_vertex is None: + self.logger.debug(f"adding record linking vertex for parent record UID {parent_record_uid}") + parent_record_vertex = self.dag.add_vertex(uid=parent_record_uid, name=parent_record_name) + + acl_edge = record_vertex.get_edge(parent_record_vertex, edge_type=EdgeType.ACL) + if acl_edge is None: + return None + + return acl_edge.content_as_object(UserAcl) + + def acl_has_belong_to_vertex(self, discovery_vertex: DAGVertex) -> Optional[DAGVertex]: + """ + Get the resource vertex for this user vertex that handles rotation, using the user's infrastructure vertex. + """ + + record_uid = self.get_record_uid(discovery_vertex, "pamUser") + if record_uid is None: + return None + + return self.acl_has_belong_to_record_uid(record_uid) + + def acl_has_belong_to_record_uid(self, record_uid: str) -> Optional[DAGVertex]: + + """ + Get the resource vertex for this user vertex that handles rotation. using the user's record UID. + """ + + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + return None + for edge in record_vertex.edges: + if edge.edge_type != EdgeType.ACL: + continue + content = edge.content_as_object(UserAcl) + if content.belongs_to is True: + return self.dag.get_vertex(edge.head_uid) + return None + + def get_parent_record_uid(self, record_uid: str) -> Optional[str]: + """ + Get the parent record uid. + + Check the ACL edges for the one where belongs_to is True + If there is a LINK edge that leads to the parent. + """ + + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + return None + for edge in record_vertex.edges: + if edge.edge_type == EdgeType.ACL: + content = edge.content_as_object(UserAcl) # type: UserAcl + if content.belongs_to is True: + return edge.head_uid + elif edge.edge_type == EdgeType.LINK: + return edge.head_uid + return None + + def get_child_record_uids(self, record_uid: str) -> List[str]: + """ + Get a list of child record for this parent. + + The list contains any parent that this record uid has a LINK or ACL edge to. + """ + + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + self.logger.debug(f"could not get the parent record for {record_uid}") + return [] + + record_uids = [] + self.logger.debug(f"has {record_vertex.has_vertices()}") + for child_vertex in record_vertex.has_vertices(EdgeType.ACL): + record_uids.append(child_vertex.uid) + for child_vertex in record_vertex.has_vertices(EdgeType.LINK): + record_uids.append(child_vertex.uid) + + return record_uids + + def get_parent_record_uids(self, record_uid: str) -> List[str]: + """ + Get a list of parent record this child record belongs to. + + The list contains any parent that this record uid has a LINK or ACL edge to. + """ + + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is None: + self.logger.debug(f"could not get the child record for {record_uid}") + return [] + + record_uids = [] + for vertex in record_vertex.belongs_to_vertices(): + edge = vertex.get_edge(record_vertex, EdgeType.ACL) + if edge is None: + edge = vertex.get_edge(record_vertex, EdgeType.LINK) + if edge is not None: + record_uids.append(record_vertex.uid) + return record_uids + + def get_admin_record_uid(self, record_uid: str) -> Optional[str]: + """ + Get the record that admins this resource record. + + """ + + record_vertex = self.dag.get_vertex(record_uid) + if record_vertex is not None: + for vertex in record_vertex.has_vertices(): + for edge in vertex.edges: + if edge.head_uid != record_vertex.uid: + continue + if edge.edge_type == EdgeType.ACL: + content = edge.content_as_object(UserAcl) # type: UserAcl + if content.is_admin is True: + return vertex.uid + return None + + def discovery_disconnect_from(self, discovery_vertex: DAGVertex, discovery_parent_vertex: DAGVertex): + record_uid = self.get_record_uid(discovery_vertex) + parent_record_uid = self.get_record_uid(discovery_parent_vertex) + self.disconnect_from(record_uid=record_uid, parent_record_uid=parent_record_uid) + + def disconnect_from(self, record_uid: str, parent_record_uid: str): + record_vertex = self.dag.get_vertex(record_uid) + parent_record_vertex = self.dag.get_vertex(parent_record_uid) + + # Check if we got vertex for the record UIDs. + # Log info if we didn't. + # Since we are disconnecting, we are not going to treat this as a fatal error. + if record_vertex is None: + self.logger.info(f"for record linking, could not find the vertex for record UID {record_uid}." + f" cannot disconnect from parent vertex for record UID {parent_record_uid}") + return + if parent_record_vertex is None: + self.logger.info(f"for record linking, could not find the parent vertex for record UID {parent_record_uid}." + f" cannot disconnect the child vertex for record UID {record_uid}") + return + + parent_record_vertex.disconnect_from(record_vertex) + + @staticmethod + def delete(vertex: DAGVertex): + if vertex is not None: + vertex.delete() + + def save(self): + if self.dag.has_graph is True: + self.logger.debug("saving the record linking.") + self.dag.save(delta_graph=False) + else: + self.logger.debug("the record linking graph does not contain any data, was not saved.") + + def to_dot(self, graph_format: str = "svg", show_version: bool = True, show_only_active_vertices: bool = True, + show_only_active_edges: bool = True, graph_type: str = "dot"): + + try: + mod = importlib.import_module("graphviz") + except ImportError: + raise Exception("Cannot to_dot(), graphviz module is not installed.") + + dot = getattr(mod, "Digraph")(comment=f"DAG for Discovery", format=graph_format) + + if graph_type == "dot": + dot.attr(rankdir='RL') + elif graph_type == "twopi": + dot.attr(layout="twopi") + dot.attr(ranksep="10") + dot.attr(ratio="auto") + else: + dot.attr(layout=graph_type) + + self.logger.debug(f"have {len(self.dag.all_vertices)} vertices") + for v in self.dag.all_vertices: + if show_only_active_vertices is True and v.active is False: + continue + + tooltip = "" + + for edge in v.edges: + + color = "grey" + style = "solid" + + # To reduce the number of edges, only show the active edges + if edge.active is True: + color = "black" + style = "bold" + elif show_only_active_edges is True: + continue + + # If the vertex is not active, gray out the DATA edge + if edge.edge_type == EdgeType.DATA and v.active is False: + color = "grey" + + if edge.edge_type == EdgeType.DELETION: + style = "dotted" + + edge_tip = "" + if edge.edge_type == EdgeType.ACL and v.active is True: + content = edge.content_as_dict + if content.get("is_admin") is True: + color = "red" + if content.get("belongs_to") is True: + if color == "red": + color = "purple" + else: + color = "blue" + + tooltip += f"TO {edge.head_uid}\\n" + for k, val in content.items(): + tooltip += f" * {k}={val}\\n" + tooltip += f"--------------------\\n\\n" + + label = DAG.EDGE_LABEL.get(edge.edge_type) + if label is None: + label = "UNK" + if edge.path is not None and edge.path != "": + label += f"\\npath={edge.path}" + if show_version is True: + label += f"\\nv={edge.version}" + + # tail, head (arrow side), label, ... + dot.edge(v.uid, edge.head_uid, label, style=style, fontcolor=color, color=color, tooltip=edge_tip) + + shape = "ellipse" + fillcolor = "white" + color = "black" + if v.active is False: + fillcolor = "grey" + + label = f"uid={v.uid}" + dot.node(v.uid, label, color=color, fillcolor=fillcolor, style="filled", shape=shape, tooltip=tooltip) + + return dot diff --git a/keepercommander/discovery_common/rule.py b/keepercommander/discovery_common/rule.py new file mode 100644 index 000000000..b6b35b409 --- /dev/null +++ b/keepercommander/discovery_common/rule.py @@ -0,0 +1,305 @@ +from __future__ import annotations +from .constants import DIS_RULES_GRAPH_ID +from .types import (RuleTypeEnum, RuleItem, ActionRuleSet, ActionRuleItem, ScheduleRuleSet, ComplexityRuleSet, + Statement, RuleActionEnum) +from .utils import value_to_boolean, get_connection +from keeper_dag import DAG, EdgeType +from keeper_dag.exceptions import DAGException +from time import time +import base64 +import os +from typing import Any, List, Optional, Callable, TYPE_CHECKING + +if TYPE_CHECKING: + from .types import DiscoveryObject + + +class Rules: + + DATA_PATH = "rules" + RULE_ITEM_TYPE_MAP = { + "ActionRuleItem": RuleTypeEnum.ACTION, + "ScheduleRuleItem": RuleTypeEnum.SCHEDULE, + "ComplexityRuleItem": RuleTypeEnum.COMPLEXITY + } + RULE_TYPE_TO_SET_MAP = { + RuleTypeEnum.ACTION: ActionRuleSet, + RuleTypeEnum.SCHEDULE: ScheduleRuleSet, + RuleTypeEnum.COMPLEXITY: ComplexityRuleSet + } + + RULE_FIELDS = { + # Attributes the records + "recordType": {"type": str}, + "parentRecordType": {"type": str}, + "recordTitle": {"type": str}, + "recordNotes": {"type": str}, + "recordDesc": {"type": str}, + "parentUid": {"type": str}, + + # Record fields + "login": {"type": str}, + "password": {"type": str}, + "privatePEMKey": {"type": str}, + "distinguishedName": {"type": str}, + "connectDatabase": {"type": str}, + "managed": {"type": bool, "default": False}, + "hostName": {"type": str}, + "port": {"type": float, "default": 0}, + "operatingSystem": {"type": str}, + "instanceName": {"type": str}, + "instanceId": {"type": str}, + "providerGroup": {"type": str}, + "providerRegion": {"type": str}, + "databaseId": {"type": str}, + "databaseType": {"type": str}, + "useSSL": {"type": bool, "default": False}, + "domainName": {"type": str}, + "directoryId": {"type": str}, + "directoryType": {"type": str}, + } + + BREAK_OUT = { + "pamHostname": { + "hostName": "hostName", + "port": "port" + } + } + + RECORD_FIELD = { + "pamMachine": ["pamHostname"], + "pamDatabase": ["pamHostname", "databaseType"], + "pamDirectory": ["pamHostname", "directoryType"], + "pamUser": ["parentUid", "login", "distinguishedName"], + } + + OBJ_ATTR = { + "parentUid": "parent_record_uid" + } + + def __init__(self, record: Any, logger: Optional[Any] = None, debug_level: int = 0, fail_on_corrupt: bool = True, + **kwargs): + + self.conn = get_connection(**kwargs) + + # This will either be a KSM Record, or Commander KeeperRecord + self.record = record + self._dag = None + self.logger = logger + self.debug_level = debug_level + self.fail_on_corrupt = fail_on_corrupt + + @property + def dag(self) -> DAG: + if self._dag is None: + + # Turn auto_save on after the DAG has been created. + # No need to call it six times in a row to initialize it. + self._dag = DAG(conn=self.conn, record=self.record, graph_id=DIS_RULES_GRAPH_ID, auto_save=False, + logger=self.logger, debug_level=self.debug_level, fail_on_corrupt=self.fail_on_corrupt) + self._dag.load() + + # Has the status been initialized? + if self._dag.has_graph is False: + for rule_type_enum in Rules.RULE_TYPE_TO_SET_MAP: + rules = self._dag.add_vertex() + rules.belongs_to_root( + EdgeType.KEY, + path=rule_type_enum.value + ) + content = Rules.RULE_TYPE_TO_SET_MAP[rule_type_enum]() + rules.add_data( + content=content, + ) + self._dag.save() + + # The graph exists now, turn on the auto_save. + self._dag.auto_save = True + return self._dag + + @staticmethod + def data_path(rule_type: RuleTypeEnum): + return f"/{rule_type.value}" + + def get_ruleset(self, rule_type: RuleTypeEnum): + path = self.data_path(rule_type) + rule_json = self.dag.walk_down_path(path).content_as_str + if rule_json is None: + raise DAGException("Could not get the status data from the DAG.") + rule_set_class = Rules.RULE_TYPE_TO_SET_MAP[rule_type] + return rule_set_class.model_validate_json(rule_json) + + def set_ruleset(self, rule_type: RuleTypeEnum, rules: List[Rules]): + path = self.data_path(rule_type) + self.dag.walk_down_path(path).add_data( + content=rules, + ) + # Auto save should save the data + + def _rule_transaction(self, func: Callable, rule: Optional[RuleItem] = None): + rule_type = rule.__class__.__name__ + rule_type_enum = Rules.RULE_ITEM_TYPE_MAP.get(rule_type) + if rule_type_enum is None: + raise ValueError("rule is not a known rule instance") + + # Get the ruleset and the rule list for the type + ruleset = self.get_ruleset(rule_type_enum) + + # Call the specialized code + rules = func( + r=rule, + rs=ruleset.rules + ) + + # Sort the rule by priority in asc order. + ruleset.rules = list(sorted(rules, key=lambda x: x.priority)) + self.set_ruleset(rule_type_enum, ruleset) + + def add_rule(self, rule: RuleItem) -> RuleItem: + + if rule.rule_id is None: + rule.rule_id = "RULE" + base64.urlsafe_b64encode(os.urandom(8)).decode().rstrip('=') + if rule.added_ts is None: + rule.added_ts = int(time()) + + def _add_rule(r: RuleItem, rs: List[RuleItem]): + rs.append(r) + return rs + + self._rule_transaction( + rule=rule, + func=_add_rule + ) + + return rule + + def update_rule(self, rule: RuleItem) -> RuleItem: + + def _update_rule(r: RuleItem, rs: List[RuleItem]): + new_rule_list = [] + for _r in rs: + if _r.rule_id == r.rule_id: + new_rule_list.append(r) + else: + new_rule_list.append(_r) + return new_rule_list + + self._rule_transaction( + rule=rule, + func=_update_rule + ) + + return rule + + def remove_rule(self, rule: RuleItem): + + def _remove_rule(r: RuleItem, rs: List[RuleItem]): + new_rule_list = [] + for _r in rs: + if _r.rule_id != r.rule_id: + new_rule_list.append(_r) + return new_rule_list + + self._rule_transaction( + rule=rule, + func=_remove_rule + ) + + def rule_list(self, rule_type: RuleTypeEnum, search: Optional[str] = None) -> List[RuleItem]: + rule_list = [] + for rule_item in self.get_ruleset(rule_type).rules: + if search is not None and rule_item.search(search) is False: + continue + rule_list.append(rule_item) + + return rule_list + + def get_rule_item(self, rule_type: RuleTypeEnum, rule_id: str) -> Optional[RuleItem]: + for rule_item in self.rule_list(rule_type=rule_type): + if rule_item.rule_id == rule_id: + return rule_item + return None + + @staticmethod + def make_action_rule_from_content(content: DiscoveryObject, action: RuleActionEnum, priority: Optional[int] = None, + case_sensitive: bool = True, + shared_folder_uid: Optional[str] = None) -> ActionRuleItem: + + if action == RuleActionEnum.IGNORE: + priority = -1 + + record_fields = Rules.RECORD_FIELD.get(content.record_type) + if record_fields is None: + raise ValueError(f"Record type {content.record_type} does not have fields maps.") + + statements = [ + Statement(field="recordType", operator="==", value=content.record_type) + ] + + for field_label in record_fields: + if field_label in Rules.OBJ_ATTR: + attr = Rules.OBJ_ATTR[field_label] + if hasattr(content, attr) is False: + raise Exception(f"Discovery object is missing attribute {attr}") + value = getattr(content, attr) + statements.append( + Statement(field=field_label, operator="==", value=value) + ) + else: + for field in content.fields: + label = field.label + if field_label != label: + continue + + value = field.value + if value is None or len(value) == 0: + continue + value = value[0] + + if label in Rules.BREAK_OUT: + for key in Rules.BREAK_OUT[label]: + key_value = value.get(key) + if key_value is None: + continue + statements.append( + Statement(field=key, operator="==", value=key_value) + ) + else: + statements.append( + Statement(field=label, operator="==", value=value) + ) + + return ActionRuleItem( + enabeld=True, + priority=priority, + case_sensitive=case_sensitive, + statement=statements, + action=action, + shared_folder_uid=shared_folder_uid + ) + + @staticmethod + def make_action_rule_statement_str(statement: List[Statement]) -> str: + statement_str = "" + for item in statement: + if statement_str != "": + statement_str += " and " + statement_str += item.field + " " + item.operator + " " + field_type = Rules.RULE_FIELDS.get(item.field).get("type") + if field_type is None: + raise ValueError("Unknown field in rule") + if field_type is str: + statement_str += f"'{item.value}'" + elif field_type is bool: + if value_to_boolean(item.value) is True: + statement_str += "true" + else: + statement_str += "false" + elif field_type is float: + if int(item.value) == item.value: + statement_str += str(int(item.value)) + else: + statement_str += str(item.value) + else: + raise ValueError("Cannot determine the field type for rule statement.") + return statement_str diff --git a/keepercommander/discovery_common/types.py b/keepercommander/discovery_common/types.py new file mode 100644 index 000000000..63aca202f --- /dev/null +++ b/keepercommander/discovery_common/types.py @@ -0,0 +1,678 @@ +from __future__ import annotations +from enum import Enum +from pydantic import BaseModel +import time +import datetime +import base64 +import json +from keeper_secrets_manager_core.crypto import CryptoUtils +from typing import Any, Union, Optional, List, TYPE_CHECKING + +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + +# IMPORTANT!!!!!!!!!!! +# +# Do not change attributes. +# This might cause a problem with deserializing existing data. +# It is safe to add attributes that allow blank values. +# There might be some pydantic magic that can be used to make migration with the deserializer. + + +class BaseEnum(Enum): + + @classmethod + def find_enum(cls, value: Union[Enum, str, int], default: Optional[Enum] = None): + if value is not None: + for e in cls: + if e == value or e.value == value: + return e + if hasattr(cls, str(value).upper()) is True: + return getattr(cls, value.upper()) + return default + + +class CredentialBase(BaseModel): + # Use Any because it might be a str or Secret, but Secret is defined to discover-and_rotation. + user: Optional[Any] = None + dn: Optional[Any] = None + password: Optional[Any] = None + private_key: Optional[Any] = None + database: Optional[Any] = None + + +class Settings(BaseModel): + + """ + credentials: List of Credentials used to test connections for resources. + default_shared_folder_uid: The default shared folder that should be used when adding records. + include_azure_aadds - Include Azure AD Domain Service. + skip_rules: Do not run the rule engine. + user_map: Map used to map found users to Keeper record UIDs + skip_machines: Do not discovery machines. + skip_databases: Do not discovery databases. + skip_directories: Do not discovery directoires. + skip_cloud_users - Skip cloud users like AWS IAM, or Azure Tenant users. + allow_resource_deletion - Allow discovery to remove resources. + allow_resource_deletion - Allow discovery to remove resources if missing. + allow_user_deletion - Allow discovery to remove users if missing. + resource_deletion_limit - Remove resource if not seen for # seconds; 0 will delete right away. + user_deletion_limit - Remove user right away if not seen for # seconds; 0 will delete right away. + """ + + credentials: List[CredentialBase] = [] + default_shared_folder_uid: Optional[str] = None + include_azure_aadds: bool = False + skip_rules: bool = False + user_map: Optional[List[dict]] = None + skip_machines: bool = False + skip_databases: bool = False + skip_directories: bool = False + skip_cloud_users: bool = False + + allow_resource_deletion: bool = True + allow_user_deletion: bool = True + resource_deletion_limit: int = 0 + user_deletion_limit: int = 0 + + def set_user_map(self, obj): + if self.user_map is not None: + obj.user_map = self.user_map + + @property + def has_credentials(self): + return len(self.credentials) > 0 + +# STATUS + + +class JobItem(BaseModel): + job_id: str + start_ts: int + settings: Settings + end_ts: Optional[int] = None + success: Optional[bool] = None + resource_uid: Optional[str] = None + conversation_id: Optional[str] = None + error: Optional[str] = None + stacktrace: Optional[str] = None + sync_point: Optional[int] = None + delta: Optional[DiscoveryDelta] = None + + @property + def duration_sec(self) -> Optional[int]: + if self.end_ts is not None: + return self.end_ts - self.start_ts + return None + + @property + def start_ts_str(self): + return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.start_ts)) + + @property + def end_ts_str(self): + if self.end_ts is not None: + return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.end_ts)) + return "" + + @property + def duration_sec_str(self): + if self.is_running is True: + duration_sec = int(time.time()) - self.start_ts + else: + duration_sec = self.duration_sec + + if duration_sec is not None: + return str(datetime.timedelta(seconds=int(duration_sec))) + else: + return "" + + @property + def is_running(self): + # If no end timestamp, and there is a start timestamp, and the job has not been processed, and there is no + # success is running. + return self.end_ts is None and self.start_ts is not None and self.success is None + + +class JobContent(BaseModel): + active_job_id: Optional[str] = None + job_history: List[JobItem] = [] + + +class DiscoveryDeltaItem(BaseModel): + uid: str + version: int + record_uid: Optional[str] = None + changes: Optional[dict] = None + + @property + def has_record(self) -> bool: + return self.record_uid is not None + + +class DiscoveryDelta(BaseModel): + added: List[DiscoveryDeltaItem] = [] + changed: List[DiscoveryDeltaItem] = [] + deleted: List[DiscoveryDeltaItem] = [] + +# RULES + + +class RuleTypeEnum(BaseEnum): + ACTION = "action" + SCHEDULE = "schedule" + COMPLEXITY = "complexity" + + +class RuleActionEnum(BaseEnum): + PROMPT = "prompt" + ADD = "add" + IGNORE = "ignore" + + +class Statement(BaseModel): + field: str + operator: str + value: Union[str, bool, float] + + +class RuleItem(BaseModel): + added_ts: Optional[int] = None + rule_id: Optional[str] = None + enabled: bool = True + priority: int = 0 + case_sensitive: bool = True + statement: List[Statement] + + # Do not set this. + # This needs to be here for the RuleEngine. + # The RuleEngine will set this to its self. + engine_rule: Optional[object] = None + + @property + def added_ts_str(self): + return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.added_ts)) + + def search(self, search: str) -> bool: + for item in self.statement: + if search in item.field or search in item.value: + return True + + if search in self.rule_id.lower() or search == self.rule_action.value or search == str(self.priority): + return True + + return False + + +class ActionRuleItem(RuleItem): + action: RuleActionEnum = RuleActionEnum.PROMPT + shared_folder_uid: Optional[str] = None + + +class ScheduleRuleItem(RuleItem): + tag: str + + +class ComplexityRuleItem(RuleItem): + tag: str + + +class RuleSet(BaseModel): + pass + + +class ActionRuleSet(RuleSet): + rules: List[ActionRuleItem] = [] + + +class ScheduleRuleSet(RuleSet): + rules: List[ScheduleRuleItem] = [] + + +class ComplexityRuleSet(RuleSet): + rules: List[ComplexityRuleItem] = [] + + +# INFRASTRUCTURE + +class UserRelEnum(BaseEnum): + """ + The relationship a pamUser has with a resource, or provider + + * BELONGS_TO - The pamUser's credentials are rotated on this resource. + * USER - The pamUser has access to this resource. + * ADMIN - The pamUser is the admin on this resource. + * INACTIVE - The pamUser has access to this resource, however, no longer does. + """ + + BELONGS_TO = "belongs_to" + USER = "user" + ADMIN = "admin" + INACTIVE = "inactive" + + +class RecordStatus(BaseEnum): + NONE = "none" + EXISTS = "exists" + ADD = "add" + IGNORE = "ignore" + + +class RecordField(BaseModel): + type: str + label: Optional[str] = None + value: List[Any] = [] + required: bool = False + + +class UserAclRotationSettings(BaseModel): + # Base64 JSON schedule + schedule: Optional[str] = "" + + # Base64 JSON for complexity + pwd_complexity: Optional[str] = "" + + disabled: bool = False + + # If true, do not rotate the username/password on remote system, if it exists. + noop: bool = False + + def set_pwd_complexity(self, complexity: Union[dict, str, bytes], record_key_bytes: bytes): + if isinstance(complexity, dict) is True: + complexity = json.dumps(complexity) + if isinstance(complexity, str) is True: + complexity = complexity.encode() + + if isinstance(complexity, bytes) is False: + raise ValueError("The complexity is not a dictionary, string or is bytes.") + + self.pwd_complexity = base64.b64encode(CryptoUtils.encrypt_aes(complexity, record_key_bytes)).decode() + + def get_pwd_complexity(self, record_key_bytes: bytes) -> Optional[dict]: + if self.pwd_complexity is None or self.pwd_complexity == "": + return None + complexity_enc_bytes = base64.b64decode(self.pwd_complexity.encode()) + complexity_bytes = CryptoUtils.decrypt_aes(complexity_enc_bytes, record_key_bytes) + return json.loads(complexity_bytes) + + def set_schedule(self, schedule: Union[dict, str]): + if isinstance(schedule, dict) is True: + schedule = json.dumps(schedule) + self.schedule = schedule + + def get_schedule(self) -> Optional[dict]: + if self.pwd_complexity is None or self.pwd_complexity == "": + return None + return json.loads(self.schedule) + + +class UserAcl(BaseModel): + # Is this user's password/private key managed by this resource? + # This should be unique for all the ACL edges of this user vertex; only one ACL edge should have a True value. + belongs_to: bool = False + + # Is this user an admin for the resource? + # This can be set True for multiple ACL edges; a user can be admin on multiple resources. + is_admin: bool = False + + # Is this user a cloud-based user? + # This will only be True if the ACL of the PAM User connects to a configuration vertex. + is_iam_user: Optional[bool] = False + + rotation_settings: Optional[UserAclRotationSettings] = None + + @staticmethod + def default(): + """ + Make an empty UserAcl that contains all the default values for the attributes. + """ + return UserAcl( + rotation_settings=UserAclRotationSettings() + ) + + +class DiscoveryItem(BaseModel): + pass + + +class DiscoveryConfiguration(DiscoveryItem): + """ + This is very general. + We are not going to make a class for each configuration/provider. + Populate a dictionary for the important information (i.e., Network CIDR) + """ + type: str + info: dict + + # Configurations never allows an admin user. + # This should always be False. + allows_admin: bool = False + + +class DiscoveryUser(DiscoveryItem): + user: Optional[str] = None + dn: Optional[str] = None + database: Optional[str] = None + managed: bool = False + + # These are for directory services. + active: bool = True + expired: bool = False + source: Optional[str] = None + + # Normally these do not get set, except for the access_user. + password: Optional[str] = None + private_key: Optional[str] = None + + +class FactsDirectory(BaseModel): + domain: str + software: Optional[str] = None + login_format: Optional[str] = None + + +class FactsId(BaseModel): + machine_id: Optional[str] = None + product_id: Optional[str] = None + board_serial: Optional[str] = None + + +class FactsNameUser(BaseModel): + name: str + user: str + + +class Facts(BaseModel): + name: Optional[str] = None + directories: List[FactsDirectory] = [] + id: Optional[FactsId] = None + services: List[FactsNameUser] = [] + tasks: List[FactsNameUser] = [] + + @property + def has_services(self): + return self.services is not None and len(self.services) > 0 + + @property + def has_tasks(self): + return self.tasks is not None and len(self.tasks) > 0 + + @property + def has_services_or_tasks(self): + return self.has_services or self.has_tasks + + +class DiscoveryMachine(DiscoveryItem): + host: str + ip: str + port: Optional[int] = None + os: Optional[str] = None + provider_region: Optional[str] = None + provider_group: Optional[str] = None + is_gateway: bool = False + allows_admin: bool = True + admin_reason: Optional[str] = None + facts: Optional[Facts] = None + + +class DiscoveryDatabase(DiscoveryItem): + host: str + ip: str + port: int + type: str + use_ssl: bool = False + database: Optional[str] = None + provider_region: Optional[str] = None + provider_group: Optional[str] = None + allows_admin: bool = True + admin_reason: Optional[str] = None + + +class DiscoveryDirectory(DiscoveryItem): + host: str + ip: str + ips: List[str] = [] + port: int + type: str + use_ssl: bool = False + provider_region: Optional[str] = None + provider_group: Optional[str] = None + allows_admin: bool = True + admin_reason: Optional[str] = None + + +class DiscoveryObject(BaseModel): + uid: str + id: str + object_type_value: str + record_uid: Optional[str] = None + parent_record_uid: Optional[str] = None + record_type: str + fields: List[RecordField] + ignore_object: bool = False + action_rules_result: Optional[str] = None + shared_folder_uid: Optional[str] = None + name: str + title: str + description: str + notes: List[str] = [] + error: Optional[str] = None + stacktrace: Optional[str] = None + + # If the object is missing, this will show a timestamp on when it went missing. + missing_since_ts: Optional[int] = None + + # This is not the official admin. + # This is the user discovery used to access to the resource. + # This will be used to help the user create an admin user. + access_user: Optional[DiscoveryUser] = None + + # Specific information for a record type. + item: Union[DiscoveryConfiguration, DiscoveryUser, DiscoveryMachine, DiscoveryDatabase, DiscoveryDirectory] + + @property + def record_exists(self): + return self.record_uid is not None + + def get_field_value(self, label): + for field in self.fields: + if field.label == label: + value = field.value + if len(value) == 0: + return None + return field.value[0] + return None + + def set_field_value(self, label, value): + if isinstance(value, list) is False: + value = [value] + for field in self.fields: + if field.label == label: + field.value = value + return + raise ValueError(f"Cannot not find field with label {label}") + + @staticmethod + def get_discovery_object(vertex: DAGVertex) -> DiscoveryObject: + """ + Get DiscoveryObject with correct item instance. + + Pydantic doesn't like Unions on the item attribute. + Item needs to be validated using the correct class. + + :param vertex: + :return: + """ + + mapping = { + "pamUser": DiscoveryUser, + "pamDirectory": DiscoveryDirectory, + "pamMachine": DiscoveryMachine, + "pamDatabase": DiscoveryDatabase + } + + content_dict = vertex.content_as_dict + + if content_dict is None: + raise Exception(f"The discovery vertex {vertex.uid} does not have any content data.") + record_type = content_dict.get("record_type") + if record_type in mapping: + content_dict["item"] = mapping[record_type].model_validate(content_dict["item"]) + else: + content_dict["item"] = DiscoveryConfiguration.model_validate(content_dict["item"]) + + return DiscoveryObject.model_validate(content_dict) # type: DiscoveryObject + +# PROCESS + + +class PromptActionEnum(BaseEnum): + ADD = "add" + IGNORE = "ignore" + SKIP = "skip" + + +class DirectoryInfo(BaseModel): + directory_record_uids: List[str] = [] + directory_user_record_uids: List[str] = [] + + def has_directories(self) -> bool: + return len(self.directory_record_uids) > 0 + + +class NormalizedRecord(BaseModel): + """ + This class attempts to normalize KeeperRecord, TypedRecord, KSM Record into a normalized record. + """ + record_uid: str + record_type: str + title: str + fields: List[RecordField] = [] + note: Optional[str] = None + + def _field(self, type, label) -> Optional[RecordField]: + for field in self.fields: + value = field.value + if value is None or len(value) == 0: + continue + if field.label == type and value[0].lower() == label.lower(): + return field + return None + + def find_user(self, user): + + from .utils import split_user_and_domain + + res = self._field("login", user) + if res is None: + user, _ = split_user_and_domain(user) + res = self._field("login", user) + + return res + + def find_dn(self, user): + return self._field("distinguishedName", user) + + +class PromptResult(BaseModel): + + # "add" and "ignore" are the only action + action: PromptActionEnum + + # The acl is only needs for pamUser record. + acl: Optional[UserAcl] = None + + # If the discovery object content has been modified, set it here. + content: Optional[DiscoveryObject] = None + + # Existing record that should be the admin. + record_uid: Optional[str] = None + + # Note to include with record + note: Optional[str] = None + + +class SummaryItem(BaseModel): + vertex_uid: str + record_type: str + + +class Summary(BaseModel): + ignored: List[SummaryItem] = [] + auto_add: List[SummaryItem] = [] + prompt: List[SummaryItem] = [] + + @property + def total_found(self): + return len(self.auto_add) + len(self.prompt) + + @property + def total_ignored(self): + return len(self.ignored) + + +class BulkRecordAdd(BaseModel): + + # The title of the record. + # This is used for debug reasons. + title: str + + # Record note + note: Optional[str] = None + + # This could be a Commander KeeperRecord, Commander RecordAdd, or KSM Record + record: Any + record_type: str + + # Normal record UID strings + record_uid: str + parent_record_uid: str + + # The shared folder UID where the record should be created. + shared_folder_uid: str + + +class BulkRecordConvert(BaseModel): + record_uid: str + parent_record_uid: str + + # Record note + note: Optional[str] = None + + +class BulkRecordSuccess(BaseModel): + title: str + record_uid: str + + +class BulkRecordFail(BaseModel): + title: str + error: str + + +class BulkProcessResults(BaseModel): + success: List[BulkRecordSuccess] = [] + failure: List[BulkRecordFail] = [] + + @property + def has_failures(self) -> bool: + return len(self.failure) > 0 + + @property + def num_results(self) -> int: + return self.failure_count + self.success_count + + @property + def failure_count(self) -> int: + return len(self.failure) + + @property + def success_count(self) -> int: + return len(self.success) + + +# Service/Schedule Task + +class ServiceAcl(BaseModel): + is_service: bool = False + is_task: bool = False diff --git a/keepercommander/discovery_common/user_service.py b/keepercommander/discovery_common/user_service.py new file mode 100644 index 000000000..255b1b2ce --- /dev/null +++ b/keepercommander/discovery_common/user_service.py @@ -0,0 +1,477 @@ +from __future__ import annotations +import logging +from .constants import USER_SERVICE_GRAPH_ID, PAM_MACHINE, PAM_USER +from .utils import get_connection, user_in_lookup, user_check_list +from .types import DiscoveryObject, ServiceAcl, FactsNameUser +from .infrastructure import Infrastructure + +from keeper_dag import DAG, EdgeType +import importlib +from typing import Any, Optional, List, TYPE_CHECKING + +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + from keeper_dag.edge import DAGEdge + + +class UserService: + + def __init__(self, record: Any, logger: Optional[Any] = None, history_level: int = 0, + debug_level: int = 0, fail_on_corrupt: bool = True, **kwargs): + + self.conn = get_connection(**kwargs) + + # This will either be a KSM Record, or Commander KeeperRecord + self.record = record + self._dag = None + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.history_level = history_level + self.debug_level = debug_level + self.fail_on_corrupt = fail_on_corrupt + + self.auto_save = False + self.last_sync_point = -1 + + @property + def dag(self) -> DAG: + if self._dag is None: + + self._dag = DAG(conn=self.conn, record=self.record, graph_id=USER_SERVICE_GRAPH_ID, + auto_save=False, logger=self.logger, history_level=self.history_level, + debug_level=self.debug_level, name="Discovery Service/Tasks", + fail_on_corrupt=self.fail_on_corrupt) + + self._dag.load(sync_point=0) + + return self._dag + + @property + def has_graph(self) -> bool: + return self.dag.has_graph + + def reload(self): + self._dag.load(sync_point=0) + + def get_record_link(self, uid: str) -> DAGVertex: + return self.dag.get_vertex(uid) + + @staticmethod + def get_record_uid(discovery_vertex: DAGVertex) -> str: + """ + Get the record UID from the vertex + + """ + data = discovery_vertex.get_data() + if data is None: + raise Exception(f"The discovery vertex {discovery_vertex.uid} does not have a DATA edge. " + "Cannot get record UID.") + content = DiscoveryObject.get_discovery_object(discovery_vertex) + if content.record_uid is not None: + return content.record_uid + raise Exception(f"The discovery vertex {discovery_vertex.uid} data does not have a populated record UID.") + + def belongs_to(self, resource_uid: str, user_uid: str, acl: Optional[ServiceAcl] = None, + resource_name: Optional[str] = None, user_name: Optional[str] = None): + + """ + Link vault records using record UIDs. + + If a link already exists, no additional link will be created. + """ + + # Get thr record vertices. + # If a vertex does not exist, then add the vertex using the record UID + resource_vertex = self.dag.get_vertex(resource_uid) + if resource_vertex is None: + self.logger.debug(f"adding resource vertex for record UID {resource_uid} ({resource_name})") + resource_vertex = self.dag.add_vertex(uid=resource_uid, name=resource_name) + + user_vertex = self.dag.get_vertex(user_uid) + if user_vertex is None: + self.logger.debug(f"adding user vertex for record UID {user_uid} ({user_name})") + user_vertex = self.dag.add_vertex(uid=user_uid, name=user_name) + + self.logger.debug(f"user {user_vertex.uid} controls services on {resource_vertex.uid}") + + edge_type = EdgeType.LINK + if acl is not None: + edge_type = EdgeType.ACL + + user_vertex.belongs_to(resource_vertex, edge_type=edge_type, content=acl) + + def disconnect_from(self, resource_uid: str, user_uid: str): + resource_vertex = self.dag.get_vertex(resource_uid) + user_vertex = self.dag.get_vertex(user_uid) + user_vertex.disconnect_from(resource_vertex) + + def get_acl(self, resource_uid, user_uid) -> Optional[ServiceAcl]: + + """ + Get the service/task ACL between a resource and the user. + + """ + + resource_vertex = self.dag.get_vertex(resource_uid) + user_vertex = self.dag.get_vertex(user_uid) + if resource_vertex is None or user_vertex is None: + self.logger.debug(f"there is no acl between {resource_uid} and {user_uid}") + return ServiceAcl() + + acl_edge = user_vertex.get_edge(resource_vertex, edge_type=EdgeType.ACL) # type: DAGEdge + if acl_edge is None: + return None + + return acl_edge.content_as_object(ServiceAcl) + + def resource_has_link(self, resource_uid) -> bool: + """ + Is this resource linked to the configuration? + """ + + resource_vertex = self.dag.get_vertex(resource_uid) + if resource_vertex is None: + return False + link_edge = resource_vertex.get_edge(self.dag.get_root, edge_type=EdgeType.LINK) # type: DAGEdge + return link_edge is not None + + def get_resource_vertices(self, user_uid: str) -> List[DAGVertex]: + + """ + Get the resource vertices where the user is used for a service or task. + + """ + + user_vertex = self.dag.get_vertex(user_uid) + if user_vertex is None: + return [] + return user_vertex.belongs_to_vertices() + + def get_user_vertices(self, resource_uid: str) -> List[DAGVertex]: + + """ + Get the user vertices that control a service or task on this machine. + + """ + resource_vertex = self.dag.get_vertex(resource_uid) + if resource_vertex is None: + return [] + return resource_vertex.has_vertices() + + @staticmethod + def delete(vertex: DAGVertex): + if vertex is not None: + vertex.delete() + + def save(self): + if self.dag.has_graph is True: + self.logger.debug("saving the service user.") + self.dag.save(delta_graph=False) + else: + self.logger.debug("the service user graph does not contain any data, was not saved.") + + def to_dot(self, graph_format: str = "svg", show_version: bool = True, show_only_active_vertices: bool = True, + show_only_active_edges: bool = True, graph_type: str = "dot"): + + try: + mod = importlib.import_module("graphviz") + except ImportError: + raise Exception("Cannot to_dot(), graphviz module is not installed.") + + dot = getattr(mod, "Digraph")(comment=f"DAG for Discovery", format=graph_format) + + if graph_type == "dot": + dot.attr(rankdir='RL') + elif graph_type == "twopi": + dot.attr(layout="twopi") + dot.attr(ranksep="10") + dot.attr(ratio="auto") + else: + dot.attr(layout=graph_type) + + self.logger.debug(f"have {len(self.dag.all_vertices)} vertices") + for v in self.dag.all_vertices: + if show_only_active_vertices is True and v.active is False: + continue + + tooltip = "" + + for edge in v.edges: + + color = "grey" + style = "solid" + + # To reduce the number of edges, only show the active edges + if edge.active is True: + color = "black" + style = "bold" + elif show_only_active_edges is True: + continue + + # If the vertex is not active, gray out the DATA edge + if edge.edge_type == EdgeType.DATA and v.active is False: + color = "grey" + + if edge.edge_type == EdgeType.DELETION: + style = "dotted" + + edge_tip = "" + if edge.edge_type == EdgeType.ACL and v.active is True: + content = edge.content_as_dict + if content.get("is_service") is True: + color = "red" + if content.get("is_task") is True: + if color == "red": + color = "purple" + else: + color = "blue" + + tooltip += f"TO {edge.head_uid}\\n" + for k, val in content.items(): + tooltip += f" * {k}={val}\\n" + tooltip += f"--------------------\\n\\n" + + label = DAG.EDGE_LABEL.get(edge.edge_type) + if label is None: + label = "UNK" + if edge.path is not None and edge.path != "": + label += f"\\npath={edge.path}" + if show_version is True: + label += f"\\nv={edge.version}" + + # tail, head (arrow side), label, ... + dot.edge(v.uid, edge.head_uid, label, style=style, fontcolor=color, color=color, tooltip=edge_tip) + + shape = "ellipse" + fillcolor = "white" + color = "black" + if v.active is False: + fillcolor = "grey" + + label = f"uid={v.uid}" + dot.node(v.uid, label, color=color, fillcolor=fillcolor, style="filled", shape=shape, tooltip=tooltip) + + return dot + + def _connect_service_users(self, + infra_resource_content: DiscoveryObject, + infra_resource_vertex: DAGVertex, + services: List[FactsNameUser]): + + self.logger.debug(f"processing services for {infra_resource_content.description} ({infra_resource_vertex.uid})") + + # We don't care about the name of the service, we just need a list users. + lookup = {} + for service in services: + lookup[service.user.lower()] = True + + for infra_user_vertex in infra_resource_vertex.has_vertices(): + infra_user_content = DiscoveryObject.get_discovery_object(infra_user_vertex) + if infra_user_content.record_uid is None: + continue + if user_in_lookup( + lookup=lookup, + user=infra_user_content.item.user, + name=infra_user_content.name, + source=infra_user_content.item.source): + self.logger.debug(f" * found user for service: {infra_user_content.item.user}") + acl = self.get_acl(infra_resource_content.record_uid, infra_user_content.record_uid) + if acl is None: + acl = ServiceAcl() + acl.is_service = True + self.belongs_to( + resource_uid=infra_resource_content.record_uid, + resource_name=infra_resource_content.uid, + user_uid=infra_user_content.record_uid, + user_name=infra_user_content.uid, + acl=acl) + + def _connect_task_users(self, + infra_resource_content: DiscoveryObject, + infra_resource_vertex: DAGVertex, + tasks: List[FactsNameUser]): + + self.logger.debug(f"processing tasks for {infra_resource_content.description} ({infra_resource_vertex.uid})") + + # We don't care about the name of the tasks, we just need a list users. + lookup = {} + for task in tasks: + lookup[task.user.lower()] = True + + for infra_user_vertex in infra_resource_vertex.has_vertices(): + infra_user_content = DiscoveryObject.get_discovery_object(infra_user_vertex) + if infra_user_content.record_uid is None: + continue + if user_in_lookup( + lookup=lookup, + user=infra_user_content.item.user, + name=infra_user_content.name, + source=infra_user_content.item.source): + self.logger.debug(f" * found user for task: {infra_user_content.item.user}") + acl = self.get_acl(infra_resource_content.record_uid, infra_user_content.record_uid) + if acl is None: + acl = ServiceAcl() + acl.is_task = True + self.belongs_to( + resource_uid=infra_resource_content.record_uid, + resource_name=infra_resource_content.uid, + user_uid=infra_user_content.record_uid, + user_name=infra_user_content.uid, + acl=acl) + + def _validate_users(self, + infra_resource_content: DiscoveryObject, + infra_resource_vertex: DAGVertex): + + """ + This method will check to see if a resource's users' ACL edges are still valid. + + """ + + self.logger.debug(f"validate existing user service edges to see if still valid to " + f"{infra_resource_content.name}") + + service_lookup = {} + for service in infra_resource_content.item.facts.services: + service_lookup[service.user.lower()] = True + + task_lookup = {} + for task in infra_resource_content.item.facts.tasks: + task_lookup[task.user.lower()] = True + + # Get the user service resource vertex. + # If it does not exist, then we cannot validate users. + user_service_resource_vertex = self.dag.get_vertex(infra_resource_content.record_uid) + if user_service_resource_vertex is None: + return + + infra_dag = infra_resource_vertex.dag + + for user_service_user_vertex in user_service_resource_vertex.has_vertices(): + acl_edge = user_service_user_vertex.get_edge( + user_service_resource_vertex, edge_type=EdgeType.ACL) # type: DAGEdge + if acl_edge is None: + self.logger.info(f"User record {user_service_user_vertex.uid} does not have an ACL edge to " + f"{user_service_resource_vertex.uid} for user services.") + continue + + found_service_acl = False + found_task_acl = False + changed = False + + acl = acl_edge.content_as_object(ServiceAcl) + + user = infra_dag.search_content({"record_type": PAM_USER, "record_uid": user_service_user_vertex.uid}) + infra_user_content = None + found_user = len(user) > 0 + if found_user is True: + infra_user_vertex = user[0] + if infra_user_vertex.active is False: + found_user = False + else: + infra_user_content = DiscoveryObject.get_discovery_object(infra_user_vertex) + + if found_user is False: + self.disconnect_from(user_service_resource_vertex.uid, user_service_user_vertex.uid) + continue + + check_list = user_check_list( + user=infra_user_content.item.user, + name=infra_user_content.name, + source=infra_user_content.item.source + ) + + if acl.is_service is True: + for check_user in check_list: + if check_user in service_lookup: + found_service_acl = True + break + if found_service_acl is False: + acl.is_service = False + changed = True + + if acl.is_task is True: + for check_user in check_list: + if check_user in task_lookup: + found_task_acl = True + break + if found_task_acl is False: + acl.is_task = False + changed = True + + if found_service_acl is True or found_task_acl is True and changed is True: + self.logger.debug(f"user {user_service_user_vertex.uid}(US) to {user_service_resource_vertex.uid} updated") + self.belongs_to(user_service_resource_vertex.uid, user_service_user_vertex.uid, acl) + elif found_service_acl is False and found_task_acl is False: + self.logger.debug(f"user {user_service_user_vertex.uid}(US) to {user_service_resource_vertex.uid} disconnected") + self.disconnect_from(user_service_resource_vertex.uid, user_service_user_vertex.uid) + + self.logger.debug(f"DONE validate existing user") + + def run(self, infra: Optional[Infrastructure] = None, **kwargs): + + self.logger.debug("") + self.logger.debug("##########################################################################################") + self.logger.debug("# MAP USER TO MACHINE FOR SERVICE/TASKS") + self.logger.debug("") + + # If an instance of Infrastructure is not passed in. + if infra is None: + + # Get ksm from the connection. + # However, this might be a local connection, so check first. + # Local connections don't need ksm. + if hasattr(self.conn, "ksm") is True: + kwargs["ksm"] = getattr(self.conn, "ksm") + + # Get the entire infrastructure graph; sync point = 0 + infra = Infrastructure(record=self.record, **kwargs) + infra.load() + + # Work ourselves to the configuration vertex. + infra_root_vertex = infra.get_root + infra_config_vertex = infra_root_vertex.has_vertices()[0] + + # For the user service, the root vertex is the equivalent to the infrastructure configuration vertex. + user_service_config_vertex = self.dag.get_root + + # Find all the resources that are machines. + for infra_resource_vertex in infra_config_vertex.has_vertices(): + if infra_resource_vertex.active is False or infra_resource_vertex.has_data is False: + continue + infra_resource_content = DiscoveryObject.get_discovery_object(infra_resource_vertex) + if infra_resource_content.record_type == PAM_MACHINE: + + # Check the user on the resource if they still are part of a service or task. + self._validate_users(infra_resource_content, infra_resource_vertex) + + # Do we have services or tasks that are run as a user with a password? + if infra_resource_content.item.facts.has_services_or_tasks is True: + + # If the resource does not exist in the user service graph, add a vertex and link it to the + # user service root/configuration vertex. + user_service_resource_vertex = self.dag.get_vertex(infra_resource_content.record_uid) + if user_service_resource_vertex is None: + user_service_resource_vertex = self.dag.add_vertex(uid=infra_resource_content.record_uid, + name=infra_resource_content.description) + if user_service_config_vertex.has(user_service_resource_vertex) is False: + user_service_resource_vertex.belongs_to_root(EdgeType.LINK) + + # Do we have services that are run as a user with a password? + if infra_resource_content.item.facts.has_services is True: + self._connect_service_users( + infra_resource_content, + infra_resource_vertex, + infra_resource_content.item.facts.services) + + # Do we have tasks that are run as a user with a password? + if infra_resource_content.item.facts.has_tasks is True: + self._connect_task_users( + infra_resource_content, + infra_resource_vertex, + infra_resource_content.item.facts.tasks) + + self.save() + + diff --git a/keepercommander/discovery_common/utils.py b/keepercommander/discovery_common/utils.py new file mode 100644 index 000000000..59523c324 --- /dev/null +++ b/keepercommander/discovery_common/utils.py @@ -0,0 +1,121 @@ +from __future__ import annotations +import os +from discovery_common.constants import PAM_USER +from discovery_common.types import DiscoveryObject +from keeper_dag.vertex import DAGVertex +from typing import List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from keeper_dag.dag import DAG + + +def value_to_boolean(value): + value = str(value) + if value.lower() in ['true', 'yes', 'on', '1']: + return True + elif value.lower() in ['false', 'no', 'off', '0']: + return False + else: + return None + + +def get_connection(**kwargs): + + """ + This method will return the proper connection based on the params passed in. + + If `ksm` and a KDNRM KSM instance, it will connect using keeper secret manager. + If `params` and a KeeperParam instance, it will connect using Commander. + If the env var `USE_LOCAL_DAG` is True, it will connect using the Local test DAG engine. + + It returns a child instance of the Connection class. + """ + + if value_to_boolean(os.environ.get("USE_LOCAL_DAG")) is True: + from keeper_dag.connection.local import Connection + conn = Connection() + else: + ksm = kwargs.get("ksm") + params = kwargs.get("params") + if ksm is not None: + from keeper_dag.connection.ksm import Connection + conn = Connection(config=ksm.storage_config) + elif params is not None: + from keeper_dag.connection.commander import Connection + conn = Connection(params=params) + else: + raise ValueError("Must pass 'ksm' for KSK, 'params' for Commander. Found neither.") + return conn + + +def split_user_and_domain(user: str) -> (str, Optional[str]): + + domain = None + + if "\\" in user: + user_parts = user.split("\\", maxsplit=1) + user = user_parts[0] + domain = user_parts[1] + elif "@" in user: + user_parts = user.split("@") + domain = user_parts.pop() + user = "@".join(user_parts) + + return user, domain + + +def user_check_list(user: str, name: Optional[str] = None, source: Optional[str] = None) -> List[str]: + user, domain = split_user_and_domain(user) + user = user.lower() + check_list = [user, f".\\{user}", ] + if name is not None: + name = name.lower() + check_list += [name, f".\\{name}"] + if source is not None: + check_list.append(f"{source.lower()}\\{user}") + domain_parts = source.split(".") + if len(domain_parts) > 1: + check_list.append(f"{domain_parts[0]}\\{user}") + if domain is not None: + domain = domain.lower() + check_list.append(f"{domain}\\{user}") + domain_parts = domain.split(".") + if len(domain_parts) > 1: + check_list.append(f"{domain_parts[0]}\\{user}") + + return check_list + + +def user_in_lookup(user: str, lookup: dict, name: Optional[str] = None, source: Optional[str] = None) -> bool: + + for check_user in user_check_list(user, name, source): + if check_user in lookup: + return True + return False + + +def find_user_vertex(graph: DAG, user: str, domain: Optional[str] = None) -> Optional[DAGVertex]: + + user_vertices = graph.search_content({"record_type": PAM_USER}) + for user_vertex in user_vertices: + + # Make sure the vertex is active, and has content data + if user_vertex.active is False or user_vertex.has_data is False: + continue + content = DiscoveryObject.get_discovery_object(user_vertex) + + current_user, current_domain = split_user_and_domain(content.item.user) + + # If we are want a directory user and the current user is not one, or does not match the domain, then skip + if domain is not None and (current_domain is None or domain.lower() != current_domain.lower()): + continue + + if current_user.lower() == user.lower(): + return user_vertex + + return None + + + + + diff --git a/keepercommander/discovery_common/verify.py b/keepercommander/discovery_common/verify.py new file mode 100644 index 000000000..e3b2d66b7 --- /dev/null +++ b/keepercommander/discovery_common/verify.py @@ -0,0 +1,388 @@ +from __future__ import annotations +import logging +from .infrastructure import Infrastructure +from .record_link import RecordLink +from .user_service import UserService +from .constants import PAM_MACHINE, PAM_DIRECTORY +from .utils import get_connection +from .types import DiscoveryObject +from keeper_dag import EdgeType +from keeper_dag.edge import DAGEdge +import re +import sys +from typing import Any, Optional, Callable, TYPE_CHECKING + +if TYPE_CHECKING: + from keeper_dag.vertex import DAGVertex + + +class Verify: + + """ + Check the graphs to make sure they are correct. + + This class will try to find problems with the graph, and then try to fix them, if flagged to do so. + Checks are: + + * Check if the user services and task graph matches what Discovery found for the machine. + * Check if the infrastructure graph has actual record for the record UID stored in the content. + + """ + + USER_SERVICE = "User Service/Task Mapping" + COLOR_RESET = "reset" + OK = "ok" + FAIL = "fail" + UNK = "unk" + TITLE = "title" + + def __init__(self, record: Any, logger: Optional[Any] = None, debug_level: int = 0, + output: Optional = None, colors: Optional[dict] = None, **kwargs): + + if output is None: + output = sys.stderr + self.output = output + + if colors is None: + colors = {} + self.colors = colors + + self.conn = get_connection(**kwargs) + + self.record = record + + # Load all the Infrastructure graph, starting at sync point 0 + self.infra = Infrastructure(record=record, logger=logger, debug_level=debug_level, fail_on_corrupt=False, + **kwargs) + self.infra.load(sync_point=0) + + self.record_link = RecordLink(record=record, logger=logger, debug_level=debug_level, fail_on_corrupt=False, + **kwargs) + self.user_service = UserService(record=record, logger=logger, debug_level=debug_level, fail_on_corrupt=False, + **kwargs) + + if logger is None: + logger = logging.getLogger() + self.logger = logger + self.debug_level = debug_level + self.logger.debug(f"configuration uid is {self.conn.get_record_uid(record)}") + + def _msg(self, msg, color_name="NONE"): + print(f"{self.colors.get(color_name, '')}{msg}{self.colors.get(Verify.COLOR_RESET, '')}", file=self.output) + + def run(self, fix: bool = False, lookup_record_func: Optional[Callable] = None): + + self.verify_infra_dag_connections(fix=fix) + self.verify_user_service(fix=fix) + if lookup_record_func is not None: + self.verify_record_exists(fix=fix, lookup_record_func=lookup_record_func) + + @staticmethod + def _split_user(user: str, hostname: Optional[str] = None, host: Optional[str] = None): + domain = None + if "\\" in user: + domain, user = user.split("\\", 1) + if domain == ".": + domain = None + elif "@" in user: + user, domain = user.split("@", 1) + if domain is not None and hostname is not None: + domain = domain.lower() + + # Don't use IP addresses + if re.match(r'\d+\.\d+.\d+\.\d+', host) is not None: + host = None + + if hostname is not None: + hostname = hostname.lower() + if domain == hostname: + domain = None + elif domain == hostname.split(".")[0]: + domain = None + + if host is not None: + host = host.lower() + if domain == host: + domain = None + elif domain == host.split(".")[0]: + domain = None + + return user, domain + + def _find_infra_user_vertex(self, resource_vertex: DAGVertex, user: str, domain: Optional[str] = None) -> ( + Optional)[DAGVertex]: + + user = user.lower() + resource_content = DiscoveryObject.get_discovery_object(resource_vertex) + + # If the domain is None, assume it a local user. + if domain is None: + self.logger.debug(" no domain, assume local user") + for user_vertex in resource_vertex.has_vertices(): + content = DiscoveryObject.get_discovery_object(user_vertex) + self.logger.debug(f" * {content.name}, {content.item.user}") + if content.name.lower() == user: + self.logger.debug(" MATCH") + return user_vertex + hostname = None + if resource_content.record_type == PAM_MACHINE: + hostname = resource_content.item.facts.name + child_user, child_domain = self._split_user( + user=content.item.user, + hostname=hostname, + host=resource_content.item.host) + if user == child_user and child_domain is None: + self.logger.debug(" MATCH") + return user_vertex + return None + + self.logger.debug(" has domain, assume directory user") + + configuration_vertex = self.infra.get_configuration + for vertex in configuration_vertex.has_vertices(): + content = DiscoveryObject.get_discovery_object(vertex) + if content.record_type != PAM_DIRECTORY: + continue + if content.name.lower() == domain.lower(): + for user_vertex in vertex.has_vertices(): + user_content = DiscoveryObject.get_discovery_object(user_vertex) + if user_content.name.lower() == user or user_content.item.user.lower() == user: + return user_vertex + + return None + + def _fix_user_service_acl(self, resource_content: DiscoveryObject, user_vertex: DAGVertex, acl_type: str, + fix: bool = False) -> bool: + + user_content = DiscoveryObject.get_discovery_object(user_vertex) + user_record_uid = user_content.record_uid + if user_record_uid is not None: + acl = self.user_service.get_acl(resource_content.record_uid, user_record_uid) + if acl is not None: + flag = getattr(acl, acl_type) + if flag is False: + + self._msg(f" - user {user_content.name}, {user_record_uid} is " + f"missing an ACL type {acl_type} to " + f"machine {resource_content.name}") + if fix is True: + self._msg(f" added {acl_type} to the ACL between " + f"user {user_content.name}, {user_record_uid} and " + f"machine {resource_content.name}", color_name=Verify.OK) + setattr(acl, acl_type, True) + self.user_service.belongs_to(resource_content.record_uid, user_record_uid, acl=acl) + return True + else: + self._msg(f" not fixing user", color_name=Verify.FAIL) + else: + self.logger.debug(f"user service ACL does have is_service as True") + else: + self.logger.debug(f"there is no ACL between the user and the resource") + else: + self.logger.debug(f"use does not have a record yet") + + return False + + def _get_infra_configuration(self): + + # Check to make sure the user service graph exists. + # The "UserService" instance should do this, but we want to make sure. + if self.user_service.dag.has_graph is False: + self.logger.debug("the user service graph contains no data") + configuration_vertex = self.user_service.dag.get_root + if configuration_vertex.uid != self.conn.get_record_uid(self.record): + raise Exception("The user service graph root/con does not match ") + + return self.infra.get_configuration + + def verify_user_service(self, fix: bool = False): + + """ + + """ + + # STEP 1 - Make sure UserService graph matches Infrastructure + + self._msg("\nChecking if user service/task graph matches infrastructure.\n\n", color_name="title") + + were_fixes = False + + infra_configuration = self._get_infra_configuration() + for resource_vertex in infra_configuration.has_vertices(): + + resource_content = DiscoveryObject.get_discovery_object(resource_vertex) + + if resource_content.record_type != PAM_MACHINE or resource_content.record_uid is None: + self._msg(f" * Machine {resource_content.name} does not have record UID, yet.", + color_name=Verify.UNK) + continue + + user_service_resource_vertex = self.user_service.dag.get_vertex(resource_content.record_uid) + if user_service_resource_vertex is None: + self._msg(f" * Machine {resource_content.name} does not have a vertex in the user service graph") + if fix is True: + user_service_resource_vertex = self.user_service.dag.add_vertex(resource_content.record_uid) + + record_uid = self.conn.get_record_uid(self.record) + self.user_service.belongs_to(record_uid, resource_content.record_uid, acl=None) + + self._msg(f" added vertex for machine {resource_content.name}, and linked " + " to configuration.", color_name=Verify.OK) + else: + self._msg(f" not fixing, skip this resource.", color_name=Verify.FAIL) + continue + + if self.user_service.resource_has_link(resource_content.record_uid) is False: + self._msg(f" * Machine {resource_content.name} is not linked to the configuration.") + + if fix is True: + user_service_resource_vertex.belongs_to(self.user_service.dag.get_root, edge_type=EdgeType.LINK) + self._msg(f" linking machine {resource_content.name} to the configuration", + color_name=Verify.OK) + else: + self._msg(f" not fixing, skip this resource.", color_name=Verify.FAIL) + continue + + self.logger.debug(f"found machine: {resource_content.name}, {resource_content.record_uid}") + + for item in resource_content.item.facts.services: + user, domain = self._split_user(item.user, + hostname=resource_content.item.facts.name, + host=resource_content.item.host) + self.logger.debug(f"found service: {item.name}, {user}, {domain}") + user_vertex = self._find_infra_user_vertex(resource_vertex, user, domain) + if user_vertex is not None: + if self._fix_user_service_acl(resource_content, user_vertex, "is_service", fix=fix) is True: + were_fixes = True + else: + self.logger.debug(f"could not find user for the service on the machine") + + for item in resource_content.item.facts.tasks: + user, domain = self._split_user(item.user, + hostname=resource_content.item.facts.name, + host=resource_content.item.host) + self.logger.debug(f" found task: {item.name}, {user}, {domain}") + user_vertex = self._find_infra_user_vertex(resource_vertex, user, domain) + if user_vertex is not None: + if self._fix_user_service_acl(resource_content, user_vertex, "is_task", fix=fix) is True: + were_fixes = True + else: + self.logger.debug(f" could not find user for the service on the machine") + + if were_fixes is True: + self._msg("\nSaving fixed user service/task graph.\n\n", color_name=Verify.OK) + self.user_service.save() + + def verify_record_exists(self, lookup_record_func: Callable, fix: bool = False): + """ + This will verify that a record exists for infrastructure content. + """ + + self._msg("\nChecking if infrastructure records exist.\n\n", color_name=Verify.TITLE) + + infra_configuration = self._get_infra_configuration() + + def _check(vertex: DAGVertex, indent: int = 0) -> bool: + + fixes = False + pad = "".ljust(4 * indent, ' ') + + if not vertex.active: + return False + if vertex.has_data is True: + content = DiscoveryObject.get_discovery_object(vertex) + + self._msg(f"{pad}* {content.record_uid or 'NA'}, {content.title}", color_name="title") + + if content.record_uid is not None: + record = lookup_record_func(content.record_uid) + if record is None: + self._msg(f"{pad} did not have a record.", color_name="fail") + if fix is True: + content.record_uid = None + vertex.add_data(content) + self._msg(f"{pad} remove record uid from graph.", color_name=Verify.OK) + fixes = True + else: + self._msg(f"{pad} has no record uid.", color_name="unk") + + for next_vertex in vertex.has_vertices(): + if next_vertex.uid == vertex.uid: + self._msg(f"{pad} * this vertex loops to itself!", color_name=Verify.FAIL) + continue + edge_type = next_vertex.get_highest_edge_version(vertex.uid) + if edge_type != EdgeType.DELETION: + if _check(next_vertex, indent=indent + 1) is True: + fixes = True + + return fixes + + were_fixes = _check(infra_configuration, indent=0) + + if were_fixes is True: + self._msg("\nSaving fixed record uids in infrastructure graph.\n\n", color_name=Verify.OK) + self.infra.save(delta_graph=False) + + def verify_infra_dag_connections(self, fix: bool = False): + + self._msg("\nChecking if infrastructure vertex refs loop.\n", color_name=Verify.TITLE) + + def _check(vertex: DAGVertex, indent: int = 0): + pad = "".ljust(4 * indent, ' ') + + fixes = False + + text = "" + if vertex.active is False: + text += f"(Inactive) " + elif vertex.corrupt is True: + text += f"{self.colors.get(Verify.FAIL)}Corrupt{self.colors.get(Verify.COLOR_RESET, '')}" + elif vertex.has_data is True: + content = DiscoveryObject.get_discovery_object(vertex) + text += content.title + + self._msg(f"{pad}checking {vertex.uid}; {text}") + for edge in vertex.edges: # type: DAGEdge + if edge.edge_type == EdgeType.DATA: + self._msg(f"{pad} * found data edge") + else: + self._msg(f"{pad} * edge {edge.edge_type} to {edge.head_uid} (parent/belongs_to)") + if edge.head_uid == vertex.uid: + if edge.edge_type == EdgeType.DELETION: + self._msg(f"{pad} * found DELETION of DATA edge") + continue + self._msg(f"{pad} * vertex as a non-DATA edge looping to self", color_name=Verify.FAIL) + if fix is True: + self._msg(f"{pad} * deleting key", color_name=Verify.OK) + fixes = True + edge.delete() + + # Get all the child vertices, allow self ref, so we can delete it if not already deleted. + for next_vertex in vertex.has_vertices(allow_self_ref=True): + if next_vertex.uid == vertex.uid: + version, edge = next_vertex.get_highest_edge_version(vertex.uid) + if edge.edge_type == EdgeType.DELETION: + continue + else: + self._msg(f"{pad} * next vertex references itself", color_name=Verify.FAIL) + if fix is True: + self._msg(f"{pad} * delete this reference", color_name=Verify.OK) + fixes = True + edge.delete() + else: + self._msg(f"{pad} * not fixing, however skipping to prevent loop", + color_name=Verify.FAIL) + continue + + self._msg(f"{pad} next vertex is {next_vertex.uid}") + if _check(next_vertex, indent + 1) is True: + fixes = True + + return fixes + + configuration = self.infra.get_configuration + were_fixes = _check(configuration, 0) + + if were_fixes is True: + self._msg("\nSaving graph vertices references.\n\n", color_name=Verify.OK) + self.infra.save(delta_graph=False) diff --git a/keepercommander/keeper_dag/__init__.py b/keepercommander/keeper_dag/__init__.py new file mode 100644 index 000000000..b4f7d7f54 --- /dev/null +++ b/keepercommander/keeper_dag/__init__.py @@ -0,0 +1,2 @@ +from .dag import DAG +from .types import EdgeType \ No newline at end of file diff --git a/keepercommander/keeper_dag/__version__.py b/keepercommander/keeper_dag/__version__.py new file mode 100644 index 000000000..fa7c0d493 --- /dev/null +++ b/keepercommander/keeper_dag/__version__.py @@ -0,0 +1 @@ +__version__ = '1.0.20' diff --git a/keepercommander/keeper_dag/connection/__init__.py b/keepercommander/keeper_dag/connection/__init__.py new file mode 100644 index 000000000..2933d95bd --- /dev/null +++ b/keepercommander/keeper_dag/connection/__init__.py @@ -0,0 +1,89 @@ +from __future__ import annotations +import logging +from ..exceptions import DAGException, DAGConnectionException +from ..crypto import generate_random_bytes +from ..types import SyncData, SyncQuery, DataPayload +import json +import base64 +from typing import Optional, Union, TYPE_CHECKING +if TYPE_CHECKING: # pragma: no cover + Logger = Union[logging.RootLogger, logging.Logger] + + +class ConnectionBase: + + def __init__(self, is_device: bool, logger: Optional[Logger] = None): + # device is a gateway device if is_device is False then we use user authentication flow + self.is_device = is_device + + if logger is None: + logger = logging.getLogger() + self.logger = logger + + @staticmethod + def get_record_uid(record: object) -> str: + pass + + @staticmethod + def get_key_bytes(record: object) -> bytes: + pass + + def rest_call_to_router(self, http_method, endpoint, payload_json=None) -> str: + return "" + + def _endpoint(self, action: str) -> str: + if action.startswith("/") is False: + action = "/" + action + + base = "/api/device" + if not self.is_device: + base = "/api/user" + return base + action + + def add_data(self, payload: Union[DataPayload, str]): + + # if payload is DataPayload + if isinstance(payload, DataPayload): + payload_data = payload.model_dump_json() + elif isinstance(payload, str): + payload_data = payload + + # make sure it is a valid json and raise and exception if not. make an exception for the case of a string + # that is a valid json + if not payload_data.startswith('{') and not payload_data.endswith('}'): + raise DAGException(f'Invalid payload: {payload_data}') + + # double check if it is a valid json inside the string + json.loads(payload_data) + else: + raise DAGException(f'Unsupported payload type: {type(payload)}') + + try: + self.rest_call_to_router("POST", self._endpoint("/add_data"), payload_data) + except DAGConnectionException as err: + raise err + except Exception as err: + raise DAGException(f"Could not create a new DAG structure: {err}") + + def sync(self, stream_id: str, sync_point: Optional[int] = 0, graph_id: Optional[int] = 0) -> SyncData: + + try: + sync_query = SyncQuery( + streamId=stream_id, + deviceId=base64.urlsafe_b64encode(generate_random_bytes(16)).decode(), + syncPoint=sync_point, + graphId=graph_id + ) + sync_query_json_str = sync_query.model_dump_json() + + data_resp = self.rest_call_to_router("POST", self._endpoint("/sync"), sync_query_json_str) + sync_data_resp = SyncData.model_validate_json(data_resp) + + return sync_data_resp + except DAGConnectionException as err: + raise err + except Exception as err: + raise DAGException(f"Could not load the DAG structure: {err}") + + def debug_dump(self) -> str: + return "Connection does not allow debug dump." diff --git a/keepercommander/keeper_dag/connection/commander.py b/keepercommander/keeper_dag/connection/commander.py new file mode 100644 index 000000000..e4b36c450 --- /dev/null +++ b/keepercommander/keeper_dag/connection/commander.py @@ -0,0 +1,130 @@ +from __future__ import annotations +import logging +from . import ConnectionBase +from ..exceptions import DAGConnectionException +from ..crypto import bytes_to_base64 +from ..utils import value_to_boolean +import os +import requests +import time + +try: # pragma: no cover + from keepercommander import crypto, utils, rest_api +except ImportError: # pragma: no cover + raise Exception("Please install the keepercommander module to use the Commander connection.") + +from typing import Optional, Union, TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from keepercommander.params import KeeperParams + from keepercommander.vault import KeeperRecord + Content = Union[str, bytes, dict] + QueryValue = Union[list, dict, str, float, int, bool] + Logger = Union[logging.RootLogger, logging.Logger] + + +class Connection(ConnectionBase): + + def __init__(self, params: KeeperParams, encrypted_transmission_key: Optional[bytes] = None, + encrypted_session_token: Optional[bytes] = None, verify_ssl: bool = True, is_ws: bool = False, + logger: Optional[Logger] = None): + + super().__init__(is_device=False, logger=logger) + self.params = params + self.verify_ssl = verify_ssl + self.is_ws = is_ws + self.encrypted_transmission_key = encrypted_transmission_key if encrypted_transmission_key else None + self.encrypted_session_token = encrypted_session_token if encrypted_session_token else None + + if self.encrypted_transmission_key is None or self.encrypted_session_token is None: + self.get_keeper_tokens() + + @staticmethod + def get_record_uid(record: KeeperRecord) -> str: + return record.record_uid + + @staticmethod + def get_key_bytes(record: KeeperRecord) -> bytes: + return record.record_key + + @property + def hostname(self) -> str: + # The host is connect.keepersecurity.com, connect.dev.keepersecurity.com, etc. Append "connect" in front + # of host used for Commander. + return os.environ.get("ROUTER_HOST", f'connect.{self.params.config.get("server")}') + + @property + def dag_server_url(self) -> str: + + # Allow override of the URL. If not set, get the hostname from the config. + hostname = os.environ.get("KROUTER_URL", self.hostname) + if hostname.startswith('ws') or hostname.startswith('http'): + return hostname + + use_ssl = value_to_boolean(os.environ.get("USE_SSL", True)) + if self.is_ws is True: + prot_pref = 'ws' + else: + prot_pref = 'http' + if use_ssl is True: + prot_pref += "s" + + return f'{prot_pref}://{hostname}' + + def get_keeper_tokens(self): + transmission_key = utils.generate_aes_key() + server_public_key = rest_api.SERVER_PUBLIC_KEYS[self.params.rest_context.server_key_id] + + if self.params.rest_context.server_key_id < 7: + self.encrypted_transmission_key = crypto.encrypt_rsa(transmission_key, server_public_key) + else: + self.encrypted_transmission_key = crypto.encrypt_ec(transmission_key, server_public_key) + self.encrypted_session_token = crypto.encrypt_aes_v2( + utils.base64_url_decode(self.params.session_token), transmission_key) + + def rest_call_to_router(self, http_method: str, endpoint: str, + payload_json: Optional[Union[bytes, str]] = None, + retry: int = 3, + retry_wait: int = 10, + timeout: Optional[int] = None) -> str: + if payload_json is not None and isinstance(payload_json, bytes) is False: + payload_json = payload_json.encode() + + if endpoint.startswith("/") is False: + endpoint = "/" + endpoint + + url = self.dag_server_url + endpoint + + attempt = 0 + while True: + try: + attempt += 1 + self.logger.debug(f"DAG web service call to {url} [{attempt}/{retry}]") + response = requests.request( + method=http_method, + url=url, + verify=self.verify_ssl, + headers={ + 'TransmissionKey': bytes_to_base64(self.encrypted_transmission_key), + 'Authorization': f'KeeperUser {bytes_to_base64(self.encrypted_session_token)}' + }, + data=payload_json, + timeout=timeout + ) + self.logger.debug(f"response status: {response.status_code}") + response.raise_for_status() + return response.text + + except requests.exceptions.HTTPError as http_err: + err_msg = f"{http_err.response.status_code}, {http_err.response.text}" + + except Exception as err: + err_msg = str(err) + + self.logger.info(f"call to DAG web service had a problem: {err_msg}.") + if attempt >= retry: + raise DAGConnectionException(f"Call to DAG web service {url}, after {retry} " + f"attempts, failed!: {err_msg}") + + self.logger.info(f"will retry call after {retry_wait} seconds.") + time.sleep(retry_wait) diff --git a/keepercommander/keeper_dag/connection/ksm.py b/keepercommander/keeper_dag/connection/ksm.py new file mode 100644 index 000000000..80be4002b --- /dev/null +++ b/keepercommander/keeper_dag/connection/ksm.py @@ -0,0 +1,254 @@ +from __future__ import annotations +from . import ConnectionBase +from ..utils import value_to_boolean +from ..exceptions import DAGException, DAGConnectionException + +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.primitives.serialization import load_der_private_key + +try: # pragma: no cover + from keeper_secrets_manager_core import utils + from keeper_secrets_manager_core.configkeys import ConfigKeys + from keeper_secrets_manager_core.storage import InMemoryKeyValueStorage, KeyValueStorage + from keeper_secrets_manager_core.utils import url_safe_str_to_bytes, bytes_to_base64, generate_random_bytes +except ImportError: # pragma: no cover + raise Exception("Please install the keeper_secrets_manager_core module to use the Ksm connection.") + +import logging +import json +import os +import requests +import time + +from typing import Union, Optional, TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from keeper_secrets_manager_core.storage import KeyValueStorage + from keeper_secrets_manager_core.dto.dtos import Record + KsmConfig = Union[dict, str, KeyValueStorage] + Content = Union[str, bytes, dict] + QueryValue = Union[list, dict, str, float, int, bool] + Logger = Union[logging.RootLogger, logging.Logger] + + +class Connection(ConnectionBase): + + KEEPER_CLIENT = 'ms16.5.0' + + def __init__(self, config: Union[str, dict, KeyValueStorage], verify_ssl: bool = None, + logger: Optional[Logger] = None): + + super().__init__(is_device=True, logger=logger) + + if InMemoryKeyValueStorage.is_base64(config): + config = utils.base64_to_string(config) + if isinstance(config, str) is True: + try: + config = json.loads(config) + except json.JSONDecodeError as err: + raise DAGException(f"The configuration JSON could not be decoded: {err}") + + if isinstance(config, dict) is False and isinstance(config, KeyValueStorage) is False: + raise DAGException("The configuration is not a dictionary.") + + if verify_ssl is None: + verify_ssl = value_to_boolean(os.environ.get("VERIFY_SSL", "TRUE")) + + self.config = config + self.verify_ssl = verify_ssl + self._signature = None + self._challenge_str = None + + @staticmethod + def get_record_uid(record: Record) -> str: + return record.uid + + @staticmethod + def get_key_bytes(record: Record) -> bytes: + return record.record_key_bytes + + def get_config_value(self, key: ConfigKeys) -> str: + if isinstance(self.config, KeyValueStorage) is True: + return self.config.get(key) + else: + return self.config.get(key.value) + + @property + def hostname(self) -> str: + return os.environ.get("ROUTER_HOST", self.get_config_value(ConfigKeys.KEY_HOSTNAME)) + + @property + def client_id(self) -> str: + return self.get_config_value(ConfigKeys.KEY_CLIENT_ID) + + @property + def private_key(self) -> str: + return self.get_config_value(ConfigKeys.KEY_PRIVATE_KEY) + + @property + def app_key(self) -> str: + return self.get_config_value(ConfigKeys.KEY_APP_KEY) + + def router_url_from_ksm_config(self) -> str: + return f'connect.{self.hostname}' + + def ws_router_url_from_ksm_config(self, is_ws: bool = False) -> str: + + router_host = self.router_url_from_ksm_config() + + kpam_router_ssl_enabled_env = value_to_boolean(os.environ.get("USE_SSL", True)) + + if is_ws: + prot_pref = 'ws' + else: + prot_pref = 'http' + + if not kpam_router_ssl_enabled_env: + return f'{prot_pref}://{router_host}' + else: + return f'{prot_pref}s://{router_host}' + + def http_router_url_from_ksm_config_or_env(self) -> str: + + router_host_from_env = os.getenv("KROUTER_URL") + if router_host_from_env: + router_http_host = router_host_from_env + else: + router_http_host = self.ws_router_url_from_ksm_config() + + return router_http_host.replace('ws', 'http') + + def authenticate(self, + refresh: bool = False, + retry: int = 3, + retry_wait: int = 10, + timeout: Optional[int] = None) -> (str, str): + + if self._signature is None or refresh is True: + + self.logger.debug(f"signature is blank or needs to be refresh {refresh}") + + router_http_host = self.http_router_url_from_ksm_config_or_env() + url = f'{router_http_host}/api/device/get_challenge' + + self._signature = None + + attempt = 0 + while True: + try: + attempt += 1 + response = requests.get(url, + verify=self.verify_ssl, + timeout=timeout) + response.raise_for_status() + + self._challenge_str = response.text + if self._challenge_str is None or self._challenge_str == "": + raise Exception("Challenge text is blank. Cannot authenticate into the DAG web service.") + + private_key_der_bytes = url_safe_str_to_bytes(self.private_key) + client_id_bytes = url_safe_str_to_bytes(self.client_id) + + self.logger.debug('adding challenge to the signature before connecting to the router') + challenge_bytes = url_safe_str_to_bytes(self._challenge_str) + client_id_bytes = client_id_bytes + challenge_bytes + + pk = load_der_private_key(private_key_der_bytes, password=None) + sig = pk.sign(client_id_bytes, ec.ECDSA(hashes.SHA256())) + + self._signature = bytes_to_base64(sig) + break + + except requests.exceptions.HTTPError as http_err: + err_msg = f"{http_err.response.status_code}, {http_err.response.text}" + + except Exception as err: + err_msg = str(err) + + self.logger.info(f"call to challenge had a problem: {err_msg}.") + if attempt >= retry: + raise DAGConnectionException(f"Call to challenge {url}, after {retry} " + f"attempts, failed!: {err_msg}") + + self.logger.info(f"will retry call after {retry_wait} seconds.") + time.sleep(retry_wait) + + return self._signature, self._challenge_str + + def rest_call_to_router(self, http_method: str, endpoint: str, + payload_json: Optional[Union[bytes, str]] = None, + retry: int = 3, + retry_wait: int = 10, + timeout: Optional[int] = None) -> str: + + # If the timeout is set to 0, set to the default which is None. + if timeout == 0: + timeout = None + + router_host = self.http_router_url_from_ksm_config_or_env() + url = router_host + endpoint + + refresh = False + attempt = 0 + while True: + + attempt += 1 + + # Keep authenticate outside the call router try. + # This is to prevent too many retries. + # For example, 3 retry of the auth, 3 retry of the request, will be 9 retries. + signature, challenge_str = self.authenticate(refresh=refresh) + headers = dict( + Signature=signature, + ClientVersion=Connection.KEEPER_CLIENT, + Authorization=f'KeeperDevice {self.client_id}', + Challenge=challenge_str + ) + self.logger.debug(f'connecting with headers: {headers}') + + try: + self.logger.debug(f"DAG web service call to {url} [{attempt}/{retry}]") + response = requests.request( + method=http_method, + url=url, + data=payload_json if payload_json else None, + verify=self.verify_ssl, + timeout=timeout, + headers=headers, + ) + + self.logger.debug(f"response status: {response.status_code}") + + # If we get a 401 Unauthorized, and we have not yet refreshed, + # refresh the signature. + if response.status_code == 401 and refresh is False: + self.logger.debug("rest call was Unauthorized") + + # The attempt didn't count. + # We get one refresh, then it becomes an exception. + refresh = True + attempt -= 1 + continue + + response.raise_for_status() + return response.text + + # Handle errors outside of requests + except requests.exceptions.HTTPError as http_err: + + err_msg = f"{http_err.response.status_code}, {http_err.response.text}" + content = http_err.response.text + + except Exception as err: + err_msg = str(err) + content = None + + self.logger.info(f"call to DAG web service had a problem: {err_msg}, {content}") + if attempt >= retry: + self.logger.info(f"payload: {payload_json}") + raise DAGConnectionException(f"Call to DAG web service {url}, after {retry} " + f"attempts, failed!: {err_msg}: {content} : {payload_json}") + + self.logger.info(f"will retry call after {retry_wait} seconds.") + time.sleep(retry_wait) diff --git a/keepercommander/keeper_dag/connection/local.py b/keepercommander/keeper_dag/connection/local.py new file mode 100644 index 000000000..0df24440c --- /dev/null +++ b/keepercommander/keeper_dag/connection/local.py @@ -0,0 +1,488 @@ +from . import ConnectionBase +import logging +from ..types import DataPayload, SyncData, EdgeType +import json +import os +from tabulate import tabulate + +try: + import sqlite3 + from contextlib import closing +except ImportError: + raise Exception("Please install the sqlite3 module to use the Local connection.") + +from typing import Optional, Union, Any, TYPE_CHECKING +if TYPE_CHECKING: + Logger = Union[logging.RootLogger, logging.Logger] + + +class Connection(ConnectionBase): + + """ + BIG TIME NOTE + + This is a fake DAG engine used for unit tests. + It tries best to emulate krouter/workflow. + This is no substitute for testing against a krouter instance. + """ + + DB_FILE = "local_dag.db" + DEBUG = 0 + + def __init__(self, limit: int = 100, db_file: Optional[str] = None, db_dir: Optional[str] = None, + logger: Optional[Any] = None): + + super().__init__(is_device=True, logger=logger) + + if db_file is None: + db_file = os.environ.get("LOCAL_DAG_DB_FILE", Connection.DB_FILE) + if db_dir is None: + db_dir = os.environ.get("LOCAL_DAG_DIR", os.environ.get("HOME", os.environ.get("USERPROFILE", "./"))) + + self.db_file = os.path.join(db_dir, db_file) + self.limit = limit + + self.create_database() + + def debug(self, msg): + if Connection.DEBUG == 1: + logging.debug(f"DAG: {msg}") + + @staticmethod + def get_record_uid(record: object) -> bytes: + if hasattr(record, "record_uid") is True: + return getattr(record, "record_uid") + elif hasattr(record, "uid") is True: + return getattr(record, "uid") + raise Exception(f"Cannot find the record uid in object type: {type(record)}.") + + @staticmethod + def get_key_bytes(record: object) -> bytes: + if hasattr(record, "record_key_bytes") is True: + return getattr(record, "record_key_bytes") + elif hasattr(record, "record_key") is True: + return getattr(record, "record_key") + raise Exception("Cannot find the record key bytes in object.") + + def clear_database(self): + try: + os.unlink(self.db_file) + except (Exception,): + pass + + def create_database(self): + + self.debug("create local dag database") + + with closing(sqlite3.connect(self.db_file))as connection: + with closing(connection.cursor()) as cursor: + + # This is based on workflow, Database.kt. + # The UIDs are stored a character instead of bytes to make them more readable for debugging. + + # The 'type' columns are stored a TEXT. + # This is because the WS wants text for the enum, but stores + # it as an INTEGER. + # We are just going to store it as a TEXT and avoid the middle man. + + cursor.execute( + """ +CREATE TABLE IF NOT EXISTS dag_edges ( + graph_id INTEGER NOT NULL DEFAULT 0, + edge_id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + head CHARACTER(22) NOT NULL, + tail CHARACTER(22) NOT NULL, + data BLOB, + origin CHARACTER(22), + path TEXT, + created timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + creator_id BLOB(16) DEFAULT NULL, + creator_type INTEGER DEFAULT NULL, + creator_name TEXT DEFAULT NULL, + FOREIGN KEY(head) REFERENCES dag_vertices(vertex_id), + FOREIGN KEY(tail) REFERENCES dag_vertices(vertex_id) +) + """ + ) + + cursor.execute( + """ +CREATE TABLE IF NOT EXISTS dag_vertices ( + vertex_id CHARACTER(22) NOT NULL, + type TEXT NOT NULL, + name TEXT, + owner_id BLOB(16) DEFAULT NULL +) + """ + ) + + cursor.execute( + """ +CREATE TABLE IF NOT EXISTS dag_streams ( + graph_id INTEGER NOT NULL, + sync_point INTEGER PRIMARY KEY AUTOINCREMENT, + vertex_id CHARACTER(22) NOT NULL, + edge_id INTEGER NOT NULL, + count INTEGER NOT NULL DEFAULT 0, + deletion INTEGER NOT NULL DEFAULT 0, + UNIQUE(vertex_id,edge_id), + FOREIGN KEY(vertex_id) REFERENCES dag_vertices(vertex_id), + FOREIGN KEY(edge_id) REFERENCES dag_edges(edge_id) +) + """ + ) + connection.commit() + + os.chmod(self.db_file, 0o777) + + @staticmethod + def _payload_to_json(payload: Union[DataPayload, str]) -> str: + + # if payload is DataPayload + if isinstance(payload, DataPayload): + payload_data = payload.model_dump_json() + elif isinstance(payload, str): + payload_data = payload + + # make sure it is a valid json and raise and exception if not. make an exception for the case of a string + # that is a valid json + if not payload_data.startswith('{') and not payload_data.endswith('}'): + raise Exception(f'Invalid payload: {payload_data}') + + # double check if it is a valid json inside the string + json.loads(payload_data) + else: + raise Exception(f'Unsupported payload type: {type(payload)}') + + return json.loads(payload_data) + + def _find_stream_id(self, payload: DataPayload): + + data = Connection._payload_to_json(payload) + + # Find the vertex that does not belong to any other vertex. + # This is normally root for a full DAG, but will be a vertex if adding additional edges. + # 100% sure this could be written better. + # 1000% sure this could be written better. + # TODO: Only refs that are type PAM_NETWORK or PAM_USER can contain the stream id. + # Change code to ignore all other ref types. + + self.debug("finding stream id") + + # First check if we can route with existing edges in the database. + stream_id = None + with closing(sqlite3.connect(self.db_file)) as connection: + with closing(connection.cursor()) as cursor: + + graph_id = data.get("graphId") + + stream_ids = {} + + runs = 0 + for item in data.get("dataList"): + + # Get the head UID of the edge and then find an edge where the UID is the tail. + # If we find an edge, use its head to find an edge where the UID is the tail. + # Repeat until we can't find and edge, that is a stream ID + # Tally all the stream ID and take the best. + item_stream_id = item.get("ref")["value"] + current_stream_id = item_stream_id + while True: + self.debug(f" check stream id {current_stream_id}") + sql = "SELECT head, edge_id FROM dag_edges WHERE tail=? AND graph_id=? AND type != ?" + res = cursor.execute(sql, (current_stream_id, graph_id, EdgeType.DATA.value)) + row = res.fetchone() + if row is None: + self.debug(f" no edge found") + if current_stream_id == item_stream_id: + current_stream_id = None + break + current_stream_id = row[0] + self.debug(f" got {current_stream_id}") + + if current_stream_id is not None: + if item_stream_id not in stream_ids: + stream_ids[current_stream_id] = 0 + stream_ids[current_stream_id] += 1 + else: + # If we didn't find anything with the tail, check starting with the head. + item_stream_id = item.get("parentRef")["value"] + current_stream_id = item_stream_id + while True: + self.debug(f" check stream id {current_stream_id}") + sql = "SELECT head, edge_id FROM dag_edges WHERE tail=? AND graph_id=? AND type != ?" + res = cursor.execute(sql, (current_stream_id, graph_id, EdgeType.DATA.value)) + row = res.fetchone() + if row is None: + self.debug(f" no edge found") + if current_stream_id == item_stream_id: + current_stream_id = None + break + current_stream_id = row[0] + self.debug(f" got {current_stream_id}") + + if current_stream_id is not None: + if item_stream_id not in stream_ids: + stream_ids[current_stream_id] = 0 + stream_ids[current_stream_id] += 1 + + # Until we rewrite this, exit after we check 3 edges. + # This will slow down after a bunch of edges are added. + # We also fixed stuff in our code to prevent the errors we were seeing. + # Might want to switch to recursion. + # https://www.sqlite.org/lang_with.html + if runs > 3: + break + runs += 1 + + if len(stream_ids) > 0: + sorted_stream_ids = [k for k, v in sorted(stream_ids.items(), key=lambda item: item[1])] + stream_id = sorted_stream_ids.pop() + + # If the stream id is None, this is the first save of the DAG. + # No edges existed. + # Compare the data list items. + # The one without an edge with a tail if the stream id. + if stream_id is None: + self.debug("stream id None, edges might be new") + # Get a starting spot + found = {} + for item in data.get("dataList"): + head_uid = item.get("parentRef")["value"] + found[head_uid] = True + for item in data.get("dataList"): + tail_uid = item.get("ref")["value"] + found.pop(tail_uid, None) + stream_ids = [uid for uid in found] + if len(stream_ids) > 0: + stream_id = stream_ids[0] + + # If we can't find stream ID, assume it's on the first item in the dataList + if stream_id is None: + item = data.get("dataList")[0] + stream_id = item.get("parentRef")["value"] or item.get("ref")["value"] + + return stream_id + + def add_data(self, payload: DataPayload): + + stream_id = self._find_stream_id(payload) + self.debug(f"STREAM ID IS {stream_id}") + + data = Connection._payload_to_json(payload) + + with closing(sqlite3.connect(self.db_file)) as connection: + with closing(connection.cursor()) as cursor: + + origin_id = data.get("origin")["value"] + graph_id = data.get("graphId") + + saved_vertex = {} + for item in data.get("dataList"): + + tail_uid = item.get("ref")["value"] + tail_type = item.get("ref")["type"] + tail_name = item.get("ref")["name"] + + head_uid = None + head_type = None + head_name = None + if item.get("parentRef") is not None: + head_uid = item.get("parentRef")["value"] + head_type = item.get("parentRef")["type"] + head_name = item.get("parentRef")["name"] + + edge_type = item.get("type") + path = item.get("path") + + sql = "INSERT INTO dag_edges (type, head, tail, data, origin, graph_id, path) " + sql += "VALUES (?,?,?,?,?,?,?)" + cursor.execute(sql, ( + edge_type, + head_uid, + tail_uid, + item.get("content"), + origin_id, + graph_id, + path + )) + edge_id = cursor.lastrowid + + sql = "INSERT INTO dag_streams (graph_id, vertex_id, edge_id, count) VALUES (?, ?, ?, ?)" + cursor.execute(sql, ( + graph_id, + stream_id, + edge_id, + 1 + )) + + if saved_vertex.get(tail_uid) is None: + # Type is RefType enum value + sql = "INSERT INTO dag_vertices (vertex_id, type, name) VALUES (?, ?, ?)" + cursor.execute(sql, ( + tail_uid, + tail_type, + tail_name + )) + saved_vertex[tail_uid] = True + if saved_vertex.get(head_uid) is None: + # Type is RefType enum value + sql = "INSERT INTO dag_vertices (vertex_id, type, name) VALUES (?, ?, ?)" + cursor.execute(sql, ( + head_uid, + head_type, + head_name + )) + saved_vertex[head_uid] = True + + connection.commit() + + def sync(self, stream_id: str, sync_point: Optional[int] = 0, graph_id: Optional[int] = 0) -> SyncData: + + self.debug(f"Sync: stream id {stream_id}, sync point {sync_point}, graph {graph_id}") + + edge_type_map = { + EdgeType.DATA.value: "data", + EdgeType.KEY.value: "key", + EdgeType.LINK.value: "link", + EdgeType.ACL.value: "acl", + EdgeType.DELETION.value: "deletion", + EdgeType.DENIAL.value: "denial", + EdgeType.UNDENIAL.value: "undenial", + } + + resp = { + "syncPoint": 0, + "data": [], + "hasMore": False + } + + with closing(sqlite3.connect(self.db_file)) as connection: + with closing(connection.cursor()) as cursor: + self.debug(f"... loading DAG, {stream_id}, {sync_point}, {self.limit + 1}") + + args = [stream_id, sync_point, graph_id] + sql = "SELECT sync_point, edge_id FROM dag_streams WHERE vertex_id = ? AND deletion = 0 "\ + "AND sync_point > ? AND graph_id=? ORDER BY sync_point ASC LIMIT ?" + args.append(self.limit + 1) + res = cursor.execute(sql, tuple(args)) + rows = list(res.fetchall()) + if len(rows) > self.limit: + resp["hasMore"] = True + rows.pop() + for row in rows: + resp["syncPoint"] = row[0] + + args = [row[1], graph_id] + sql = "SELECT head, tail, data, path, type FROM dag_edges WHERE edge_id = ? AND graph_id=?" + res = cursor.execute(sql, tuple(args)) + edges = res.fetchone() + + # If the head and tail are the same (DATA edge), then parent_ref is None. + # Else include a parent_ref + parent_ref = None + if edges[1] != edges[0]: + + sql = "SELECT type FROM dag_vertices WHERE vertex_id = ?" + res = cursor.execute(sql, (edges[0],)) + head_vertex = res.fetchone() + + parent_ref = { + "type": head_vertex[0], + "value": edges[0], + "name": None + } + + sql = "SELECT type FROM dag_vertices WHERE vertex_id = ?" + res = cursor.execute(sql, (edges[1],)) + tail_vertex = res.fetchone() + + resp["data"].append({ + "type": edge_type_map.get(edges[4]), + "ref": { + "type": tail_vertex[0], + "value": edges[1], + "name": None + }, + "parentRef": parent_ref, + "content": edges[2], + "path": edges[3], + "deletion": False + }) + + sync_data_resp = SyncData.model_validate_json(json.dumps(resp)) + return sync_data_resp + + def debug_dump(self) -> str: + + ret = "" + + with closing(sqlite3.connect(self.db_file)) as connection: + with closing(connection.cursor()) as cursor: + + cols = ["graph_id", "edge_id", "type", "head", "tail", "data", "origin", "path", "created", + "creator_id", "creator_type", "creator_name"] + + sql = f"SELECT {','.join(cols) } FROM dag_edges ORDER BY edge_id DESC" + res = cursor.execute(sql,) + + ret += "dag_edges\n" + ret += "=========\n" + table = [] + for row in res.fetchall(): + table.append(list(row)) + + ret += tabulate(table, cols) + "\n\n" + + cols = ["e.graph_id", "e.edge_id", "v.vertex_id", "v.type", "v.name", "v.owner_id"] + + sql = f"SELECT {','.join(cols) } "\ + "FROM dag_vertices v "\ + "INNER JOIN dag_edges e ON e.tail = v.vertex_id "\ + "ORDER BY e.graph_id DESC, e.edge_id DESC" + res = cursor.execute(sql,) + + ret += "dag_vertices\n" + ret += "============\n" + table = [] + for row in res.fetchall(): + table.append(list(row)) + + ret += tabulate(table, cols) + "\n\n" + + cols = ["graph_id", "edge_id", "sync_point", "vertex_id", "count", "deletion"] + + sql = f"SELECT {','.join(cols) } FROM dag_streams ORDER BY edge_id DESC" + res = cursor.execute(sql,) + + ret += "dag_streams\n" + ret += "===========\n" + table = [] + for row in res.fetchall(): + table.append(list(row)) + + ret += tabulate(table, cols) + "\n\n" + + return ret + + def update_edge_content(self, graph_id: int, head_uid: str, tail_uid: str, content: str): + + with closing(sqlite3.connect(self.db_file)) as connection: + with closing(connection.cursor()) as cursor: + + sql = "UPDATE dag_edges SET data=? WHERE graph_id=? AND head=? AND tail=?" + res = cursor.execute(sql, (content, graph_id, head_uid, tail_uid)) + + connection.commit() + + def clear(self): + + with closing(sqlite3.connect(self.db_file)) as connection: + with closing(connection.cursor()) as cursor: + + for table in ["dag_streams", "dag_edges", "dag_vertices"]: + sql = f"DELETE FROM {table}" + cursor.execute(sql, ) + + connection.commit() diff --git a/keepercommander/keeper_dag/crypto.py b/keepercommander/keeper_dag/crypto.py new file mode 100644 index 000000000..bf66ce558 --- /dev/null +++ b/keepercommander/keeper_dag/crypto.py @@ -0,0 +1,77 @@ +from cryptography.hazmat.primitives.ciphers.aead import AESGCM +import base64 +import os +from typing import Optional, Union + + +def encrypt_aes(data: bytes, key: bytes, iv: bytes = None) -> bytes: + aesgcm = AESGCM(key) + iv = iv or os.urandom(12) + enc = aesgcm.encrypt(iv, data, None) + return iv + enc + + +def decrypt_aes(data: bytes, key: bytes) -> bytes: + aesgcm = AESGCM(key) + return aesgcm.decrypt(data[:12], data[12:], None) + + +def bytes_to_base64(b: Union[str, bytes]) -> str: + + if isinstance(b, str) is True: + b = b.encode() + + return base64.b64encode(b).decode() + + +def urlsafe_str_to_bytes(s: str) -> bytes: + b = base64.urlsafe_b64decode(s + '==') + return b + + +def str_to_bytes(s: str) -> bytes: + b = base64.b64decode(s + '==') + return b + + +def bytes_to_urlsafe_str(b: Union[str,bytes]) -> str: + """ + Convert bytes to a URL-safe base64 encoded string. + + Args: + b (bytes): The bytes to be encoded. + + Returns: + str: The URL-safe base64 encoded representation of the input bytes. + """ + if isinstance(b, str) is True: + b = b.encode() + + return base64.urlsafe_b64encode(b).decode().rstrip('=') + + +def bytes_to_str(b: bytes) -> str: + """ + Convert bytes to a URL-safe base64 encoded string. + + Args: + b (bytes): The bytes to be encoded. + + Returns: + str: The URL-safe base64 encoded representation of the input bytes. + """ + return base64.b64encode(b).decode().rstrip('=') + + +def generate_random_bytes(length: int) -> bytes: + return os.urandom(length) + + +def generate_uid_bytes(length: int = 16) -> bytes: + return generate_random_bytes(length) + + +def generate_uid_str(uid_bytes: Optional[bytes] = None) -> str: + if uid_bytes is None: + uid_bytes = generate_uid_bytes() + return bytes_to_urlsafe_str(uid_bytes) diff --git a/keepercommander/keeper_dag/dag.py b/keepercommander/keeper_dag/dag.py new file mode 100644 index 000000000..9580d70bf --- /dev/null +++ b/keepercommander/keeper_dag/dag.py @@ -0,0 +1,1271 @@ +from __future__ import annotations +import logging +import os +import time + +from .vertex import DAGVertex +from .types import DAGData, EdgeType, RefType, Ref, DataPayload +from .crypto import encrypt_aes, decrypt_aes, generate_uid_str, bytes_to_str, str_to_bytes, urlsafe_str_to_bytes +from .exceptions import (DAGConfirmException, DAGPathException, DAGVertexAlreadyExistsException, DAGKeyException, + DAGVertexException, DAGCorruptException, DAGDataException) +from .utils import value_to_boolean +import json +import importlib +from typing import Optional, Union, List, Any, TYPE_CHECKING + +if TYPE_CHECKING: + from .connection import ConnectionBase + Content = Union[str, bytes, dict] + QueryValue = Union[list, dict, str, float, int, bool] + Logger = Union[logging.RootLogger, logging.Logger] + + +class DAG: + + # Debug level. Increase to get finer debug messages. + DEBUG_LEVEL = 0 + + UID_KEY_BYTES_SIZE = 16 + UID_KEY_STR_SIZE = 22 + + # For the dot output, enum to text. + EDGE_LABEL = { + EdgeType.DATA: "DATA", + EdgeType.KEY: "KEY", + EdgeType.LINK: "LINK", + EdgeType.ACL: "ACL", + EdgeType.DELETION: "DELETION", + } + + def __init__(self, conn: ConnectionBase, record: Optional[object] = None, key_bytes: Optional[bytes] = None, + name: Optional[str] = None, graph_id: int = 0, auto_save: bool = False, + history_level: int = 0, logger: Optional[Any] = None, debug_level: int = 0, is_dev: bool = False, + vertex_type: RefType = RefType.PAM_NETWORK, decrypt: bool = True, fail_on_corrupt: bool = True, + data_requires_encryption: bool = False, log_prefix: str = "GraphSync", + save_batch_count: Optional[int] = None): + + """ + Create a GraphSync instance. + + :param conn: Connection instance + :param record: If set, the key bytes will use the key bytes in the record. Overrides key_bytes. + :param key_bytes: If set, these key bytes will be used. + :param name: Optional name for the graph. + :param graph_id: Graph ID sets which graph to load for the graph. + :param auto_save: Automatically save when modifications are performed. Default is False. + :param history_level: How much edge history to keep in memory. Default is 0, no history. + :param logger: Python logger instance to use for logging. + :param debug_level: Debug level; the higher the number will result in more debug information. + :param is_dev: Is the code running in a development environment? + :param vertex_type: The default vertex/ref type for the root vertex, if auto creating. + :param decrypt: Decrypt the graph; Default is TRUE + :param fail_on_corrupt: If unable to decrypt encrypted data, fail out. + :param data_requires_encryption: Data edges are already encrypted. Default is False. + :param log_prefix: Text prepended to the log messages. Handy if dealing with multiple graphs + :param save_batch_count: The number of edges to save at one time. + :return: Instance of GraphSync + """ + + if logger is None: + logger = logging.getLogger() + self.logger = logger + if debug_level is None: + debug_level = int(os.environ.get("GS_DEBUG_LEVEL", os.environ.get("DAG_DEBUG_LEVEL", 0))) + + self.debug_level = debug_level + self.log_prefix = log_prefix + + if save_batch_count is None or save_batch_count <= 0: + save_batch_count = 0 + self.save_batch_count = save_batch_count + self.debug(f"save batch count is set to {self.save_batch_count}") + + self.vertex_type = vertex_type + + self.data_requires_encryption = data_requires_encryption + self.decrypt = decrypt + self.fail_on_corrupt = fail_on_corrupt + + gs_is_dev = os.environ.get("GS_IS_DEV", os.environ.get("DAG_IS_DEV")) + if gs_is_dev is not None: + is_dev = value_to_boolean(gs_is_dev) + self.is_dev = is_dev + if self.is_dev is True: + self.debug("GraphSync is running in a development environment, vertex names will be included.") + + # If the record is passed in, use the UID and key bytes from the record. + self.uid = None + if record is not None: + self.uid = conn.get_record_uid(record) + key_bytes = conn.get_key_bytes(record) + + self.key = key_bytes + + if key_bytes is None: + raise ValueError("Either the record or the key_bytes needs to be passed.") + + # If the UID is blank, use the key bytes to generate a UID + if self.uid is None: + self.uid = generate_uid_str(key_bytes[:16]) + + # Graph ID allow you to select which graph to load. The default is 0, which will load all graph for the UID + self.graph_id = graph_id + + self.debug(f"{self.log_prefix} key {self.key}", level=1) + self.debug(f"{self.log_prefix} UID {self.uid}", level=1) + self.debug(f"{self.log_prefix} UID HEX {urlsafe_str_to_bytes(self.uid).hex()}", level=1) + + if name is None: + name = f"{self.log_prefix} ROOT" + self.name = name + + # The order of the vertices is important. + # The order creates the history. + # The web service will order edge by their edge_id + # Store in and array. + # The lookup table to make UID to DAGVertex easier. + # The integer is the index into the array. + self._vertices = [] # type: List[DAGVertex] + self._uid_lookup = {} # type: dict[str, int] + + # This is like the batch + self.origin_uid = generate_uid_str() + + # If True, any addition or changes will automatically be saved. + self.auto_save = auto_save + + # To auto save, both allow_auto_save and auto_save needs to be True. + # If the graph has not been saved before and the root vertex has not been connected, + # we want to disable auto save. + self._allow_auto_save = False + + # For big changes, we need a confirmation to save. + self.need_save_confirm = False + + # The last sync point after save. + self.last_sync_point = 0 + + # Amount of history to keep. + # The default is 0, which will keep all history. + # Setting to 1 will only keep the latest edges. + # Settings to 2 will keep the latest and prior edges. + # And so on. + self.history_level = history_level + + # If data was corrupt in the graph, the vertex UID will appear in this list. + self.corrupt_uids = [] + + self.conn = conn + + def debug(self, msg: str, level: int = 0): + """ + Debug with granularity level. + + If the debug level is greater or equal to the level on the message, the message will be displayed. + + :param msg: Text debug message + :param level: Debug level of message + :return: + """ + + if self.debug_level >= level: + + msg = f"{self.log_prefix}: {msg}" + + if self.logger is not None: + self.logger.debug(msg) + else: + logging.debug(msg) + + def __str__(self): + ret = f"GraphSync {self.uid}\n" + ret += f" python instance id: {id(self)}\n" + ret += f" name: {self.name}\n" + ret += f" key: {self.key}\n" + ret += f" vertices:\n" + for v in self.all_vertices: + ret += f" * {v.uid}, Keys: {v.keychain}, Active: {v.active}\n" + for e in v.edges: + if e.edge_type == EdgeType.DATA: + ret += f" + has a DATA edge" + if e.content is not None: + ret += ", has content" + else: + ret += f" + belongs to {e.head_uid}, {DAG.EDGE_LABEL.get(e.edge_type)}, {e.content}" + ret += "\n" + + return ret + + @property + def is_corrupt(self): + return len(self.corrupt_uids) > 0 + + @property + def allow_auto_save(self) -> bool: + """ + Return the flag indicating if auto save is allowed. + :return: + """ + + return self._allow_auto_save + + @allow_auto_save.setter + def allow_auto_save(self, value: bool): + """ + Set the ability to auto save. + :param value: True enables, False disables. + :return: + """ + + if value is True: + self.debug("ability to auto save has been ENABLED", level=2) + else: + self.debug("ability to auto save has been DISABLED", level=2) + + self._allow_auto_save = value + + @property + def origin_ref(self) -> Ref: + + """ + Return an instance of the origin reference. + :return: + """ + + return Ref( + type=RefType.DEVICE, + value=self.origin_uid, + name=self.name if self.is_dev is True else None + ) + + @property + def has_graph(self) -> bool: + """ + Do we have any graph items? + + :return: True if there are vertices. False if no vertices. + """ + + return len(self._vertices) > 0 + + @property + def vertices(self) -> List[DAGVertex]: + """ + Get all active vertices + + :return: List of DAGVertex instance + """ + + return [ + vertex + for vertex in self._vertices + if vertex.active is True + ] + + @property + def all_vertices(self) -> List[DAGVertex]: + """ + Get all vertices + :return: List of DAGVertex instance + """ + + return self._vertices + + def get_vertex(self, key) -> Optional[DAGVertex]: + + """ + Get a single vertex. + + The key can be either a UID, path or name. + + The UID is most reliable since there can only be one per graph. + + The path is second reliable if it is set by the user. + It will find an edge with the path, the vertex that is the edge's tail is returned. + There is no unique constraint for the path. + You can have duplicates. + + The name is third, and not reliable. + The name only exists when the graph is created. + If loaded, the name will be None. + + :param key: A UID, path item, or name of a vertex. + :return: DAGVertex instance, if it exists. + """ + + if key is None: + return None + + # Is the key a UID? If so, return the vertex from the lookup. + if key in self._uid_lookup: + index = self._uid_lookup[key] + return self._vertices[index] + + # Is the key a path? + # We also want to include any deleted edges. + vertices = self.get_vertices_by_path_value(key, inc_deleted=True) + if len(vertices) > 0: + if len(vertices) > 1: + raise DAGPathException("Cannot get vertex using the path. Found multiple vertex that use the path.") + return vertices[0] + + # Is the key a name? This is a last resort. + for vertex in vertices: + if vertex.name == key: + return vertex + + return None + + @property + def get_root(self) -> Optional[DAGVertex]: + """ + Get the root vertex + + If the root vertex does not exist, it will create the vertex with a ref type of PAM_NETWORK. + + :return: + """ + root = self.get_vertex(self.uid) + if root is None: + root = self.add_vertex(uid=self.uid, name=self.name, vertex_type=self.vertex_type) + return root + + def vertex_exists(self, key: str) -> bool: + """ + Check if a vertex identified by the key exists. + :param key: UID, path, or name + :return: + """ + + return self.get_vertex(key) is not None + + def get_vertices_by_path_value(self, path: str, inc_deleted: bool = False) -> List[DAGVertex]: + """ + Find all vertices that have an edge that match the path + :param path: A string path value. This is a path to walk, just the value. + :param inc_deleted: Include deleted edges. + :return: List of DAGVertex + """ + results = [] + if inc_deleted is True: + vertices = self.all_vertices + else: + vertices = self.vertices + + for vertex in vertices: + for edge in vertex.edges: + if edge.path == path: + results.append(vertex) + return results + + def _sync(self, sync_point: int = 0) -> (List[DAGData], int): + + # The web service will send 500 items, if there is more the 'has_more' flag is set to True. + has_more = True + + # Make the web service call to set all the data + all_data = [] + while has_more is True: + # Load a page worth of items + resp = self.conn.sync( + stream_id=self.uid, + sync_point=sync_point, + graph_id=self.graph_id + ) + if resp.syncPoint == 0: + return all_data, 0 + + all_data += resp.data + + # The server will tell us if there is more data to get. + has_more = resp.hasMore + + # The sync_point will indicate where we need to start the sync from. Think syncPoint > value + sync_point = resp.syncPoint + + return all_data, sync_point + + def _load(self, sync_point: int = 0): + + """ + Load the DAG + + This will clear the existing graph. + It will make web services calls to get the fresh graph, which will return a list of edges. + With the list of edges, it will create vertices and connect them with the edges. + The content of the edges will remain encrypted. The 'encrypted' flag is set to True. + We need the entire graph structure before decrypting. + + We don't have to worry about keys at this point. We are just trying to get structure + and content in the right place. Nothing is decrypted here. + + :param sync_point: Where to load + """ + + # Clear the existing vertices. + self._vertices = [] # type: List[DAGVertex] + self._uid_lookup = {} # type: dict[str, int] + + self.debug("# SYNC THE GRAPH ##################################################################", level=1) + + # Make the web service call to set all the data + all_data, sync_point = self._sync(sync_point=sync_point) + + self.debug(" PROCESS the non-DATA edges", level=2) + + # Process the non-DATA edges + for data in all_data: + + # Skip all the DATA edge + edge_type = EdgeType.find_enum(data.type) + if edge_type == EdgeType.DATA: + continue + + # The ref the tail. It connects to stored in the vertex. + tail_uid = data.ref.get("value") + + # The parentRef is the head. It's the arrowhead on the edge. For DATA edges, it will be None. + head_uid = None + if data.parentRef is not None: + head_uid = data.parentRef.get("value") + + self.debug(f" * edge {edge_type}, tail {tail_uid} to head {head_uid}", level=3) + + # We want to store this edge in the Vertex with the same value/UID as the ref. + if self.vertex_exists(tail_uid) is False: + self.debug(f" * tail vertex {tail_uid} does not exists. create.", level=3) + self.add_vertex( + uid=tail_uid, + name=data.ref.get("name"), + + # This will be 0/GENERAL right now. We do the lookup just in case things will change in the + # future. + vertex_type=RefType.find_enum(data.ref.get("type")) + ) + + # Get the tail vertex. + tail = self.get_vertex(tail_uid) + + # This most likely is a DELETION edge of a DATA edge. + # Set the head to be the same as the tail. + if head_uid is None: + head_uid = tail_uid + + # If the head vertex doesn't exist, we need to create. + if self.vertex_exists(head_uid) is False: + self.debug(f" * head vertex {head_uid} does not exists. create.", level=3) + self.add_vertex( + uid=head_uid, + name=data.parentRef.get("name"), + vertex_type=RefType.GENERAL + ) + # Get the head vertex, which will exist now. + head = self.get_vertex(head_uid) + self.debug(f" * tail {tail_uid} belongs to {head_uid}, " + f"edge type {edge_type}", level=3) + + if edge_type == EdgeType.DELETION: + tail.disconnect_from(head) + else: + if data.content is not None: + content = str_to_bytes(data.content) + else: + content = None + + # ACL are decrypted, but it is base64 encode. + # We need to deserialize the base64 to get the bytes. + # We can't update an existing edges content after added. + # if edge_type == EdgeType.ACL: + # content = str_to_bytes(content) + + # Connect this vertex to the head vertex. It belongs to that head vertex. + tail.belongs_to( + vertex=head, + edge_type=edge_type, + # content is encrypted + content=content, + path=data.path, + modified=False, + from_load=True + ) + + self.debug("", level=2) + self.debug(" PROCESS the DATA edges", level=2) + + # Process the DATA edges + # We don't have to worry about vertex creation since they will all exist. + for data in all_data: + + # Only process the data edges. + edge_type = EdgeType.find_enum(data.type) + if edge_type != EdgeType.DATA: + continue + + # Get the tail vertex. + tail_uid = data.ref.get("value") + # We want to store this edge in the Vertex with the same value/UID as the ref. + if self.vertex_exists(tail_uid) is False: + self.debug(f" * tail vertex {tail_uid} does not exists. create.", level=3) + self.add_vertex( + uid=tail_uid, + name=data.ref.get("name"), + + # This will be 0/GENERAL right now. We do the lookup just in case things will change in the + # future. + vertex_type=RefType.find_enum(data.ref.get("type")) + ) + tail = self.get_vertex(tail_uid) + + self.debug(f" * DATA edge belongs to {tail.uid}", level=3) + tail.add_data( + # content is encrypted + content=data.content, + path=data.path, + modified=False, + from_load=True, + ) + + self.debug("", level=1) + + return sync_point + + def _mark_deletion(self): + + """ + Mark vertices as deleted. + + Check each vertex to see if there is any non-DELETION edge connecting to another vertex. + If there are no edges, then the vertex is flagged at deleted. + + This is done to prevent the edges from being connected to a deleted vertex. + Also, to display deleted vertex in the DOT graph. + :return: + """ + + self.debug(" CHECK dag vertices to see if they are active", level=1) + for vertex in self.all_vertices: + + self.debug(f"check vertex {vertex.uid}", level=3) + found_edge_to_another_vertex = False + for edge in vertex.edges: + # Skip the DELETION and DATA edges. + if edge.edge_type == EdgeType.DELETION or edge.edge_type == EdgeType.DATA: + continue + + # Check if this edge has a matching DELETION edge. + # If it does not, this vertex cannot be deleted. + if edge.is_deleted is False: + found_edge_to_another_vertex = True + break + + # If the vertex belongs to no vertex, and it not the root, then flag it for deletion. + if found_edge_to_another_vertex is False and vertex.uid != self.uid: + self.debug(f" * vertex is deleted", level=3) + vertex.active = False + + self.debug("", level=1) + + def _decrypt_keychain(self): + + """ + Decrypt KEY/ACL edges + + Part one is to decrypt the KEY and ACL edges. + To decrypt the edge, we need to walk up the edges until we can no longer. + If we get the point where we can't walk up any farther, we need to use the record key bytes. + While walking up, if we get to a keychain that has been decrypted, we return that keychain. + As we walk back, we can decrypt any keychain that is still encrypted. + The decrypt keychain is set in the vertex. + """ + + self.debug(" DECRYPT the dag KEY edges", level=1) + + def _get_keychain(v): + self.debug(f" * looking at {v.uid}", level=3) + + # If the vertex has a decrypted key, then return it. + if v.has_decrypted_keys is True: + self.debug(" found a decrypted keychain on vertex", level=3) + return v.keychain + + # Else we need KEY/ACL edge and get the key from the vertex that this vertex belongs to + found_key_edge = False + for e in v.edges: + if e.edge_type == EdgeType.KEY: + + self.debug(f" has edge that is a key, check head vertex {e.head_uid}", level=3) + head = self.get_vertex(e.head_uid) + keychain = _get_keychain(head) + + # No need to check if keychain exists. + # At default, it should contain the record bytes if no KEY/ACL edges existed for a vertex. + + self.debug(f" * decrypt {v.uid} with keys {keychain}", level=3) + was_able_to_decrypt = False + + # Try the keys in the keychain. One should be able to decrypt the content. + for key in keychain: + try: + # The edge will contain a single key. + # Adding a key to + self.debug(f" decrypt with key {key}", level=3) + content = decrypt_aes(e.content, key) + self.debug(f" content {content}", level=3) + v.add_to_keychain(content) + self.debug(f" * vertex {v.uid} keychain is {v.keychain}", level=3) + was_able_to_decrypt = True + found_key_edge = True + break + except (Exception,): + self.debug(f" !! this is not the key", level=3) + + if was_able_to_decrypt is False: + + # Flag that the edge is corrupt, flag that the vertex keychain is corrupt, + # and store vertex UID/tail UID. + # If we fail on corrupt keys, then raise exceptions. + e.corrupt = True + v.corrupt = True + self.corrupt_uids.append(v.uid) + if self.fail_on_corrupt is True: + raise DAGKeyException(f"Could not decrypt vertex {v.uid} keychain for edge path {e.path}") + return [] + + if found_key_edge is True: + return v.keychain + else: + self.debug(" * using record bytes", level=3) + return [self.key] + + for vertex in self.all_vertices: + if vertex.has_key is False: + continue + self.debug(f"vertex {vertex.uid}, {vertex.has_key}, {vertex.has_decrypted_keys}", level=3) + vertex.keychain = _get_keychain(vertex) + self.debug(f" setting keychain to {vertex.keychain}", level=3) + + self.debug("", level=1) + + def _decrypt_data(self): + + """ + Decrypt DATA edges + + At this point, all the vertex should have an encrypted key. + This key is used to decrypt the DATA edge's content. + Walk each vertex and decrypt the DATA edge if there is a DATA edge. + """ + + self.debug(" DECRYPT the dag data", level=1) + for vertex in self.all_vertices: + if vertex.has_data is False: + continue + self.debug(f"vertex {vertex.uid}, {vertex.keychain}", level=3) + + for edge in vertex.edges: + if edge.edge_type != EdgeType.DATA: + continue + + # If the vertex/KEY edge that tail is this vertex is corrupt, we cannot decrypt data. + if vertex.corrupt is True: + self.logger.error(f"the key for the DATA edge is corrupt for vertex {vertex.uid}; " + "cannot decrypt data.") + continue + + content = edge.content + if isinstance(content, bytes) is True: + raise ValueError("The content has already been decrypted.") + + self.debug(f" * enc safe content {content}", level=3) + if isinstance(content, str): + content = str_to_bytes(content) + self.debug(f" * enc {content}, enc key {vertex.keychain}", level=3) + able_to_decrypt = False + + keychain = vertex.keychain + + # Try the keys in the keychain. One should be able to decrypt the content. + for key in keychain: + try: + edge.content = decrypt_aes(content, key) + able_to_decrypt = True + self.debug(f" * content {edge.content}", level=3) + break + except (Exception,): + self.debug(f" !! this is not the key", level=3) + + if able_to_decrypt is False: + + # If the DATA edge requires encryption, throw error if we cannot decrypt. + if self.data_requires_encryption is True: + self.corrupt_uids.append(vertex.uid) + raise DAGDataException(f"The data edge {vertex.uid} could not be decrypted.") + + edge.content = content + edge.needs_encryption = False + self.debug(f" * edge is not encrypted or key is incorrect.") + + self.debug("", level=1) + + def _flag_as_not_modified(self): + + """ + Flag all edges a not modified. + + :return: + """ + + for vertex in self.all_vertices: + for edge in vertex.edges: + edge.modified = False + + def load(self, sync_point: int = 0) -> int: + + """ + Load data from the graph. + + The first step is to recreate the structure of the graph. + The second step is mark vertex as deleted. + The third step is to decrypt the KEY/ACL/DATA edges. + Forth is to flag all edges as not modified. + + :return: The sync point of the graph stream + """ + + # During the load, turn off auto save + self.allow_auto_save = False + + self.debug("== LOAD DAG ========================================================================", level=2) + sync_point = self._load(sync_point) + self.debug(f"sync point is {sync_point}") + self._mark_deletion() + if self.decrypt is True: + self._decrypt_keychain() + self._decrypt_data() + else: + self.logger.info("the DAG has not been decrypted, the decrypt flag was get to False") + self._flag_as_not_modified() + self.debug("====================================================================================", level=2) + + # We have loaded the grpah, enable the ability to use auto save. + self.allow_auto_save = True + + self.last_sync_point = sync_point + + return sync_point + + def _make_delta_graph(self, duplicate_data: bool = True): + + self.debug("DELTA GRAPH", level=3) + modified_vertices = [] + for vertex in self.all_vertices: + found_modification = False + for edge in vertex.edges: + if edge.modified is True: + found_modification = True + break + if found_modification is True: + modified_vertices.append(vertex) + if len(modified_vertices) == 0: + self.debug("nothing has been modified") + return + + self.debug(f"has {len(modified_vertices)} vertices", level=3) + + def _flag(vertex: DAGVertex): + + self.debug(f"check vertex {vertex.uid}", level=3) + if vertex.uid == self.uid: + self.debug(f" FOUND ROOT", level=3) + return True + + # Check if we have any of these edges in this order. + found_path = False + for edge_type in [EdgeType.KEY, EdgeType.ACL, EdgeType.LINK]: + seen = {} + for edge in vertex.edges: + self.debug(f" checking {edge.edge_type}, {vertex.uid} to {edge.head_uid}", level=3) + is_deletion = None + if edge.edge_type == edge_type: + self.debug(f" found {edge_type}", level=3) + next_vertex = self.get_vertex(edge.head_uid) + + if is_deletion is None: + # If the most recent edge a DELETION edge? + version, highest_edge = vertex.get_highest_edge_version(next_vertex.uid) + is_deletion = highest_edge.edge_type == EdgeType.DELETION + if is_deletion is True: + self.debug(f" highest deletion edge. will not mark any edges as modified", + level=3) + + found_path = _flag(next_vertex) + if found_path is True and seen.get(edge.head_uid) is None: + self.debug(f" setting {vertex.uid}, {edge_type} active", level=3) + if is_deletion is False: + edge.modified = True + seen[edge.head_uid] = True + else: + self.debug(f" edge is not {edge_type}", level=3) + + if found_path is True: + break + + # If we found a path, we may need to duplicate the DATA edge. + if found_path is True and duplicate_data is True: + for edge in vertex.edges: + if edge.edge_type == EdgeType.DATA: + edge.modified = True + break + + return found_path + + self.logger.debug("BEGIN delta graph edge detection") + for modified_vertex in modified_vertices: + _flag(modified_vertex) + self.logger.debug("END delta graph edge detection") + + def save(self, confirm: bool = False, delta_graph: bool = False): + + """ + Save the graph + + We will not save if using the default graph. + + The save process will only save edges that have been flagged as modified, or are newly added. + The process will get the edges from all vertices. + The UID of the vertex is the tail UID of the edge. + For DATA edges, the key (first key in the keychain) will be used for encryption. + + If the web service takes too long or hangs, the batch_count can be used to reduce the amount the web service + needs to handle per request. If set to None or non-postivie value, it will not send in batches. + + :param confirm: Confirm save. + Only need this when deleting all vertices. + :param delta_graph: Make a standalone graph from the modifications. + Use sync points to load this graph. + + :return: + """ + + self.debug("== SAVE GRAPH ========================================================================", level=2) + + if self.is_corrupt is True: + self.logger.error(f"the graph is corrupt, there are problem UIDs: {','.join(self.corrupt_uids)}") + raise DAGCorruptException(f"Cannot save. Graph steam uid {self.uid}, graph id {self.graph_id} " + f"has corrupt vertices: {','.join(self.corrupt_uids)}") + + root_vertex = self.get_vertex(self.uid) + if root_vertex is None: + raise DAGVertexException("Cannot save. Could not find the root vertex.") + + if root_vertex.vertex_type != RefType.PAM_NETWORK and root_vertex.vertex_type != RefType.PAM_USER: + raise DAGVertexException("Cannot save. Root vertex type needs to be PAM_NETWORK or PAM_USER.") + + # Do we need to the 'confirm' parameter set to True? + # This is needed if the entire graph is being deleted. + if self.need_save_confirm is True and confirm is False: + raise DAGConfirmException("Cannot save. Confirmation is required.") + self.need_save_confirm = False + + if delta_graph is True: + self._make_delta_graph() + + data_list = [] + + def _add_data(vertex): + self.debug(f"processing vertex {vertex.uid}, key {vertex.key}, type {vertex.vertex_type}", level=3) + # The vertex UID and edge tail UID + uid = vertex.uid + for edge in vertex.edges: + self.debug(f" * edge {edge.edge_type.value}, head {edge.head_uid}, tail {vertex.uid}", level=3) + + # If this edge is not modified, don't add to the data list to save. + if edge.modified is False: + self.debug(f" not modified, not saving.", level=3) + continue + + content = edge.content + + # If we are decrypting the edge data, then we want to encrypt it when we save. + # Else, save the content as it is. + if self.decrypt is True: + if edge.edge_type == EdgeType.DATA: + self.debug(f" edge is data, encrypt data: {edge.needs_encryption}", level=3) + if isinstance(content, dict) is True: + content = json.dumps(content) + if isinstance(content, str) is True: + content = content.encode() + + # If individual edges require encryption or all DATA edge require encryption, then encrypt + if edge.needs_encryption is True or self.data_requires_encryption is True: + self.debug(f" content {edge.content}, enc key {vertex.key}", level=3) + content = encrypt_aes(content, vertex.key) + self.debug(f" enc content {content}", level=3) + + content = bytes_to_str(content) + self.debug(f" enc safe content {content}", level=3) + elif edge.edge_type == EdgeType.KEY: + self.debug(f" edge is key or acl, encrypt key", level=3) + head_vertex = self.get_vertex(edge.head_uid) + key = head_vertex.key + if key is None: + self.debug(f" the edges head vertex {edge.head_uid} did not have a key. " + "using root dag key.", level=3) + key = self.key + self.debug(f" key {vertex.key}, enc key {key}", level=3) + content = bytes_to_str(encrypt_aes(vertex.key, key)) + elif edge.edge_type == EdgeType.ACL: + content = bytes_to_str(edge.content) + else: + self.debug(f" edge is {edge.edge_type}", level=3) + + parent_vertex = self.get_vertex(edge.head_uid) + + data = DAGData( + type=edge.edge_type, + content=content, + # tail point at this vertex, so it uses this vertex's uid. + ref=Ref( + type=vertex.vertex_type, + value=uid, + name=vertex.name if self.is_dev is True else None + ), + # Head, the arrowhead, points at the vertex this vertex belongs to, the parent. + # Apparently, for DATA edges, the parentRef is allowed to be None. + # Doesn't hurt to send it. + parentRef=Ref( + type=parent_vertex.vertex_type, + value=edge.head_uid, + name=parent_vertex.name if self.is_dev is True else None + ), + path=edge.path + ) + + data_list.append(data) + + # Flag that this edge is no longer modified. + edge.modified = False + + # Add the root vertex first + _add_data(self.get_root) + + # Add the rest. + # Only add is the skip_save is False. + for v in self.all_vertices: + if v.skip_save is False: + if v.uid != self.uid: + _add_data(v) + + # Save the keys before the data. + # This is done to make sure the web service can figure out the stream id. + # By saving the keys before data, the structure of the graph is formed. + if len(data_list) > 0: + + if self.debug_level >= 4: + + self.debug("EDGE LIST") + self.debug("##############################################") + for data in data_list: + self.debug(f"{data.ref.value} -> {data.parentRef.value} ({data.type})") + self.debug("##############################################") + + self.debug(f"total list has {len(data_list)} items", level=0) + self.debug(f"batch {self.save_batch_count} edges", level=0) + + batch_num = 0 + while len(data_list) > 0: + + # If using batch add, then take the first batch_count items. + # Remove them from the data list + if self.save_batch_count > 0: + batch_list = data_list[:self.save_batch_count] + data_list = data_list[self.save_batch_count:] + + # Else take everything and clear the data list (else infinite loop) + else: + batch_list = data_list + data_list = [] + + # Little sanity check + if len(batch_list) == 0: + break + + self.debug(f"adding {len(batch_list)} edges, batch {batch_num}", level=0) + payload = DataPayload( + origin=self.origin_ref, + dataList=batch_list, + graphId=self.graph_id + ) + + self.debug("PAYLOAD; batch {batch_num} =======================", level=5) + self.debug(payload.model_dump_json(), level=5) + self.debug("==================================================", level=5) + + self.conn.add_data(payload) + batch_num += 1 + + # It's a POST that returns no data + else: + self.debug("data list was empty, not saving.", level=2) + + self.debug("====================================================================================", level=2) + + def do_auto_save(self): + # If allow_auto_save is False, we will not allow auto saving. + # On newly created graph, this will happen if the root vertex has not been connected. + # The root vertex/disconnect edge head is needed to get a proper stream ID. + if self.allow_auto_save is False: + self.debug("cannot auto_save, allow_auto_save is False.", level=3) + return + if self.auto_save is True: + self.debug("... dag auto saving", level=1) + self.save() + + def add_vertex(self, name: Optional[str] = None, uid: Optional[str] = None, keychain: Optional[List[bytes]] = None, + vertex_type: RefType = RefType.GENERAL) -> DAGVertex: + + """ + Add a vertex to the graph. + + :param name: Name for the vertex. + :param uid: String unique identifier. + It's a 16bit hex value that is base64 encoded. + :param keychain: List if key bytes to use for encryption/description. This is set by the load/save method. + :param vertex_type: A RefType enumeration type. If blank, it will default to GENERAL. + :return: + """ + + if name is None: + name = uid + + vertex = DAGVertex( + name=name, + dag=self, + uid=uid, + keychain=keychain, + vertex_type=vertex_type + ) + if self.vertex_exists(vertex.uid) is True: + raise DAGVertexAlreadyExistsException(f"Vertex {vertex.uid} already exists.") + + # Set the UID to array index lookup. + # This is where the vertex will be in the vertices list. + # Then append the vertex to the vertices list. + self._uid_lookup[vertex.uid] = len(self._vertices) + self._vertices.append(vertex) + + return vertex + + @property + def is_modified(self) -> bool: + for vertex in self.all_vertices: + for edge in vertex.edges: + if edge.modified is True: + return True + return False + + @property + def modified_edges(self): + edges = [] + for vertex in self.all_vertices: + for edge in vertex.edges: + if edge.modified is True: + edges.append(edge) + return edges + + def delete(self): + """ + Delete the entire graph. + + This will delete all the vertex, which will delete all the edges. + This will not automatically save. + The save method will need to be called. + The save will require the 'confirm' parameter to be set to True. + :return: + """ + for vertex in self.vertices: + vertex.delete() + self.need_save_confirm = True + + def _search(self, content: Any, value: QueryValue, ignore_case: bool = False): + + if isinstance(value, dict) is True: + # If the object is not a dictionary, then it's not match + if isinstance(content, dict) is False: + return False + for next_key, next_value in value.items(): + if next_key not in content: + return False + if self._search(content=content[next_key], + value=next_value, + ignore_case=ignore_case) is False: + return False + elif isinstance(value, list) is True: + # If the object is not a dictionary, then it's not match + for next_value in value: + if self._search(content=content, + value=next_value, + ignore_case=ignore_case) is True: + return True + return False + else: + content = str(content) + value = str(value) + if ignore_case is True: + content = content.lower() + value = value.lower() + + return value in content + + return True + + def search_content(self, query, ignore_case: bool = False): + results = [] + for vertex in self.vertices: + if vertex.has_data is False or vertex.active is False: + continue + content = vertex.content + if isinstance(query, bytes) is True: + if query == content: + results.append(vertex) + elif isinstance(query, str) is True: + try: + content = content.decode() + if query in content: + results.append(vertex) + continue + + except (Exception,): + pass + elif isinstance(query, dict) is True: + try: + content = content.decode() + content = json.loads(content) + search_result = self._search(content, value=query, ignore_case=ignore_case) + if search_result is True: + results.append(vertex) + except (Exception,): + pass + else: + raise ValueError("Query is not an accepted type.") + return results + + def walk_down_path(self, path: Union[str, List[str]]) -> Optional[DAGVertex]: + + """ + Walk the vertices using the path and return the vertex starting at root vertex. + + :param path: An array of path string, or string where the path is joined with a "/" (i.e., think URL) + :return: DAGVertex is the path completes, None is failure. + """ + + self.debug("walking path starting at the root vertex", level=2) + vertex = self.get_vertex(self.uid) + return vertex.walk_down_path(path) + + def to_dot(self, graph_format: str = "svg", show_hex_uid: bool = False, + show_version: bool = True, show_only_active: bool = False): + + """ + Generate a graphviz Gigraph in DOT format that is marked up. + + :param graph_format: + :param show_hex_uid: + :param show_version: + :param show_only_active: + :return: + """ + + try: + mod = importlib.import_module("graphviz") + except ImportError: + raise Exception("Cannot to_dot(), graphviz module is not installed.") + + dot = getattr(mod, "Digraph")(comment=f"GraphSync for {self.name}", format=graph_format) + dot.attr(rankdir='BT') + + for v in self._vertices: + if show_only_active is True and v.active is False: + continue + if v.corrupt is False: + fillcolor = "white" + if v.active is False: + fillcolor = "grey" + label = f"uid={v.uid}" + if v.name is not None and v.name != v.uid: + label += f"\\nname={v.name}" + if show_hex_uid is True: + label += f"\\nhex={urlsafe_str_to_bytes(v.uid).hex()}" + else: + fillcolor = "red" + label = f"{v.uid} (CORRUPT)" + + dot.node(v.uid, label, fillcolor=fillcolor, style="filled") + for edge in v.edges: + + if edge.corrupt is False: + color = "grey" + style = "solid" + + # To reduce the number of edges, only show the active edges + if edge.active is True: + color = "black" + style = "bold" + elif show_only_active is True: + continue + + # If the vertex is not active, gray out the DATA edge + if edge.active is False: + color = "grey" + + if edge.edge_type == EdgeType.DELETION: + style = "dotted" + + label = DAG.EDGE_LABEL.get(edge.edge_type) + if label is None: + label = "UNK" + if edge.path is not None and edge.path != "": + label += f"\\npath={edge.path}" + if show_version is True: + label += f"\\ne{edge.version}" + # tail, head (arrow side), label + else: + color = "red" + style = "solid" + label = f"{DAG.EDGE_LABEL.get(edge.edge_type)} (CORRUPT)" + + dot.edge(v.uid, edge.head_uid, label, style=style, fontcolor=color, color=color) + + return dot + + def to_dot_raw(self, graph_format: str = "svg", sync_point: int = 0, rank_dir="BT"): + + """ + Generate a graphviz Gigraph in DOT format that is not (heavily) marked up. + + :param graph_format: + :param sync_point: + :param rank_dir: + :return: + """ + + try: + mod = importlib.import_module("graphviz") + except ImportError: + raise Exception("Cannot to_dot(), graphviz module is not installed.") + + dot = getattr(mod, "Digraph")(comment=f"GraphSync for {self.name}", format=graph_format) + dot.attr(rankdir=rank_dir) + + all_data, sync_point = self._sync(sync_point=sync_point) + + for edge in all_data: + edge_type = edge.type + tail_uid = edge.ref.get("value") + dot.node(tail_uid, tail_uid) + if edge.parentRef is not None: + head_uid = edge.parentRef.get("value") + dot.edge(tail_uid, head_uid, edge_type) + else: + dot.edge(tail_uid, tail_uid, edge_type) + return dot diff --git a/keepercommander/keeper_dag/edge.py b/keepercommander/keeper_dag/edge.py new file mode 100644 index 000000000..4461150cb --- /dev/null +++ b/keepercommander/keeper_dag/edge.py @@ -0,0 +1,243 @@ +from __future__ import annotations +import logging +from .types import EdgeType +from .exceptions import DAGContentException +import json +from typing import Optional, Union, Any, TYPE_CHECKING + +if TYPE_CHECKING: + from .vertex import DAGVertex + Content = Union[str, bytes, dict] + QueryValue = Union[list, dict, str, float, int, bool] + import pydantic + from pydantic import BaseModel + + +class DAGEdge: + def __init__(self, vertex: DAGVertex, edge_type: EdgeType, head_uid: str, version: int = 0, + content: Optional[Any] = None, path: Optional[str] = None, + modified: bool = True, block_content_auto_save: bool = False, from_load: bool = False, + needs_encryption: bool = False): + """ + Create an instance of DAGEdge. + + A primary key of the edge the vertex UID, the head UID, and edge_type. + + :param vertex: The DAGVertex instance that owns these edges. + :param edge_type: The enumeration EdgeType. Indicate the type of the edge. + :param head_uid: The vertex uid that has this edge's vertex. The vertex uid that the edge arrow points at. + :param version: Version of this edge. + :param content: The content of this edge. + :param path: Short tag about this edge. Do + :param modified: + :param block_content_auto_save: + :param from_load: Is this being called from the load() method? + :param needs_encryption: Flag to indicate if the content needs to be encrypted. + :return: An instance of DAGEdge + """ + + # This is the vertex that owns this edge. + self.vertex = vertex + self.edge_type = edge_type + self.head_uid = head_uid + + # Flag to indicate if the edge has been modified. Used to determine if the edge should be part of saved data. + # Set this before setting the content, else setting the content will cause an auto save. + self._modified = None + self.modified = modified + + # Block auto save in the content setter. + # When creating an edge, don't save until the edge is added to the edge list. + self.block_content_auto_save = block_content_auto_save + + # Does this edge's content need encryption? + self.needs_encryption = needs_encryption + + # If the content is being populated from a the load() method, and the edge type is a KEY or DATA, then the + # content will be encrypted (str). + # We want to keep a str, unless KEYs are decrypted. + + # If the edge data need encryption, is _content, currently encrypted. + self.encrypted = from_load is True and edge_type in [EdgeType.KEY, EdgeType.DATA] + + # If the content could not be decrypted, set + self.corrupt = False + + self._content = None # type: Optional[Any] + self.content = content + self.path = path + + self.version = version + + # If a higher version edge exists, this will be False. + # If True, this is the highest edge. + self.active = True + + def __str__(self) -> str: + return f"" + + def debug(self, msg, level=0): + self.vertex.dag.debug(msg, level=level) + + @property + def modified(self): + return self._modified + + @modified.setter + def modified(self, value): + if value is True: + self.debug(f"vertex {self.vertex.uid}, type {self.vertex.dag.__class__.EDGE_LABEL.get(self.edge_type)}, " + f"head {self.head_uid} has been modified", level=5) + else: + self.debug(f"vertex {self.vertex.uid}, type {self.vertex.dag.__class__.EDGE_LABEL.get(self.edge_type)}, " + f"head {self.head_uid} had modified RESET", level=5) + self._modified = value + + @property + def content(self) -> Optional[Union[str, bytes]]: + """ + Get the content of the edge. + + If the content is a str, then the content is encrypted. + """ + return self._content + + @property + def content_as_dict(self) -> Optional[dict]: + """ + Get the content from the DATA edge as a dictionary. + :return: Content as a dictionary. + """ + content = self._content + if content is not None: + try: + content = json.loads(content) + except Exception as err: + raise DAGContentException(f"Cannot decode JSON. Is the content a dictionary? : {err}") + return content + + @property + def content_as_str(self) -> Optional[str]: + """ + Get the content from the DATA edge as string + :return: + """ + content = self._content + try: + content = content.decode() + except Exception as err: + pass + return content + + def content_as_object(self, meta_class: pydantic._internal._model_construction.ModelMetaclass) -> ( + Optional)[BaseModel]: + """ + Get the content as a pydantic based object. + + :param meta_class: The class to return + :return: + """ + content = self.content_as_str + if content is not None: + content = meta_class.model_validate_json(self.content_as_str) + return content + + @content.setter + def content(self, value: Any): + + """ + Set the content in the edge. + + The content should be stored as bytes. + If the encrypted flag is set, the content will be stored as is. + Content that is a str type is encrypted data (A Base64, AES encrypted bytes, str) + """ + + self.debug(f"vertex {self.vertex.uid}, type {self.vertex.dag.__class__.EDGE_LABEL.get(self.edge_type)}, " + f"head {self.head_uid} setting content", level=2) + + # If the data is encrypted, set it. + # Don't try to make it bytes. + # Also don't set the modified flag to True. + if self.encrypted is True: + self.debug(" content is encrypted.", level=3) + self._content = value + return + + if self._content is not None: + raise DAGContentException("Cannot update existing content. Use add_data() to change the content.") + + if isinstance(value, dict) is True: + value = json.dumps(value) + + # Is this a Pydantic based class? + if hasattr(value, "model_dump_json") is True: + value = value.model_dump_json() + + if isinstance(value, str) is True: + value = value.encode() + + self._content = value + + def delete(self): + """ + Delete the edge. + + Deleting an edge does not remove the existing edge. + It will create another edge with the same tail and head, but will be type DELETION. + """ + + # If already inactive, return + if self.active is False: + return + + version, _ = self.vertex.get_highest_edge_version(head_uid=self.head_uid) + + # Flag all other edges as inactive. + for edge in self.vertex.edges: + edge.active = False + + self.vertex.edges.append( + DAGEdge( + vertex=self.vertex, + edge_type=EdgeType.DELETION, + head_uid=self.head_uid, + version=version + 1 + ) + ) + + # Perform the DELETION edges save in one batch. + # Get the current allowed auto save state, and disable auto save. + current_allow_auto_save = self.vertex.dag.allow_auto_save + self.vertex.dag.allow_auto_save = False + + if self.vertex.belongs_to_a_vertex is False: + self.vertex.delete(ignore_vertex=self.vertex) + + self.vertex.dag.allow_auto_save = current_allow_auto_save + self.vertex.dag.do_auto_save() + + @property + def is_deleted(self) -> bool: + """ + Does this edge have a DELETION edge that has the same head? + + This should be used to check in a non-DELETION edge type has a matching DELETION edge. + :return: + """ + + # We shouldn't be checking the DELETION edge if it deleted. + # Throw some info message to make sure the coder knows their code might be something foolish. + if self.edge_type == EdgeType.DELETION: + logging.info(f"The edge is_deleted() just check if the DELETION edge is DELETION " + f"for vertex {self.vertex.uid}, head UID {self.head_uid}. Returned True, but code should " + "not be checking this edge.") + return True + + # Check the other edges for this vertex for an active DELETION-edge type. + for edge in self.vertex.edges: + if edge.edge_type == EdgeType.DELETION and edge.head_uid == self.head_uid and edge.active is True: + return True + + return False diff --git a/keepercommander/keeper_dag/exceptions.py b/keepercommander/keeper_dag/exceptions.py new file mode 100644 index 000000000..017ea017c --- /dev/null +++ b/keepercommander/keeper_dag/exceptions.py @@ -0,0 +1,80 @@ +from __future__ import annotations +from typing import Any, Optional + + +class DAGException(Exception): + + def __init__(self, msg: Any, uid: Optional[str] = None): + if isinstance(msg, str) is False: + msg = str(msg) + + self.msg = msg + self.uid = uid + + super().__init__(self.msg) + + def __str__(self): + return self.msg + + def __repr__(self): + return self.msg + + +class DAGKeyIsEncryptedException(DAGException): + pass + + +class DAGDataEdgeNotFoundException(DAGException): + pass + + +class DAGDeletionException(DAGException): + pass + + +class DAGConfirmException(DAGException): + pass + + +class DAGPathException(DAGException): + pass + + +class DAGVertexAlreadyExistsException(DAGException): + pass + + +class DAGContentException(DAGException): + pass + + +class DAGDefaultGraphException(DAGException): + pass + + +class DAGIllegalEdgeException(DAGException): + pass + + +class DAGKeyException(DAGException): + pass + + +class DAGDataException(DAGException): + pass + + +class DAGVertexException(DAGException): + pass + + +class DAGEdgeException(DAGException): + pass + + +class DAGCorruptException(DAGException): + pass + + +class DAGConnectionException(DAGException): + pass diff --git a/keepercommander/keeper_dag/maintenance.py b/keepercommander/keeper_dag/maintenance.py new file mode 100644 index 000000000..175c3c224 --- /dev/null +++ b/keepercommander/keeper_dag/maintenance.py @@ -0,0 +1,130 @@ +from __future__ import annotations +from .dag import DAG +from .edge import EdgeType +from .exceptions import DAGVertexException, DAGDataException +from .crypto import decrypt_aes, str_to_bytes + + +class Maintenance: + + def __init__(self, dag: DAG, sync_point: int = 0, decrypt=False): + self.dag = dag + self.sync_point = sync_point + self.decrypt = decrypt + self._loaded = False + self._load_sync_point = None + + def debug(self, msg: str, level: int = 0): + return self.dag.debug(msg, level) + + @property + def logger(self): + return self.dag.logger + + def _get_keychain(self, v): + self.debug(f"getting keychain for vertex {v.uid}, {v.name}, {v.vertex_type}", level=1) + + keychain = [] + + found_key_edge = False + for e in v.edges: + if e.edge_type == EdgeType.KEY: + head = self.dag.get_vertex(e.head_uid) + keychain += self._get_keychain(head) + + # Each vertex has a "keychain". + # However, this will be one key in an array. + try: + content = decrypt_aes(e.content, keychain[-1]["key"]) + keychain += [ + { + "uid": v.uid, + "name": v.name, + "type": v.vertex_type, + "key": content, + "corrupt": False + } + ] + found_key_edge = True + break + except Exception as err: + self.logger.error(f"could not decrypt key for {v.uid}, {keychain[-1]['key']}: {err}", level=1) + keychain += [ + { + "uid": v.uid, + "name": v.name, + "type": v.vertex_type, + "corrupt": True + } + ] + return keychain + + if found_key_edge is True: + return keychain + else: + return [ + { + "uid": self.dag.uid, + "key": self.dag.key, + "name": self.dag.name, + "type": self.dag.vertex_type, + "corrupt": False + } + ] + + def load(self, sync_point: int = 0): + if self._loaded is False or sync_point != self._load_sync_point: + self.logger.info(f"reloading the graph with decrypt {self.decrypt} and sync point {self.sync_point}") + # Disable the automatic decrypt of the KEY and DATA edges. + self.dag.decrypt = self.decrypt + self.dag.load(sync_point=self.sync_point) + self._loaded = True + self._load_sync_point = sync_point + + def reload(self): + self._loaded = False + self.load() + + def get_keychain(self, uid: str, sync_point: int = 0): + """ + + :param uid: Either the UID, or name of the vertex. + :param sync_point: A starting sync point for loading the graph. + :return: + """ + + # Disable the automatic decrypt of the KEY and DATA edges. + self.dag.decrypt = False + self.load(sync_point=sync_point) + + vertex = self.dag.get_vertex(uid) + if vertex is None: + raise DAGVertexException(f"Vertex {uid} does not exists.", uid=uid) + + key_chain = self._get_keychain(vertex) + + return key_chain + + def get_data(self, uid: str, key: bytes, sync_point: int = 0): + + self.dag.decrypt = False + self.load(sync_point=sync_point) + + vertex = self.dag.get_vertex(uid) + if vertex is None: + raise DAGVertexException(f"Vertex {uid} does not exists.", uid=uid) + + content = vertex.content + if content is None: + raise DAGVertexException(f"Vertex {uid} does not have a DATA edge.", uid=uid) + if isinstance(content, str) is False: + raise DAGDataException(f"Vertex {uid} DATA edge content was a not str.", uid=uid) + + try: + self.debug(f"decrypt {uid} data {content} with {key}") + return decrypt_aes(str_to_bytes(content), key) + except Exception as err: + raise DAGDataException(f"Could not decrypt vertex {uid}'s data edge: {err}", uid=uid) + + def delete_data(self): + pass diff --git a/keepercommander/keeper_dag/types.py b/keepercommander/keeper_dag/types.py new file mode 100644 index 000000000..f84af20f4 --- /dev/null +++ b/keepercommander/keeper_dag/types.py @@ -0,0 +1,127 @@ +from __future__ import annotations +from enum import Enum +from pydantic import BaseModel +from typing import List, Optional, Union + + +class BaseEnum(Enum): + + @classmethod + def find_enum(cls, value: Union[Enum, str, int], default: Optional[Enum] = None): + if value is not None: + for e in cls: + if e == value or e.value == value: + return e + if hasattr(cls, str(value).upper()) is True: + return getattr(cls, value.upper()) + return default + + +class RefType(BaseEnum): + # 0 + GENERAL = "general" + # 1 + USER = "user" + # 2 + DEVICE = "device" + # 3 + REC = "rec" + # 4 + FOLDER = "folder" + # 5 + TEAM = "team" + # 6 + ENTERPRISE = "enterprise" + # 7 + PAM_DIRECTORY = "pam_directory" + # 8 + PAM_MACHINE = "pam_machine" + # 9 + PAM_DATABASE = "pam_database" + # 10 + PAM_USER = "pam_user" + # 11 + PAM_NETWORK = "pam_network" + #12 + PAM_BROWSER = "pam_browser" + + def __str__(self): + return self.value + + +class EdgeType(BaseEnum): + + """ + DAG data type enum + + * DATA - encrypted data + * KEY - encrypted key + * LINK - like a key, but not encrypted + * ACL - unencrypted set of access control flags + * DELETION - removal of the previous edge at the same coordinates + * DENIAL - an element that was shared through graph relationship, can be explicitly denied + * UNDENIAL - negates the effect of denial, bringing back the share + + """ + DATA = "data" + KEY = "key" + LINK = "link" + ACL = "acl" + DELETION = "deletion" + DENIAL = "denial" + UNDENIAL = "undenial" + + # To store discovery, you would need data and key. To store relationships between records after the discovery + # data was converted, you use Link. + + def __str__(self) -> str: + return str(self.value) + + +class SyncQuery(BaseModel): + streamId: Optional[str] = None # base64 of a user's ID who is syncing. + deviceId: Optional[str] = None + syncPoint: Optional[int] = None + graphId: Optional[int] = 0 + + +class SyncDataItem(BaseModel): + ref: dict + parentRef: Optional[dict] = None + content: Optional[str] = None + type: Optional[str] = None + path: Optional[str] = None + deletion: Optional[bool] = False + + +class SyncData(BaseModel): + syncPoint: int + data: List[SyncDataItem] + hasMore: bool + + +class Ref(BaseModel): + type: RefType + value: str + name: Optional[str] = None + + +# Translation for Key +class Key(BaseModel): + id: Ref + value: str + + +class DAGData(BaseModel): + type: EdgeType + ref: Ref + parentRef: Optional[Ref] = None + content: Optional[str] = None + path: Optional[str] = None + + +class DataPayload(BaseModel): + origin: Ref + dataList: List + graphId: Optional[int] = 0 + diff --git a/keepercommander/keeper_dag/utils.py b/keepercommander/keeper_dag/utils.py new file mode 100644 index 000000000..85d811529 --- /dev/null +++ b/keepercommander/keeper_dag/utils.py @@ -0,0 +1,8 @@ +def value_to_boolean(value): + value = str(value) + if value.lower() in ['true', 'yes', 'on', '1']: + return True + elif value.lower() in ['false', 'no', 'off', '0']: + return False + else: + return None diff --git a/keepercommander/keeper_dag/vertex.py b/keepercommander/keeper_dag/vertex.py new file mode 100644 index 000000000..c2bed5084 --- /dev/null +++ b/keepercommander/keeper_dag/vertex.py @@ -0,0 +1,850 @@ +from __future__ import annotations +from .edge import DAGEdge +from .types import EdgeType, RefType +from .crypto import generate_random_bytes, generate_uid_str, urlsafe_str_to_bytes +from .exceptions import DAGDeletionException, DAGIllegalEdgeException, DAGVertexException, DAGKeyException +from typing import Optional, Union, List, Any, TYPE_CHECKING + +if TYPE_CHECKING: + from .dag import DAG + Content = Union[str, bytes, dict] + QueryValue = Union[list, dict, str, float, int, bool] + import pydantic + from pydantic import BaseModel + + +class DAGVertex: + + def __init__(self, dag: DAG, uid: Optional[str] = None, name: Optional[str] = None, + keychain: Optional[bytes] = None, vertex_type: RefType = RefType.GENERAL): + + self.dag = dag + + # If the UID is not set, generate a UID. + if uid is None: + uid = generate_uid_str() + # Else verify that the UID is valid. The UID should be a 16-byte value that is web-safe base64 serialized. + else: + if len(uid) != 22: + raise ValueError(f"The uid {uid} is not a 22 characters in length.") + try: + b = urlsafe_str_to_bytes(uid) + if len(b) != 16: + raise ValueError("not 16 bytes") + except Exception: + raise ValueError("The uid does not appear to be web-safe base64 string contains a 16 bytes value.") + + # If the UID is the root UID, make sure the vertex type is not general. + # The root vertex needs to be either PAM_NETWORK or PAM_USER, if not set to PAM_NETWORK. + if uid == self.dag.uid and (vertex_type != RefType.PAM_NETWORK and vertex_type != RefType.PAM_USER): + vertex_type = RefType.PAM_NETWORK + self.vertex_type = vertex_type + + # If the name is not defined, use the UID. Name is not persistent in the DAG. + # If you load the DAG, the web service will not return the name. + if name is None: + name = uid + + self._uid = uid + self._name = name + + # The keychain is a list of keys that can be used. + # The keychain may contain multiple keys, when loading the default graph (graph_id) + # For normal editing, the keychain will contain only one key. + self._keychain = [] + if keychain is not None: + if isinstance(keychain, list) is False: + keychain = [keychain] + self._keychain += keychain + + # Is the keychain corrupt? + self.corrupt = False + + # These are edges to which vertex own this vertex. This vertex belongs to. So this would + self.edges = [] + self.has_uid = [] + + # Flag indicating that this vertex is active. + # This means this vertex has an active edge connected to another vertex. + self.active = True + + # By default, we will save this vertex; not skip_save. + # If in the process building the graph, it is decided that a vertex should not be saved; this can be set to + # prevent the vertex from being saved. + self._skip_save = False + + def __str__(self): + ret = f"Vertex {self.uid}\n" + ret += f" python instance id: {id(self)}\n" + ret += f" name: {self.name}\n" + ret += f" keychain: {self.keychain}\n" + ret += f" active: {self.active}\n" + ret += f" edges:\n" + for edge in self.edges: + ret += f" * type {self.dag.__class__.EDGE_LABEL.get(edge.edge_type)}" + ret += f", connect to {edge.head_uid}" + ret += f", path {edge.path}, " + ret += f", active: {edge.active}" + ret += f", modified: {edge.modified}" + ret += f", content: {'yes' if edge.content is not None else 'no'}" + ret += f", content type: {type(edge.content)}" + ret += "\n" + return ret + + def debug(self, msg: str, level: int = 0): + self.dag.debug(msg, level=level) + + @property + def name(self) -> str: + """ + Get the name for vertex + + If the name is not defined, the UID will be returned. + The name is not persistent. + If loading a DAG, the name will not be set. + + :return: + """ + if self._name is not None: + return self._name + return self._uid + + @property + def key(self) -> Optional[Union[str, bytes]]: + """ + Get a single key from the keychain. + + :return: + """ + + keychain = self.keychain + if len(keychain) > 0: + return self.keychain[0] + + return None + + @property + def skip_save(self): + return self._skip_save + + @skip_save.setter + def skip_save(self, value): + self._skip_save = value + + for vertex in self.has_vertices(): + vertex._skip_save = value + + def add_to_keychain(self, key: Union[str, bytes]): + """ + Add a key to the keychain + + :param key: A decrypted key bytes or encrypted key str + :return: + """ + if key not in self._keychain: + self._keychain.append(key) + + @property + def keychain(self) -> Optional[List[Union[str, bytes]]]: + """ + Get the keychain for the vertex. + + The key is stored on the edges, however, the key belongs to the vertex. + KEY and ACL edges from this vertex will have the same encrypted key. + It is simpler to store the key on the DAGVertex instance. + + The keychain in an array of keys. + When using graph_id = 0, different graphs that have the same UID will + have different keys. + When decrypting DATA edges, each key in the keychain will be tried. + + If the keychain has not been set, check if any edges exist that require a key. + If there are, then generate a random key. + The load process will populate the key. + If the vertex does not have a key in the keychain, it is because this is a newly + added vertex. + + If there are no edges that require a key, then return None. + """ + + # If the vertex is root, then the keychain will be the key bytes. + if self.dag.get_root == self: + self._keychain = [self.dag.key] + + # If the keychain is empty, generate a key for a specific edge type. + elif len(self._keychain) == 0: + for e in self.edges: + if e.edge_type in [EdgeType.KEY, EdgeType.DATA]: + self._keychain.append(generate_random_bytes(self.dag.__class__.UID_KEY_BYTES_SIZE)) + break + + return self._keychain + + @keychain.setter + def keychain(self, value: List[Union[str, bytes]]): + """ + Set the key in the vertex. + + The save method will use this key for any KEY/ACL edges. + A key of str type means it is encrypted. + """ + self._keychain = value + + @property + def has_decrypted_keys(self) -> Optional[bool]: + """ + Does the vertex have a decrypted keys? + + If the vertex contains a KEY, ACL or DATA edge and if the key is bytes, then the key is decrypted. + If it is a str type, then it is encrypted. + """ + if len(self._keychain) > 0: + for e in self.edges: + if e.edge_type in [EdgeType.KEY, EdgeType.DATA]: + all_decrypted = True + for key in self._keychain: + if isinstance(key, bytes) is False: + all_decrypted = False + break + return all_decrypted + return None + + @property + def uid(self): + """ + Get the vertex UID. + + Once set, don't allow it to be changed. + """ + return self._uid + + def get_edge(self, vertex: DAGVertex, edge_type: EdgeType) -> DAGEdge: + high_edge = None + high_version = -1 + for edge in self.edges: + # Get all the edge point at the same vertex. + # Don't include DATA edges. + if edge.head_uid == vertex.uid and edge.edge_type == edge_type: + if edge.version > high_version: + high_version = edge.version + high_edge = edge + return high_edge + + def get_highest_edge_version(self, head_uid: str) -> (int, Optional[DAGEdge]): + """ + Find the highest DAGEdge version of all edge types. + + :param head_uid: + :return: + """ + + high_edge = None + high_version = -1 + for edge in self.edges: + # Get all the edge point at the same vertex. + # Don't include DATA edges. + if edge.head_uid == head_uid: + if edge.version > high_version: + high_edge = edge + high_version = edge.version + return high_version, high_edge + + def edge_count(self, vertex: DAGVertex, edge_type: EdgeType) -> int: + """ + Get the number of edges between two vertices. + + :param vertex: + :param edge_type: + :return: + """ + count = 0 + for edge in self.edges: + if edge.head_uid == vertex.uid and edge.edge_type == edge_type: + count += 1 + return count + + def edge_by_type(self, vertex: DAGVertex, edge_type: EdgeType) -> List[DAGEdge]: + edge_list = [] + for edge in self.edges: + if edge.edge_type == edge_type and edge.head_uid == vertex.uid: + edge_list.append(edge) + return edge_list + + @property + def has_data(self) -> bool: + + """ + Does this vertex contain a DATA edge? + + :return: True if vertex has a DATA edge. + """ + + for item in self.edges: + if item.edge_type == EdgeType.DATA: + return True + return False + + def get_data(self, index: Optional[int] = None) -> Optional[DAGEdge]: + """ + Get data edge + + If the index is None or 0, the latest data edge will be returned. + A positive and negative, non-zero, index will return the same data. + It will be the absolute value of the index from the latest data. + This means the 1 or -1 will return the prior data. + + If there is no data, None is returned. + + :param index: + :return: + """ + + data_list = self.edge_by_type(self, EdgeType.DATA) + data_count = len(data_list) + if data_count == 0: + return None + + # If the index is None, get the latest. + if index is None or index == 0: + index = -1 + # Since -1 is the current, switch index to a negative number and subtract one more. + # For example, 1 means prior, -1 would be the latest, so we need to subtract one to get -2. + elif index > 0: + index *= -1 + index -= 1 + # If already a negative index, just subtract one. + else: + index -= 1 + + try: + data = data_list[index] + except IndexError: + raise ValueError(f"The index is not valid. Currently there are {data_count} data edges") + + return data + + def add_data(self, content: Any, path: Optional[str] = None, modified: bool = True, + from_load: bool = False, needs_encryption: bool = True): + + """ + Add a DATA edge to the vertex. + + :param content: The content to store in the DATA edge. + :param path: Simple string tag to identify the edge. + :param modified: Does this modify the content? + By default, adding a DATA edge will flag that the edge has been modified. + If loading, modified will be set to False. + :param from_load: This call is being performed the load() method. + Do not validate adding data. + :param needs_encryption: Default is True. + Does the content need to be encrypted? + """ + + self.debug(f"connect {self.uid} to DATA edge", level=1) + + # Are we trying to add DATA to a deleted vertex? + + if self.active is False: + # If deleted, there will not be a KEY to decrypt the data. + # Throw an exception if not from the loading method. + if from_load is False: + raise DAGDeletionException("This vertex is not active. Cannot add DATA edge.") + # If from loading, do not add and do not throw an exception. + return + + # Make sure the vertex belongs before auto saving. If it does not belong, it's just an orphan right now. + # This only is checked if using this module is used to create the graph. + if self.belongs_to_a_vertex is False and from_load is False: + raise DAGVertexException(f"Before adding data, connect this vertex {self.uid} to another vertex.") + + # Make sure that we have a KEY. + # Allow a DATA edge to be connected to the root vertex, which will not have a KEY edge. + # Or if we are loading, allow out of sync edges. + + if needs_encryption is True: + found_key_edge = self.dag.get_root == self or from_load is True + if found_key_edge is False: + for edge in self.edges: + if edge.edge_type == EdgeType.KEY: + found_key_edge = True + if found_key_edge is False: + raise DAGKeyException(f"Cannot add DATA edge without a KEY edge for vertex {self.uid}.") + + # Get the prior data, set the version and inactive the prior data. + version = 0 + prior_data = self.get_data() + if prior_data is not None: + version = prior_data.version + 1 + prior_data.active = False + + # The tail UID is the UID of the vertex. Since data loops back to the vertex, the head UID is the same. + self.edges.append( + DAGEdge( + vertex=self, + edge_type=EdgeType.DATA, + head_uid=self.uid, + version=version, + content=content, + path=path, + modified=modified, + from_load=from_load, + needs_encryption=needs_encryption + ) + ) + + # If using a history level, we want to remove edges if we exceed the history level. + # The history level is per edge type. + # It's FIFO, so we will remove the first edge type if we exceed the history level. + if self.dag.history_level > 0: + data_count = self.data_count() + while data_count > self.dag.history_level: + for index in range(0, len(self.edges) - 1): + if self.edges[index].edge_type == EdgeType.DATA: + del self.edges[index] + data_count -= 1 + break + + self.dag.do_auto_save() + + def data_count(self): + return self.edge_count(self, EdgeType.DATA) + + def data_delete(self): + + # Get the DATA edge. + # It will be a reference to itself. + data_edge = self.get_edge(self, EdgeType.DATA) + if data_edge is None: + self.debug("cannot delete the data, no data edge exists.") + + data_edge.active = False + + self.belongs_to( + vertex=self, + edge_type=EdgeType.DELETION + ) + self.debug(f"deleted data edge for {self.uid}") + + @property + def latest_data_version(self): + version = -1 + for edge in self.edges: + if edge.edge_type == EdgeType.DATA and edge.version > version: + version = edge.version + return version + + @property + def content(self) -> Optional[Union[str, bytes]]: + """ + Get the content of the active DATA edge. + + If the content is a str, then the content is encrypted. + """ + data_edge = self.get_data() + if data_edge is None: + return None + return data_edge.content + + @property + def content_as_dict(self) -> Optional[dict]: + """ + Get the content from the active DATA edge as a dictionary. + :return: Content as a dictionary. + """ + data_edge = self.get_data() + if data_edge is None: + return None + return data_edge.content_as_dict + + @property + def content_as_str(self) -> Optional[str]: + """ + Get the content from the active DATA edge as a str. + :return: Content as a str. + """ + + data_edge = self.get_data() + if data_edge is None: + return None + return data_edge.content_as_str + + def content_as_object(self, meta_class: pydantic._internal._model_construction.ModelMetaclass) -> ( + Optional)[BaseModel]: + """ + Get the content as a pydantic based object. + + :param meta_class: The class to return + :return: + """ + data_edge = self.get_data() + if data_edge is None: + return None + + return data_edge.content_as_object(meta_class) + + @property + def has_key(self) -> bool: + + """ + Does this vertex contain any KEY or ACL edges? + + :return: True if vertex has a KEY or ACL edge. + """ + + for item in self.edges: + if item.edge_type == EdgeType.KEY: + return True + return False + + def belongs_to(self, vertex: DAGVertex, edge_type: EdgeType, content: Optional[Any] = None, + path: Optional[str] = None, modified: bool = True, from_load: bool = False): + + """ + Connect a vertex to another vertex (as the owner). + + This will create an edge between this vertex and the passed in vertex. + The passed in vertex will own this vertex. + + If the edge_type is a KEY or ACL, data will be treated as a key. If a DATA edge already exists, the + edge_type will be changed to a KEY, if not a KEY or ACL edge_type. + + :param vertex: The vertex has this vertex. + :param edge_type: The edge type that connects the two vertices. + :param content: Data to store as the edges content. + :param path: Text tag for the edge. + :param modified: Does adding this edge modify the stored DAG? + :param from_load: Is being connected from load() method? + :return: + """ + + self.debug(f"connect {self.uid} to {vertex.uid} with edge type {edge_type.value}", level=1) + + if vertex is None: + raise ValueError("Vertex is blank.") + if self.uid == self.dag.uid and not (edge_type == EdgeType.DATA or edge_type == EdgeType.DELETION): + if from_load is False: + raise DAGIllegalEdgeException(f"Cannot create edge to self for edge type {edge_type}.") + self.dag.debug(f"vertex {self.uid} , the root vertex, " + f"attempted to create '{edge_type.value}' edge to self, skipping.") + return + + # Cannot make an edge to the same vertex, unless the edge type is a DELETION. + # Normally an edge to self is a DATA type, use add_data for that. + # A DELETION edge to self is allowed. + # Just means the DATA edge is being deleted. + if self.uid == vertex.uid and not (edge_type == EdgeType.DATA or edge_type == EdgeType.DELETION): + if from_load is False: + raise DAGIllegalEdgeException(f"Cannot create edge to self for edge type {edge_type}.") + self.dag.debug(f"vertex {self.uid} attempted to make '{edge_type.value}' to self, skipping.") + return + + # Figure out what version of the edge we are. + + version, _ = self.get_highest_edge_version(head_uid=vertex.uid) + + # If the new edge is not DELETION + if edge_type != EdgeType.DELETION: + + # Find the current active edge for this edge type to make it inactive. + current_edge_by_type = self.get_edge(vertex, edge_type) + if current_edge_by_type is not None: + current_edge_by_type.active = False + + # If we are adding a non-DELETION edge, it will inactivate the DELETION edge. + highest_deletion_edge = self.get_edge(vertex, EdgeType.DELETION) + if highest_deletion_edge is not None: + highest_deletion_edge.active = False + + # Should we activate the vertex again? + if self.active is False: + + # If the vertex is already inactive, and we are trying to delete, return. + if edge_type == EdgeType.DELETION: + return + + self.dag.logger.info(f"vertex {self.uid} was inactive; reactivating vertex.") + self.active = True + + # Create and append a new DAGEdge instance. + # Disable the auto saving after the content is changed since the edge has not been appended yet. + # Once the edge is created, disable blocking auto save for content changes. + edge = DAGEdge( + vertex=self, + edge_type=edge_type, + head_uid=vertex.uid, + version=version + 1, + block_content_auto_save=True, + content=content, + path=path, + modified=modified, + from_load=from_load + ) + edge.block_content_auto_save = False + + self.edges.append(edge) + if self.uid not in vertex.has_uid: + vertex.has_uid.append(self.uid) + + self.dag.do_auto_save() + + def belongs_to_root(self, edge_type: EdgeType, path: Optional[str] = None): + + """ + Connect the vertex to the root vertex. + + :param edge_type: The type of edge to use for the connection. + :param path: Short tag for this edge. + :return: + """ + + self.debug(f"connect {self.uid} to root", level=1) + + if self.uid == self.dag.uid: + raise DAGIllegalEdgeException("Cannot create edge to self.") + + if self.active is False: + raise DAGDeletionException("This vertex is not active. Cannot connect to root.") + + # We are adding the root, we can enable auto save now. + # We can get the correct stream id with an edge to the root vertex. + self.belongs_to(self.dag.get_root, edge_type=edge_type, path=path) + + self.dag.allow_auto_save = True + self.dag.do_auto_save() + + def has_vertices(self, edge_type: Optional[EdgeType] = None, allow_inactive: bool = False, + allow_self_ref: bool = False) -> List[DAGVertex]: + + """ + Get a list of vertices that belong to this vertex. + :return: List of DAGVertex + """ + + vertices = [] + for uid in self.has_uid: + + # This will remove DATA and DATA that have changed to DELETION edges. + # Prevent looping. + if uid == self.uid and allow_self_ref is False: + continue + + vertex = self.dag.get_vertex(uid) + if edge_type is not None: + edge = vertex.get_edge(self, edge_type=edge_type) + if edge is not None: + vertices.append(vertex) + + # If no edge type was specified, do not return DATA and DELETION. + # Also do not include vertices that are inactive by default. + elif edge_type != EdgeType.DATA and edge_type != EdgeType.DELETION: + if vertex.active is True or allow_inactive is True: + vertices.append(vertex) + + return vertices + + def has(self, vertex: DAGVertex, edge_type: Optional[EdgeType] = None) -> bool: + + """ + Does this vertex have the passed in vertex? + + :return: True if request vertex belongs to this vertex. + False if it does not. + """ + + vertices = self.has_vertices(edge_type=edge_type) + return vertex in vertices + + def belongs_to_vertices(self) -> List[DAGVertex]: + """ + Get a list of vertices that this vertex belongs to + :return: + """ + + vertices = [] + for edge in self.edges: + # If the edge is not a DATA or DELETION type, and the edge is the highest version/active + if edge.edge_type != EdgeType.DATA and edge.edge_type != EdgeType.DELETION and edge.active is True: + + # The head will point at the remote vertex. + # If it is active, and not already in the list, add it to the list of vertices this vertex belongs to. + vertex = self.dag.get_vertex(edge.head_uid) + if vertex.active is True and vertex not in vertices: + vertices.append(vertex) + return vertices + + @property + def belongs_to_a_vertex(self) -> bool: + """ + Does this vertex belong to another vertex? + :return: + """ + + # If this is the root vertex, return True. + # Where this is being called should handle operations involving the root vertex. + if self.dag.get_root == self: + return True + + return len(self.belongs_to_vertices()) > 0 + + def disconnect_from(self, vertex: DAGVertex, path: Optional[str] = None): + + """ + Disconnect this vertex from another vertex. + + This will add a DELETION edge between two vertices. + If the vertex no longer belongs to another vertex, the vertex will be deleted. + + :param vertex: The vertex this vertex belongs to + :param path: an Optional path for the DELETION edge. + :return: + """ + + if vertex is None: + raise ValueError("Vertex is blank.") + + # Flag all the edges as inactive. + for edge in self.edges: + if edge.head_uid == vertex.uid and edge.edge_type: + edge.active = False + + # Add the DELETION edge + self.belongs_to( + vertex=vertex, + edge_type=EdgeType.DELETION, + path=path + ) + + # If all the KEY edges are inactive now, the DATA edge needs to be made inactive. + # There is no longer a KEY edge to decrypt the DATA. + has_active_key_edge = False + for edge in self.edges: + if edge.edge_type == EdgeType.KEY and edge.active is True: + has_active_key_edge = True + break + if has_active_key_edge is False: + for edge in self.edges: + if edge.edge_type == EdgeType.DATA: + edge.active = False + + if self.belongs_to_a_vertex is False: + self.debug(f"vertex {self.uid} is now not active", level=1) + self.active = False + + def delete(self, ignore_vertex: Optional[DAGVertex] = None): + + """ + Delete a vertex + + Deleting a vertex will inactivate the vertex. + It will also inactivate any vertices, and their edges, that belong to the vertex. + It will not inactivate a vertex that belongs to multiple vertices. + :return: + """ + + def _delete(vertex, prior_vertex): + + # Do not delete the root vertex + if vertex.uid == self.dag.uid: + self.debug(f" * vertex is root, cannot delete root", level=2) + return + + self.debug(f"> checking vertex {vertex.uid}") + + # Should we ignore a vertex? + # If deleting an edge, we want to ignore the vertex that owns the edge. + # This prevents circular calls. + if ignore_vertex is not None and vertex.uid == ignore_vertex.uid: + return + + # Get a list of vertices that belong to this vertex (v) + has_v = vertex.has_vertices() + + if len(has_v) > 1: + self.debug(f" * vertex has vertices that belong to it.", level=2) + for v in has_v: + self.debug(f" checking {v.uid}") + _delete(v, vertex) + else: + self.debug(f" * vertex {vertex.uid} has NO vertices.", level=2) + + for e in list(vertex.edges): + if e.edge_type != EdgeType.DATA and (prior_vertex is None or e.head_uid == prior_vertex.uid): + e.delete() + if vertex.belongs_to_a_vertex is False: + self.debug(f" * inactive vertex {vertex.uid}") + vertex.active = False + + self.debug(f"DELETING vertex {self.uid}", level=3) + + # Perform the DELETION edges save in one batch. + # Get the current allowed auto save state, and disable auto save. + current_allow_auto_save = self.dag.allow_auto_save + self.dag.allow_auto_save = False + + _delete(self, None) + + # Restore the allow auto save and trigger auto save() + self.dag.allow_auto_save = current_allow_auto_save + self.dag.do_auto_save() + + def walk_down_path(self, path: Union[str, List[str]]) -> Optional[DAGVertex]: + + """ + Walk the vertices using the path and return the vertex starting at this vertex. + + :param path: An array of path string, or string where the path is joined with a "/" (i.e., think URL) + :return: DAGVertex is the path completes, None is failure. + """ + + self.debug(f"walking path in vertex {self.uid}", level=2) + + # If the path is str, break it into an array. Get rid of leading / + if isinstance(path, str) is True: + self.debug("path is str, break into array", level=2) + if path.startswith("/") is True: + path = path[1:] + path = path.split("/") + + # Unshift the path + + current_path = path[0] + path = path[1:] + self.debug(f"current path: {current_path}", level=2) + self.debug(f"path left: {path}", level=2) + + # Check the DATA edges. + # If a DATA edge has the current path, return this vertex. + for edge in self.edges: + if edge.edge_type != EdgeType.DATA: + continue + if edge.path == current_path: + return self + + # Check the vertices that belong to this vertex for edges going to this vertex and the path matches. + for vertex in self.has_vertices(): + self.debug(f"vertex {self.uid} has {vertex.uid}", level=2) + for edge in vertex.edges: + # If the edge matches the current path, the head of the edge is this vertex, a route exists. + if edge.path == current_path and edge.head_uid == self.uid: + # If there is no path left, this is our vertex + if len(path) == 0: + return vertex + # If there is still more path, call vertex to walk more of the path. + else: + return vertex.walk_down_path(path) + return None + + def get_paths(self) -> List[str]: + """ + Get paths from this vertex to vertex owned by this vertex. + :return: List of string paths + """ + + paths = [] + for vertex in self.has_vertices(): + for edge in vertex.edges: + if edge.path is None or edge.path == "": + continue + paths.append(edge.path) + + return paths diff --git a/libs/discovery_common-1.0.26-py3-none-any.whl b/libs/discovery_common-1.0.26-py3-none-any.whl deleted file mode 100644 index 2ad348f83..000000000 Binary files a/libs/discovery_common-1.0.26-py3-none-any.whl and /dev/null differ diff --git a/libs/keeper_dag-1.0.20-py3-none-any.whl b/libs/keeper_dag-1.0.20-py3-none-any.whl deleted file mode 100644 index e08abaa0a..000000000 Binary files a/libs/keeper_dag-1.0.20-py3-none-any.whl and /dev/null differ diff --git a/requirements.txt b/requirements.txt index a0d29ef8c..12407f4ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,16 +15,4 @@ cryptography>=39.0.1 protobuf>=3.19.0 keeper-secrets-manager-core>=16.6.0 aiortc; python_version>='3.8' and python_version<'3.13' - pydantic>=2.6.4 - -# pip uninstall keeper-dag -y -# python3 setup.py wheel --whlsrc ~/src/keeper-dag --libdir $PWD/libs --reqfiles $PWD/requirements.txt -# pip install $(ls libs/keeper_dag-*) -./libs/keeper_dag-1.0.20-py3-none-any.whl - - -# pip uninstall discovery-common -y -# python3 setup.py wheel --whlsrc ~/src/discovery-common --libdir $PWD/libs --reqfiles $PWD/requirements.txt -# pip install $(ls libs/discovery_common-*) -./libs/discovery_common-1.0.26-py3-none-any.whl diff --git a/setup.cfg b/setup.cfg index a7aecbb6a..8f916677b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,8 +42,7 @@ install_requires = tabulate websockets aiortc; python_version>='3.8' and python_version<'3.13' - keeper-dag @ https://github.com/Keeper-Security/Commander/raw/refs/heads/master/libs/keeper_dag-1.0.20-py3-none-any.whl - discovery_common @ https://github.com/Keeper-Security/Commander/raw/refs/heads/master/libs/discovery_common-1.0.26-py3-none-any.whl + pydantic>=2.6.4 [options.extras_require] test =