From 5b621f66595f4199f87b2b60caadb3409d48b08d Mon Sep 17 00:00:00 2001 From: itdependsnetworks Date: Sat, 22 Jul 2023 15:54:08 -0400 Subject: [PATCH] Add filters for regex_findall, regex_match, regex_search, regex_split, regex_sub --- .../code_reference/{hostname.md => regex.md} | 4 +- docs/user/include_jinja_list.md | 6 +- docs/user/lib_use_cases_jinja_filters.md | 72 ++++++++ mkdocs.yml | 1 + netutils/hostname.py | 35 ---- netutils/regex.py | 158 ++++++++++++++++++ netutils/utils.py | 6 +- tests/unit/test_hostname.py | 23 --- tests/unit/test_regex.py | 98 +++++++++++ 9 files changed, 341 insertions(+), 62 deletions(-) rename docs/dev/code_reference/{hostname.md => regex.md} (56%) delete mode 100644 netutils/hostname.py create mode 100644 netutils/regex.py delete mode 100644 tests/unit/test_hostname.py create mode 100644 tests/unit/test_regex.py diff --git a/docs/dev/code_reference/hostname.md b/docs/dev/code_reference/regex.md similarity index 56% rename from docs/dev/code_reference/hostname.md rename to docs/dev/code_reference/regex.md index 1242cd0f..9c18b84c 100644 --- a/docs/dev/code_reference/hostname.md +++ b/docs/dev/code_reference/regex.md @@ -1,5 +1,5 @@ -# Hostname +# Regex -::: netutils.hostname +::: netutils.regex options: show_submodules: True \ No newline at end of file diff --git a/docs/user/include_jinja_list.md b/docs/user/include_jinja_list.md index da390f02..4be27d36 100644 --- a/docs/user/include_jinja_list.md +++ b/docs/user/include_jinja_list.md @@ -19,7 +19,6 @@ | paloalto_panos_brace_to_set | netutils.config.conversion.paloalto_panos_brace_to_set | | fqdn_to_ip | netutils.dns.fqdn_to_ip | | is_fqdn_resolvable | netutils.dns.is_fqdn_resolvable | -| hostname_regex | netutils.hostname.hostname_regex | | abbreviated_interface_name | netutils.interface.abbreviated_interface_name | | abbreviated_interface_name_list | netutils.interface.abbreviated_interface_name_list | | canonical_interface_name | netutils.interface.canonical_interface_name | @@ -70,6 +69,11 @@ | encrypt_type7 | netutils.password.encrypt_type7 | | get_hash_salt | netutils.password.get_hash_salt | | tcp_ping | netutils.ping.tcp_ping | +| regex_findall | netutils.regex.regex_findall | +| regex_match | netutils.regex.regex_match | +| regex_search | netutils.regex.regex_search | +| regex_split | netutils.regex.regex_split | +| regex_sub | netutils.regex.regex_sub | | longest_prefix_match | netutils.route.longest_prefix_match | | uptime_seconds_to_string | netutils.time.uptime_seconds_to_string | | uptime_string_to_seconds | netutils.time.uptime_string_to_seconds | diff --git a/docs/user/lib_use_cases_jinja_filters.md b/docs/user/lib_use_cases_jinja_filters.md index d433d54d..9ad12cf5 100644 --- a/docs/user/lib_use_cases_jinja_filters.md +++ b/docs/user/lib_use_cases_jinja_filters.md @@ -108,6 +108,78 @@ When you run `jinja2_environment.py` the output will be: The version of 192.168.0.1/24 is IPv4. ``` +## regex Convenience Functions + +When adding the netutils functions to your jinja2 environment, you also gain access to the built-in `re` python library using these Jinja2 filters. + +```python + "regex_findall": "regex.regex_findall", + "regex_match": "regex.regex_match", + "regex_search": "regex.regex_search", + "regex_split": "regex.regex_split", + "regex_sub": "regex.regex_sub", +``` + +These functions will always return a json serializable object and not a complex object like `re.Match` or simialr to better serve the primary use case of functions to be used as Jinja2 filters. After all, they are simply small wrappers around Python `re` functions, which should be preferred when not using Jinja2 or similar templating language. + +Below is a code in you can drop into your Python shell to help bring to life how these regex functions can be used. + +```python +from jinja2 import Environment, BaseLoader +from netutils.utils import jinja2_convenience_function + +env = Environment(loader=BaseLoader()) +env.filters.update(jinja2_convenience_function()) + +DATA = { + "device": "USSCAMS07", + "comma_seperated_devices": "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01" +} + +TEMPLATE_STRING = """ +{% set device_details = '([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)' | regex_match(device) %} + +Country: {{ ('^([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)' | regex_search(device))[0] }} +STATE: {{ device_details[1] }} +FUNCTION: {{ device_details[2] }} + +ALL DEVICES: +{% for router in ',' | regex_split(comma_seperated_devices) -%} + - {{ router }} +{% endfor %} + +ONLY ROUTERS: +{% for router in ',' | regex_split(comma_seperated_devices) -%} +{% if '-RT' | regex_search(router) -%} + - {{ router }} +{% endif -%} +{% endfor %} +""" + +template = env.from_string(TEMPLATE_STRING, DATA) +result = template.render() +print(result) +``` + +Which would result in the following output. + +```text +Country: US +STATE: SC +FUNCTION: AMS + +ALL DEVICES: +- NYC-RT01 +- NYC-RT02 +- SFO-SW01 +- SFO-RT01 + +ONLY ROUTERS: +- NYC-RT01 +- NYC-RT02 +- SFO-RT01 +``` + ## Netutils to Jinja2 Filters List diff --git a/mkdocs.yml b/mkdocs.yml index b8d0a7d5..981c82da 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -138,6 +138,7 @@ nav: - Password: "dev/code_reference/password.md" - Ping: "dev/code_reference/ping.md" - Protocol Mapper: "dev/code_reference/protocol_mapper.md" + - Regex: "dev/code_reference/regex.md" - Route: "dev/code_reference/route.md" - Time: "dev/code_reference/time.md" - Utils: "dev/code_reference/utils.md" diff --git a/netutils/hostname.py b/netutils/hostname.py deleted file mode 100644 index c99bf137..00000000 --- a/netutils/hostname.py +++ /dev/null @@ -1,35 +0,0 @@ -import re - - -def hostname_regex(hostname, match_regex_string): - """Given a hostname string and regex string, return the regex match object or `None` if no match found - - This is useful in two primary use cases: - - 1. Truthy conditional check that a hostname matches a given regex - 2. Returning regex capture groups from the hostname string - - Args: - hostname: String representation of the device hostname - match_regex_string: Regex string to match against - - Returns: - Regex match object or None if no match found - - Extamples: - >>> from netutils.hostname import hostname_regex - >>> print("South Carolina" if hostname_regex("USSCAMS07", ".+SC.+\d\d") else "Not South Carolina") - South Carolina - >>> - >>> match = hostname_regex("USSCAMS07", "([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)") - >>> match[1] - 'US' - >>> match[2] - 'SC' - >>> match[3] - 'AMS' - >>> match[4] - '07' - - """ - return re.match(match_regex_string, hostname) diff --git a/netutils/regex.py b/netutils/regex.py new file mode 100644 index 00000000..fbbd83c4 --- /dev/null +++ b/netutils/regex.py @@ -0,0 +1,158 @@ +"""Utilities to expose regex functions, primarily for Jinja filters.""" + +import re +import typing as t + + +def _match_object(match: t.Optional[t.Match[str]]) -> t.Union[t.List[str], str, None]: + """Helper method to better 'serialize' a re.Match object.""" + if not match: + return None + if match.groups(): + results = [] + for group in match.groups(): + results.append(group) + return results + return str(match.group()) + + +def regex_findall(pattern: str, string: str) -> t.List[str]: + r"""Given a regex pattern and string, return all non-overlapping matches of pattern in string, as a list of strings matches. + + The main purpose of this function is provide a Jinja2 filter as this is simply a wrapper around `re.findall`. + + Args: + pattern: Regex string to match against. + string: String to check against. + + Returns: + List of matches, when there is no match the list will be empty. + + Examples: + >>> from netutils.regex import regex_findall + >>> match = regex_findall("\w\w\w-RT\d\d", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + >>> len(match) + 3 + >>> match[0] + 'NYC-RT01' + + """ + return re.findall(pattern, string) + + +def regex_match(pattern: str, string: str) -> t.Union[t.List[str], str, None]: + r"""Given a regex pattern and string, return `None` if there is no matching `re.Match.groups()` if using capture groups or regex match via `re.Match.group()` on start of string. + + This is useful in the following use cases: + + 1. Truthy conditional check that a string matches a given regex. + 2. Returning regex capture groups from the string. + 3. Matching for the start of a string, see `regex_search` when you do not want only start of string matching. + + The main purpose of this function is provide a Jinja2 filter as this is simply a wrapper around `re.match`. + + Args: + pattern: Regex string to match against. + string: String to check against. + + Returns: + List of matches, match, or None no match found + + Examples: + >>> from netutils.regex import regex_match + >>> print("South Carolina" if regex_match(".+SC.+\d\d", "USSCAMS07") else "Not South Carolina") + South Carolina + >>> + >>> match = regex_match("([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)", "USSCAMS07") + >>> match[0] + 'US' + >>> match[1] + 'SC' + >>> match[2] + 'AMS' + >>> match[3] + '07' + + """ + return _match_object(re.match(pattern, string)) + + +def regex_search(pattern: str, string: str) -> t.Union[t.List[str], str, None]: + r"""Given a regex pattern and string, return `None` if there is no matching `re.Match.groups()` if using capture groups or regex match via `re.Match.group()`. + + The main purpose of this function is provide a Jinja2 filter as this is simply a wrapper around `re.search`. + + Args: + pattern: Regex string to match against. + string: String to check against. + + Returns: + List of matches, match, or None no match found. + + Examples: + >>> from netutils.regex import regex_search + >>> print("South Carolina" if regex_search(".+SC.+\d\d", "USSCAMS07") else "Not South Carolina") + South Carolina + >>> + >>> match = regex_search("^([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)", "USSCAMS07") + >>> match[0] + 'US' + >>> match[1] + 'SC' + >>> match[2] + 'AMS' + >>> match[3] + '07' + + """ + return _match_object(re.search(pattern, string)) + + +def regex_split(pattern: str, string: str, maxsplit: int = 0) -> t.List[str]: + """Given a regex pattern and string, return the split the object based on the patern a single element or single element of original value if there is no match. + + The main purpose of this function is provide a Jinja2 filter as this is simply a wrapper around `re.split`. + + Args: + pattern: Regex string to match against. + string: String to check against. + maxsplit: The maximum time to split. + + Returns: + List of string of the match or single element list of original value if no match + + Examples: + >>> from netutils.regex import regex_split + >>> match = regex_split(",", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + >>> match[0] + 'NYC-RT01' + >>> match[3] + 'SFO-RT01' + """ + return re.split(pattern, string, maxsplit) + + +def regex_sub(pattern: str, repl: str, string: str, count: int = 0) -> str: + """Given a regex pattern, replacement, and string replace the pattern within the string and return. + + The main purpose of this function is provide a Jinja2 filter as this is simply a wrapper around `re.sub`. + + Args: + pattern: Regex string to match against. + repl: Replacement characters that were matched in the pattern. + string: String to check against. + count: The maximum time to replace. + + Returns: + List of string of the match or single element list of original value if no match + + Examples: + >>> from netutils.regex import regex_sub + >>> match = regex_sub(",", " ", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + >>> match + 'NYC-RT01 NYC-RT02 SFO-SW01 SFO-RT01' + >>> match = regex_sub("(ROUTER|RTR)", "RT", "NYC-ROUTER01,NYC-ROUTER02,NYC-RTR03") + >>> match + 'NYC-RT01,NYC-RT02,NYC-RT03' + """ + return re.sub(pattern, repl, string, count) diff --git a/netutils/utils.py b/netutils/utils.py index 34f31545..9580c278 100644 --- a/netutils/utils.py +++ b/netutils/utils.py @@ -20,7 +20,6 @@ "section_config": "config.compliance.section_config", "fqdn_to_ip": "dns.fqdn_to_ip", "is_fqdn_resolvable": "dns.is_fqdn_resolvable", - "hostname_regex": "hostname.hostname_regex", "interface_range_expansion": "interface.interface_range_expansion", "interface_range_compress": "interface.interface_range_compress", "split_interface": "interface.split_interface", @@ -69,6 +68,11 @@ "encrypt_juniper_type9": "password.encrypt_juniper_type9", "get_hash_salt": "password.get_hash_salt", "tcp_ping": "ping.tcp_ping", + "regex_findall": "regex.regex_findall", + "regex_match": "regex.regex_match", + "regex_search": "regex.regex_search", + "regex_split": "regex.regex_split", + "regex_sub": "regex.regex_sub", "longest_prefix_match": "route.longest_prefix_match", "vlanlist_to_config": "vlan.vlanlist_to_config", "vlanconfig_to_list": "vlan.vlanconfig_to_list", diff --git a/tests/unit/test_hostname.py b/tests/unit/test_hostname.py deleted file mode 100644 index 3d340816..00000000 --- a/tests/unit/test_hostname.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test for the Hostname based functions.""" - -from netutils import hostname - - -def test_truthy_happy_path_case(): - match = hostname.hostname_regex("USSCAMS07", ".+SC.+\d\d") - assert bool(match) is True - - -def test_truthy_sad_path_case(): - match = hostname.hostname_regex("USSCAMS07", "foobar") - assert bool(match) is False - - -def test_capture_group_happy_path_case(): - match = hostname.hostname_regex("USSCAMS07", "([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)") - assert match[1] == "US" - - -def test_capture_group_sad_path_case(): - match = hostname.hostname_regex("USSCAMS07", "(foobar)") - assert match is None diff --git a/tests/unit/test_regex.py b/tests/unit/test_regex.py new file mode 100644 index 00000000..800ba8f4 --- /dev/null +++ b/tests/unit/test_regex.py @@ -0,0 +1,98 @@ +"""Test for the regex based functions.""" + +from netutils import regex + + +def test_regex_findall_bool_true(): + match = regex.regex_findall(r"CAMS\d\d", "USSCAMS07") + assert bool(match) is True + + +def test_regex_findall_bool_false(): + match = regex.regex_findall("foobar", "USSCAMS07") + assert bool(match) is False + + +def test_regex_findall_list(): + match = regex.regex_findall(r"\w\w\w-RT\d\d", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + assert match[0] == "NYC-RT01" + assert match[1] == "NYC-RT02" + + +def test_regex_findall_is_none(): + match = regex.regex_findall("(foobar)", "USSCAMS07") + assert match == [] + + +def test_regex_match_bool_true(): + match = regex.regex_match(r".+SC.+\d\d", "USSCAMS07") + assert bool(match) is True + + +def test_regex_match_bool_false(): + match = regex.regex_match("foobar", "USSCAMS07") + assert bool(match) is False + + +def test_regex_match_list(): + match = regex.regex_match(r"([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)", "USSCAMS07") + assert match[0] == "US" + assert match[1] == "SC" + match = regex.regex_match(r"[A-Z]{2}[A-Z]{2}[A-Z]{3}\d*", "USSCAMS07") + assert match == "USSCAMS07" + + +def test_regex_match_is_none(): + match = regex.regex_match("(foobar)", "USSCAMS07") + assert match is None + + +def test_regex_search_bool_true(): + match = regex.regex_search(r"CAMS\d\d", "USSCAMS07") + assert bool(match) is True + + +def test_regex_search_bool_false(): + match = regex.regex_search("foobar", "USSCAMS07") + assert bool(match) is False + + +def test_regex_search_list(): + match = regex.regex_search(r"([A-Z]{2})([A-Z]{2})([A-Z]{3})(\d*)", "USSCAMS07") + assert match[0] == "US" + assert match[1] == "SC" + match = regex.regex_search(r"[A-Z]{2}[A-Z]{2}[A-Z]{3}\d*", "USSCAMS07") + assert match == "USSCAMS07" + + +def test_regex_search_is_none(): + match = regex.regex_search("(foobar)", "USSCAMS07") + assert match is None + + +def test_regex_split_list(): + match = regex.regex_split(",", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + assert match[0] == "NYC-RT01" + assert match[1] == "NYC-RT02" + match = regex.regex_split("(.)", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + assert match[1] == "N" + match = regex.regex_split(",", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01", 1) + assert match[0] == "NYC-RT01" + assert match[1] == "NYC-RT02,SFO-SW01,SFO-RT01" + + +def test_regex_split_no_match(): + match = regex.regex_split(r"(\.)", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + assert match == ["NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01"] + + +def test_regex_sub_list(): + match = regex.regex_sub(",", " ", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + assert match == "NYC-RT01 NYC-RT02 SFO-SW01 SFO-RT01" + match = regex.regex_sub("(ROUTER|RTR)", "RT", "NYC-ROUTER01,NYC-ROUTER02,NYC-RTR03") + assert match == "NYC-RT01,NYC-RT02,NYC-RT03" + + +def test_regex_sub_no_match(): + match = regex.regex_sub("ABBA", "CADABBA", "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01") + assert match == "NYC-RT01,NYC-RT02,SFO-SW01,SFO-RT01"