-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Another approach to parse tag string with spaces to avoid long running time on regex matching. #7625
base: develop
Are you sure you want to change the base?
fix: Another approach to parse tag string with spaces to avoid long running time on regex matching. #7625
Changes from 3 commits
c1b29e4
93209d4
6366d33
85feda9
c21c3b0
6840c4d
a30d6fa
55e7b85
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -17,6 +17,31 @@ | |||||
LOG = logging.getLogger(__name__) | ||||||
|
||||||
|
||||||
def _generate_quoted_match_regex(match_pattern): | ||||||
""" | ||||||
Creates a regex on a quoted string based on a match pattern (also a regex) that is to be | ||||||
run on a string (which may contain escaped quotes) that is separated by delimiters. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
match_pattern: (str) regex pattern to match | ||||||
delim: (str) delimiter that is respected when identifying matching groups with generated regex. | ||||||
|
||||||
Returns | ||||||
------- | ||||||
str: regex expression | ||||||
|
||||||
Examples | ||||||
------- | ||||||
match_pattern: [A-Za-z0-9\\"_:\\.\\/\\+-\\@=] | ||||||
input: createdBy=\"Test user\" ProjectName='Test App' | ||||||
output: ['"Test user"', "'Test App'"] | ||||||
|
||||||
""" | ||||||
|
||||||
return f"""(\\"(?:\\\\{match_pattern}|[^\\"\\\\]+)*\\"|""" + f"""\'(?:\\\\{match_pattern}|[^\'\\\\]+)*\')""" | ||||||
|
||||||
|
||||||
def _generate_match_regex(match_pattern, delim): | ||||||
""" | ||||||
Creates a regex string based on a match pattern (also a regex) that is to be | ||||||
|
@@ -194,6 +219,7 @@ def __init__(self, multiple_values_per_key=False): | |||||
TAG_REGEX = '[A-Za-z0-9\\"_:\\.\\/\\+-\\@=]' | ||||||
|
||||||
_pattern = r"{tag}={tag}".format(tag=_generate_match_regex(match_pattern=TAG_REGEX, delim=" ")) | ||||||
_quoted_pattern = _generate_quoted_match_regex(match_pattern=TAG_REGEX) | ||||||
|
||||||
name = "string,list" | ||||||
|
||||||
|
@@ -222,13 +248,36 @@ def convert(self, value, param, ctx): | |||||
for k in tags: | ||||||
self._add_value(result, _unquote_wrapped_quotes(k), _unquote_wrapped_quotes(tags[k])) | ||||||
else: | ||||||
groups = re.findall(self._pattern, val) | ||||||
|
||||||
if not groups: | ||||||
fail = True | ||||||
for group in groups: | ||||||
key, v = group | ||||||
self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(v)) | ||||||
# Instead of parsing a full {tag}={tag} pattern, we will try to look for quoted string with spaces | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we refactor this new logic out to a separate method to unit test? There's a lot of moving parts here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is tricky because the block below
take longer time to return |
||||||
# first, remove all the spaces and start over again. | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there missing code here, or maybe duplicate comment? This comment and the one under say "first" |
||||||
# First, we need to unquote a full string | ||||||
modified_val = _unquote_wrapped_quotes(val) | ||||||
|
||||||
# Next, looking for a quote strings that contain spaces and proceed to replace them | ||||||
quoted_strings_with_spaces = re.findall(self._quoted_pattern, modified_val) | ||||||
quoted_strings_without_spaces = [self._replace_spaces(s) for s in quoted_strings_with_spaces] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: no single character variable names |
||||||
for s, replacement in zip(quoted_strings_with_spaces, quoted_strings_without_spaces): | ||||||
modified_val = modified_val.replace(s, replacement[0]) | ||||||
|
||||||
# Finally, restart the parsing with key=value separated by (multiple) spaces. | ||||||
parsed, tags = self._multiple_space_separated_key_value_parser(modified_val) | ||||||
if parsed: | ||||||
for k in tags: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit:
Suggested change
|
||||||
v = tags[k] | ||||||
m = [i for i in quoted_strings_without_spaces if i[0] == v] | ||||||
if len(m) > 0: | ||||||
v = self._restore_spaces(*m[0]) | ||||||
self._add_value(result, _unquote_wrapped_quotes(k), _unquote_wrapped_quotes(v)) | ||||||
else: | ||||||
# Otherwise, fall back to the original mechanism. | ||||||
groups = re.findall(self._pattern, val) | ||||||
|
||||||
if not groups: | ||||||
fail = True | ||||||
for group in groups: | ||||||
key, v = group | ||||||
self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(v)) | ||||||
|
||||||
if fail: | ||||||
return self.fail( | ||||||
|
@@ -286,6 +335,48 @@ def _space_separated_key_value_parser(tag_value): | |||||
tags_dict = {**tags_dict, **parsed_tag} | ||||||
return True, tags_dict | ||||||
|
||||||
@staticmethod | ||||||
def _multiple_space_separated_key_value_parser(tag_value): | ||||||
""" | ||||||
Method to parse space separated `Key1=Value1 Key2=Value2` type tags without using regex. | ||||||
Parameters | ||||||
---------- | ||||||
tag_value | ||||||
""" | ||||||
tags_dict = {} | ||||||
for value in tag_value.split(): | ||||||
parsed, parsed_tag = CfnTags._standard_key_value_parser(value) | ||||||
if not parsed: | ||||||
return False, None | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How come we're also returning a boolean here? Can we just return |
||||||
tags_dict = {**tags_dict, **parsed_tag} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit (if
Suggested change
|
||||||
return True, tags_dict | ||||||
|
||||||
@staticmethod | ||||||
def _replace_spaces(text, replacement="_"): | ||||||
""" | ||||||
Replace spaces in a text with a replacement together with its original locations. | ||||||
Input: "test 1" | ||||||
Output: "test_1" [4] | ||||||
""" | ||||||
space_positions = [i for i, char in enumerate(text) if char == " "] | ||||||
modified = text.replace(" ", replacement) | ||||||
|
||||||
return modified, space_positions | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thoughts on using dataclasses for return objects, instead of returning tuples and performing array indexes to get the modified strings/replacement index array? I imagine that changing things to dataclasses will make it easier for future readers to understand what is happening quickly. |
||||||
|
||||||
@staticmethod | ||||||
def _restore_spaces(modified_text, space_positions, replacement="_"): | ||||||
""" | ||||||
Restore spaces in a text from a original space locations. | ||||||
Input: "test_1" [4] | ||||||
Output: "test 1" | ||||||
""" | ||||||
text_list = list(modified_text) | ||||||
|
||||||
for pos in space_positions: | ||||||
text_list[pos] = " " | ||||||
|
||||||
return "".join(text_list) | ||||||
|
||||||
|
||||||
class SigningProfilesOptionType(click.ParamType): | ||||||
""" | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we reuse or refactor the old
_generate_match_regex
? They share similar return values.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They have different regex, hence creating a new function.