aws · dkphm · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Nov 12, 2024
@@ -17,6 +17,31 @@
 LOG = logging.getLogger(__name__)
 
 
+def _generate_quoted_match_regex(match_pattern):
+    """
+    Creates a regex on a quoted string based on a match pattern (also a regex) that is to be
+    run on a string (which may contain escaped quotes) that is separated by delimiters.
+
+    Parameters
+    ----------
+    match_pattern: (str) regex pattern to match
+    delim: (str) delimiter that is respected when identifying matching groups with generated regex.
+
+    Returns
+    -------
+    str: regex expression
+
+    Examples
+    -------
+    match_pattern: [A-Za-z0-9\\"_:\\.\\/\\+-\\@=]
+    input: createdBy=\"Test user\" ProjectName='Test App'
+    output: ['"Test user"', "'Test App'"]
+
+    """
+
+    return f"""(\\"(?:\\\\{match_pattern}|[^\\"\\\\]+)*\\"|""" + f"""\'(?:\\\\{match_pattern}|[^\'\\\\]+)*\')"""
+
+
 def _generate_match_regex(match_pattern, delim):
     """
     Creates a regex string based on a match pattern (also a regex) that is to be
@@ -194,6 +219,7 @@ def __init__(self, multiple_values_per_key=False):
     TAG_REGEX = '[A-Za-z0-9\\"_:\\.\\/\\+-\\@=]'
 
     _pattern = r"{tag}={tag}".format(tag=_generate_match_regex(match_pattern=TAG_REGEX, delim=" "))
+    _quoted_pattern = _generate_quoted_match_regex(match_pattern=TAG_REGEX)
 
     name = "string,list"
 
@@ -222,13 +248,36 @@ def convert(self, value, param, ctx):
                 for k in tags:
                     self._add_value(result, _unquote_wrapped_quotes(k), _unquote_wrapped_quotes(tags[k]))
             else:
-                groups = re.findall(self._pattern, val)
-
-                if not groups:
-                    fail = True
-                for group in groups:
-                    key, v = group
-                    self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(v))
+                # Instead of parsing a full {tag}={tag} pattern, we will try to look for quoted string with spaces
+                # first, remove all the spaces and start over again.
+
+                # First, we need to unquote a full string
+                modified_val = _unquote_wrapped_quotes(val)
+
+                # Next, looking for a quote strings that contain spaces and proceed to replace them
+                quoted_strings_with_spaces = re.findall(self._quoted_pattern, modified_val)
+                quoted_strings_without_spaces = [self._replace_spaces(s) for s in quoted_strings_with_spaces]
+                for s, replacement in zip(quoted_strings_with_spaces, quoted_strings_without_spaces):
+                    modified_val = modified_val.replace(s, replacement[0])
+
+                # Finally, restart the parsing with key=value separated by (multiple) spaces.
+                parsed, tags = self._multiple_space_separated_key_value_parser(modified_val)
+                if parsed:
+                    for k in tags:
-                    for k in tags:
+                    for key, value in tags.items():
-                    for k in tags:
+                    for key, value in tags.items():
+                        v = tags[k]
+                        m = [i for i in quoted_strings_without_spaces if i[0] == v]
+                        if len(m) > 0:
+                            v = self._restore_spaces(*m[0])
+                        self._add_value(result, _unquote_wrapped_quotes(k), _unquote_wrapped_quotes(v))
+                else:
+                    # Otherwise, fall back to the original mechanism.
+                    groups = re.findall(self._pattern, val)
+
+                    if not groups:
+                        fail = True
+                    for group in groups:
+                        key, v = group
+                        self._add_value(result, _unquote_wrapped_quotes(key), _unquote_wrapped_quotes(v))
 
             if fail:
                 return self.fail(
@@ -286,6 +335,48 @@ def _space_separated_key_value_parser(tag_value):
             tags_dict = {**tags_dict, **parsed_tag}
         return True, tags_dict
 
+    @staticmethod
+    def _multiple_space_separated_key_value_parser(tag_value):
+        """
+        Method to parse space separated `Key1=Value1 Key2=Value2` type tags without using regex.
+        Parameters
+        ----------
+        tag_value
+        """
+        tags_dict = {}
+        for value in tag_value.split():
+            parsed, parsed_tag = CfnTags._standard_key_value_parser(value)
+            if not parsed:
+                return False, None
+            tags_dict = {**tags_dict, **parsed_tag}
-            tags_dict = {**tags_dict, **parsed_tag}
+            tags_dict.update(parsed_tag)
-            tags_dict = {**tags_dict, **parsed_tag}
+            tags_dict.update(parsed_tag)
+        return True, tags_dict
+
+    @staticmethod
+    def _replace_spaces(text, replacement="_"):
+        """
+        Replace spaces in a text with a replacement together with its original locations.
+        Input: "test 1"
+        Output: "test_1" [4]
+        """
+        space_positions = [i for i, char in enumerate(text) if char == " "]
+        modified = text.replace(" ", replacement)
+
+        return modified, space_positions
+
+    @staticmethod
+    def _restore_spaces(modified_text, space_positions, replacement="_"):
+        """
+        Restore spaces in a text from a original space locations.
+        Input: "test_1" [4]
+        Output: "test 1"
+        """
+        text_list = list(modified_text)
+
+        for pos in space_positions:
+            text_list[pos] = " "
+
+        return "".join(text_list)
+
 
 class SigningProfilesOptionType(click.ParamType):
     """

@@ -238,6 +238,14 @@ def test_must_fail_on_invalid_format(self, input):
                 ["stage=int", "company:application=awesome-service", "company:department=engineering"],
                 {"stage": "int", "company:application": "awesome-service", "company:department": "engineering"},
             ),
+            # input as string with multiple key-values including spaces
+            (('tag1="son of anton" tag2="company abc"',), {"tag1": "son of anton", "tag2": "company abc"}),
+            (('tag1="son of anton"   tag2="company abc"',), {"tag1": "son of anton", "tag2": "company abc"}),
+            (('\'tag1="son of anton" tag2="company abc"\'',), {"tag1": "son of anton", "tag2": "company abc"}),
+            (
+                ('tag1="son of anton" tag2="company abc" tag:3="dummy tag"',),
+                {"tag1": "son of anton", "tag2": "company abc", "tag:3": "dummy tag"},
+            ),
         ]
     )
     def test_successful_parsing(self, input, expected):