17
17
LOG = logging .getLogger (__name__ )
18
18
19
19
20
- def _generate_match_regex (match_pattern , delim ):
20
+ def _generate_match_regex (match_pattern , delim = None ):
21
21
"""
22
22
Creates a regex string based on a match pattern (also a regex) that is to be
23
23
run on a string (which may contain escaped quotes) that is separated by delimiters.
@@ -32,13 +32,13 @@ def _generate_match_regex(match_pattern, delim):
32
32
str: regex expression
33
33
34
34
"""
35
+ result = f"""(\\ "(?:\\ \\ { match_pattern } |[^\\ "\\ \\ ]+)*\\ "|""" + f"""\' (?:\\ \\ { match_pattern } |[^\' \\ \\ ]+)*\' """
35
36
36
- # Non capturing groups reduces duplicates in groups, but does not reduce matches.
37
- return (
38
- f"""(\\ "(?:\\ \\ { match_pattern } |[^\\ "\\ \\ ]+)*\\ "|"""
39
- + f"""\' (?:\\ \\ { match_pattern } |[^\' \\ \\ ]+)*\' |"""
40
- + f"""(?:\\ \\ { match_pattern } |[^{ delim } \\ "\\ \\ ]+)+)"""
41
- )
37
+ if delim is not None :
38
+ # Non capturing groups reduces duplicates in groups, but does not reduce matches.
39
+ return result + f"""|(?:\\ \\ { match_pattern } |[^{ delim } \\ "\\ \\ ]+)+)"""
40
+ else :
41
+ return result + ")"
42
42
43
43
44
44
def _unquote_wrapped_quotes (value ):
@@ -194,6 +194,7 @@ def __init__(self, multiple_values_per_key=False):
194
194
TAG_REGEX = '[A-Za-z0-9\\ "_:\\ .\\ /\\ +-\\ @=]'
195
195
196
196
_pattern = r"{tag}={tag}" .format (tag = _generate_match_regex (match_pattern = TAG_REGEX , delim = " " ))
197
+ _quoted_pattern = _generate_match_regex (match_pattern = TAG_REGEX )
197
198
198
199
name = "string,list"
199
200
@@ -222,13 +223,7 @@ def convert(self, value, param, ctx):
222
223
for k in tags :
223
224
self ._add_value (result , _unquote_wrapped_quotes (k ), _unquote_wrapped_quotes (tags [k ]))
224
225
else :
225
- groups = re .findall (self ._pattern , val )
226
-
227
- if not groups :
228
- fail = True
229
- for group in groups :
230
- key , v = group
231
- self ._add_value (result , _unquote_wrapped_quotes (key ), _unquote_wrapped_quotes (v ))
226
+ fail = not self ._parse_key_value_pair (result , val )
232
227
233
228
if fail :
234
229
return self .fail (
@@ -239,6 +234,66 @@ def convert(self, value, param, ctx):
239
234
240
235
return result
241
236
237
+ def _parse_key_value_pair (self , result : dict , key_value_string : str ):
238
+ """
239
+ This method processes a string in the format "'key1'='value1','key2'='value2'",
240
+ where spaces may exist within keys or values.
241
+
242
+ To optimize performance, the parsing is divided into two stages:
243
+
244
+ Stage 1: Optimized Parsing
245
+ 1. Identify quoted strings containing spaces within values.
246
+ 2. Temporarily replace spaces in these strings with a placeholder (e.g., "_").
247
+ 3. Use a fast, standard parser to extract key-value pairs, as no spaces are expected.
248
+ 4. Restore original spaces in the extracted key-value pairs.
249
+
250
+ Stage 2: Fallback Parsing
251
+ If Stage 1 fails to parse the string correctly,run against a comprehensive regex pattern
252
+ {tag}={tag}) to parse the entire string.
253
+
254
+ Parameters
255
+ ----------
256
+ result: result dict
257
+ key_value_string: string to parse
258
+
259
+ Returns
260
+ -------
261
+ boolean - parse result
262
+ """
263
+ parse_result = True
264
+
265
+ # Unquote an entire string
266
+ modified_val = _unquote_wrapped_quotes (key_value_string )
267
+
268
+ # Looking for a quote strings that contain spaces and proceed to replace them
269
+ quoted_strings_with_spaces = re .findall (self ._quoted_pattern , modified_val )
270
+ quoted_strings_with_spaces_objects = [
271
+ TextWithSpaces (str_with_spaces ) for str_with_spaces in quoted_strings_with_spaces
272
+ ]
273
+ for s , replacement in zip (quoted_strings_with_spaces , quoted_strings_with_spaces_objects ):
274
+ modified_val = modified_val .replace (s , replacement .replace_spaces ())
275
+
276
+ # Use default parser to parse key=value
277
+ tags = self ._multiple_space_separated_key_value_parser (modified_val )
278
+ if tags is not None :
279
+ for key , value in tags .items ():
280
+ new_value = value
281
+ text_objects = [obj for obj in quoted_strings_with_spaces_objects if obj .modified_text == value ]
282
+ if len (text_objects ) > 0 :
283
+ new_value = text_objects [0 ].restore_spaces ()
284
+ self ._add_value (result , _unquote_wrapped_quotes (key ), _unquote_wrapped_quotes (new_value ))
285
+ else :
286
+ # Otherwise, fall back to the original mechanism.
287
+ groups = re .findall (self ._pattern , key_value_string )
288
+
289
+ if not groups :
290
+ parse_result = False
291
+ for group in groups :
292
+ key , v = group
293
+ self ._add_value (result , _unquote_wrapped_quotes (key ), _unquote_wrapped_quotes (v ))
294
+
295
+ return parse_result
296
+
242
297
def _add_value (self , result : dict , key : str , new_value : str ):
243
298
"""
244
299
Add a given value to a given key in the result map.
@@ -286,6 +341,22 @@ def _space_separated_key_value_parser(tag_value):
286
341
tags_dict = {** tags_dict , ** parsed_tag }
287
342
return True , tags_dict
288
343
344
+ @staticmethod
345
+ def _multiple_space_separated_key_value_parser (tag_value ):
346
+ """
347
+ Method to parse space separated `Key1=Value1 Key2=Value2` type tags without using regex.
348
+ Parameters
349
+ ----------
350
+ tag_value
351
+ """
352
+ tags_dict = {}
353
+ for value in tag_value .split ():
354
+ parsed , parsed_tag = CfnTags ._standard_key_value_parser (value )
355
+ if not parsed :
356
+ return None
357
+ tags_dict .update (parsed_tag )
358
+ return tags_dict
359
+
289
360
290
361
class SigningProfilesOptionType (click .ParamType ):
291
362
"""
@@ -560,3 +631,34 @@ def convert(
560
631
)
561
632
562
633
return {resource_id : [excluded_path ]}
634
+
635
+
636
+ class TextWithSpaces :
637
+ def __init__ (self , text ) -> None :
638
+ self .text = text
639
+ self .modified_text = text
640
+ self .space_positions = [] # type: List[int]
641
+
642
+ def replace_spaces (self , replacement = "_" ):
643
+ """
644
+ Replace spaces in a text with a replacement together with its original locations.
645
+ Input: "test 1"
646
+ Output: "test_1" [4]
647
+ """
648
+ self .space_positions = [i for i , char in enumerate (self .text ) if char == " " ]
649
+ self .modified_text = self .text .replace (" " , replacement )
650
+
651
+ return self .modified_text
652
+
653
+ def restore_spaces (self ):
654
+ """
655
+ Restore spaces in a text from a original space locations.
656
+ Input: "test_1" [4]
657
+ Output: "test 1"
658
+ """
659
+ text_list = list (self .modified_text )
660
+
661
+ for pos in self .space_positions :
662
+ text_list [pos ] = " "
663
+
664
+ return "" .join (text_list )
0 commit comments