diff --git a/gapic/utils/lines.py b/gapic/utils/lines.py index 358759b55..fb24e1935 100644 --- a/gapic/utils/lines.py +++ b/gapic/utils/lines.py @@ -17,6 +17,9 @@ from typing import Iterable, Optional +NUMBERED_LIST_REGEX = r"^\d+\. " + + def sort_lines(text: str, dedupe: bool = True) -> str: """Sort the individual lines of a block of text. @@ -40,6 +43,49 @@ def sort_lines(text: str, dedupe: bool = True) -> str: return f'{leading}{answer}{trailing}' +def get_subsequent_line_indentation_level(list_item: str) -> int: + """ + Given a list item return the indentation level for subsequent lines. + For example, if it is a numbered list, the indentation level should be 3 + as shown below. + + Here subsequent lines should be indented by 2 + + - The quick brown fox jumps over the lazy dog. The quick brown fox jumps + over the lazy dog + + Here subsequent lines should be indented by 2 + + + The quick brown fox jumps over the lazy dog. The quick brown fox jumps + over the lazy dog + + Here subsequent lines should be indented by 4 to cater for double digits + + 1. The quick brown fox jumps over the lazy dog. The quick brown fox jumps + over the lazy dog + + 22. The quick brown fox jumps over the lazy dog. The quick brown fox jumps + over the lazy dog + """ + if len(list_item) >= 2 and list_item[0:2] in ['- ', '+ ']: + indentation_level = 2 + elif len(list_item) >= 4 and re.match(NUMBERED_LIST_REGEX, list_item): + indentation_level = 4 + else: + # Don't use any intentation level if the list item marker is not known + indentation_level = 0 + return indentation_level + + +def is_list_item(list_item: str) -> bool: + """ + Given a string return a boolean indicating whether a list is identified. + """ + if len(list_item) < 3: + return False + return list_item.startswith('- ') or list_item.startswith('+ ') or bool(re.match(NUMBERED_LIST_REGEX, list_item)) + + def wrap(text: str, width: int, *, offset: Optional[int] = None, indent: int = 0) -> str: """Wrap the given string to the given width. @@ -93,11 +139,12 @@ def wrap(text: str, width: int, *, offset: Optional[int] = None, indent: int = 0 break_on_hyphens=False, ) # Strip the first \n from the text so it is not misidentified as an - # intentionally short line below, except when the text contains `:` - # as the new line is required for lists. + # intentionally short line below, except when the text contains a list, + # as the new line is required for lists. Look for a list item marker in + # the remaining text which indicates that a list is present. if '\n' in text: - initial_text = text.split('\n')[0] - if ":" not in initial_text: + remaining_text = "".join(text.split('\n')[1:]) + if not is_list_item(remaining_text.strip()): text = text.replace('\n', ' ', 1) # Save the new `first` line. @@ -121,9 +168,9 @@ def wrap(text: str, width: int, *, offset: Optional[int] = None, indent: int = 0 tokens = [] token = '' for line in text.split('\n'): - # Ensure that lines that start with a hyphen are always on a new line + # Ensure that lines that start with a list item marker are always on a new line # Ensure that blank lines are preserved - if (line.strip().startswith('-') or not len(line)) and token: + if (is_list_item(line.strip()) or not len(line)) and token: tokens.append(token) token = '' token += line + '\n' @@ -145,7 +192,7 @@ def wrap(text: str, width: int, *, offset: Optional[int] = None, indent: int = 0 initial_indent=' ' * indent, # ensure that subsequent lines for lists are indented 2 spaces subsequent_indent=' ' * indent + \ - (' ' if token.strip().startswith('-') else ''), + ' ' * get_subsequent_line_indentation_level(token.strip()), text=token, width=width, break_on_hyphens=False, diff --git a/tests/integration/goldens/eventarc/google/cloud/eventarc_v1/types/channel.py b/tests/integration/goldens/eventarc/google/cloud/eventarc_v1/types/channel.py index 10b33b682..e864f6554 100755 --- a/tests/integration/goldens/eventarc/google/cloud/eventarc_v1/types/channel.py +++ b/tests/integration/goldens/eventarc/google/cloud/eventarc_v1/types/channel.py @@ -102,9 +102,9 @@ class State(proto.Enum): possible cases this state can happen: 1. The SaaS provider disconnected from this - Channel. 2. The Channel activation token has - expired but the SaaS provider wasn't - connected. + Channel. + 2. The Channel activation token has expired but + the SaaS provider wasn't connected. To re-establish a Connection with a provider, the subscriber should create a new Channel and diff --git a/tests/unit/utils/test_lines.py b/tests/unit/utils/test_lines.py index 7a0638b71..9642b0f01 100644 --- a/tests/unit/utils/test_lines.py +++ b/tests/unit/utils/test_lines.py @@ -246,3 +246,38 @@ def test_list_with_multiple_paragraphs(): erat. In nec est nisl. Quisque ut orci efficitur, vestibulum ante non, vestibulum erat. Donec mollis ultricies nisl.""" assert lines.wrap(input, width=60) == expected + + +def test_list_with_numbered_list(): + input = """Config for video classification human labeling task. +Currently two types of video classification are supported: +1. Assign labels on the entire video. Assign labels on the entire video. +22. Split the video into multiple video clips based on camera shot, and +assign labels on each video clip.""" + expected = """Config for video classification human labeling task. +Currently two types of video classification are supported: + +1. Assign labels on the entire video. Assign labels on the + entire video. +22. Split the video into multiple video clips based on + camera shot, and assign labels on each video clip.""" + assert lines.wrap(input, width=60) == expected + + +def test_list_with_plus_list_item_marker(): + input = """User-assigned name of the trigger. Must be unique within the project. +Trigger names must meet the following requirements: ++ They must contain only alphanumeric characters and dashes. ++ They can be 1-64 characters long. ++ They must begin and end with an alphanumeric character.""" + expected = """User-assigned name of the trigger. Must +be unique within the project. Trigger +names must meet the following +requirements: + ++ They must contain only alphanumeric + characters and dashes. ++ They can be 1-64 characters long. ++ They must begin and end with an + alphanumeric character.""" + assert lines.wrap(input, width=40) == expected