Skip to content

Commit

Permalink
adjusted code to work with markdown blocks, added tests, used formatt…
Browse files Browse the repository at this point in the history
…er on code (#17)
  • Loading branch information
daniu54 committed Oct 21, 2023
1 parent d3aedc7 commit 2edcaf9
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 47 deletions.
33 changes: 21 additions & 12 deletions src/codeblock_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,56 @@ class CodeblockParser:
def parse_text(self, text: str, path: Path) -> list:
parsed_codeblocks = []
current_codeblock: ParsedCodeBlock = None

for (line_number, line) in enumerate(text.splitlines()):
if self.has_codeblock_separator(line):
if current_codeblock == None or current_codeblock.is_completed():
type = self.parse_codeblock_type(line)
headers = self.parse_codeblock_headers(line)
if type is None or type.lower() != "anki":

if not self.has_needed_headers(type, headers):
continue

current_codeblock = ParsedCodeBlock()

current_codeblock.begin(line_number + 1, path, type, headers)
else:
content = text.splitlines()[current_codeblock.start_pos:line_number]
content = anki_newline_separator.join(content)

current_codeblock.complete(
content=content,
end_pos=line_number)

parsed_codeblocks.append(current_codeblock)
current_codeblock = None

return parsed_codeblocks


def has_needed_headers(self, type: str, headers: list[str]) -> bool:
if type == "anki":
return True

if type == "markdown" and "anki" in headers:
return True

return False

def has_codeblock_separator(self, line: str):
return line.lstrip().startswith('```')

def parse_codeblock_type(self, header_line: str):
tail = header_line.lstrip()[3:]
if len(tail) > 0:
return tail.split(' ')[0].rstrip()

return None

def parse_codeblock_headers(self, header_line: str) -> list[str]:
tail = header_line.lstrip()[3:]
if len(tail) > 0:
headers = tail.split(' ')

return headers[1:]

return []
111 changes: 76 additions & 35 deletions src/test_codeblock_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,103 +5,144 @@

def test_parsing_of_one_codeblock():
parser = CodeblockParser()

content = '''
```anki
```anki
some code
some more code
```
'''

codeblocks = parser.parse_text(content, None)

assert len(codeblocks) == 1

codeblock: ParsedCodeBlock = codeblocks[0]

assert codeblock.type == "anki"


codeblock_content = split_lines_using_anki_separator(codeblock.content)

assert len(codeblock_content) == 2
assert codeblock_content[0].strip() == "some code"
assert codeblock_content[1].strip() == "some more code"

def test_parsing_of_one_markdown_codeblock():
parser = CodeblockParser()

content = '''
```markdown anki
some code
some more code
```
'''

codeblocks = parser.parse_text(content, None)

assert len(codeblocks) == 1

codeblock: ParsedCodeBlock = codeblocks[0]

assert codeblock.type == "markdown"

codeblock_content = split_lines_using_anki_separator(codeblock.content)

assert len(codeblock_content) == 2
assert codeblock_content[0].strip() == "some code"
assert codeblock_content[1].strip() == "some more code"


def test_ignoring_of_one_markdown_codeblock_without_anki_annotation():
parser = CodeblockParser()

# note no `anki`
content = '''
```markdown
some code
some more code
```
'''

codeblocks = parser.parse_text(content, None)

assert len(codeblocks) == 0


def test_parsing_of_headers_one_codeblock():
parser = CodeblockParser()

content = '''
```anki header1:value header2
```markdown anki header1:value header2
some code
some more code
```
'''

codeblocks = parser.parse_text(content, None)

assert len(codeblocks) == 1

codeblock: ParsedCodeBlock = codeblocks[0]

assert codeblock.type == "anki"

assert len(codeblock.headers) == 2
assert codeblock.headers[0] == "header1:value"
assert codeblock.headers[1] == "header2"

assert codeblock.type == "markdown"

assert len(codeblock.headers) == 3
assert codeblock.headers[0] == "anki"
assert codeblock.headers[1] == "header1:value"
assert codeblock.headers[2] == "header2"

def test_parsing_of_multiple_codeblocks():
parser = CodeblockParser()

content = '''
```anki
```anki
some code
some more code
```
some irrelevant text
some irrelevant text
some irrelevant text
```anki
```markdown anki
some code2
some more code2
```
some irrelevant text
some irrelevant text
```anki
```anki
some code3
some more code3
some more more code3
```
'''

codeblocks = parser.parse_text(content, None)

assert len(codeblocks) == 3

codeblock1: ParsedCodeBlock = codeblocks[0]
codeblock2: ParsedCodeBlock = codeblocks[1]
codeblock3: ParsedCodeBlock = codeblocks[2]

assert codeblock1.type == "anki"
assert codeblock2.type == "anki"
assert codeblock2.type == "markdown"
assert codeblock3.type == "anki"

codeblock1_content = split_lines_using_anki_separator(codeblock1.content)
codeblock2_content = split_lines_using_anki_separator(codeblock2.content)
codeblock3_content = split_lines_using_anki_separator(codeblock3.content)

assert len(codeblock1_content) == 2
assert codeblock1_content[0].strip() == "some code"
assert codeblock1_content[1].strip() == "some more code"

assert len(codeblock2_content) == 2
assert codeblock2_content[0].strip() == "some code2"
assert codeblock2_content[1].strip() == "some more code2"


assert len(codeblock3_content) == 3
assert codeblock3_content[0].strip() == "some code3"
Expand Down

0 comments on commit 2edcaf9

Please sign in to comment.