forked from teleological/camxes-py
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from rlpowell/master
Added an original-camxes-style parser
- Loading branch information
Showing
7 changed files
with
86,430 additions
and
86,332 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
- merge any pull requests at | ||
https://github.com/teleological/camxes-py/pulls to this repo | ||
- add a test case in for the minimal transformer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
|
||
# pylint: disable=I0011, C0111, C0326, no-self-use, unused-argument, invalid-name | ||
|
||
import re | ||
|
||
from parsimonious.nodes import NodeVisitor | ||
|
||
def is_selmaho_expression(name): | ||
return re.match(r"^[ABCDFGIJKLMNPRSTUVXYZ]([AEIOUY]([IU]|h[AEIOU])?)?$", name) | ||
|
||
class Transformer(object): | ||
|
||
def transform(self, parsed): | ||
return Visitor().visit(parsed) | ||
|
||
def default_serializer(self): | ||
return lambda x : x.as_json() | ||
|
||
class Visitor(NodeVisitor): | ||
|
||
def visit_space_char(self, node, visited_children): | ||
return None | ||
|
||
def visit_EOF(self, node, visited_children): | ||
return None | ||
|
||
def visit_CMEVLA(self, node, visited_children): | ||
return ['CMEVLA', node.text] | ||
|
||
def visit_zoi_word(self, node, visited_children): | ||
return ['zoi_word', node.text] | ||
|
||
def visit_gismu_2(self, node, visited_children): | ||
return ['gismu', node.text] | ||
|
||
def visit_lujvo(self, node, visited_children): | ||
return ['lujvo', node.text] | ||
|
||
def visit_fuhivla(self, node, visited_children): | ||
return ["fu'ivla", node.text] | ||
|
||
def generic_visit(self, node, visited_children): | ||
# Catch all the cmavo | ||
if node.expr_name and is_selmaho_expression(node.expr_name): | ||
return [node.expr_name, node.text] | ||
|
||
# Catch all the spaces | ||
if node.expr_name == "" and node.text.strip() == "": | ||
return None | ||
|
||
# Drop the crap | ||
proper_children = list(filter(None, visited_children)) | ||
|
||
if len(proper_children) == 0: | ||
if len(node.text) > 0: | ||
return [node.expr_name, node.text] | ||
else: | ||
return None | ||
else: | ||
# Make a tree | ||
if len(proper_children) == 1: | ||
# Catch the special case where an empty name got used | ||
# *and* we're the only parent so we can use our name | ||
if len(proper_children[0]) > 1 and proper_children[0][0] == "": | ||
proper_children[0][0] = node.expr_name | ||
|
||
return proper_children[0] | ||
else: | ||
# Catch empty names that we *can't* replace because | ||
# we're not the only parent by just dropping that | ||
# layer (this happens with ZOI clauses, for example) | ||
|
||
new_children = [] | ||
for child in proper_children: | ||
if len(child) > 1 and child[0] == "": | ||
new_children.append(child[1]) | ||
else: | ||
new_children.append(child) | ||
|
||
return [node.expr_name, list(new_children)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.