Skip to content

Commit

Permalink
Merge pull request #6 from rlpowell/master
Browse files Browse the repository at this point in the history
Added an original-camxes-style parser
  • Loading branch information
rlpowell authored Oct 17, 2021
2 parents 2c2d68c + 1a841ea commit 730dffc
Show file tree
Hide file tree
Showing 7 changed files with 86,430 additions and 86,332 deletions.
11 changes: 11 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ Partial parsing:
>>> node.end < len("klama ku ku")
True

original-camxes-style parsing:

>>> import camxes_py
>>> from camxes_py.transformers import minimal
>>> minimal_transformer = minimal.Transformer()
>>> text = camxes_py.match("mi la cmen broda fu'ivla li 1 la'o gy english words gy", None, None, minimal_transformer)
>>> text
['sentence', [['terms', [['KOhA', 'mi'], ['sumti_6', [['LA', 'la'], ['CMEVLA', 'cmen']]]]], ['bridi_tail_3', [['selbri_3', [['gismu', 'broda'], ['lujvo', "fu'ivla"]]], ['nonabs_terms', [['li_clause', [['LI', 'li'], ['PA', '1']]], ['ZOI_pre', [['ZOI', "la'o"], ['BY', 'gy'], [['zoi_word', 'english '], ['zoi_word', 'words ']], ['BY', 'gy']]]]]]]]]
>>>


TESTING
=======

Expand Down
3 changes: 3 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- merge any pull requests at
https://github.com/teleological/camxes-py/pulls to this repo
- add a test case in for the minimal transformer
7 changes: 5 additions & 2 deletions camxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

import camxes_py.parsimonious_ext # expression_nodes

__version__ = "v0.9.0"
__version__ = "v0.10.0"

PARSERS = [ 'camxes-ilmen' ]
TRANSFORMERS = [ 'camxes-json', 'camxes-morphology', 'vlatai', 'node-coverage', 'debug', 'raw' ]
TRANSFORMERS = [ 'camxes-json', 'camxes-morphology', 'minimal', 'vlatai', 'node-coverage', 'debug', 'raw' ]
SERIALIZERS = [ 'json', 'json-pretty', 'json-compact', 'xml' ]

IMPLEMENTATION_RECURSION_LIMIT = {
Expand Down Expand Up @@ -102,6 +102,9 @@ def build_transformer(transformer_option, parser):
elif transformer_option == 'vlatai':
from camxes_py.transformers import vlatai
return vlatai.Transformer()
elif transformer_option == 'minimal':
from camxes_py.transformers import minimal
return minimal.Transformer()
elif transformer_option == 'node-coverage':
from camxes_py.transformers import node_coverage
return node_coverage.Transformer(parser)
Expand Down
80 changes: 80 additions & 0 deletions camxes_py/transformers/minimal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

# pylint: disable=I0011, C0111, C0326, no-self-use, unused-argument, invalid-name

import re

from parsimonious.nodes import NodeVisitor

def is_selmaho_expression(name):
return re.match(r"^[ABCDFGIJKLMNPRSTUVXYZ]([AEIOUY]([IU]|h[AEIOU])?)?$", name)

class Transformer(object):

def transform(self, parsed):
return Visitor().visit(parsed)

def default_serializer(self):
return lambda x : x.as_json()

class Visitor(NodeVisitor):

def visit_space_char(self, node, visited_children):
return None

def visit_EOF(self, node, visited_children):
return None

def visit_CMEVLA(self, node, visited_children):
return ['CMEVLA', node.text]

def visit_zoi_word(self, node, visited_children):
return ['zoi_word', node.text]

def visit_gismu_2(self, node, visited_children):
return ['gismu', node.text]

def visit_lujvo(self, node, visited_children):
return ['lujvo', node.text]

def visit_fuhivla(self, node, visited_children):
return ["fu'ivla", node.text]

def generic_visit(self, node, visited_children):
# Catch all the cmavo
if node.expr_name and is_selmaho_expression(node.expr_name):
return [node.expr_name, node.text]

# Catch all the spaces
if node.expr_name == "" and node.text.strip() == "":
return None

# Drop the crap
proper_children = list(filter(None, visited_children))

if len(proper_children) == 0:
if len(node.text) > 0:
return [node.expr_name, node.text]
else:
return None
else:
# Make a tree
if len(proper_children) == 1:
# Catch the special case where an empty name got used
# *and* we're the only parent so we can use our name
if len(proper_children[0]) > 1 and proper_children[0][0] == "":
proper_children[0][0] = node.expr_name

return proper_children[0]
else:
# Catch empty names that we *can't* replace because
# we're not the only parent by just dropping that
# layer (this happens with ZOI clauses, for example)

new_children = []
for child in proper_children:
if len(child) > 1 and child[0] == "":
new_children.append(child[1])
else:
new_children.append(child)

return [node.expr_name, list(new_children)]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setuptools.setup(
name=PACKAGE_NAME,
version="0.9.0",
version="0.10.0",
author="Robin Lee Powell",
author_email="rlpowell@digitalkingdom.org",
description="A pure Python implementation of the lojban 'camxes' PEG parser.",
Expand Down
1 change: 1 addition & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def process_spec(input_spec, parser, json_transformer, morph_transformer):

out = morph = None
try:
print("text: " + text)
parsed = parser.parse(text)
out = transform_to_serial(parsed, json_transformer)
morph = transform_to_serial(parsed, morph_transformer)
Expand Down
Loading

0 comments on commit 730dffc

Please sign in to comment.