diff --git a/README.md b/README.md index 8099eb9..0256b28 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Codebase for [A\* CCG Parsing with a Supertag and Dependency Factored Model](https://arxiv.org/abs/1704.06936) #### Requirements -* Python (Either 2 or 3) +* Python 2 * [Chainer](http://chainer.org/) (newer versions) * [Cython](http://cython.org/) * A C++ compiler supporting [C++11 standard](https://en.wikipedia.org/wiki/C%2B%2B11) diff --git a/src/py/ccgbank.py b/src/py/ccgbank.py index d44a338..681755b 100644 --- a/src/py/ccgbank.py +++ b/src/py/ccgbank.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import print_function +import codecs import re import os import py.cat @@ -25,7 +26,7 @@ def walk_autodir(path, subset="train"): class AutoReader(object): def __init__(self, filename): - self.lines = open(filename).readlines() + self.lines = codecs.open(filename, encoding='UTF-8').readlines() def readall(self, suppress_error=False): # Inputs: @@ -52,7 +53,7 @@ def readall(self, suppress_error=False): class AutoLineReader(object): def __init__(self, line): - self.line = line.encode("utf-8") + self.line = line self.index = 0 self.word_id = -1 @@ -80,7 +81,7 @@ def next_node(self): elif self.line[self.index+2] == "T": return self.parse_tree else: - raise RuntimeError() + raise RuntimeError("AUTO parse error: expected string starting with '