move data directory into package

direct-phonology · Jan 16, 2020 · 340f41f · 340f41f
1 parent 38f4020
commit 340f41f
Show file tree

Hide file tree

Showing 8 changed files with 9 additions and 5 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/README.md b/README.md
@@ -60,7 +60,7 @@ $ dphon --version
 
 ## methodology
 
-matching sequences are determined by a dictionary file that represents a particular reconstruction of old chinese phonology (you can see some examples in the `data/` folder). these data structures map an input character to an arbitrary sound token ("dummy") that can be matched against other such tokens.
+matching sequences are determined by a dictionary file that represents a particular reconstruction of old chinese phonology (you can see some examples in the `dphon/data/` folder). these data structures map an input character to an arbitrary sound token ("dummy") that can be matched against other such tokens.
 
 the core process of DIRECT is to accept plaintext input, tokenize it according to a particular phonological reconstruction, and search for matches amongst the tokenized text. these matches thus represent resonance: sequences that could have rhymed when they were originally read aloud, despite dissimilarity in their written forms.
 

diff --git a/data/bs_dict.json → dphon/data/bs_dict.json b/data/bs_dict.json → dphon/data/bs_dict.json
diff --git a/data/dummy_dict.json → dphon/data/dummy_dict.json b/data/dummy_dict.json → dphon/data/dummy_dict.json
diff --git a/data/dummy_initgroup_dict.json → dphon/data/dummy_initgroup_dict.json b/data/dummy_initgroup_dict.json → dphon/data/dummy_initgroup_dict.json
diff --git a/data/schuessler_dict.json → dphon/data/schuessler_dict.json b/data/schuessler_dict.json → dphon/data/schuessler_dict.json
diff --git a/dphon/lib.py b/dphon/lib.py
@@ -1,12 +1,16 @@
 import json
 from collections import defaultdict
-from typing import List, Dict, Tuple
 from os.path import basename, splitext
+from typing import Dict, List, Tuple
+
+import pkg_resources
 
 '''Non-alphabetic symbols used in place of a character.'''
 CHAR_MARKERS = ['□']
 
-with open('data/dummy_dict.json', encoding='utf-8') as file:
+'''Dictionary based on Schuessler's reconstruction of Old Chinese.'''
+schuessler_path = pkg_resources.resource_filename(__package__, 'data/dummy_dict.json')
+with open(schuessler_path, encoding='utf-8') as file:
     DUMMY_DICT = json.loads(file.read())
 
 def phonetic_tokens(string: str) -> str:

diff --git a/setup.py b/setup.py
@@ -39,7 +39,8 @@ def run(self):
     long_description=long_description,
     long_description_content_type='text/markdown',
     url='https://github.com/direct-phonology/direct',
-    include_package_data=True,  # include extra data files, e.g. dictionaries
+    include_package_data=True,
+    package_data={'dphon': ['data/*.json']},
     author='John O\'Leary, Nick Budak, Gian Rominger',
     author_email='jo10@princeton.edu, nbudak@princeton.edu, gianr@princeton.edu',
     license='MIT',