diff --git a/README.rst b/README.rst deleted file mode 100644 index 6111b5d..0000000 --- a/README.rst +++ /dev/null @@ -1,188 +0,0 @@ - -Homoglyphs -========== - - -.. image:: logo.png - :target: logo.png - :alt: Homoglyphs logo - - -.. image:: https://travis-ci.org/orsinium/homoglyphs.svg?branch=master - :target: https://travis-ci.org/orsinium/homoglyphs - :alt: Build Status - -.. image:: https://img.shields.io/pypi/v/homoglyphs.svg - :target: https://pypi.python.org/pypi/homoglyphs - :alt: PyPI version - -.. image:: https://img.shields.io/pypi/status/homoglyphs.svg - :target: https://pypi.python.org/pypi/homoglyphs - :alt: Status - -.. image:: https://img.shields.io/github/languages/code-size/orsinium/homoglyphs.svg - :target: https://github.com/orsinium/homoglyphs - :alt: Code size - -.. image:: https://img.shields.io/pypi/l/homoglyphs.svg - :target: LICENSE - :alt: License - - -Homoglyphs -- python library for getting `homoglyphs `_ and converting to ASCII. - -Features --------- - -It's smarter version of `confusable_homoglyphs `_\ : - - -* Autodect or manual choosing category (\ `aliases from ISO 15924 `_\ ). -* Auto or manual load only needed alphabets in memory. -* Converting to ASCII. -* More configurable. -* More stable. - -Installation ------------- - -.. code-block:: bash - - sudo pip install homoglyphs - -Usage ------ - -Best way to explain something is show how it works. So, let's have a look on the real usage. - -Importing: - -.. code-block:: python - - import homoglyphs as hg - -Languages -^^^^^^^^^ - -.. code-block:: python - - #detect - hg.Languages.detect('w') - # {'pl', 'da', 'nl', 'fi', 'cz', 'sr', 'pt', 'it', 'en', 'es', 'sk', 'de', 'fr', 'ro'} - hg.Languages.detect('т') - # {'mk', 'ru', 'be', 'bg', 'sr'} - hg.Languages.detect('.') - # set() - - # get alphabet for languages - hg.Languages.get_alphabet(['ru']) - # {'в', 'Ё', 'К', 'Т', ..., 'Р', 'З', 'Э'} - - # get all languages - hg.Languages.get_all() - # {'nl', 'lt', ..., 'de', 'mk'} - -Categories -^^^^^^^^^^ - -Categories -- (\ `aliases from ISO 15924 `_\ ). - -.. code-block:: python - - #detect - hg.Categories.detect('w') - # 'LATIN' - hg.Categories.detect('т') - # 'CYRILLIC' - hg.Categories.detect('.') - # 'COMMON' - - # get alphabet for categories - hg.Categories.get_alphabet(['CYRILLIC']) - # {'ӗ', 'Ԍ', 'Ґ', 'Я', ..., 'Э', 'ԕ', 'ӻ'} - - # get all categories - hg.Categories.get_all() - # {'RUNIC', 'DESERET', ..., 'SOGDIAN', 'TAI_LE'} - -Homoglyphs -^^^^^^^^^^ - -Get homoglyphs: - -.. code-block:: python - - # get homoglyphs (latin alphabet initialized by default) - hg.Homoglyphs().get_combinations('q') - # ['q', '𝐪', '𝑞', '𝒒', '𝓆', '𝓺', '𝔮', '𝕢', '𝖖', '𝗊', '𝗾', '𝘲', '𝙦', '𝚚'] - -Alphabet loading: - -.. code-block:: python - - # load alphabet on init by categories - homoglyphs = hg.Homoglyphs(categories=('LATIN', 'COMMON', 'CYRILLIC')) # alphabet loaded here - homoglyphs.get_combinations('гы') - # ['rы', 'гы', 'ꭇы', 'ꭈы', '𝐫ы', '𝑟ы', '𝒓ы', '𝓇ы', '𝓻ы', '𝔯ы', '𝕣ы', '𝖗ы', '𝗋ы', '𝗿ы', '𝘳ы', '𝙧ы', '𝚛ы'] - - # load alphabet on init by languages - homoglyphs = hg.Homoglyphs(languages={'ru', 'en'}) # alphabet will be loaded here - homoglyphs.get_combinations('гы') - # ['rы', 'гы'] - - # manual set alphabet on init # eng rus - homoglyphs = hg.Homoglyphs(alphabet='abc абс') - homoglyphs.get_combinations('с') - # ['c', 'с'] - - # load alphabet on demand - homoglyphs = hg.Homoglyphs(languages={'en'}, strategy=hg.STRATEGY_LOAD) - # ^ alphabet will be loaded here for "en" language - homoglyphs.get_combinations('гы') - # ^ alphabet will be loaded here for "ru" language - # ['rы', 'гы'] - -You can combine ``categories``\ , ``languages``\ , ``alphabet`` and any strategies as you want. The strategies specify how to handle any characters not already loaded: - - -* ``STRATEGY_LOAD``\ : load category for this character -* ``STRATEGY_IGNORE``\ : add character to result -* ``STRATEGY_REMOVE``\ : remove character from result - -Converting glyphs to ASCII chars -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: python - - homoglyphs = hg.Homoglyphs(languages={'en'}, strategy=hg.STRATEGY_LOAD) - - # convert - homoglyphs.to_ascii('тест') - # ['tect'] - homoglyphs.to_ascii('ХР123.') # this is cyrillic "х" and "р" - # ['XP123.', 'XPI23.', 'XPl23.'] - - # string with chars which can't be converted by default will be ignored - homoglyphs.to_ascii('лол') - # [] - - # you can set strategy for removing not converted non-ASCII chars from result - homoglyphs = hg.Homoglyphs( - languages={'en'}, - strategy=hg.STRATEGY_LOAD, - ascii_strategy=hg.STRATEGY_REMOVE, - ) - homoglyphs.to_ascii('лол') - # ['o'] - - # also you can set up range of allowed char codes for ascii (0-128 by default): - homoglyphs = hg.Homoglyphs( - languages={'en'}, - strategy=hg.STRATEGY_LOAD, - ascii_strategy=hg.STRATEGY_REMOVE, - ascii_range=range(ord('a'), ord('z')), - ) - homoglyphs.to_ascii('ХР123.') - # ['l'] - homoglyphs.to_ascii('хр123.') - # ['xpl'] diff --git a/homoglyphs/__init__.py b/homoglyphs/__init__.py index dc77d3c..d3d695c 100644 --- a/homoglyphs/__init__.py +++ b/homoglyphs/__init__.py @@ -9,7 +9,7 @@ # main package info __title__ = 'Homoglyphs' -__version__ = '1.3.5' +__version__ = '2.0.0' __author__ = 'Gram Orsinium' __license__ = 'MIT' diff --git a/setup.py b/setup.py index 3baa942..00ccb6d 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,10 @@ setup( long_description=readme, name='homoglyphs', - version='1.3.5', + version='2.0.0', description='Homoglyphs', - python_requires='>=2.7', - project_urls={'homepage': 'https://github.com/life4/homoglyphs'}, + python_requires='>=3.5', + project_urls={"homepage": "https://github.com/life4/homoglyphs"}, author='orsinium', author_email='master_fess@mail.ru', license='MIT', @@ -41,6 +41,7 @@ 'Topic :: Software Development :: Libraries :: Python Modules' ], packages=['homoglyphs'], - package_data={'homoglyphs': ['*.json']}, + package_dir={"": "."}, + package_data={"homoglyphs": ["*.json"]}, install_requires=[], )