From 58ded3e7528e9e55a6387073f587b353c3992693 Mon Sep 17 00:00:00 2001 From: jeremymarch Date: Thu, 17 Jan 2019 01:10:04 -0500 Subject: [PATCH] prefer precomposed tonos to acute --- README.md | 4 +++- src/py/hopliteaccent.py | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a67803a..f8029f4 100644 --- a/README.md +++ b/README.md @@ -46,5 +46,7 @@ From the options menu you can select the unicode mode for diacritics. * Precomposed with PUA (Private Use Area) mode is the same, but will also use the precomposed characters from the non-standard Private Use Area. These characters are not standard unicode, but are supported by some fonts such as New Athena Unicode and IFAOGrec Unicode. * Combining-only mode will use combining diacritics to type decomposed characters. Few fonts handle combining diacritics well at this point; New Athena Unicode is currently the best. +There is a detailed discussion of these differences [here](https://apagreekkeys.org/technicalDetails.html). + ## Why a LibreOffice extension? Why not offer this functionality system-wide? -The Windows, Mac, and Linux operating systems do not provide the keyboard with the information necessary to toggle on/off diacritics. The Hoplite Keyboard started on iOS and Android where this information *is* provided to the keyboard. So for Windows, Mac, and Linux the only way to implement this is inside applications. +The Linux, Mac, and Windows operating systems do not provide the keyboard with the information necessary to toggle on/off diacritics. The Hoplite Keyboard started on iOS and Android where this information *is* provided to the keyboard. So for Linux, Mac, and Windows the only way to implement this is inside applications. diff --git a/src/py/hopliteaccent.py b/src/py/hopliteaccent.py index 5f6047a..7705714 100644 --- a/src/py/hopliteaccent.py +++ b/src/py/hopliteaccent.py @@ -140,7 +140,8 @@ # HYPHEN 0x2010 # COMMA 0x002C -#http://www.unicode.org/charts/normalization/ + +# this list determines the order of combining diacritics: combiningAccents = [ COMBINING_MACRON, COMBINING_BREVE, COMBINING_DIAERESIS, COMBINING_ROUGH_BREATHING, COMBINING_SMOOTH_BREATHING, COMBINING_ACUTE, COMBINING_GRAVE, COMBINING_CIRCUMFLEX, COMBINING_IOTA_SUBSCRIPT ] letters = [ [ '\u03B1', '\u1F00', '\u1F01', '\u1F71', '\u1F04', '\u1F05', '\u1F70', '\u1F02', '\u1F03', '\u1FB6', '\u1F06', '\u1F07', '\u1FB3', '\u1F80', '\u1F81', '\u1FB4', '\u1F84', '\u1F85', '\u1FB2', '\u1F82', '\u1F83', '\u1FB7', '\u1F86', '\u1F87', '\u0000', '\u0000', '\u0000', '\u0000', '\u1FB1', '\uEB04', '\uEB07', '\uEAF3', '\uEB05', '\uEB09', '\uEAF4', '\uEB00', '\uEAF0', '\u03AC' ], @@ -168,7 +169,7 @@ def getPrecomposedLetter(letterIndex, diacriticBits): elif diacriticBits == (_ROUGH): accentIndex = DASIA elif diacriticBits == (_ACUTE): - accentIndex = OXIA + accentIndex = TONOS #OXIA, tonos is preferred: https://apagreekkeys.org/technicalDetails.html#problems elif diacriticBits == (_SMOOTH | _ACUTE): accentIndex = PSILI_AND_OXIA elif diacriticBits == (_ROUGH | _ACUTE): @@ -399,6 +400,8 @@ def isLegalDiacriticForLetter(letterCode, accentToAdd): return True +#a hash table could save us from looping through all this +#we don't want to analyze via canonical decomposition because PUA characters are not canonical def analyzePrecomposedLetter(letter): for vidx in range(0, NUM_VOWEL_CODES): for aidx in range(0, NUM_ACCENT_CODES): @@ -406,6 +409,7 @@ def analyzePrecomposedLetter(letter): return (vidx, aidx) return (None, None) + def precomposedIndexToBitMask(diacriticIndex, diacriticBits): #don't initialize to false here because diacriticMask could have combining accents already set to true #make sure this is in order of enum so compiler can optimize switch @@ -483,7 +487,7 @@ def precomposedIndexToBitMask(diacriticIndex, diacriticBits): elif diacriticIndex == MACRON_AND_GRAVE: diacriticBits |= (_MACRON | _GRAVE) #endif - elif diacriticIndex == TONOS: + elif diacriticIndex == TONOS: #we conflate tonos and acute diacriticBits |= _ACUTE return diacriticBits