From 58ded3e7528e9e55a6387073f587b353c3992693 Mon Sep 17 00:00:00 2001
From: jeremymarch <jmarch@gradcenter.cuny.edu>
Date: Thu, 17 Jan 2019 01:10:04 -0500
Subject: [PATCH] prefer precomposed tonos to acute

---
 README.md               |  4 +++-
 src/py/hopliteaccent.py | 10 +++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index a67803a..f8029f4 100644
--- a/README.md
+++ b/README.md
@@ -46,5 +46,7 @@ From the options menu you can select the unicode mode for diacritics.
 * Precomposed with PUA (Private Use Area) mode is the same, but will also use the precomposed characters from the non-standard Private Use Area.  These characters are not standard unicode, but are supported by some fonts such as New Athena Unicode and IFAOGrec Unicode.  
 * Combining-only mode will use combining diacritics to type decomposed characters.  Few fonts handle combining diacritics well at this point; New Athena Unicode is currently the best.  
 
+There is a detailed discussion of these differences [here](https://apagreekkeys.org/technicalDetails.html).
+
 ## Why a LibreOffice extension?  Why not offer this functionality system-wide?
-The Windows, Mac, and Linux operating systems do not provide the keyboard with the information necessary to toggle on/off diacritics.  The Hoplite Keyboard started on iOS and Android where this information *is* provided to the keyboard.  So for Windows, Mac, and Linux the only way to implement this is inside applications.
+The Linux, Mac, and Windows operating systems do not provide the keyboard with the information necessary to toggle on/off diacritics.  The Hoplite Keyboard started on iOS and Android where this information *is* provided to the keyboard.  So for Linux, Mac, and Windows the only way to implement this is inside applications.
diff --git a/src/py/hopliteaccent.py b/src/py/hopliteaccent.py
index 5f6047a..7705714 100644
--- a/src/py/hopliteaccent.py
+++ b/src/py/hopliteaccent.py
@@ -140,7 +140,8 @@
 # HYPHEN                          0x2010
 # COMMA                           0x002C
 
-#http://www.unicode.org/charts/normalization/
+
+# this list determines the order of combining diacritics:
 combiningAccents = [ COMBINING_MACRON, COMBINING_BREVE, COMBINING_DIAERESIS, COMBINING_ROUGH_BREATHING, COMBINING_SMOOTH_BREATHING, COMBINING_ACUTE, COMBINING_GRAVE, COMBINING_CIRCUMFLEX, COMBINING_IOTA_SUBSCRIPT ]
 
 letters = [ [ '\u03B1', '\u1F00', '\u1F01', '\u1F71', '\u1F04', '\u1F05', '\u1F70', '\u1F02', '\u1F03', '\u1FB6', '\u1F06', '\u1F07', '\u1FB3', '\u1F80', '\u1F81', '\u1FB4', '\u1F84', '\u1F85', '\u1FB2', '\u1F82', '\u1F83', '\u1FB7', '\u1F86', '\u1F87', '\u0000', '\u0000', '\u0000', '\u0000', '\u1FB1', '\uEB04', '\uEB07', '\uEAF3', '\uEB05', '\uEB09', '\uEAF4', '\uEB00', '\uEAF0', '\u03AC' ], 
@@ -168,7 +169,7 @@ def getPrecomposedLetter(letterIndex, diacriticBits):
     elif diacriticBits == (_ROUGH):
         accentIndex = DASIA
     elif diacriticBits == (_ACUTE):
-        accentIndex = OXIA
+        accentIndex = TONOS #OXIA, tonos is preferred: https://apagreekkeys.org/technicalDetails.html#problems
     elif diacriticBits == (_SMOOTH | _ACUTE):
         accentIndex = PSILI_AND_OXIA
     elif diacriticBits == (_ROUGH | _ACUTE):
@@ -399,6 +400,8 @@ def isLegalDiacriticForLetter(letterCode, accentToAdd):
     return True
 
 
+#a hash table could save us from looping through all this
+#we don't want to analyze via canonical decomposition because PUA characters are not canonical
 def analyzePrecomposedLetter(letter):
     for vidx in range(0, NUM_VOWEL_CODES):
         for aidx in range(0, NUM_ACCENT_CODES):
@@ -406,6 +409,7 @@ def analyzePrecomposedLetter(letter):
                 return (vidx, aidx)
     return (None, None)
 
+
 def precomposedIndexToBitMask(diacriticIndex, diacriticBits):
     #don't initialize to false here because diacriticMask could have combining accents already set to true
     #make sure this is in order of enum so compiler can optimize switch
@@ -483,7 +487,7 @@ def precomposedIndexToBitMask(diacriticIndex, diacriticBits):
     elif diacriticIndex == MACRON_AND_GRAVE:
         diacriticBits |= (_MACRON | _GRAVE)
 #endif
-    elif diacriticIndex == TONOS:
+    elif diacriticIndex == TONOS: #we conflate tonos and acute
         diacriticBits |= _ACUTE
     return diacriticBits