From 9c9eada328587d7b46ed3c72cdf78ed3bad45200 Mon Sep 17 00:00:00 2001 From: Cindy Qi Li Date: Wed, 27 Sep 2023 12:05:43 -0400 Subject: [PATCH] fix: apply accomodating bliss to SVO messages in bmw.json --- data/bmw.json | 2185 +++++++++------------ docs/ConvertBMWToJSON.md | 2 +- utils/README.md | 12 +- utils/fill_in_null_bliss_id_with_spacy.py | 174 +- 4 files changed, 1133 insertions(+), 1240 deletions(-) diff --git a/data/bmw.json b/data/bmw.json index 0b3902c..42fa6b5 100644 --- a/data/bmw.json +++ b/data/bmw.json @@ -760,9 +760,7 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 16494 ] @@ -773,9 +771,7 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 17713 ] @@ -904,9 +900,7 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 18212 ] @@ -925,11 +919,9 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 18298 + 18465 ] }, "to ride": { @@ -1199,7 +1191,7 @@ "HE+" ], "bci-av-id": [ - 27057, + 24261, "//", 14687 ] @@ -1210,7 +1202,7 @@ "I" ], "bci-av-id": [ - 27057, + 24261, "//", 14916 ] @@ -1221,7 +1213,7 @@ "IT+" ], "bci-av-id": [ - 27057, + 24261, "//", 14960 ] @@ -1252,11 +1244,11 @@ "HE+" ], "bci-av-id": [ - 27057, + 24261, "//", - 14687, + 15733, "//", - 15733 + 14687 ] }, "won't I": { @@ -1266,11 +1258,11 @@ "I+" ], "bci-av-id": [ - 27057, + 24261, "//", - 14916, + 15733, "//", - 15733 + 14916 ] }, "won't it": { @@ -1280,11 +1272,11 @@ "IT+" ], "bci-av-id": [ - 27057, + 24261, "//", - 14960, + 15733, "//", - 15733 + 14960 ] }, "won't she": { @@ -1294,11 +1286,11 @@ "SHE+" ], "bci-av-id": [ - 27057, + 24261, "//", - 16494, + 15733, "//", - 15733 + 16494 ] }, "won't they": { @@ -1308,11 +1300,11 @@ "THEY+" ], "bci-av-id": [ - 27057, + 24261, "//", - 17713, + 15733, "//", - 15733 + 17713 ] }, "won't we": { @@ -1322,11 +1314,11 @@ "WE+" ], "bci-av-id": [ - 27057, + 24261, "//", - 18212, + 15733, "//", - 15733 + 18212 ] }, "won't you": { @@ -1336,11 +1328,11 @@ "YOU+" ], "bci-av-id": [ - 27057, + 24261, "//", - 18298, + 15733, "//", - 15733 + 18465 ] }, "future": { @@ -1364,7 +1356,7 @@ "SHE+" ], "bci-av-id": [ - 27057, + 24261, "//", 16494 ] @@ -1375,7 +1367,7 @@ "THEY+" ], "bci-av-id": [ - 27057, + 24261, "//", 17713 ] @@ -1386,7 +1378,7 @@ "WE+" ], "bci-av-id": [ - 27057, + 24261, "//", 18212 ] @@ -1397,9 +1389,9 @@ "YOU+" ], "bci-av-id": [ - 27057, + 24261, "//", - 18298 + 18465 ] }, "holiday": { @@ -1708,7 +1700,7 @@ "o'clock": { "encoding": [ "LIMITIME", - "OUTWORL", + "OUTWORLD", "ABS TIME" ], "bci-av-id": 14904 @@ -2002,11 +1994,11 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 14916, "//", - 13114 + 25520, + ";", + 8995 ] }, "I can": { @@ -2026,11 +2018,11 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 14916, "//", - 13860 + 12335, + ";", + 9004 ] }, "I should": { @@ -2039,9 +2031,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 14916, "//", - 14916 + 24264 ] }, "I will": { @@ -2050,9 +2042,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 14916, "//", - 14916 + 24261 ] }, "I had": { @@ -2061,9 +2053,11 @@ "GO" ], "bci-av-id": [ - 27105, + 14916, "//", - 14916 + 24912, + ";", + 9004 ] }, "I have": { @@ -2072,9 +2066,11 @@ "HAVE" ], "bci-av-id": [ - 27105, + 14916, "//", - 14916 + 24912, + ";", + 24807 ] }, "I": { @@ -2090,9 +2086,9 @@ "IF" ], "bci-av-id": [ - 27105, + 14916, "//", - 14916 + 24264 ] }, "I may": { @@ -2115,9 +2111,9 @@ "bci-av-id": [ 14916, "//", - 15733, + 12639, "//", - 12639 + 15733 ] }, "I couldn't": { @@ -2127,13 +2123,13 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 14916, "//", - 15733, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "I can't": { @@ -2145,9 +2141,9 @@ "bci-av-id": [ 14916, "//", - 15733, + 13114, "//", - 13114 + 15733 ] }, "I didn't": { @@ -2157,13 +2153,13 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 14916, "//", - 15733, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "I shouldn't": { @@ -2173,10 +2169,10 @@ "FORGIVE" ], "bci-av-id": [ - 27105, - "//", 14916, "//", + 24264, + "//", 15733 ] }, @@ -2187,10 +2183,10 @@ "FUTURE" ], "bci-av-id": [ - 27057, - "//", 14916, "//", + 24261, + "//", 15733 ] }, @@ -2201,10 +2197,12 @@ "GO" ], "bci-av-id": [ - 27105, - "//", 14916, "//", + 24912, + ";", + 9004, + "//", 15733 ] }, @@ -2215,10 +2213,12 @@ "HAVE" ], "bci-av-id": [ - 27105, - "//", 14916, "//", + 24912, + ";", + 24807, + "//", 15733 ] }, @@ -2229,10 +2229,10 @@ "IF" ], "bci-av-id": [ - 27105, - "//", 14916, "//", + 24264, + "//", 15733 ] }, @@ -2243,13 +2243,13 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 14916, "//", - 15733, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "I don't": { @@ -2261,9 +2261,9 @@ "bci-av-id": [ 14916, "//", - 15733, + 13860, "//", - 13860 + 15733 ] }, "I don't want": { @@ -2275,10 +2275,10 @@ "bci-av-id": [ 14916, "//", - 15733, - "//", 13860, "//", + 15733, + "//", 18035 ] }, @@ -2295,9 +2295,9 @@ "OUTWORLD" ], "bci-av-id": [ - 27105, + 14916, "//", - 14916 + 16226 ] }, "I was": { @@ -2306,11 +2306,11 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 14916, "//", - 12639 + 24443, + ";", + 9004 ] }, "my": { @@ -2337,9 +2337,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 14916, "//", - 14916 + 25072, + ";", + 9004 ] }, "I want": { @@ -2366,9 +2368,7 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 14687 ] @@ -2765,9 +2765,9 @@ "OUTWORLD" ], "bci-av-id": [ - 27105, + 14960, "//", - 14960 + 16226 ] }, "it was": { @@ -2778,7 +2778,9 @@ "bci-av-id": [ 14960, "//", - 12639 + 24443, + ";", + 9004 ] }, "its": { @@ -2805,9 +2807,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 14960, "//", - 14960 + 25072, + ";", + 9004 ] }, "it wants": { @@ -3120,9 +3124,7 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 14916 ] @@ -3133,9 +3135,7 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 14960 ] @@ -3147,13 +3147,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 14687, + 15733, "//", - 15733 + 14687 ] }, "wouldn't I": { @@ -3163,13 +3161,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 14916, + 15733, "//", - 15733 + 14916 ] }, "wouldn't it": { @@ -3179,13 +3175,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 14960, + 15733, "//", - 15733 + 14960 ] }, "wouldn't she": { @@ -3195,13 +3189,11 @@ "SHE+" ], "bci-av-id": [ - 8485, + 24264, "//", - 27105, - "//", - 16494, + 15733, "//", - 15733 + 16494 ] }, "wouldn't they": { @@ -3211,13 +3203,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 17713, + 15733, "//", - 15733 + 17713 ] }, "wouldn't we": { @@ -3227,13 +3217,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 18212, + 15733, "//", - 15733 + 18212 ] }, "wouldn't you": { @@ -3243,13 +3231,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 18298, + 15733, "//", - 15733 + 18465 ] }, "eighteenth": { @@ -3280,9 +3266,7 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 16494 ] @@ -3293,9 +3277,7 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 17713 ] @@ -3318,9 +3300,7 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 18212 ] @@ -3331,11 +3311,9 @@ "YOU+" ], "bci-av-id": [ - 8485, + 24264, "//", - 27105, - "//", - 18298 + 18465 ] }, "well": { @@ -3362,11 +3340,11 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 14960, "//", - 13114 + 25520, + ";", + 8995 ] }, "it can": { @@ -3386,11 +3364,11 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 14960, "//", - 13860 + 12335, + ";", + 9004 ] }, "it should": { @@ -3399,9 +3377,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 14960, "//", - 14960 + 24264 ] }, "it will": { @@ -3410,9 +3388,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 14960, "//", - 14960 + 24261 ] }, "it had": { @@ -3421,9 +3399,11 @@ "GO" ], "bci-av-id": [ - 27105, + 14960, "//", - 14960 + 24912, + ";", + 9004 ] }, "it has": { @@ -3431,7 +3411,13 @@ "IT+", "HAVE" ], - "bci-av-id": 14960 + "bci-av-id": [ + 14960, + "//", + 24912, + ";", + 24807 + ] }, "it would": { "encoding": [ @@ -3439,9 +3425,9 @@ "IF" ], "bci-av-id": [ - 27105, + 14960, "//", - 14960 + 24264 ] }, "it may": { @@ -3464,11 +3450,9 @@ "bci-av-id": [ 14960, "//", - 15474, - "/", - 14947, + 12639, "//", - 12639 + 15733 ] }, "it couldn't": { @@ -3478,13 +3462,13 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 14960, "//", - 15733, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "it can't": { @@ -3496,9 +3480,9 @@ "bci-av-id": [ 14960, "//", - 15733, + 13114, "//", - 13114 + 15733 ] }, "it didn't": { @@ -3508,13 +3492,13 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 14960, "//", - 15733, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "it shouldn't": { @@ -3524,10 +3508,10 @@ "FORGIVE" ], "bci-av-id": [ - 27105, - "//", 14960, "//", + 24264, + "//", 15733 ] }, @@ -3538,10 +3522,10 @@ "FUTURE" ], "bci-av-id": [ - 27057, - "//", 14960, "//", + 24261, + "//", 15733 ] }, @@ -3552,24 +3536,28 @@ "GO" ], "bci-av-id": [ - 27105, - "//", 14960, "//", + 24912, + ";", + 9004, + "//", 15733 ] }, - "it hasnt": { + "it hasn't": { "encoding": [ "IT+", "NOT", "HAVE" ], "bci-av-id": [ - 27105, - "//", 14960, "//", + 24912, + ";", + 24807, + "//", 15733 ] }, @@ -3580,10 +3568,10 @@ "IF" ], "bci-av-id": [ - 27105, - "//", 14960, "//", + 24264, + "//", 15733 ] }, @@ -3596,11 +3584,11 @@ "bci-av-id": [ 14960, "//", - 15474, - "/", - 14947, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "it doesn't": { @@ -3612,9 +3600,9 @@ "bci-av-id": [ 14960, "//", - 15733, + 13860, "//", - 13860 + 15733 ] }, "it doesn't want": { @@ -3626,10 +3614,10 @@ "bci-av-id": [ 14960, "//", - 15733, - "//", 13860, "//", + 15733, + "//", 18035 ] }, @@ -4510,9 +4498,9 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", 18212 ] @@ -4523,11 +4511,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 18298 + 18465 ] }, "to belong": { @@ -4576,9 +4564,9 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", 14687 ] @@ -4616,13 +4604,13 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 14687, + 15733, "//", - 15733 + 14687 ] }, "haven't I": { @@ -4632,13 +4620,13 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 14916, + 15733, "//", - 15733 + 14916 ] }, "hasn't it": { @@ -4648,13 +4636,13 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 14960, + 15733, "//", - 15733 + 14960 ] }, "hasn't she": { @@ -4664,13 +4652,13 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 16494, + 15733, "//", - 15733 + 16494 ] }, "haven't they": { @@ -4680,13 +4668,13 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 17713, + 15733, "//", - 15733 + 17713 ] }, "haven't we": { @@ -4696,13 +4684,13 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 18212, + 15733, "//", - 15733 + 18212 ] }, "haven't you": { @@ -4712,13 +4700,13 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 18298, + 15733, "//", - 15733 + 18465 ] }, "twenty-sixth": { @@ -4739,7 +4727,7 @@ "to own": { "encoding": [ "HAVE", - "OUTWORL", + "OUTWORLD", "TO+VERB" ], "bci-av-id": 15950 @@ -4747,7 +4735,7 @@ "own": { "encoding": [ "HAVE", - "OUTWORL", + "OUTWORLD", "VERB" ], "bci-av-id": 15950 @@ -4755,7 +4743,7 @@ "owned": { "encoding": [ "HAVE", - "OUTWORL", + "OUTWORLD", "VERB+ED" ], "bci-av-id": 15950 @@ -4763,14 +4751,14 @@ "owning": { "encoding": [ "HAVE", - "OUTWORL" + "OUTWORLD" ], "bci-av-id": 15950 }, "owns": { "encoding": [ "HAVE", - "OUTWORL", + "OUTWORLD", "VERB+S" ], "bci-av-id": 15950 @@ -4967,11 +4955,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 24807, "//", - 18298 + 18465 ] }, "he is": { @@ -4993,7 +4981,9 @@ "bci-av-id": [ 14687, "//", - 13114 + 25520, + ";", + 8995 ] }, "he can": { @@ -5015,7 +5005,9 @@ "bci-av-id": [ 14687, "//", - 13860 + 12335, + ";", + 9004 ] }, "he should": { @@ -5026,7 +5018,7 @@ "bci-av-id": [ 14687, "//", - 24261 + 24264 ] }, "he will": { @@ -5048,7 +5040,9 @@ "bci-av-id": [ 14687, "//", - 14685 + 24912, + ";", + 9004 ] }, "he has": { @@ -5059,7 +5053,9 @@ "bci-av-id": [ 14687, "//", - 14685 + 24912, + ";", + 24807 ] }, "he": { @@ -5077,7 +5073,7 @@ "bci-av-id": [ 14687, "//", - 24261 + 24264 ] }, "he may": { @@ -5100,11 +5096,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 12639, "//", - 12639 + 15733 ] }, "he couldn't": { @@ -5116,11 +5110,11 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "he can't": { @@ -5132,11 +5126,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 13114, "//", - 13114 + 15733 ] }, "he didn't": { @@ -5148,11 +5140,11 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "he shouldn't": { @@ -5164,11 +5156,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 24264, "//", - 24261 + 15733 ] }, "he won't": { @@ -5180,11 +5170,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 24261, "//", - 24261 + 15733 ] }, "he hadn't": { @@ -5195,11 +5183,11 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 24912, + ";", + 9004, "//", - 12639 + 15733 ] }, "he hasn't": { @@ -5211,11 +5199,11 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 24912, + ";", + 24807, "//", - 12639 + 15733 ] }, "he wouldn't": { @@ -5227,11 +5215,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 24264, "//", - 24261 + 15733 ] }, "he wasn't": { @@ -5243,11 +5229,11 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "he doesn't": { @@ -5259,11 +5245,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 13860, "//", - 12639 + 15733 ] }, "he doesn't want": { @@ -5275,11 +5259,9 @@ "bci-av-id": [ 14687, "//", - 15474, - "/", - 14947, + 13860, "//", - 12639, + 15733, "//", 18035 ] @@ -5299,7 +5281,7 @@ "bci-av-id": [ 14687, "//", - 12639 + 16226 ] }, "he was": { @@ -5310,7 +5292,9 @@ "bci-av-id": [ 14687, "//", - 12639 + 24443, + ";", + 9004 ] }, "his": { @@ -5328,7 +5312,7 @@ "bci-av-id": [ 14687, "//", - 12860 + 13860 ] }, "he used to": { @@ -5337,9 +5321,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 14687, "//", - 14687 + 25072, + ";", + 9004 ] }, "eighteen": { @@ -5439,7 +5425,7 @@ "nineteen": { "encoding": [ "NUMBER", - "OUTWORL" + "OUTWORLD" ], "bci-av-id": [ 8497, @@ -5841,9 +5827,9 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", 14916 ] @@ -5915,13 +5901,13 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 14687, + 15733, "//", - 15733 + 14687 ] }, "hadn't I": { @@ -5931,13 +5917,13 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 14916, + 15733, "//", - 15733 + 14916 ] }, "hadn't it": { @@ -5947,13 +5933,13 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 14960, + 15733, "//", - 15733 + 14960 ] }, "hadn't she": { @@ -5963,13 +5949,13 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 16494, + 15733, "//", - 15733 + 16494 ] }, "hadn't they": { @@ -5979,13 +5965,13 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 17713, + 15733, "//", - 15733 + 17713 ] }, "hadn't we": { @@ -5995,13 +5981,13 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 18212, + 15733, "//", - 15733 + 18212 ] }, "hadn't you": { @@ -6011,13 +5997,13 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", - 18298, + 15733, "//", - 15733 + 18465 ] }, "twenty-fifth": { @@ -6048,9 +6034,9 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24912, + ";", + 9004, "//", 16494 ] @@ -6685,11 +6671,9 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 16226, "//", - 16226 + 14687 ] }, "may I": { @@ -6698,11 +6682,9 @@ "I+" ], "bci-av-id": [ - 8485, + 16226, "//", - 14916, - "//", - 16226 + 14916 ] }, "I guess so": { @@ -6724,11 +6706,9 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 16226, "//", - 16226 + 14960 ] }, "eightieth": { @@ -6760,11 +6740,9 @@ "SHE+" ], "bci-av-id": [ - 8485, + 16226, "//", - 16494, - "//", - 16226 + 16494 ] }, "may they": { @@ -6773,11 +6751,9 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 16226, "//", - 16226 + 17713 ] }, "uncertain": { @@ -6794,11 +6770,9 @@ "WE+" ], "bci-av-id": [ - 8485, + 16226, "//", - 18212, - "//", - 16226 + 18212 ] }, "may you": { @@ -6807,11 +6781,9 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 16226, "//", - 16226 + 18465 ] }, ".(PERIOD)": { @@ -7547,11 +7519,9 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 12639, "//", - 12639 + 14687 ] }, "am I": { @@ -7560,11 +7530,9 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 12639, "//", - 12639 + 14916 ] }, "is it": { @@ -7573,11 +7541,9 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 12639, "//", - 12639 + 14960 ] }, "life": { @@ -7639,13 +7605,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 12639, "//", 15733, "//", - 12639 + 14687 ] }, "aren't I": { @@ -7655,13 +7619,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 12639, "//", 15733, "//", - 12639 + 14916 ] }, "isn't it": { @@ -7671,13 +7633,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 12639, "//", 15733, "//", - 12639 + 14960 ] }, "isn't she": { @@ -7687,13 +7647,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 12639, "//", 15733, "//", - 12639 + 16494 ] }, "aren't they": { @@ -7703,13 +7661,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 12639, "//", 15733, "//", - 12639 + 17713 ] }, "aren't we": { @@ -7719,13 +7675,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 12639, "//", 15733, "//", - 12639 + 18212 ] }, "aren't you": { @@ -7735,13 +7689,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 12639, "//", 15733, "//", - 12639 + 18465 ] }, "twenty-first": { @@ -8497,11 +8449,9 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 13860, "//", - 13860 + 14687 ] }, "do I": { @@ -8510,11 +8460,9 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 13860, "//", - 13860 + 14916 ] }, "does it": { @@ -8523,11 +8471,9 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 13860, "//", - 13860 + 14960 ] }, "to play": { @@ -8962,7 +8908,11 @@ "PREVRB+S", "TOOL" ], - "bci-av-id": 27105 + "bci-av-id": [ + 25072, + ";", + 9004 + ] }, "want": { "encoding": [ @@ -8982,7 +8932,11 @@ "CAN", "VERB+ED" ], - "bci-av-id": 13114 + "bci-av-id": [ + 25520, + ";", + 8995 + ] }, "can": { "encoding": [ @@ -9003,7 +8957,11 @@ "SEXUAL", "VERB+ED" ], - "bci-av-id": 13860 + "bci-av-id": [ + 12335, + ";", + 9004 + ] }, "must": { "encoding": [ @@ -9033,7 +8991,7 @@ "IF", "IF" ], - "bci-av-id": 24261 + "bci-av-id": 24264 }, "shall": { "encoding": [ @@ -9054,7 +9012,7 @@ "PREVERB", "MAYBE" ], - "bci-av-id": 15435 + "bci-av-id": 16226 }, "isn't": { "encoding": [ @@ -9063,11 +9021,9 @@ "AM/BE" ], "bci-av-id": [ - 15474, - "/", - 14947, + 12639, "//", - 12639 + 15733 ] }, "couldn't": { @@ -9077,11 +9033,9 @@ "BUT" ], "bci-av-id": [ - 15474, - "/", - 14947, + 25520, "//", - 13114 + 15733 ] }, "can't": { @@ -9091,11 +9045,9 @@ "CAN" ], "bci-av-id": [ - 15474, - "/", - 14947, + 13114, "//", - 13114 + 15733 ] }, "didn't": { @@ -9105,11 +9057,11 @@ "FINISH" ], "bci-av-id": [ - 15474, - "/", - 14947, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "shouldn't": { @@ -9119,11 +9071,11 @@ "FORGIVE" ], "bci-av-id": [ - 15474, - "/", - 14947, + 17705, + ";", + 9004, "//", - 24261 + 15733 ] }, "won't": { @@ -9133,9 +9085,7 @@ "FUTURE" ], "bci-av-id": [ - 15474, - "/", - 14947, + 24261, "//", 24261 ] @@ -9231,7 +9181,7 @@ "PREVERB", "OUTWORLD" ], - "bci-av-id": null + "bci-av-id": 16226 }, "was": { "encoding": [ @@ -10100,7 +10050,7 @@ "proud": { "encoding": [ "POEM", - "OUTWORL", + "OUTWORLD", "ADJ." ], "bci-av-id": 16422 @@ -10108,7 +10058,7 @@ "prouder": { "encoding": [ "POEM", - "OUTWORL", + "OUTWORLD", "ADJ.+ER" ], "bci-av-id": [ @@ -10120,7 +10070,7 @@ "proudest": { "encoding": [ "POEM", - "OUTWORL", + "OUTWORLD", "ADJ.+EST" ], "bci-av-id": [ @@ -10850,15 +10800,13 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 16494, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 16494 ] }, "couldn't they": { @@ -10868,15 +10816,13 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 17713, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 17713 ] }, "couldn't we": { @@ -10886,15 +10832,13 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18212, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 18212 ] }, "couldn't you": { @@ -10904,15 +10848,13 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18298, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 18465 ] }, "sixtieth": { @@ -10951,13 +10893,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 16494, + 25520, + ";", + 8995, "//", - 13114 + 16494 ] }, "could they": { @@ -10966,13 +10906,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 17713, + 25520, + ";", + 8995, "//", - 13114 + 17713 ] }, "could we": { @@ -10981,13 +10919,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18212, + 25520, + ";", + 8995, "//", - 13114 + 18212 ] }, "could you": { @@ -10996,13 +10932,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18298, + 25520, + ";", + 8995, "//", - 13114 + 18465 ] }, "able": { @@ -11025,11 +10959,9 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 13114, "//", - 13114 + 14687 ] }, "can I": { @@ -11038,11 +10970,9 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 13114, "//", - 13114 + 14916 ] }, "can it": { @@ -11051,11 +10981,9 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 13114, "//", - 13114 + 14960 ] }, "in front of": { @@ -11117,13 +11045,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 13114, "//", 15733, "//", - 13114 + 14687 ] }, "can't I": { @@ -11133,13 +11059,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 13114, "//", 15733, "//", - 13114 + 14916 ] }, "can't it": { @@ -11149,13 +11073,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 13114, "//", 15733, "//", - 13114 + 14960 ] }, "can't she": { @@ -11165,13 +11087,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 13114, "//", 15733, "//", - 13114 + 16494 ] }, "can't they": { @@ -11181,13 +11101,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 13114, "//", 15733, "//", - 13114 + 17713 ] }, "can't we": { @@ -11197,29 +11115,25 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 13114, "//", 15733, "//", - 13114 + 18212 ] }, - "can\u2019t you": { + "can't you": { "encoding": [ "CAN", "NOT", "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 13114, "//", 15733, "//", - 13114 + 18465 ] }, "fortieth": { @@ -11250,11 +11164,9 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 13114, "//", - 13114 + 16494 ] }, "can they": { @@ -11263,11 +11175,9 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 13114, "//", - 13114 + 17713 ] }, "to be able": { @@ -11618,7 +11528,7 @@ "AM/BE" ], "bci-av-id": [ - 18298, + 18465, "//", 12639 ] @@ -11629,11 +11539,11 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", - 18298, + 18465, "//", - 13114 + 25520, + ";", + 8995 ] }, "you can": { @@ -11642,7 +11552,7 @@ "CAN" ], "bci-av-id": [ - 18298, + 18465, "//", 13114 ] @@ -11653,11 +11563,11 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", - 18298, + 18465, "//", - 13860 + 12335, + ";", + 9004 ] }, "she had": { @@ -11666,9 +11576,11 @@ "GO" ], "bci-av-id": [ - 27105, + 16494, "//", - 16494 + 24912, + ";", + 9004 ] }, "she has": { @@ -11677,9 +11589,11 @@ "HAVE" ], "bci-av-id": [ - 27105, + 16494, "//", - 16494 + 24912, + ";", + 24807 ] }, "she would": { @@ -11688,9 +11602,9 @@ "IF" ], "bci-av-id": [ - 27105, + 16494, "//", - 16494 + 24264 ] }, "she may": { @@ -11711,13 +11625,11 @@ "AM/BE" ], "bci-av-id": [ - 14688, + 16494, "//", - 15474, - "/", - 14947, + 12639, "//", - 12639 + 15733 ] }, "she couldn't": { @@ -11727,13 +11639,13 @@ "BUT" ], "bci-av-id": [ - 14688, + 16494, "//", - 15474, - "/", - 14947, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "she can't": { @@ -11743,13 +11655,11 @@ "CAN" ], "bci-av-id": [ - 14688, + 16494, "//", - 15474, - "/", - 14947, + 13114, "//", - 13114 + 15733 ] }, "she didn't": { @@ -11759,13 +11669,13 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 16494, "//", - 15733, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "she shouldn't": { @@ -11775,10 +11685,10 @@ "FORGIVE" ], "bci-av-id": [ - 27105, - "//", 16494, "//", + 24264, + "//", 15733 ] }, @@ -11789,10 +11699,10 @@ "FUTURE" ], "bci-av-id": [ - 27057, - "//", 16494, "//", + 24261, + "//", 15733 ] }, @@ -11803,10 +11713,12 @@ "GO" ], "bci-av-id": [ - 27105, - "//", 16494, "//", + 24912, + ";", + 9004, + "//", 15733 ] }, @@ -11817,10 +11729,12 @@ "HAVE" ], "bci-av-id": [ - 27105, - "//", 16494, "//", + 24912, + ";", + 24807, + "//", 15733 ] }, @@ -11831,10 +11745,10 @@ "IF" ], "bci-av-id": [ - 27105, - "//", 16494, "//", + 24264, + "//", 15733 ] }, @@ -11845,13 +11759,13 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 16494, "//", - 15733, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "she doesn't": { @@ -11863,9 +11777,9 @@ "bci-av-id": [ 16494, "//", - 15733, + 13860, "//", - 13860 + 15733 ] }, "she doesn't want": { @@ -11877,10 +11791,10 @@ "bci-av-id": [ 16494, "//", - 15733, - "//", 13860, "//", + 15733, + "//", 18035 ] }, @@ -11897,9 +11811,9 @@ "OUTWORLD" ], "bci-av-id": [ - 27105, + 16494, "//", - 16494 + 16226 ] }, "she was": { @@ -11908,11 +11822,11 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 16494, "//", - 12639 + 24443, + ";", + 9004 ] }, "she does": { @@ -11939,9 +11853,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 16494, "//", - 16494 + 25072, + ";", + 9004 ] }, "she wants": { @@ -12130,9 +12046,7 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 16226, "//", 14960 ] @@ -12261,9 +12175,7 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 16226, "//", 17713 ] @@ -12371,9 +12283,7 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 16226, "//", 18212 ] @@ -12384,11 +12294,9 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 16226, "//", - 18298 + 18465 ] }, "some more": { @@ -12860,9 +12768,7 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 16226, "//", 14687 ] @@ -12873,9 +12779,7 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 16226, "//", 14916 ] @@ -12939,13 +12843,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 13860, "//", 15733, "//", - 13860 + 14687 ] }, "don't I": { @@ -12955,13 +12857,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 13860, "//", 15733, "//", - 13860 + 14916 ] }, "doesn't it": { @@ -12971,13 +12871,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 13860, "//", 15733, "//", - 13860 + 14960 ] }, "doesn't she": { @@ -12987,13 +12885,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 13860, "//", 15733, "//", - 13860 + 16494 ] }, "don't they": { @@ -13003,13 +12899,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 13860, "//", 15733, "//", - 13860 + 17713 ] }, "don't we": { @@ -13019,13 +12913,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 13860, "//", 15733, "//", - 13860 + 18212 ] }, "don't you": { @@ -13035,13 +12927,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 13860, "//", 15733, "//", - 13860 + 18465 ] }, "history": { @@ -13146,11 +13036,9 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 13860, "//", - 13860 + 16494 ] }, "do they": { @@ -13193,11 +13081,9 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 13860, "//", - 13860 + 18212 ] }, "however": { @@ -13239,11 +13125,9 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 13860, "//", - 13860 + 18465 ] }, "she is": { @@ -13263,11 +13147,11 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 16494, "//", - 13114 + 25520, + ";", + 8995 ] }, "she can": { @@ -13287,11 +13171,11 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 16494, "//", - 13860 + 12335, + ";", + 9004 ] }, "she should": { @@ -13300,9 +13184,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 16494, "//", - 16494 + 24264 ] }, "she will": { @@ -13311,9 +13195,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 16494, "//", - 16494 + 24261 ] }, "breaking": { @@ -13746,11 +13630,9 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 12639, "//", - 12639 + 16494 ] }, "are they": { @@ -13759,11 +13641,9 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 12639, "//", - 12639 + 17713 ] }, "to be": { @@ -13840,11 +13720,9 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 12639, "//", - 12639 + 18212 ] }, "are you": { @@ -13853,11 +13731,9 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 12639, "//", - 12639 + 18465 ] }, "birthday": { @@ -13956,13 +13832,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14687, + 25520, + ";", + 8995, "//", - 13114 + 14687 ] }, "could I": { @@ -13971,13 +13845,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14916, + 25520, + ";", + 8995, "//", - 13114 + 14916 ] }, "could it": { @@ -13986,23 +13858,13 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14960, + 25520, + ";", + 8995, "//", - 13114 + 14960 ] }, - "Version Name": { - "encoding": [ - "BUT", - "MAYBE", - "WANT" - ], - "bci-av-id": null - }, "couldn't he": { "encoding": [ "BUT", @@ -14010,15 +13872,13 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14687, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 14687 ] }, "couldn't I": { @@ -14028,15 +13888,13 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14916, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 14916 ] }, "couldn't it": { @@ -14046,15 +13904,13 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14960, + 25520, + ";", + 8995, "//", 15733, "//", - 13114 + 14960 ] }, "mornings": { @@ -14075,11 +13931,9 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 13114, "//", - 13114 + 18212 ] }, "fragile": { @@ -14096,11 +13950,9 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 13114, "//", - 13114 + 18465 ] }, "so (adv.)": { @@ -14769,11 +14621,11 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 17713, "//", - 13114 + 25520, + ";", + 8995 ] }, "they can": { @@ -14793,11 +14645,11 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 17713, "//", - 13860 + 12335, + ";", + 9004 ] }, "they should": { @@ -14806,9 +14658,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 17713, "//", - 17713 + 24264 ] }, "they will": { @@ -14817,9 +14669,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 17713, "//", - 17713 + 24261 ] }, "they had": { @@ -14828,9 +14680,11 @@ "GO" ], "bci-av-id": [ - 27105, + 17713, "//", - 17713 + 24912, + ";", + 9004 ] }, "they have": { @@ -14839,9 +14693,11 @@ "HAVE" ], "bci-av-id": [ - 27105, + 17713, "//", - 17713 + 24912, + ";", + 24807 ] }, "they would": { @@ -14850,9 +14706,9 @@ "IF" ], "bci-av-id": [ - 27105, + 17713, "//", - 17713 + 24264 ] }, "they may": { @@ -14875,9 +14731,9 @@ "bci-av-id": [ 17713, "//", - 15733, + 12639, "//", - 12639 + 15733 ] }, "they couldn't": { @@ -14887,13 +14743,13 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 17713, "//", - 15733, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "they can't": { @@ -14905,9 +14761,9 @@ "bci-av-id": [ 17713, "//", - 15733, + 13114, "//", - 13114 + 15733 ] }, "they didn't": { @@ -14917,13 +14773,13 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 17713, "//", - 15733, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "they shouldn't": { @@ -14933,10 +14789,10 @@ "FORGIVE" ], "bci-av-id": [ - 27105, - "//", 17713, "//", + 24264, + "//", 15733 ] }, @@ -14947,10 +14803,10 @@ "FUTURE" ], "bci-av-id": [ - 27057, - "//", 17713, "//", + 24261, + "//", 15733 ] }, @@ -14961,10 +14817,12 @@ "GO" ], "bci-av-id": [ - 27105, - "//", 17713, "//", + 24912, + ";", + 9004, + "//", 15733 ] }, @@ -14975,10 +14833,12 @@ "HAVE" ], "bci-av-id": [ - 27105, - "//", 17713, "//", + 24912, + ";", + 24807, + "//", 15733 ] }, @@ -14989,10 +14849,10 @@ "IF" ], "bci-av-id": [ - 27105, - "//", 17713, "//", + 24264, + "//", 15733 ] }, @@ -15003,13 +14863,13 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 17713, "//", - 15733, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "they don't": { @@ -15021,9 +14881,9 @@ "bci-av-id": [ 17713, "//", - 15733, + 13860, "//", - 13860 + 15733 ] }, "they don't want": { @@ -15035,10 +14895,10 @@ "bci-av-id": [ 17713, "//", - 15733, - "//", 13860, "//", + 15733, + "//", 18035 ] }, @@ -15055,9 +14915,9 @@ "OUTWORLD" ], "bci-av-id": [ - 27105, + 17713, "//", - 17713 + 16226 ] }, "they were": { @@ -15066,11 +14926,11 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 17713, "//", - 12639 + 24443, + ";", + 9004 ] }, "their": { @@ -15104,9 +14964,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 17713, "//", - 17713 + 25072, + ";", + 9004 ] }, "they want": { @@ -15553,13 +15415,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14687, + 24443, + ";", + 9004, "//", - 12639 + 14687 ] }, "was I": { @@ -15568,13 +15428,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14916, + 24443, + ";", + 9004, "//", - 12639 + 14916 ] }, "was it": { @@ -15583,13 +15441,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14960, + 24443, + ";", + 9004, "//", - 12639 + 14960 ] }, "yesterdays": { @@ -15609,16 +15465,14 @@ "NOT", "HE+" ], - "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14687, + "bci-av-id": [ + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 14687 ] }, "wasn't I": { @@ -15628,15 +15482,13 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14916, + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 14916 ] }, "wasn't it": { @@ -15646,15 +15498,13 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14960, + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 14960 ] }, "wasn't she": { @@ -15664,15 +15514,13 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 16494, + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 16494 ] }, "weren't they": { @@ -15682,15 +15530,13 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 17713, + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 17713 ] }, "weren't we": { @@ -15700,15 +15546,13 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18212, + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 18212 ] }, "weren't you": { @@ -15718,15 +15562,13 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18298, + 24443, + ";", + 9004, "//", 15733, "//", - 12639 + 18465 ] }, "past": { @@ -15776,13 +15618,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 16494, + 24443, + ";", + 9004, "//", - 12639 + 16494 ] }, "were they": { @@ -15791,13 +15631,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 17713, + 24443, + ";", + 9004, "//", - 12639 + 17713 ] }, "PTs": { @@ -15828,13 +15666,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18212, + 24443, + ";", + 9004, "//", - 12639 + 18212 ] }, "were you": { @@ -15843,13 +15679,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18298, + 24443, + ";", + 9004, "//", - 12639 + 18465 ] }, "poetry": { @@ -16357,10 +16191,10 @@ "FORGIVE" ], "bci-av-id": [ - 27105, - "//", 18212, "//", + 24264, + "//", 15733 ] }, @@ -16371,10 +16205,10 @@ "FUTURE" ], "bci-av-id": [ - 27057, - "//", 18212, "//", + 24261, + "//", 15733 ] }, @@ -16385,10 +16219,12 @@ "GO" ], "bci-av-id": [ - 27105, - "//", 18212, "//", + 24912, + ";", + 9004, + "//", 15733 ] }, @@ -16399,10 +16235,12 @@ "HAVE" ], "bci-av-id": [ - 27105, - "//", 18212, "//", + 24912, + ";", + 24807, + "//", 15733 ] }, @@ -16413,10 +16251,10 @@ "IF" ], "bci-av-id": [ - 27105, - "//", 18212, "//", + 24264, + "//", 15733 ] }, @@ -16427,13 +16265,13 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 18212, "//", - 15733, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "we don't": { @@ -16445,9 +16283,9 @@ "bci-av-id": [ 18212, "//", - 15733, + 13860, "//", - 13860 + 15733 ] }, "we don't want": { @@ -16459,10 +16297,10 @@ "bci-av-id": [ 18212, "//", - 15733, - "//", 13860, "//", + 15733, + "//", 18035 ] }, @@ -16479,9 +16317,9 @@ "OUTWORLD" ], "bci-av-id": [ - 27105, + 18212, "//", - 18212 + 16226 ] }, "we were": { @@ -16490,11 +16328,11 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", 18212, "//", - 12639 + 24443, + ";", + 9004 ] }, "our": { @@ -16521,9 +16359,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 18212, "//", - 18212 + 25072, + ";", + 9004 ] }, "we want": { @@ -17956,11 +17796,11 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 18212, "//", - 13114 + 25520, + ";", + 8995 ] }, "we can": { @@ -17980,11 +17820,11 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 18212, "//", - 13860 + 12335, + ";", + 9004 ] }, "we should": { @@ -17993,9 +17833,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 18212, "//", - 18212 + 24264 ] }, "we will": { @@ -18004,9 +17844,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 18212, "//", - 18212 + 24261 ] }, "we had": { @@ -18015,9 +17855,11 @@ "GO" ], "bci-av-id": [ - 27105, + 18212, "//", - 18212 + 24912, + ";", + 9004 ] }, "we have": { @@ -18026,9 +17868,11 @@ "HAVE" ], "bci-av-id": [ - 27105, + 18212, "//", - 18212 + 24912, + ";", + 24807 ] }, "we would": { @@ -18037,9 +17881,9 @@ "IF" ], "bci-av-id": [ - 27105, + 18212, "//", - 18212 + 24264 ] }, "we may": { @@ -18062,9 +17906,9 @@ "bci-av-id": [ 18212, "//", - 15733, + 12639, "//", - 12639 + 15733 ] }, "we couldn't": { @@ -18074,13 +17918,13 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", 18212, "//", - 15733, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "we can't": { @@ -18092,9 +17936,9 @@ "bci-av-id": [ 18212, "//", - 15733, + 13114, "//", - 13114 + 15733 ] }, "we didn't": { @@ -18104,13 +17948,13 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", 18212, "//", - 15733, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "thoughts": { @@ -18676,7 +18520,7 @@ "crazy": { "encoding": [ "THINK", - "OUTWORL", + "OUTWORLD", "ADJ." ], "bci-av-id": [ @@ -18692,7 +18536,7 @@ "crazier": { "encoding": [ "THINK", - "OUTWORL", + "OUTWORLD", "ADJ.+ER" ], "bci-av-id": [ @@ -18710,7 +18554,7 @@ "craziest": { "encoding": [ "THINK", - "OUTWORL", + "OUTWORLD", "ADJ.+EST" ], "bci-av-id": [ @@ -18915,12 +18759,10 @@ "SHE+" ], "bci-av-id": [ - 8485, + 13860, "//", 16494, "//", - 13860, - "//", 18035 ] }, @@ -18986,12 +18828,10 @@ "THEY+" ], "bci-av-id": [ - 8485, + 13860, "//", 17713, "//", - 13860, - "//", 18035 ] }, @@ -19054,12 +18894,10 @@ "WE+" ], "bci-av-id": [ - 8485, + 13860, "//", 18212, "//", - 13860, - "//", 18035 ] }, @@ -19145,12 +18983,10 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, - "//", 13860, "//", + 18465, + "//", 18035 ] }, @@ -19236,9 +19072,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298 + 24264 ] }, "you will": { @@ -19247,9 +19083,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 18465, "//", - 18298 + 24261 ] }, "you had": { @@ -19258,9 +19094,11 @@ "GO" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298 + 24912, + ";", + 9004 ] }, "you have": { @@ -19269,9 +19107,11 @@ "HAVE" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298 + 24912, + ";", + 24807 ] }, "you would": { @@ -19280,9 +19120,9 @@ "IF" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298 + 24264 ] }, "you may": { @@ -19291,7 +19131,7 @@ "MAYBE" ], "bci-av-id": [ - 18298, + 18465, "//", 16226 ] @@ -19303,11 +19143,11 @@ "AM/BE" ], "bci-av-id": [ - 18298, + 18465, "//", - 15733, + 12639, "//", - 12639 + 15733 ] }, "you couldn't": { @@ -19317,13 +19157,13 @@ "BUT" ], "bci-av-id": [ - 27105, - "//", - 18298, + 18465, "//", - 15733, + 25520, + ";", + 8995, "//", - 13114 + 15733 ] }, "you can't": { @@ -19333,11 +19173,11 @@ "CAN" ], "bci-av-id": [ - 18298, + 18465, "//", - 15733, + 13114, "//", - 13114 + 15733 ] }, "you didn't": { @@ -19347,13 +19187,13 @@ "FINISH" ], "bci-av-id": [ - 27105, - "//", - 18298, + 18465, "//", - 15733, + 12335, + ";", + 9004, "//", - 13860 + 15733 ] }, "you shouldn't": { @@ -19363,9 +19203,9 @@ "FORGIVE" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298, + 24264, "//", 15733 ] @@ -19377,9 +19217,9 @@ "FUTURE" ], "bci-av-id": [ - 27057, + 18465, "//", - 18298, + 24261, "//", 15733 ] @@ -19391,9 +19231,11 @@ "GO" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298, + 24912, + ";", + 9004, "//", 15733 ] @@ -19405,9 +19247,11 @@ "HAVE" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298, + 24912, + ";", + 24807, "//", 15733 ] @@ -19419,9 +19263,9 @@ "IF" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298, + 24264, "//", 15733 ] @@ -19433,13 +19277,13 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", - 18298, + 18465, "//", - 15733, + 24443, + ";", + 9004, "//", - 12639 + 15733 ] }, "you don't": { @@ -19449,11 +19293,11 @@ "SEXUAL" ], "bci-av-id": [ - 18298, + 18465, "//", - 15733, + 13860, "//", - 13860 + 15733 ] }, "you don't want": { @@ -19463,12 +19307,12 @@ "WANT" ], "bci-av-id": [ - 18298, - "//", - 15733, + 18465, "//", 13860, "//", + 15733, + "//", 18035 ] }, @@ -19485,9 +19329,9 @@ "OUTWORLD" ], "bci-av-id": [ - 27105, + 18465, "//", - 18298 + 16226 ] }, "you were": { @@ -19496,11 +19340,11 @@ "PAST" ], "bci-av-id": [ - 27105, - "//", - 18298, + 18465, "//", - 12639 + 24443, + ";", + 9004 ] }, "your": { @@ -19516,7 +19360,7 @@ "SEXUAL" ], "bci-av-id": [ - 18298, + 18465, "//", 13860 ] @@ -19527,9 +19371,11 @@ "TOOL" ], "bci-av-id": [ - 27105, + 18465, "//", - 18465 + 25072, + ";", + 9004 ] }, "you want": { @@ -19538,7 +19384,7 @@ "WANT" ], "bci-av-id": [ - 18298, + 18465, "//", 18035 ] @@ -21496,12 +21342,10 @@ "HE+" ], "bci-av-id": [ - 8485, + 13860, "//", 14687, "//", - 13860, - "//", 18035 ] }, @@ -21537,12 +21381,10 @@ "I+" ], "bci-av-id": [ - 8485, + 13860, "//", 14916, "//", - 13860, - "//", 18035 ] }, @@ -21567,12 +21409,10 @@ "IT+" ], "bci-av-id": [ - 8485, + 13860, "//", 14960, "//", - 13860, - "//", 18035 ] }, @@ -21633,13 +21473,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 14687, + 13860, "//", 15733, "//", - 13860, + 14687, "//", 18035 ] @@ -21651,13 +21489,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 14916, + 13860, "//", 15733, "//", - 13860, + 14916, "//", 18035 ] @@ -21669,13 +21505,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 14960, + 13860, "//", 15733, "//", - 13860, + 14960, "//", 18035 ] @@ -21687,13 +21521,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 16494, + 13860, "//", 15733, "//", - 13860, + 16494, "//", 18035 ] @@ -21705,13 +21537,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 17713, + 13860, "//", 15733, "//", - 13860, + 17713, "//", 18035 ] @@ -21723,13 +21553,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 18212, + 13860, "//", 15733, "//", - 13860, + 18212, "//", 18035 ] @@ -21741,13 +21569,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 18298, + 13860, "//", 15733, "//", - 13860, + 18465, "//", 18035 ] @@ -24072,7 +23898,7 @@ "blue": { "encoding": [ "EYE", - "OUTWORL", + "OUTWORLD", "ADJ." ], "bci-av-id": 12864 @@ -24479,9 +24305,7 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 14687 ] @@ -24492,9 +24316,7 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 14916 ] @@ -24512,9 +24334,7 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", 14960 ] @@ -24606,13 +24426,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 14687, + 15733, "//", - 15733 + 14687 ] }, "shouldn't I": { @@ -24622,13 +24440,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 14916, + 15733, "//", - 15733 + 14916 ] }, "shouldn't it": { @@ -24638,13 +24454,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 14960, + 15733, "//", - 15733 + 14960 ] }, "shouldn't she": { @@ -24654,13 +24468,11 @@ "SHE+" ], "bci-av-id": [ - 8485, + 24264, "//", - 27105, - "//", - 16494, + 15733, "//", - 15733 + 16494 ] }, "shouldn't they": { @@ -24670,13 +24482,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 17713, + 15733, "//", - 15733 + 17713 ] }, "shouldn't we": { @@ -24686,13 +24496,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 18212, + 15733, "//", - 15733 + 18212 ] }, "shouldn't you": { @@ -24702,13 +24510,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, + 24264, "//", - 18298, + 15733, "//", - 15733 + 18465 ] }, "twenty-fourth": { @@ -25752,13 +25558,11 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14687, + 12335, + ";", + 9004, "//", - 13860 + 14687 ] }, "everyone": { @@ -25774,13 +25578,11 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14916, + 12335, + ";", + 9004, "//", - 13860 + 14916 ] }, "did it": { @@ -25789,13 +25591,11 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14960, + 12335, + ";", + 9004, "//", - 13860 + 14960 ] }, "job": { @@ -25832,15 +25632,13 @@ "HE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14687, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 14687 ] }, "didn't I": { @@ -25850,15 +25648,13 @@ "I+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14916, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 14916 ] }, "didn't it": { @@ -25868,15 +25664,13 @@ "IT+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 14960, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 14960 ] }, "didn't she": { @@ -25886,15 +25680,13 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 16494, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 16494 ] }, "didn't they": { @@ -25904,15 +25696,13 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 17713, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 17713 ] }, "didn't we": { @@ -25922,15 +25712,13 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18212, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 18212 ] }, "didn't you": { @@ -25940,15 +25728,13 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18298, + 12335, + ";", + 9004, "//", 15733, "//", - 13860 + 18465 ] }, "everywhere": { @@ -26004,13 +25790,11 @@ "SHE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 16494, + 12335, + ";", + 9004, "//", - 13860 + 16494 ] }, "did they": { @@ -26019,13 +25803,11 @@ "THEY+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 17713, + 12335, + ";", + 9004, "//", - 13860 + 17713 ] }, "to finish": { @@ -26152,13 +25934,11 @@ "WE+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18212, + 12335, + ";", + 9004, "//", - 13860 + 18212 ] }, "answer (noun)": { @@ -26246,13 +26026,11 @@ "YOU+" ], "bci-av-id": [ - 8485, - "//", - 27105, - "//", - 18298, + 12335, + ";", + 9004, "//", - 13860 + 18465 ] }, "weekend": { @@ -26397,14 +26175,6 @@ 9011 ] }, - "": { - "encoding": [ - "LANG.", - "LANG.", - "PUT" - ], - "bci-av-id": null - }, "sentence": { "encoding": [ "LANG.", @@ -26776,7 +26546,6 @@ "OPEN": 15921, "PRESENT": 14437, "THINK": 17718, - "OUTWORL": 15411, "HELP": 14704, "REPEAT": 16487, "PUT": 16440, @@ -26914,4 +26683,4 @@ "WEIGHT": 18221, "POSS.": 24925 } -} \ No newline at end of file +} diff --git a/docs/ConvertBMWToJSON.md b/docs/ConvertBMWToJSON.md index e3bbde3..59c0b58 100644 --- a/docs/ConvertBMWToJSON.md +++ b/docs/ConvertBMWToJSON.md @@ -24,7 +24,7 @@ tracked in the file 4. Mannually modify the JSON file to revert changes tracked in [data/intermediate_BMW_conversion_data/special_handling.txt](../data/intermediate_BMW_conversion_data/special_handling.txt). 5. Run a script to loop through the JSON file, find messages with null BCI-AV-IDs, parse and transform -every message to conceptual Bliss, then compose BCI-AV-ID based on the transformed message. +every message to accommodating Bliss, then compose BCI-AV-ID based on the transformed message. 6. Manually fill in the rest messages that have null BCI-AV-IDs. ## Steps diff --git a/utils/README.md b/utils/README.md index 48c599c..fb912ca 100644 --- a/utils/README.md +++ b/utils/README.md @@ -199,7 +199,7 @@ in the way that the indicator 8499 is on top of the the charactor 12355; ## Fill in null BCI-AV-ID values for messages using SpaCy (utils/fill_in_null_bliss_id_with_spacy.py) This script reads bmw.json, find all messages that have null BCI-AV-ID values, use Spacy to parse and transform -these messages to conceptual Bliss, then find their BCI-AV-IDs. This script handles messages in these formats: +these messages to accommodating Bliss, then find their BCI-AV-IDs. This script handles messages in these formats: 1. Verb in different form. For example: "begin", "to begin", "beginning", "began", "begun", "begins" all share the same Bliss symbol of its infinitive form "begin". @@ -207,7 +207,8 @@ its infinitive form "begin". 2. Plural nouns. For example: "books" -> [book, ";", 9011]. -3. Subject + Pronoun. +3. Subject + Pronoun. The script supports two transformations: +3.1. Transform to Conceptual Bliss For example: "I am" -> [I, be] "I were" -> [past_tense, I, be] "I will" -> [future_tense, I] @@ -215,6 +216,13 @@ For example: "I am" -> [I, be] "isn't he" -> [question_mark, he, not, be] "should he" -> [question_mark, past_tense, he] "shouldn't he" -> [question_mark, past_tense, he, not] + +3.2. Transform to Accommadating Bliss in English +For example: "I am" -> [I, am] +"I were" -> [I, were] +"he isn't" -> [he, is, not] +"isn't he" -> [is, not, he] + When the BCI-AV-ID for a word in the tranformed sentence cannot be found, an error will be reported. Note: The code for each case above should be uncommented and ran one by one. The result from each run should be diff --git a/utils/fill_in_null_bliss_id_with_spacy.py b/utils/fill_in_null_bliss_id_with_spacy.py index 175939d..efca18c 100644 --- a/utils/fill_in_null_bliss_id_with_spacy.py +++ b/utils/fill_in_null_bliss_id_with_spacy.py @@ -1,6 +1,6 @@ ''' This script reads bmw.json, find all messages that have null BCI-AV-ID values, use Spacy to parse and transform -these messages to conceptual Bliss, then find their BCI-AV-IDs. This script handles messages in these formats: +these messages to accommodating Bliss, then find their BCI-AV-IDs. This script handles messages in these formats: 1. Verb in different form. For example: "begin", "to begin", "beginning", "began", "begun", "begins" all share the same Bliss symbol of its infinitive form "begin". @@ -8,7 +8,8 @@ 2. Plural nouns. For example: "books" -> [book, ";", 9011]. -3. Subject + Pronoun. +3. Subject + Pronoun. The script supports two transformations: +3.1. Transform to Conceptual Bliss For example: "I am" -> [I, be] "I were" -> [past_tense, I, be] "I will" -> [future_tense, I] @@ -16,6 +17,13 @@ "isn't he" -> [question_mark, he, not, be] "should he" -> [question_mark, past_tense, he] "shouldn't he" -> [question_mark, past_tense, he, not] + +3.2. Transform to Accommadating Bliss in English +For example: "I am" -> [I, am] +"I were" -> [I, were] +"he isn't" -> [he, is, not] +"isn't he" -> [is, not, he] + When the BCI-AV-ID for a word in the tranformed sentence cannot be found, an error will be reported. Note: The code for each case above should be uncommented and ran one by one. The result from each run should be @@ -114,7 +122,49 @@ def find_bliss_id_in_general(text, bliss_explanation_json): return None -def get_sequence_for_msg_with_subject(text): +def find_bliss_id_in_list(text): + map = { + "is": 12639, + "are": 12639, + "am": 12639, + "be": 12639, + "was": [24443, ";", 9004], + "were": [24443, ";", 9004], + "been": [24443, ";", 9004], + "has": [24912, ";", 24807], + "have": [24912, ";", 24807], + "had": [24912, ";", 9004], + "do": 13860, + "does": 13860, + "did": [12335, ";", 9004], + "done": [12335, ";", 9004], + "can": 13114, + "could": [25520, ";", 8995], + "not": 15733, + "it": 14960, + "they": 17713, + "you": 18465, + "we": 18212, + "i": 14916, + "he": 14687, + "she": 16494, + "believe": 12661, + "may": 16226, + "might": 16226, + "shall": 24261, + "will": 24261, + "should": 24264, + "would": 24264, + "want": 18035, + "wants": 18035, + } + if text.lower() in map.keys(): + return map[text.lower()] + else: + return None + + +def get_conceptual_bliss_sequence_for_msg_with_subject(text): doc = nlp(text) sequence = [] is_past_tense = False @@ -174,54 +224,120 @@ def get_sequence_for_msg_with_subject(text): return sequence if len(sequence) > 1 and has_subject else None +def get_accommodating_bliss_sequence_for_msg_with_subject(text): + doc = nlp(text) + has_subject = False + sequence = [] + + for token in doc: + text = token.text.strip() + if token.dep_ == "subj" or token.dep_ == "nsubj": + has_subject = True + if (text == "n't"): + sequence.append("not") + elif (text == "ca"): + sequence.append("can") + elif (text == "'m"): + sequence.append("am") + elif (text == "wo"): + sequence.append("will") + else: + sequence.append(text) + + return sequence if len(sequence) > 1 and has_subject else None + + source_json_file = sys.argv[1] bliss_explanation_json_location = sys.argv[2] output_json_location = sys.argv[3] +words_missing_id = set() + with open(source_json_file, 'r') as file: data = json.load(file) # Load the spaCy English language model nlp = spacy.load("en_core_web_sm") - # load bliss translation json file - with open(bliss_explanation_json_location, 'r') as file: - bliss_explanation_json = json.load(file) + # # load bliss translation json file + # with open(bliss_explanation_json_location, 'r') as file: + # bliss_explanation_json = json.load(file) for message, value in data["encodings"].items(): - if value["bci-av-id"] is None: - # 1. handle single words - if message.startswith("to ") or len(message.split()) == 1: - # 1.1. Handle verb in various forms - # For example, "begin", "to begin", "beginning", "began", "begun", "begins" - # should all use the Bliss symbol for "begin" - infinitive_form_for_verb = find_infinitive_form_for_verb(message) - if infinitive_form_for_verb is not None: - value["bci-av-id"] = find_bliss_id_for_verb(infinitive_form_for_verb, bliss_explanation_json) - - # 1.2. Handle noun in plural form - # For example, the BCI-AV-ID for "books" should be [{id_for_book}, ";", 9011] - infinitive_form_for_noun = find_infinitive_form_for_plural_noun(message) - if infinitive_form_for_noun is not None: - bliss_id_for_noun = find_bliss_id_in_general(infinitive_form_for_noun, bliss_explanation_json) - if bliss_id_for_noun is not None: - value["bci-av-id"] = [bliss_id_for_noun, ";", 9011] - - # 2. Handle multiple words messages with a subject such as "I should", "I shouldn't", "should I", "shouldn't I" - bliss_sequence = get_sequence_for_msg_with_subject(message) + # if value["bci-av-id"] is None: + # # 1. handle single words + # if message.startswith("to ") or len(message.split()) == 1: + # # 1.1. Handle verb in various forms + # # For example, "begin", "to begin", "beginning", "began", "begun", "begins" + # # should all use the Bliss symbol for "begin" + # infinitive_form_for_verb = find_infinitive_form_for_verb(message) + # if infinitive_form_for_verb is not None: + # value["bci-av-id"] = find_bliss_id_for_verb(infinitive_form_for_verb, bliss_explanation_json) + + # # 1.2. Handle noun in plural form + # # For example, the BCI-AV-ID for "books" should be [{id_for_book}, ";", 9011] + # infinitive_form_for_noun = find_infinitive_form_for_plural_noun(message) + # if infinitive_form_for_noun is not None: + # bliss_id_for_noun = find_bliss_id_in_general(infinitive_form_for_noun, bliss_explanation_json) + # if bliss_id_for_noun is not None: + # value["bci-av-id"] = [bliss_id_for_noun, ";", 9011] + + # # 2. Transform multiple words messages with a subject such as "I should", "I shouldn't", "should I", "shouldn't I" + # # to conceptual Bliss grammar + # bliss_sequence = get_conceptual_bliss_sequence_for_msg_with_subject(message) + # if bliss_sequence is not None: + # encoding = [] + # position = 0 + # for text in bliss_sequence: + # bliss_id = find_bliss_id_in_general(text, bliss_explanation_json) + # if bliss_id is not None: + # encoding.append(bliss_id) + # if (position < len(bliss_sequence) - 1): + # encoding.append("//") + # else: + # print("Error: ", message, ": cannot find bliss id for \"", text, "\"") + # position = position + 1 + # value["bci-av-id"] = encoding + + # 3. Transform multiple words messages with a subject such as "I should", "I shouldn't", "should I", "shouldn't I" + # to accommodating Bliss grammar + to_skip = [ + "it is good to be here today", + "people plural", + "I think so", + "I guess so", + "I used to", + "it used to", + "he used to", + "she used to", + "they used to", + "we used to", + "you used to", + ] + if message not in to_skip: + bliss_sequence = get_accommodating_bliss_sequence_for_msg_with_subject(message) if bliss_sequence is not None: encoding = [] position = 0 for text in bliss_sequence: - bliss_id = find_bliss_id_in_general(text, bliss_explanation_json) + bliss_id = find_bliss_id_in_list(text) if bliss_id is not None: - encoding.append(bliss_id) + if type(bliss_id) is list: + encoding.extend(bliss_id) + else: + encoding.append(bliss_id) if (position < len(bliss_sequence) - 1): encoding.append("//") else: - print("Error: ", message, ": cannot find bliss id for \"", text, "\"") + print(f"=== {message}, {bliss_sequence}") + print(f"Error: {message}: cannot find bliss id for \"{text}\"") + words_missing_id.add(text) position = position + 1 + print(f"{bliss_sequence}: {encoding}") value["bci-av-id"] = encoding +if len(words_missing_id) > 0: + print(f"Words with missing IDs: {words_missing_id}") + # Write the JSON into a file with open(output_json_location, "w") as json_file: json_file.write(json.dumps(data, indent=4))