From 69c067621564dd90b2050a0b5515dabc72ec7811 Mon Sep 17 00:00:00 2001 From: berryzplus Date: Tue, 26 Sep 2023 00:01:43 +0900 Subject: [PATCH] =?UTF-8?q?IVS=E3=81=AE=E7=95=B0=E4=BD=93=E5=AD=97?= =?UTF-8?q?=E3=82=BB=E3=83=AC=E3=82=AF=E3=82=BF=E3=81=AB=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sakura_core/charset/CCodeBase.cpp | 13 +++++- sakura_core/charset/CUtf8.cpp | 3 ++ sakura_core/charset/codechecker.h | 37 +++++++++++++++++ sakura_core/mem/CNativeW.cpp | 6 +++ sakura_core/parse/CWordParse.cpp | 18 +++++---- sakura_core/view/CEditView_Command_New.cpp | 2 +- sakura_core/view/CTextDrawer.cpp | 2 +- sakura_core/view/CTextMetrics.cpp | 6 +++ tests/unittests/test-ccodebase.cpp | 23 +++++++++++ tests/unittests/test-cnative.cpp | 20 +++++++++ tests/unittests/test-codechecker.cpp | 47 ++++++++++++++++++++++ tests/unittests/test-ctextmetrics.cpp | 11 +++++ tests/unittests/test-cwordparse.cpp | 6 +++ tests/unittests/tests1.vcxproj | 1 + tests/unittests/tests1.vcxproj.filters | 3 ++ 15 files changed, 187 insertions(+), 11 deletions(-) create mode 100644 tests/unittests/test-codechecker.cpp diff --git a/sakura_core/charset/CCodeBase.cpp b/sakura_core/charset/CCodeBase.cpp index fe5dd91086..62877bbe52 100644 --- a/sakura_core/charset/CCodeBase.cpp +++ b/sakura_core/charset/CCodeBase.cpp @@ -54,8 +54,17 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat // 表示用16進表示 UNICODE → Hex 変換 2008/6/9 Uchi EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar) { - if (IsUTF16High(cSrc[0]) && iSLen >= 2 && IsUTF16Low(cSrc[1])) { - // サロゲートペア + // IVS + if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { + if (psStatusbar->m_bDispSPCodepoint) { + auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1)); + } + else { + auto_sprintf(pDst, L"%04X, %04X%04X", cSrc[0], cSrc[1], cSrc[2]); + } + } + // サロゲートペア + else if (iSLen >= 2 && IsSurrogatePair(cSrc)) { if (psStatusbar->m_bDispSPCodepoint) { auto_sprintf( pDst, L"U+%05X", 0x10000 + ((cSrc[0] & 0x3FF)<<10) + (cSrc[1] & 0x3FF)); } diff --git a/sakura_core/charset/CUtf8.cpp b/sakura_core/charset/CUtf8.cpp index d1c85fa144..73016d2fe6 100644 --- a/sakura_core/charset/CUtf8.cpp +++ b/sakura_core/charset/CUtf8.cpp @@ -227,6 +227,9 @@ EConvertResult CUtf8::_UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* if (IsUTF16High(cSrc[0]) && iSLen >= 2 && IsUTF16Low(cSrc[1])) { cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, 4); } + else if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { + cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, sizeof(wchar_t) * 3); + } else { cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, 2); if( IsBinaryOnSurrogate(cSrc[0]) ){ diff --git a/sakura_core/charset/codechecker.h b/sakura_core/charset/codechecker.h index 6d4b48a90b..738a24e7cf 100644 --- a/sakura_core/charset/codechecker.h +++ b/sakura_core/charset/codechecker.h @@ -359,6 +359,43 @@ inline bool IsUTF16Low( wchar_t c ){ return IsUtf16SurrogLow(c); } +/*! + * 文字列がサロゲートペアで始まっているか判定する + */ +inline bool IsSurrogatePair(std::wstring_view text) { + return 2 <= text.length() && IsUTF16High(text[0]) && IsUTF16Low(text[1]); +} + +/*! + * UTF16文字列の先頭1文字をUTF32コードポイントに変換する + * + * @return 文字列の先頭1文字のコードポイント + */ +inline +_Success_(return != 0) +char32_t ConvertToUtf32(std::wstring_view text) { + if (text.empty()) { + return 0; + } + if (IsSurrogatePair(text)) { + return 0x10000 + ((text[0] & 0x3ff) << 10) + (text[1] & 0x3ff); + } + if (const auto ch = text[0]; + !(ch & 0xfc00)) + { + return ch; + } + return 0; +} + +/*! + * 文字列がIVSの異体字セレクタで始まっているか判定する + */ +inline bool IsVariationSelector(std::wstring_view text) { + const auto cp = ConvertToUtf32(text); + return 0xe0100 <= cp && cp <= 0xe01ef; +} + //! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け) inline unsigned short _SwapHLByte( const unsigned short wc ){ unsigned short wc1 = static_cast( (static_cast(wc) << 8) & 0x0000ffff ); diff --git a/sakura_core/mem/CNativeW.cpp b/sakura_core/mem/CNativeW.cpp index 9641999138..dbd8e8a98d 100644 --- a/sakura_core/mem/CNativeW.cpp +++ b/sakura_core/mem/CNativeW.cpp @@ -396,6 +396,12 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx } } + // IVSの異体字セレクタチェック + if (IsVariationSelector(pData + nIdx + 1)) { + // 正字 + 異体字セレクタで3個分 + return CLogicInt(3); + } + return CLogicInt(1); } diff --git a/sakura_core/parse/CWordParse.cpp b/sakura_core/parse/CWordParse.cpp index 9afecab33e..b1f46c98d7 100644 --- a/sakura_core/parse/CWordParse.cpp +++ b/sakura_core/parse/CWordParse.cpp @@ -140,11 +140,10 @@ ECharKind CWordParse::WhatKindOfChar( { using namespace WCODE; - int nCharChars = CNativeW::GetSizeOfChar( pData, pDataLen, nIdx ); - if( nCharChars == 0 ){ - return CK_NULL; // NULL - } - else if( nCharChars == 1 ){ + ECharKind ret = CK_NULL; + if(const auto nCharChars = CNativeW::GetSizeOfChar(pData, pDataLen, nIdx); + nCharChars == 1) + { wchar_t c=pData[nIdx]; //今までの半角 @@ -186,9 +185,14 @@ ECharKind CWordParse::WhatKindOfChar( } return CK_ETC; // 半角のその他 } - else{ - return CK_NULL; // NULL + // IVS(正字 + 異体字セレクタ) + else if (nCharChars == 3 && + IsVariationSelector(pData + nIdx + 1)) + { + ret = CK_ZEN_ETC; // 全角のその他(漢字など) } + + return ret; } //! 二つの文字を結合したものの種類を調べる diff --git a/sakura_core/view/CEditView_Command_New.cpp b/sakura_core/view/CEditView_Command_New.cpp index 128783ae5b..64c30b3cc7 100644 --- a/sakura_core/view/CEditView_Command_New.cpp +++ b/sakura_core/view/CEditView_Command_New.cpp @@ -613,7 +613,7 @@ void CEditView::DeleteData( nNxtPos = GetCaret().GetCaretLayoutPos().GetX() + CLayoutInt(pcLayout->GetLayoutEol().GetLen()>0?1+m_pcEditDoc->m_cLayoutMgr.GetCharSpacing():0); } else{ - nNxtIdx = CLogicInt(CNativeW::GetCharNext( pLine, nLineLen, &pLine[nCurIdx] ) - pLine); + nNxtIdx = nCurIdx + CNativeW::GetSizeOfChar( pLine, nLineLen, nCurIdx); // 指定された行のデータ内の位置に対応する桁の位置を調べる nNxtPos = LineIndexToColumn( pcLayout, nNxtIdx ); } diff --git a/sakura_core/view/CTextDrawer.cpp b/sakura_core/view/CTextDrawer.cpp index 33e254f0e3..6d15dac86a 100644 --- a/sakura_core/view/CTextDrawer.cpp +++ b/sakura_core/view/CTextDrawer.cpp @@ -132,7 +132,7 @@ void CTextDrawer::DispText( HDC hdc, DispPos* pDispPos, int marginy, const wchar nWorkWidth += pDrawDxArray[nDrawLength++]; } // サロゲートペア対策 2008/7/5 Uchi Update 7/8 Uchi - if (nDrawLength < nDrawDataMaxLength && pDrawDxArray[nDrawLength] == 0) { + while (nDrawLength < nDrawDataMaxLength && pDrawDxArray[nDrawLength] == 0) { nDrawLength++; } diff --git a/sakura_core/view/CTextMetrics.cpp b/sakura_core/view/CTextMetrics.cpp index 9ff53f1205..182c849031 100644 --- a/sakura_core/view/CTextMetrics.cpp +++ b/sakura_core/view/CTextMetrics.cpp @@ -108,6 +108,12 @@ const int* CTextMetrics::GenerateDxArray( } vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing); nIndent += vResultArray.back(); + + if (IsVariationSelector(pText + i + 1)) { + vResultArray.push_back(0); + vResultArray.push_back(0); + i += 2; + } } return vResultArray.data(); } diff --git a/tests/unittests/test-ccodebase.cpp b/tests/unittests/test-ccodebase.cpp index 0e62b155c2..1a8a272998 100644 --- a/tests/unittests/test-ccodebase.cpp +++ b/tests/unittests/test-ccodebase.cpp @@ -813,6 +813,9 @@ TEST(CCodeBase, Utf8ToHex) // カラー絵文字「男性のシンボル」(サロゲートペア) EXPECT_STREQ(L"F09F9AB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str()); + + // IVS(Ideographic Variation Sequence) 「葛󠄀」(葛󠄀城市の葛󠄀、下がヒ) + EXPECT_STREQ(L"E8919BF3A08480", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str()); } /*! @@ -837,3 +840,23 @@ TEST(CCodeBase, Latin1ToHex) // カラー絵文字「男性のシンボル」(サロゲートペア) EXPECT_STREQ(L"D83DDEB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str()); } + +TEST(CCodeBase, UnicodeToHex) +{ + const auto eCodeType = CODE_UNICODE; + auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType); + + // 特定コードのマルチバイトを表示する設定 + CommonSetting_Statusbar sStatusbar; + sStatusbar.m_bDispUniInSjis = false; + sStatusbar.m_bDispUniInJis = false; + sStatusbar.m_bDispUniInEuc = false; + sStatusbar.m_bDispUtf8Codepoint = false; + sStatusbar.m_bDispSPCodepoint = false; + + sStatusbar.m_bDispSPCodepoint = true; + EXPECT_STREQ(L"845B, U+E0100", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str()); + + sStatusbar.m_bDispSPCodepoint = false; + EXPECT_STREQ(L"845B, DB40DD00", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str()); +} diff --git a/tests/unittests/test-cnative.cpp b/tests/unittests/test-cnative.cpp index 46d6ed3db7..56dfcd87b8 100644 --- a/tests/unittests/test-cnative.cpp +++ b/tests/unittests/test-cnative.cpp @@ -792,6 +792,26 @@ TEST(CNativeW, GetSizeOfChar) EXPECT_EQ(CNativeW::GetSizeOfChar(CStringRef(L"\xd83c\xdf38", 2), 1), 1); } +TEST(CNativeW, GetSizeOfChar_Empty) +{ + const auto& s = L""; + EXPECT_EQ(0, CNativeW::GetSizeOfChar(s, _countof(s) - 1, 0)); +} + +TEST(CNativeW, GetSizeOfChar_SurrogatePair) +{ + // 絵文字 男性のシンボル + const auto& s = L"\U0001f6b9"; + EXPECT_EQ(2, CNativeW::GetSizeOfChar(s, _countof(s) - 1, 0)); +} + +TEST(CNativeW, GetSizeOfChar_IVS) +{ + // 葛󠄀城市(先頭の文字が異体字) + const auto& s = L"葛󠄀城市"; + EXPECT_EQ(3, CNativeW::GetSizeOfChar(s, _countof(s) - 1, 0)); +} + /*! * @brief GetKetaOfCharの仕様 * @remark 指定した文字の桁数を返す。 diff --git a/tests/unittests/test-codechecker.cpp b/tests/unittests/test-codechecker.cpp new file mode 100644 index 0000000000..f7d84b1b01 --- /dev/null +++ b/tests/unittests/test-codechecker.cpp @@ -0,0 +1,47 @@ +/* + Copyright (C) 2023, Sakura Editor Organization + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; + you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment + in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, + and must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +#include +#include "charset/codechecker.h" + +TEST(ConvertToUtf32, SurrogatePair) +{ + // 絵文字 男性のシンボル + const auto& s = L"\U0001f6b9"; + EXPECT_EQ(0x1f6b9, ConvertToUtf32(s)); +} + +TEST(ConvertToUtf32, VariationSelector) +{ + // 異体字セレクタ VS17 + const auto& s = L"\U000e0100"; + EXPECT_EQ(0xe0100, ConvertToUtf32(s)); +} + +TEST(ConvertToUtf32, BinaryOnSurrogate) +{ + // 独自仕様 変換できない文字は1byteずつ下位サロゲートに詰める + const auto& s = L"\xdcff"; + EXPECT_EQ(0, ConvertToUtf32(s)); +} diff --git a/tests/unittests/test-ctextmetrics.cpp b/tests/unittests/test-ctextmetrics.cpp index 915fc1526e..4dfeee8970 100644 --- a/tests/unittests/test-ctextmetrics.cpp +++ b/tests/unittests/test-ctextmetrics.cpp @@ -220,6 +220,17 @@ TEST(CTextMetrics, GenerateDxArray7) EXPECT_EQ(v[3], 99); } +TEST(CTextMetrics, GenerateDxArray8) +{ + // IVSのVariantSelectorが続く文字列は先頭1文字 + 幅0×2で生成する + std::vector v; + FakeCache1 cache; + CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 2, 0, 0, 0, 10, cache); + EXPECT_TRUE(v[0]); + EXPECT_FALSE(v[1]); + EXPECT_FALSE(v[2]); +} + TEST(CTextMetrics, CalcTextWidth) { int dx[] = {1, 2, 3}; diff --git a/tests/unittests/test-cwordparse.cpp b/tests/unittests/test-cwordparse.cpp index 79d2b2fc99..0837e51c7d 100644 --- a/tests/unittests/test-cwordparse.cpp +++ b/tests/unittests/test-cwordparse.cpp @@ -140,6 +140,12 @@ TEST(WhatKindOfChar, SurrogatePairs) EXPECT_EQ(CK_ZEN_ETC, CWordParse::WhatKindOfChar(L"\xd842\xdfb7", 2, 0)); } +TEST(WhatKindOfChar, IVS) +{ +// EXPECT_EQ(CK_ETC, CWordParse::WhatKindOfChar(L"葛󠄀", 3, 0)); + EXPECT_EQ(CK_ZEN_ETC, CWordParse::WhatKindOfChar(L"葛󠄀", 3, 0)); +} + TEST(WhatKindOfTwoChars, ReturnsSameKindIfTwoKindsAreIdentical) { EXPECT_EQ(CK_HIRA, CWordParse::WhatKindOfTwoChars(CK_HIRA, CK_HIRA)); diff --git a/tests/unittests/tests1.vcxproj b/tests/unittests/tests1.vcxproj index 4cb4a8f04e..5737ef7d38 100644 --- a/tests/unittests/tests1.vcxproj +++ b/tests/unittests/tests1.vcxproj @@ -110,6 +110,7 @@ + diff --git a/tests/unittests/tests1.vcxproj.filters b/tests/unittests/tests1.vcxproj.filters index 01cb428f20..56bd6b4c33 100644 --- a/tests/unittests/tests1.vcxproj.filters +++ b/tests/unittests/tests1.vcxproj.filters @@ -172,6 +172,9 @@ Test Files + + Test Files +