diff --git a/.gitignore b/.gitignore index 428bf4ac82..7cdfa87bab 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.iobj *.ipdb *.pyc +*.vcxproj.user **/packages **/TestResults .sonarqube diff --git a/sakura/sakura.vcxproj b/sakura/sakura.vcxproj index 2cf3437530..d1e874906f 100644 --- a/sakura/sakura.vcxproj +++ b/sakura/sakura.vcxproj @@ -77,7 +77,7 @@ Windows - comctl32.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies) + comctl32.lib;icu.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies) true diff --git a/sakura_core/StdAfx.h b/sakura_core/StdAfx.h index 701718d56c..f407a74d50 100644 --- a/sakura_core/StdAfx.h +++ b/sakura_core/StdAfx.h @@ -92,6 +92,8 @@ #include #include +#include +#define NTDDI_VERSION NTDDI_WIN10_RS3 #include #include #include diff --git a/sakura_core/charset/CCodeBase.cpp b/sakura_core/charset/CCodeBase.cpp index 62877bbe52..8ddeab4b13 100644 --- a/sakura_core/charset/CCodeBase.cpp +++ b/sakura_core/charset/CCodeBase.cpp @@ -25,10 +25,10 @@ #include "StdAfx.h" #include "CCodeBase.h" +#include "CEol.h" #include "charset/CCodeFactory.h" -#include "convert/convert_util2.h" #include "charset/codechecker.h" -#include "CEol.h" +#include "convert/convert_util2.h" #include "env/CommonSetting.h" /*! @@ -54,8 +54,12 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat // 表示用16進表示 UNICODE → Hex 変換 2008/6/9 Uchi EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar) { + std::wstring_view trailingChars; + // IVS if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) { + trailingChars = std::wstring_view(cSrc + 3, iSLen - 3); + if (psStatusbar->m_bDispSPCodepoint) { auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1)); } @@ -65,6 +69,8 @@ EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCH } // サロゲートペア else if (iSLen >= 2 && IsSurrogatePair(cSrc)) { + trailingChars = std::wstring_view(cSrc + 2, iSLen - 2); + if (psStatusbar->m_bDispSPCodepoint) { auto_sprintf( pDst, L"U+%05X", 0x10000 + ((cSrc[0] & 0x3FF)<<10) + (cSrc[1] & 0x3FF)); } @@ -73,9 +79,16 @@ EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCH } } else { + trailingChars = std::wstring_view(cSrc + 1, iSLen - 1); + auto_sprintf( pDst, L"U+%04X", cSrc[0] ); } + if (CountNonSpacingMarkCharactersByUTF16CodeUnits(trailingChars)) { + // 結合文字がある場合は「...」を表示する + wcscat(pDst, L"..."); + } + return RESULT_COMPLETE; } diff --git a/sakura_core/charset/codechecker.cpp b/sakura_core/charset/codechecker.cpp index 3219592deb..5b0d2513fd 100644 --- a/sakura_core/charset/codechecker.cpp +++ b/sakura_core/charset/codechecker.cpp @@ -34,11 +34,43 @@ #include "StdAfx.h" #include "charset/codechecker.h" -#include "mem/CMemory.h" -#include "convert/convert_util2.h" -#include "charset/codeutil.h" -#include "charset/charcode.h" + #include +#include +#include + +#include + +#include "charset/charcode.h" +#include "charset/codeutil.h" +#include "convert/convert_util2.h" +#include "mem/CMemory.h" + +std::size_t CountNonSpacingMarkCharactersByUTF16CodeUnits(std::wstring_view text) { + std::size_t i = 0; + while (i < text.size()) { + char32_t ch; + bool is_surrogate_pair; + + if (char16_t high = text[i]; + !IsUTF16High(high)) { + ch = high; + is_surrogate_pair = false; + } else if (i + 1 < text.size() && IsUTF16Low(text[i + 1])) { + char16_t low = text[i + 1]; + ch = DecodeSurrogatePair(high, low); + is_surrogate_pair = true; + } else { + // 下位が欠落している不正なサロゲートペアに遭遇したので計算を打ち切る。 + break; + } + + if (u_charType(ch) != U_NON_SPACING_MARK) + break; + i += (is_surrogate_pair ? 2 : 1); + } + return i; +} /* =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* diff --git a/sakura_core/charset/codechecker.h b/sakura_core/charset/codechecker.h index 738a24e7cf..6f9a2a9404 100644 --- a/sakura_core/charset/codechecker.h +++ b/sakura_core/charset/codechecker.h @@ -36,6 +36,9 @@ #define SAKURA_CODECHECKER_62A18A31_2ECD_47B6_AEE1_38EDDAD3FF2B_H_ #pragma once +#include +#include + #include "_main/global.h" #include "convert/convert_util2.h" #include "basis/primitive.h" @@ -366,6 +369,11 @@ inline bool IsSurrogatePair(std::wstring_view text) { return 2 <= text.length() && IsUTF16High(text[0]) && IsUTF16Low(text[1]); } +//! UTF-16 のサロゲートペアを Unicode コードポイントに変換する +inline char32_t DecodeSurrogatePair(char16_t high, char16_t low) { + return 0x10000 + ((high & 0x3ff) << 10) + (low & 0x3ff); +} + /*! * UTF16文字列の先頭1文字をUTF32コードポイントに変換する * @@ -378,7 +386,7 @@ char32_t ConvertToUtf32(std::wstring_view text) { return 0; } if (IsSurrogatePair(text)) { - return 0x10000 + ((text[0] & 0x3ff) << 10) + (text[1] & 0x3ff); + return DecodeSurrogatePair(text[0], text[1]); } if (const auto ch = text[0]; !(ch & 0xfc00)) @@ -396,6 +404,9 @@ inline bool IsVariationSelector(std::wstring_view text) { return 0xe0100 <= cp && cp <= 0xe01ef; } +//! 連続する幅なし結合文字の長さを UTF-16 コードユニット単位で数える。 +std::size_t CountNonSpacingMarkCharactersByUTF16CodeUnits(std::wstring_view text); + //! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け) inline unsigned short _SwapHLByte( const unsigned short wc ){ unsigned short wc1 = static_cast( (static_cast(wc) << 8) & 0x0000ffff ); diff --git a/sakura_core/mem/CNativeW.cpp b/sakura_core/mem/CNativeW.cpp index dbd8e8a98d..df2d376355 100644 --- a/sakura_core/mem/CNativeW.cpp +++ b/sakura_core/mem/CNativeW.cpp @@ -23,13 +23,15 @@ distribution. */ #include "StdAfx.h" -#include -#include "charset/codechecker.h" #include "mem/CNativeW.h" +#include #include +#include + #include "CEol.h" +#include "charset/codechecker.h" /*! コンストラクタ @@ -389,20 +391,21 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx return CLogicInt(0); // サロゲートチェック 2008/7/5 Uchi - if (IsUTF16High(pData[nIdx])) { - if (nIdx + 1 < nDataLen && IsUTF16Low(pData[nIdx + 1])) { - // サロゲートペア 2個分 - return CLogicInt(2); - } - } - - // IVSの異体字セレクタチェック - if (IsVariationSelector(pData + nIdx + 1)) { - // 正字 + 異体字セレクタで3個分 - return CLogicInt(3); + int units; + if (!IsUTF16High(pData[nIdx])) { + units = 1; + } else if (nIdx + 1 < nDataLen && IsUTF16Low(pData[nIdx + 1])) { + // サロゲートペア 2個分 + units = 2; + } else { + // 不正なサロゲートペア + return CLogicInt(1); } - return CLogicInt(1); + // 後続の幅なし結合文字の数を足す + const auto trailing_text = std::wstring_view(pData + nIdx + units, nDataLen - nIdx - units); + const auto count = CountNonSpacingMarkCharactersByUTF16CodeUnits(trailing_text); + return CLogicInt(static_cast(units + count)); } //! 指定した位置の文字が半角何個分かを返す diff --git a/sakura_core/parse/CWordParse.cpp b/sakura_core/parse/CWordParse.cpp index b1f46c98d7..1d9fd8ffd8 100644 --- a/sakura_core/parse/CWordParse.cpp +++ b/sakura_core/parse/CWordParse.cpp @@ -24,6 +24,10 @@ */ #include "StdAfx.h" #include "CWordParse.h" + +#include + +#include "basis/SakuraBasis.h" #include "charset/charcode.h" #include "charset/codechecker.h" #include "mem/CNativeW.h" diff --git a/sakura_core/view/CTextMetrics.cpp b/sakura_core/view/CTextMetrics.cpp index 182c849031..f0b4e4fbeb 100644 --- a/sakura_core/view/CTextMetrics.cpp +++ b/sakura_core/view/CTextMetrics.cpp @@ -24,9 +24,14 @@ distribution. */ #include "StdAfx.h" +#include "CTextMetrics.h" + #include +#include #include -#include "CTextMetrics.h" + +#include + #include "charset/codechecker.h" #include "mem/CNativeW.h" @@ -86,16 +91,18 @@ const int* CTextMetrics::GenerateDxArray( std::vector& vResultArray = *pvResultArray; vResultArray.clear(); - for (int i = 0; i < nLength; ++i) { + int i = 0; + while (i < nLength) { if (pText[i] == WCODE::TAB) { // TAB対応 2013/5/7 Uchi if (i > 0 && pText[i - 1] == WCODE::TAB) { vResultArray.push_back(nTabSpace); nIndent += nTabSpace; - continue; + } else { + vResultArray.push_back((nTabSpace + nHankakuDx - 1) - ((nIndent + nHankakuDx - 1) % nTabSpace)); + nIndent += vResultArray.back(); } - vResultArray.push_back((nTabSpace + nHankakuDx - 1) - ((nIndent + nHankakuDx - 1) % nTabSpace)); - nIndent += vResultArray.back(); + ++i; continue; } @@ -103,17 +110,17 @@ const int* CTextMetrics::GenerateDxArray( if(IsUTF16High(pText[i]) && i + 1 < nLength && IsUTF16Low(pText[i + 1])) { vResultArray.push_back(cache.CalcPxWidthByFont2(pText + i) + spacing); vResultArray.push_back(0); - i++; - continue; - } - vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing); - nIndent += vResultArray.back(); - - if (IsVariationSelector(pText + i + 1)) { - vResultArray.push_back(0); - vResultArray.push_back(0); i += 2; + } else { + vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing); + nIndent += vResultArray.back(); + ++i; } + + const auto trailing_text = std::wstring_view(pText + i, nLength - i); + const auto count = CountNonSpacingMarkCharactersByUTF16CodeUnits(trailing_text); + std::fill_n(std::back_inserter(vResultArray), count, 0); + i += count; } return vResultArray.data(); } diff --git a/tests/unittests/test-ccodebase.cpp b/tests/unittests/test-ccodebase.cpp index 1a8a272998..b7f3581d26 100644 --- a/tests/unittests/test-ccodebase.cpp +++ b/tests/unittests/test-ccodebase.cpp @@ -841,7 +841,7 @@ TEST(CCodeBase, Latin1ToHex) EXPECT_STREQ(L"D83DDEB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str()); } -TEST(CCodeBase, UnicodeToHex) +TEST(CCodeBase, UnicodeToHex1) { const auto eCodeType = CODE_UNICODE; auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType); @@ -860,3 +860,20 @@ TEST(CCodeBase, UnicodeToHex) sStatusbar.m_bDispSPCodepoint = false; EXPECT_STREQ(L"845B, DB40DD00", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str()); } + +TEST(CCodeBase, UnicodeToHex2) +{ + const auto eCodeType = CODE_UNICODE; + auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType); + + // 特定コードのマルチバイトを表示する設定 + CommonSetting_Statusbar sStatusbar; + sStatusbar.m_bDispUniInSjis = false; + sStatusbar.m_bDispUniInJis = false; + sStatusbar.m_bDispUniInEuc = false; + sStatusbar.m_bDispUtf8Codepoint = false; + sStatusbar.m_bDispSPCodepoint = false; + sStatusbar.m_bDispSPCodepoint = false; + + EXPECT_STREQ(L"U+30AB...", pCodeBase->CodeToHex(L"ガ", sStatusbar).c_str()); +} diff --git a/tests/unittests/test-ctextmetrics.cpp b/tests/unittests/test-ctextmetrics.cpp index 4dfeee8970..fb03fd855e 100644 --- a/tests/unittests/test-ctextmetrics.cpp +++ b/tests/unittests/test-ctextmetrics.cpp @@ -156,6 +156,7 @@ TEST(CTextMetrics, GenerateDxArray1) FakeCache1 cache; const int* p = CTextMetrics::GenerateDxArray(&v, L"ab", 2, 0, 0, 0, 0, cache); EXPECT_EQ(p, v.data()); + EXPECT_EQ(v.size(), 2); EXPECT_EQ(v[0], 1); EXPECT_EQ(v[1], 2); } @@ -166,6 +167,7 @@ TEST(CTextMetrics, GenerateDxArray2) std::vector v; FakeCache1 cache; CTextMetrics::GenerateDxArray(&v, L"ab", 2, 0, 0, 0, 10, cache); + EXPECT_EQ(v.size(), 2); EXPECT_EQ(v[0], 11); EXPECT_EQ(v[1], 12); } @@ -176,6 +178,7 @@ TEST(CTextMetrics, GenerateDxArray3) std::vector v; FakeCache1 cache; CTextMetrics::GenerateDxArray(&v, L"\xd83c\xdf38", 2, 0, 0, 0, 0, cache); + EXPECT_EQ(v.size(), 2); EXPECT_EQ(v[0], 10000); } @@ -185,6 +188,7 @@ TEST(CTextMetrics, GenerateDxArray4) std::vector v; FakeCache1 cache; CTextMetrics::GenerateDxArray(&v, L"\xd83c\xdf38", 2, 0, 0, 0, 10, cache); + EXPECT_EQ(v.size(), 2); EXPECT_EQ(v[0], 10020); } @@ -194,6 +198,7 @@ TEST(CTextMetrics, GenerateDxArray5) std::vector v; FakeCache1 cache; CTextMetrics::GenerateDxArray(&v, L"\xd83c,", 2, 0, 0, 0, 0, cache); + EXPECT_EQ(v.size(), 2); EXPECT_EQ(v[0], 1); EXPECT_EQ(v[1], 2); } @@ -204,6 +209,7 @@ TEST(CTextMetrics, GenerateDxArray6) std::vector v; FakeCache1 cache; CTextMetrics::GenerateDxArray(&v, L"\xd83c,", 2, 0, 0, 0, 10, cache); + EXPECT_EQ(v.size(), 2); EXPECT_EQ(v[0], 21); EXPECT_EQ(v[1], 12); } @@ -214,6 +220,7 @@ TEST(CTextMetrics, GenerateDxArray7) std::vector v; FakeCache1 cache; CTextMetrics::GenerateDxArray(&v, L"\t\t \t", 4, 10, 100, 1000, 0, cache); + EXPECT_EQ(v.size(), 4); EXPECT_EQ(v[0], 100); EXPECT_EQ(v[1], 100); EXPECT_EQ(v[2], 1); @@ -225,10 +232,36 @@ TEST(CTextMetrics, GenerateDxArray8) // IVSのVariantSelectorが続く文字列は先頭1文字 + 幅0×2で生成する std::vector v; FakeCache1 cache; - CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 2, 0, 0, 0, 10, cache); - EXPECT_TRUE(v[0]); - EXPECT_FALSE(v[1]); - EXPECT_FALSE(v[2]); + CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 3, 0, 0, 0, 10, cache); + EXPECT_EQ(v.size(), 3); + EXPECT_NE(v[0], 0); + EXPECT_EQ(v[1], 0); + EXPECT_EQ(v[2], 0); +} + +TEST(CTextMetrics, GenerateDxArray9) +{ + // 結合文字のテスト。先頭1文字 + 後続1文字の例。 + std::vector v; + FakeCache1 cache; + CTextMetrics::GenerateDxArray(&v, L"ガ", 2, 0, 0, 100, 10, cache); + EXPECT_EQ(v.size(), 2); + EXPECT_NE(v[0], 0); + EXPECT_EQ(v[1], 0); +} + +TEST(CTextMetrics, GenerateDxArray10) +{ + // 結合文字のテスト。先頭1文字 + 後続4文字。 + std::vector v; + FakeCache1 cache; + CTextMetrics::GenerateDxArray(&v, L"\x0061\x0337\x0305\x034d\x032a", 5, 0, 0, 100, 10, cache); + EXPECT_EQ(v.size(), 5); + EXPECT_NE(v[0], 0); + EXPECT_EQ(v[1], 0); + EXPECT_EQ(v[2], 0); + EXPECT_EQ(v[3], 0); + EXPECT_EQ(v[4], 0); } TEST(CTextMetrics, CalcTextWidth) diff --git a/tests/unittests/tests1.vcxproj b/tests/unittests/tests1.vcxproj index 5737ef7d38..b0cfbcae61 100644 --- a/tests/unittests/tests1.vcxproj +++ b/tests/unittests/tests1.vcxproj @@ -54,7 +54,7 @@ Console - comctl32.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies) + comctl32.lib;icu.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies) true