diff --git a/.gitignore b/.gitignore
index 428bf4ac82..7cdfa87bab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
*.iobj
*.ipdb
*.pyc
+*.vcxproj.user
**/packages
**/TestResults
.sonarqube
diff --git a/sakura/sakura.vcxproj b/sakura/sakura.vcxproj
index 2cf3437530..d1e874906f 100644
--- a/sakura/sakura.vcxproj
+++ b/sakura/sakura.vcxproj
@@ -77,7 +77,7 @@
Windows
- comctl32.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies)
+ comctl32.lib;icu.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies)
true
diff --git a/sakura_core/StdAfx.h b/sakura_core/StdAfx.h
index 701718d56c..f407a74d50 100644
--- a/sakura_core/StdAfx.h
+++ b/sakura_core/StdAfx.h
@@ -92,6 +92,8 @@
#include
#include
+#include
+#define NTDDI_VERSION NTDDI_WIN10_RS3
#include
#include
#include
diff --git a/sakura_core/charset/CCodeBase.cpp b/sakura_core/charset/CCodeBase.cpp
index 62877bbe52..8ddeab4b13 100644
--- a/sakura_core/charset/CCodeBase.cpp
+++ b/sakura_core/charset/CCodeBase.cpp
@@ -25,10 +25,10 @@
#include "StdAfx.h"
#include "CCodeBase.h"
+#include "CEol.h"
#include "charset/CCodeFactory.h"
-#include "convert/convert_util2.h"
#include "charset/codechecker.h"
-#include "CEol.h"
+#include "convert/convert_util2.h"
#include "env/CommonSetting.h"
/*!
@@ -54,8 +54,12 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat
// 表示用16進表示 UNICODE → Hex 変換 2008/6/9 Uchi
EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar)
{
+ std::wstring_view trailingChars;
+
// IVS
if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) {
+ trailingChars = std::wstring_view(cSrc + 3, iSLen - 3);
+
if (psStatusbar->m_bDispSPCodepoint) {
auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1));
}
@@ -65,6 +69,8 @@ EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCH
}
// サロゲートペア
else if (iSLen >= 2 && IsSurrogatePair(cSrc)) {
+ trailingChars = std::wstring_view(cSrc + 2, iSLen - 2);
+
if (psStatusbar->m_bDispSPCodepoint) {
auto_sprintf( pDst, L"U+%05X", 0x10000 + ((cSrc[0] & 0x3FF)<<10) + (cSrc[1] & 0x3FF));
}
@@ -73,9 +79,16 @@ EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCH
}
}
else {
+ trailingChars = std::wstring_view(cSrc + 1, iSLen - 1);
+
auto_sprintf( pDst, L"U+%04X", cSrc[0] );
}
+ if (CountNonSpacingMarkCharactersByUTF16CodeUnits(trailingChars)) {
+ // 結合文字がある場合は「...」を表示する
+ wcscat(pDst, L"...");
+ }
+
return RESULT_COMPLETE;
}
diff --git a/sakura_core/charset/codechecker.cpp b/sakura_core/charset/codechecker.cpp
index 3219592deb..5b0d2513fd 100644
--- a/sakura_core/charset/codechecker.cpp
+++ b/sakura_core/charset/codechecker.cpp
@@ -34,11 +34,43 @@
#include "StdAfx.h"
#include "charset/codechecker.h"
-#include "mem/CMemory.h"
-#include "convert/convert_util2.h"
-#include "charset/codeutil.h"
-#include "charset/charcode.h"
+
#include
+#include
+#include
+
+#include
+
+#include "charset/charcode.h"
+#include "charset/codeutil.h"
+#include "convert/convert_util2.h"
+#include "mem/CMemory.h"
+
+std::size_t CountNonSpacingMarkCharactersByUTF16CodeUnits(std::wstring_view text) {
+ std::size_t i = 0;
+ while (i < text.size()) {
+ char32_t ch;
+ bool is_surrogate_pair;
+
+ if (char16_t high = text[i];
+ !IsUTF16High(high)) {
+ ch = high;
+ is_surrogate_pair = false;
+ } else if (i + 1 < text.size() && IsUTF16Low(text[i + 1])) {
+ char16_t low = text[i + 1];
+ ch = DecodeSurrogatePair(high, low);
+ is_surrogate_pair = true;
+ } else {
+ // 下位が欠落している不正なサロゲートペアに遭遇したので計算を打ち切る。
+ break;
+ }
+
+ if (u_charType(ch) != U_NON_SPACING_MARK)
+ break;
+ i += (is_surrogate_pair ? 2 : 1);
+ }
+ return i;
+}
/* =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*
diff --git a/sakura_core/charset/codechecker.h b/sakura_core/charset/codechecker.h
index 738a24e7cf..6f9a2a9404 100644
--- a/sakura_core/charset/codechecker.h
+++ b/sakura_core/charset/codechecker.h
@@ -36,6 +36,9 @@
#define SAKURA_CODECHECKER_62A18A31_2ECD_47B6_AEE1_38EDDAD3FF2B_H_
#pragma once
+#include
+#include
+
#include "_main/global.h"
#include "convert/convert_util2.h"
#include "basis/primitive.h"
@@ -366,6 +369,11 @@ inline bool IsSurrogatePair(std::wstring_view text) {
return 2 <= text.length() && IsUTF16High(text[0]) && IsUTF16Low(text[1]);
}
+//! UTF-16 のサロゲートペアを Unicode コードポイントに変換する
+inline char32_t DecodeSurrogatePair(char16_t high, char16_t low) {
+ return 0x10000 + ((high & 0x3ff) << 10) + (low & 0x3ff);
+}
+
/*!
* UTF16文字列の先頭1文字をUTF32コードポイントに変換する
*
@@ -378,7 +386,7 @@ char32_t ConvertToUtf32(std::wstring_view text) {
return 0;
}
if (IsSurrogatePair(text)) {
- return 0x10000 + ((text[0] & 0x3ff) << 10) + (text[1] & 0x3ff);
+ return DecodeSurrogatePair(text[0], text[1]);
}
if (const auto ch = text[0];
!(ch & 0xfc00))
@@ -396,6 +404,9 @@ inline bool IsVariationSelector(std::wstring_view text) {
return 0xe0100 <= cp && cp <= 0xe01ef;
}
+//! 連続する幅なし結合文字の長さを UTF-16 コードユニット単位で数える。
+std::size_t CountNonSpacingMarkCharactersByUTF16CodeUnits(std::wstring_view text);
+
//! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け)
inline unsigned short _SwapHLByte( const unsigned short wc ){
unsigned short wc1 = static_cast( (static_cast(wc) << 8) & 0x0000ffff );
diff --git a/sakura_core/mem/CNativeW.cpp b/sakura_core/mem/CNativeW.cpp
index dbd8e8a98d..df2d376355 100644
--- a/sakura_core/mem/CNativeW.cpp
+++ b/sakura_core/mem/CNativeW.cpp
@@ -23,13 +23,15 @@
distribution.
*/
#include "StdAfx.h"
-#include
-#include "charset/codechecker.h"
#include "mem/CNativeW.h"
+#include
#include
+#include
+
#include "CEol.h"
+#include "charset/codechecker.h"
/*!
コンストラクタ
@@ -389,20 +391,21 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx
return CLogicInt(0);
// サロゲートチェック 2008/7/5 Uchi
- if (IsUTF16High(pData[nIdx])) {
- if (nIdx + 1 < nDataLen && IsUTF16Low(pData[nIdx + 1])) {
- // サロゲートペア 2個分
- return CLogicInt(2);
- }
- }
-
- // IVSの異体字セレクタチェック
- if (IsVariationSelector(pData + nIdx + 1)) {
- // 正字 + 異体字セレクタで3個分
- return CLogicInt(3);
+ int units;
+ if (!IsUTF16High(pData[nIdx])) {
+ units = 1;
+ } else if (nIdx + 1 < nDataLen && IsUTF16Low(pData[nIdx + 1])) {
+ // サロゲートペア 2個分
+ units = 2;
+ } else {
+ // 不正なサロゲートペア
+ return CLogicInt(1);
}
- return CLogicInt(1);
+ // 後続の幅なし結合文字の数を足す
+ const auto trailing_text = std::wstring_view(pData + nIdx + units, nDataLen - nIdx - units);
+ const auto count = CountNonSpacingMarkCharactersByUTF16CodeUnits(trailing_text);
+ return CLogicInt(static_cast(units + count));
}
//! 指定した位置の文字が半角何個分かを返す
diff --git a/sakura_core/parse/CWordParse.cpp b/sakura_core/parse/CWordParse.cpp
index b1f46c98d7..1d9fd8ffd8 100644
--- a/sakura_core/parse/CWordParse.cpp
+++ b/sakura_core/parse/CWordParse.cpp
@@ -24,6 +24,10 @@
*/
#include "StdAfx.h"
#include "CWordParse.h"
+
+#include
+
+#include "basis/SakuraBasis.h"
#include "charset/charcode.h"
#include "charset/codechecker.h"
#include "mem/CNativeW.h"
diff --git a/sakura_core/view/CTextMetrics.cpp b/sakura_core/view/CTextMetrics.cpp
index 182c849031..f0b4e4fbeb 100644
--- a/sakura_core/view/CTextMetrics.cpp
+++ b/sakura_core/view/CTextMetrics.cpp
@@ -24,9 +24,14 @@
distribution.
*/
#include "StdAfx.h"
+#include "CTextMetrics.h"
+
#include
+#include
#include
-#include "CTextMetrics.h"
+
+#include
+
#include "charset/codechecker.h"
#include "mem/CNativeW.h"
@@ -86,16 +91,18 @@ const int* CTextMetrics::GenerateDxArray(
std::vector& vResultArray = *pvResultArray;
vResultArray.clear();
- for (int i = 0; i < nLength; ++i) {
+ int i = 0;
+ while (i < nLength) {
if (pText[i] == WCODE::TAB) {
// TAB対応 2013/5/7 Uchi
if (i > 0 && pText[i - 1] == WCODE::TAB) {
vResultArray.push_back(nTabSpace);
nIndent += nTabSpace;
- continue;
+ } else {
+ vResultArray.push_back((nTabSpace + nHankakuDx - 1) - ((nIndent + nHankakuDx - 1) % nTabSpace));
+ nIndent += vResultArray.back();
}
- vResultArray.push_back((nTabSpace + nHankakuDx - 1) - ((nIndent + nHankakuDx - 1) % nTabSpace));
- nIndent += vResultArray.back();
+ ++i;
continue;
}
@@ -103,17 +110,17 @@ const int* CTextMetrics::GenerateDxArray(
if(IsUTF16High(pText[i]) && i + 1 < nLength && IsUTF16Low(pText[i + 1])) {
vResultArray.push_back(cache.CalcPxWidthByFont2(pText + i) + spacing);
vResultArray.push_back(0);
- i++;
- continue;
- }
- vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing);
- nIndent += vResultArray.back();
-
- if (IsVariationSelector(pText + i + 1)) {
- vResultArray.push_back(0);
- vResultArray.push_back(0);
i += 2;
+ } else {
+ vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing);
+ nIndent += vResultArray.back();
+ ++i;
}
+
+ const auto trailing_text = std::wstring_view(pText + i, nLength - i);
+ const auto count = CountNonSpacingMarkCharactersByUTF16CodeUnits(trailing_text);
+ std::fill_n(std::back_inserter(vResultArray), count, 0);
+ i += count;
}
return vResultArray.data();
}
diff --git a/tests/unittests/test-ccodebase.cpp b/tests/unittests/test-ccodebase.cpp
index 1a8a272998..b7f3581d26 100644
--- a/tests/unittests/test-ccodebase.cpp
+++ b/tests/unittests/test-ccodebase.cpp
@@ -841,7 +841,7 @@ TEST(CCodeBase, Latin1ToHex)
EXPECT_STREQ(L"D83DDEB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str());
}
-TEST(CCodeBase, UnicodeToHex)
+TEST(CCodeBase, UnicodeToHex1)
{
const auto eCodeType = CODE_UNICODE;
auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType);
@@ -860,3 +860,20 @@ TEST(CCodeBase, UnicodeToHex)
sStatusbar.m_bDispSPCodepoint = false;
EXPECT_STREQ(L"845B, DB40DD00", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str());
}
+
+TEST(CCodeBase, UnicodeToHex2)
+{
+ const auto eCodeType = CODE_UNICODE;
+ auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType);
+
+ // 特定コードのマルチバイトを表示する設定
+ CommonSetting_Statusbar sStatusbar;
+ sStatusbar.m_bDispUniInSjis = false;
+ sStatusbar.m_bDispUniInJis = false;
+ sStatusbar.m_bDispUniInEuc = false;
+ sStatusbar.m_bDispUtf8Codepoint = false;
+ sStatusbar.m_bDispSPCodepoint = false;
+ sStatusbar.m_bDispSPCodepoint = false;
+
+ EXPECT_STREQ(L"U+30AB...", pCodeBase->CodeToHex(L"ガ", sStatusbar).c_str());
+}
diff --git a/tests/unittests/test-ctextmetrics.cpp b/tests/unittests/test-ctextmetrics.cpp
index 4dfeee8970..fb03fd855e 100644
--- a/tests/unittests/test-ctextmetrics.cpp
+++ b/tests/unittests/test-ctextmetrics.cpp
@@ -156,6 +156,7 @@ TEST(CTextMetrics, GenerateDxArray1)
FakeCache1 cache;
const int* p = CTextMetrics::GenerateDxArray(&v, L"ab", 2, 0, 0, 0, 0, cache);
EXPECT_EQ(p, v.data());
+ EXPECT_EQ(v.size(), 2);
EXPECT_EQ(v[0], 1);
EXPECT_EQ(v[1], 2);
}
@@ -166,6 +167,7 @@ TEST(CTextMetrics, GenerateDxArray2)
std::vector v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"ab", 2, 0, 0, 0, 10, cache);
+ EXPECT_EQ(v.size(), 2);
EXPECT_EQ(v[0], 11);
EXPECT_EQ(v[1], 12);
}
@@ -176,6 +178,7 @@ TEST(CTextMetrics, GenerateDxArray3)
std::vector v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"\xd83c\xdf38", 2, 0, 0, 0, 0, cache);
+ EXPECT_EQ(v.size(), 2);
EXPECT_EQ(v[0], 10000);
}
@@ -185,6 +188,7 @@ TEST(CTextMetrics, GenerateDxArray4)
std::vector v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"\xd83c\xdf38", 2, 0, 0, 0, 10, cache);
+ EXPECT_EQ(v.size(), 2);
EXPECT_EQ(v[0], 10020);
}
@@ -194,6 +198,7 @@ TEST(CTextMetrics, GenerateDxArray5)
std::vector v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"\xd83c,", 2, 0, 0, 0, 0, cache);
+ EXPECT_EQ(v.size(), 2);
EXPECT_EQ(v[0], 1);
EXPECT_EQ(v[1], 2);
}
@@ -204,6 +209,7 @@ TEST(CTextMetrics, GenerateDxArray6)
std::vector v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"\xd83c,", 2, 0, 0, 0, 10, cache);
+ EXPECT_EQ(v.size(), 2);
EXPECT_EQ(v[0], 21);
EXPECT_EQ(v[1], 12);
}
@@ -214,6 +220,7 @@ TEST(CTextMetrics, GenerateDxArray7)
std::vector v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"\t\t \t", 4, 10, 100, 1000, 0, cache);
+ EXPECT_EQ(v.size(), 4);
EXPECT_EQ(v[0], 100);
EXPECT_EQ(v[1], 100);
EXPECT_EQ(v[2], 1);
@@ -225,10 +232,36 @@ TEST(CTextMetrics, GenerateDxArray8)
// IVSのVariantSelectorが続く文字列は先頭1文字 + 幅0×2で生成する
std::vector v;
FakeCache1 cache;
- CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 2, 0, 0, 0, 10, cache);
- EXPECT_TRUE(v[0]);
- EXPECT_FALSE(v[1]);
- EXPECT_FALSE(v[2]);
+ CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 3, 0, 0, 0, 10, cache);
+ EXPECT_EQ(v.size(), 3);
+ EXPECT_NE(v[0], 0);
+ EXPECT_EQ(v[1], 0);
+ EXPECT_EQ(v[2], 0);
+}
+
+TEST(CTextMetrics, GenerateDxArray9)
+{
+ // 結合文字のテスト。先頭1文字 + 後続1文字の例。
+ std::vector v;
+ FakeCache1 cache;
+ CTextMetrics::GenerateDxArray(&v, L"ガ", 2, 0, 0, 100, 10, cache);
+ EXPECT_EQ(v.size(), 2);
+ EXPECT_NE(v[0], 0);
+ EXPECT_EQ(v[1], 0);
+}
+
+TEST(CTextMetrics, GenerateDxArray10)
+{
+ // 結合文字のテスト。先頭1文字 + 後続4文字。
+ std::vector v;
+ FakeCache1 cache;
+ CTextMetrics::GenerateDxArray(&v, L"\x0061\x0337\x0305\x034d\x032a", 5, 0, 0, 100, 10, cache);
+ EXPECT_EQ(v.size(), 5);
+ EXPECT_NE(v[0], 0);
+ EXPECT_EQ(v[1], 0);
+ EXPECT_EQ(v[2], 0);
+ EXPECT_EQ(v[3], 0);
+ EXPECT_EQ(v[4], 0);
}
TEST(CTextMetrics, CalcTextWidth)
diff --git a/tests/unittests/tests1.vcxproj b/tests/unittests/tests1.vcxproj
index 5737ef7d38..b0cfbcae61 100644
--- a/tests/unittests/tests1.vcxproj
+++ b/tests/unittests/tests1.vcxproj
@@ -54,7 +54,7 @@
Console
- comctl32.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies)
+ comctl32.lib;icu.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies)
true