Skip to content

Commit

Permalink
Merge pull request #1937 from berryzplus/feature/add_ivs_support
Browse files Browse the repository at this point in the history
IVSの異体字セレクタに対応する
  • Loading branch information
kengoide authored Feb 11, 2024
2 parents d083650 + 69c0676 commit 7622a8e
Show file tree
Hide file tree
Showing 15 changed files with 187 additions and 11 deletions.
13 changes: 11 additions & 2 deletions sakura_core/charset/CCodeBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,17 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat
// 表示用16進表示 UNICODE → Hex 変換 2008/6/9 Uchi
EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar)
{
if (IsUTF16High(cSrc[0]) && iSLen >= 2 && IsUTF16Low(cSrc[1])) {
// サロゲートペア
// IVS
if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) {
if (psStatusbar->m_bDispSPCodepoint) {
auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1));
}
else {
auto_sprintf(pDst, L"%04X, %04X%04X", cSrc[0], cSrc[1], cSrc[2]);
}
}
// サロゲートペア
else if (iSLen >= 2 && IsSurrogatePair(cSrc)) {
if (psStatusbar->m_bDispSPCodepoint) {
auto_sprintf( pDst, L"U+%05X", 0x10000 + ((cSrc[0] & 0x3FF)<<10) + (cSrc[1] & 0x3FF));
}
Expand Down
3 changes: 3 additions & 0 deletions sakura_core/charset/CUtf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ EConvertResult CUtf8::_UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR*
if (IsUTF16High(cSrc[0]) && iSLen >= 2 && IsUTF16Low(cSrc[1])) {
cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, 4);
}
else if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) {
cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, sizeof(wchar_t) * 3);
}
else {
cBuff._GetMemory()->SetRawDataHoldBuffer(cSrc, 2);
if( IsBinaryOnSurrogate(cSrc[0]) ){
Expand Down
37 changes: 37 additions & 0 deletions sakura_core/charset/codechecker.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,43 @@ inline bool IsUTF16Low( wchar_t c ){
return IsUtf16SurrogLow(c);
}

/*!
* 文字列がサロゲートペアで始まっているか判定する
*/
inline bool IsSurrogatePair(std::wstring_view text) {
return 2 <= text.length() && IsUTF16High(text[0]) && IsUTF16Low(text[1]);
}

/*!
* UTF16文字列の先頭1文字をUTF32コードポイントに変換する
*
* @return 文字列の先頭1文字のコードポイント
*/
inline
_Success_(return != 0)
char32_t ConvertToUtf32(std::wstring_view text) {
if (text.empty()) {
return 0;
}
if (IsSurrogatePair(text)) {
return 0x10000 + ((text[0] & 0x3ff) << 10) + (text[1] & 0x3ff);
}
if (const auto ch = text[0];
!(ch & 0xfc00))
{
return ch;
}
return 0;
}

/*!
* 文字列がIVSの異体字セレクタで始まっているか判定する
*/
inline bool IsVariationSelector(std::wstring_view text) {
const auto cp = ConvertToUtf32(text);
return 0xe0100 <= cp && cp <= 0xe01ef;
}

//! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け)
inline unsigned short _SwapHLByte( const unsigned short wc ){
unsigned short wc1 = static_cast<unsigned short>( (static_cast<unsigned int>(wc) << 8) & 0x0000ffff );
Expand Down
6 changes: 6 additions & 0 deletions sakura_core/mem/CNativeW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,12 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx
}
}

// IVSの異体字セレクタチェック
if (IsVariationSelector(pData + nIdx + 1)) {
// 正字 + 異体字セレクタで3個分
return CLogicInt(3);
}

return CLogicInt(1);
}

Expand Down
18 changes: 11 additions & 7 deletions sakura_core/parse/CWordParse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,10 @@ ECharKind CWordParse::WhatKindOfChar(
{
using namespace WCODE;

int nCharChars = CNativeW::GetSizeOfChar( pData, pDataLen, nIdx );
if( nCharChars == 0 ){
return CK_NULL; // NULL
}
else if( nCharChars == 1 ){
ECharKind ret = CK_NULL;
if(const auto nCharChars = CNativeW::GetSizeOfChar(pData, pDataLen, nIdx);
nCharChars == 1)
{
wchar_t c=pData[nIdx];

//今までの半角
Expand Down Expand Up @@ -186,9 +185,14 @@ ECharKind CWordParse::WhatKindOfChar(
}
return CK_ETC; // 半角のその他
}
else{
return CK_NULL; // NULL
// IVS(正字 + 異体字セレクタ)
else if (nCharChars == 3 &&
IsVariationSelector(pData + nIdx + 1))
{
ret = CK_ZEN_ETC; // 全角のその他(漢字など)
}

return ret;
}

//! 二つの文字を結合したものの種類を調べる
Expand Down
2 changes: 1 addition & 1 deletion sakura_core/view/CEditView_Command_New.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ void CEditView::DeleteData(
nNxtPos = GetCaret().GetCaretLayoutPos().GetX() + CLayoutInt(pcLayout->GetLayoutEol().GetLen()>0?1+m_pcEditDoc->m_cLayoutMgr.GetCharSpacing():0);
}
else{
nNxtIdx = CLogicInt(CNativeW::GetCharNext( pLine, nLineLen, &pLine[nCurIdx] ) - pLine);
nNxtIdx = nCurIdx + CNativeW::GetSizeOfChar( pLine, nLineLen, nCurIdx);
// 指定された行のデータ内の位置に対応する桁の位置を調べる
nNxtPos = LineIndexToColumn( pcLayout, nNxtIdx );
}
Expand Down
2 changes: 1 addition & 1 deletion sakura_core/view/CTextDrawer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ void CTextDrawer::DispText( HDC hdc, DispPos* pDispPos, int marginy, const wchar
nWorkWidth += pDrawDxArray[nDrawLength++];
}
// サロゲートペア対策 2008/7/5 Uchi Update 7/8 Uchi
if (nDrawLength < nDrawDataMaxLength && pDrawDxArray[nDrawLength] == 0) {
while (nDrawLength < nDrawDataMaxLength && pDrawDxArray[nDrawLength] == 0) {
nDrawLength++;
}

Expand Down
6 changes: 6 additions & 0 deletions sakura_core/view/CTextMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ const int* CTextMetrics::GenerateDxArray(
}
vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing);
nIndent += vResultArray.back();

if (IsVariationSelector(pText + i + 1)) {
vResultArray.push_back(0);
vResultArray.push_back(0);
i += 2;
}
}
return vResultArray.data();
}
Expand Down
23 changes: 23 additions & 0 deletions tests/unittests/test-ccodebase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,9 @@ TEST(CCodeBase, Utf8ToHex)

// カラー絵文字「男性のシンボル」(サロゲートペア)
EXPECT_STREQ(L"F09F9AB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str());

// IVS(Ideographic Variation Sequence) 「葛󠄀」(葛󠄀城市の葛󠄀、下がヒ)
EXPECT_STREQ(L"E8919BF3A08480", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str());
}

/*!
Expand All @@ -837,3 +840,23 @@ TEST(CCodeBase, Latin1ToHex)
// カラー絵文字「男性のシンボル」(サロゲートペア)
EXPECT_STREQ(L"D83DDEB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str());
}

TEST(CCodeBase, UnicodeToHex)
{
const auto eCodeType = CODE_UNICODE;
auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType);

// 特定コードのマルチバイトを表示する設定
CommonSetting_Statusbar sStatusbar;
sStatusbar.m_bDispUniInSjis = false;
sStatusbar.m_bDispUniInJis = false;
sStatusbar.m_bDispUniInEuc = false;
sStatusbar.m_bDispUtf8Codepoint = false;
sStatusbar.m_bDispSPCodepoint = false;

sStatusbar.m_bDispSPCodepoint = true;
EXPECT_STREQ(L"845B, U+E0100", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str());

sStatusbar.m_bDispSPCodepoint = false;
EXPECT_STREQ(L"845B, DB40DD00", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str());
}
20 changes: 20 additions & 0 deletions tests/unittests/test-cnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,26 @@ TEST(CNativeW, GetSizeOfChar)
EXPECT_EQ(CNativeW::GetSizeOfChar(CStringRef(L"\xd83c\xdf38", 2), 1), 1);
}

TEST(CNativeW, GetSizeOfChar_Empty)
{
const auto& s = L"";
EXPECT_EQ(0, CNativeW::GetSizeOfChar(s, _countof(s) - 1, 0));
}

TEST(CNativeW, GetSizeOfChar_SurrogatePair)
{
// 絵文字 男性のシンボル
const auto& s = L"\U0001f6b9";
EXPECT_EQ(2, CNativeW::GetSizeOfChar(s, _countof(s) - 1, 0));
}

TEST(CNativeW, GetSizeOfChar_IVS)
{
// 葛󠄀城市(先頭の文字が異体字)
const auto& s = L"葛󠄀城市";
EXPECT_EQ(3, CNativeW::GetSizeOfChar(s, _countof(s) - 1, 0));
}

/*!
* @brief GetKetaOfCharの仕様
* @remark 指定した文字の桁数を返す。
Expand Down
47 changes: 47 additions & 0 deletions tests/unittests/test-codechecker.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
Copyright (C) 2023, Sakura Editor Organization
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented;
you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment
in the product documentation would be appreciated but is
not required.
2. Altered source versions must be plainly marked as such,
and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
*/

#include <gtest/gtest.h>
#include "charset/codechecker.h"

TEST(ConvertToUtf32, SurrogatePair)
{
// 絵文字 男性のシンボル
const auto& s = L"\U0001f6b9";
EXPECT_EQ(0x1f6b9, ConvertToUtf32(s));
}

TEST(ConvertToUtf32, VariationSelector)
{
// 異体字セレクタ VS17
const auto& s = L"\U000e0100";
EXPECT_EQ(0xe0100, ConvertToUtf32(s));
}

TEST(ConvertToUtf32, BinaryOnSurrogate)
{
// 独自仕様 変換できない文字は1byteずつ下位サロゲートに詰める
const auto& s = L"\xdcff";
EXPECT_EQ(0, ConvertToUtf32(s));
}
11 changes: 11 additions & 0 deletions tests/unittests/test-ctextmetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,17 @@ TEST(CTextMetrics, GenerateDxArray7)
EXPECT_EQ(v[3], 99);
}

TEST(CTextMetrics, GenerateDxArray8)
{
// IVSのVariantSelectorが続く文字列は先頭1文字 + 幅0×2で生成する
std::vector<int> v;
FakeCache1 cache;
CTextMetrics::GenerateDxArray(&v, L"葛󠄀", 2, 0, 0, 0, 10, cache);
EXPECT_TRUE(v[0]);
EXPECT_FALSE(v[1]);
EXPECT_FALSE(v[2]);
}

TEST(CTextMetrics, CalcTextWidth)
{
int dx[] = {1, 2, 3};
Expand Down
6 changes: 6 additions & 0 deletions tests/unittests/test-cwordparse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ TEST(WhatKindOfChar, SurrogatePairs)
EXPECT_EQ(CK_ZEN_ETC, CWordParse::WhatKindOfChar(L"\xd842\xdfb7", 2, 0));
}

TEST(WhatKindOfChar, IVS)
{
// EXPECT_EQ(CK_ETC, CWordParse::WhatKindOfChar(L"葛󠄀", 3, 0));
EXPECT_EQ(CK_ZEN_ETC, CWordParse::WhatKindOfChar(L"葛󠄀", 3, 0));
}

TEST(WhatKindOfTwoChars, ReturnsSameKindIfTwoKindsAreIdentical)
{
EXPECT_EQ(CK_HIRA, CWordParse::WhatKindOfTwoChars(CK_HIRA, CK_HIRA));
Expand Down
1 change: 1 addition & 0 deletions tests/unittests/tests1.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
<ClCompile Include="code-main.cpp" />
<ClCompile Include="test-cdocline.cpp" />
<ClCompile Include="test-cdoclinemgr.cpp" />
<ClCompile Include="test-codechecker.cpp" />
<ClCompile Include="test-cppa.cpp" />
<ClCompile Include="test-csearchagent.cpp" />
<ClCompile Include="test-crunningtimer.cpp" />
Expand Down
3 changes: 3 additions & 0 deletions tests/unittests/tests1.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@
<ClCompile Include="test-extmodules.cpp">
<Filter>Test Files</Filter>
</ClCompile>
<ClCompile Include="test-codechecker.cpp">
<Filter>Test Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="StartEditorProcessForTest.h">
Expand Down

0 comments on commit 7622a8e

Please sign in to comment.