Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Unicode の結合文字を1文字として描画できるようにする #1942

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*.iobj
*.ipdb
*.pyc
*.vcxproj.user
**/packages
**/TestResults
.sonarqube
Expand Down
2 changes: 1 addition & 1 deletion sakura/sakura.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
</Manifest>
<Link>
<SubSystem>Windows</SubSystem>
<AdditionalDependencies>comctl32.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>comctl32.lib;icu.lib;Imm32.lib;mpr.lib;imagehlp.lib;Shlwapi.lib;Dwmapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
<PostBuildEvent>
Expand Down
2 changes: 2 additions & 0 deletions sakura_core/StdAfx.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
#include <utility>
#include <vector>

#include <sdkddkver.h>
#define NTDDI_VERSION NTDDI_WIN10_RS3
#include <Windows.h>
#include <windowsx.h>
#include <CommCtrl.h>
Expand Down
17 changes: 15 additions & 2 deletions sakura_core/charset/CCodeBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
#include "StdAfx.h"
#include "CCodeBase.h"

#include "CEol.h"
#include "charset/CCodeFactory.h"
#include "convert/convert_util2.h"
#include "charset/codechecker.h"
#include "CEol.h"
#include "convert/convert_util2.h"
#include "env/CommonSetting.h"

/*!
Expand All @@ -54,8 +54,12 @@ std::wstring CCodeBase::CodeToHex(const CNativeW& cSrc, const CommonSetting_Stat
// 表示用16進表示 UNICODE → Hex 変換 2008/6/9 Uchi
EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCHAR* pDst, const CommonSetting_Statusbar* psStatusbar)
{
std::wstring_view trailingChars;

// IVS
if (iSLen >= 3 && IsVariationSelector(cSrc + 1)) {
trailingChars = std::wstring_view(cSrc + 3, iSLen - 3);

if (psStatusbar->m_bDispSPCodepoint) {
auto_sprintf(pDst, L"%04X, U+%05X", cSrc[0], ConvertToUtf32(cSrc + 1));
}
Expand All @@ -65,6 +69,8 @@ EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCH
}
// サロゲートペア
else if (iSLen >= 2 && IsSurrogatePair(cSrc)) {
trailingChars = std::wstring_view(cSrc + 2, iSLen - 2);

if (psStatusbar->m_bDispSPCodepoint) {
auto_sprintf( pDst, L"U+%05X", 0x10000 + ((cSrc[0] & 0x3FF)<<10) + (cSrc[1] & 0x3FF));
}
Expand All @@ -73,9 +79,16 @@ EConvertResult CCodeBase::UnicodeToHex(const wchar_t* cSrc, const int iSLen, WCH
}
}
else {
trailingChars = std::wstring_view(cSrc + 1, iSLen - 1);

auto_sprintf( pDst, L"U+%04X", cSrc[0] );
}

if (CountNonSpacingMarkCharactersByUTF16CodeUnits(trailingChars)) {
// 結合文字がある場合は「...」を表示する
wcscat(pDst, L"...");
}

return RESULT_COMPLETE;
}

Expand Down
40 changes: 36 additions & 4 deletions sakura_core/charset/codechecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,43 @@

#include "StdAfx.h"
#include "charset/codechecker.h"
#include "mem/CMemory.h"
#include "convert/convert_util2.h"
#include "charset/codeutil.h"
#include "charset/charcode.h"

#include <algorithm>
#include <cstddef>
#include <string_view>

#include <icu.h>

#include "charset/charcode.h"
#include "charset/codeutil.h"
#include "convert/convert_util2.h"
#include "mem/CMemory.h"

std::size_t CountNonSpacingMarkCharactersByUTF16CodeUnits(std::wstring_view text) {
std::size_t i = 0;
while (i < text.size()) {
char32_t ch;
bool is_surrogate_pair;

if (char16_t high = text[i];
!IsUTF16High(high)) {
ch = high;
is_surrogate_pair = false;
} else if (i + 1 < text.size() && IsUTF16Low(text[i + 1])) {
char16_t low = text[i + 1];
ch = DecodeSurrogatePair(high, low);
is_surrogate_pair = true;
} else {
// 下位が欠落している不正なサロゲートペアに遭遇したので計算を打ち切る。
break;
}

if (u_charType(ch) != U_NON_SPACING_MARK)
break;
i += (is_surrogate_pair ? 2 : 1);
}
return i;
}

/* =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*

Expand Down
13 changes: 12 additions & 1 deletion sakura_core/charset/codechecker.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
#define SAKURA_CODECHECKER_62A18A31_2ECD_47B6_AEE1_38EDDAD3FF2B_H_
#pragma once

#include <cstddef>
#include <string_view>

#include "_main/global.h"
#include "convert/convert_util2.h"
#include "basis/primitive.h"
Expand Down Expand Up @@ -366,6 +369,11 @@ inline bool IsSurrogatePair(std::wstring_view text) {
return 2 <= text.length() && IsUTF16High(text[0]) && IsUTF16Low(text[1]);
}

//! UTF-16 のサロゲートペアを Unicode コードポイントに変換する
inline char32_t DecodeSurrogatePair(char16_t high, char16_t low) {
return 0x10000 + ((high & 0x3ff) << 10) + (low & 0x3ff);
}

/*!
* UTF16文字列の先頭1文字をUTF32コードポイントに変換する
*
Expand All @@ -378,7 +386,7 @@ char32_t ConvertToUtf32(std::wstring_view text) {
return 0;
}
if (IsSurrogatePair(text)) {
return 0x10000 + ((text[0] & 0x3ff) << 10) + (text[1] & 0x3ff);
return DecodeSurrogatePair(text[0], text[1]);
}
if (const auto ch = text[0];
!(ch & 0xfc00))
Expand All @@ -396,6 +404,9 @@ inline bool IsVariationSelector(std::wstring_view text) {
return 0xe0100 <= cp && cp <= 0xe01ef;
}

//! 連続する幅なし結合文字の長さを UTF-16 コードユニット単位で数える。
std::size_t CountNonSpacingMarkCharactersByUTF16CodeUnits(std::wstring_view text);

//! 上位バイトと下位バイトを交換 (主に UTF-16 LE/BE 向け)
inline unsigned short _SwapHLByte( const unsigned short wc ){
unsigned short wc1 = static_cast<unsigned short>( (static_cast<unsigned int>(wc) << 8) & 0x0000ffff );
Expand Down
31 changes: 17 additions & 14 deletions sakura_core/mem/CNativeW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@
distribution.
*/
#include "StdAfx.h"
#include <stdexcept>
#include "charset/codechecker.h"
#include "mem/CNativeW.h"

#include <stdexcept>
#include <string_view>

#include <icu.h>

#include "CEol.h"
#include "charset/codechecker.h"

/*!
コンストラクタ
Expand Down Expand Up @@ -389,20 +391,21 @@ CLogicInt CNativeW::GetSizeOfChar( const wchar_t* pData, int nDataLen, int nIdx
return CLogicInt(0);

// サロゲートチェック 2008/7/5 Uchi
if (IsUTF16High(pData[nIdx])) {
if (nIdx + 1 < nDataLen && IsUTF16Low(pData[nIdx + 1])) {
// サロゲートペア 2個分
return CLogicInt(2);
}
}

// IVSの異体字セレクタチェック
if (IsVariationSelector(pData + nIdx + 1)) {
// 正字 + 異体字セレクタで3個分
return CLogicInt(3);
int units;
if (!IsUTF16High(pData[nIdx])) {
units = 1;
} else if (nIdx + 1 < nDataLen && IsUTF16Low(pData[nIdx + 1])) {
// サロゲートペア 2個分
units = 2;
} else {
// 不正なサロゲートペア
return CLogicInt(1);
}

return CLogicInt(1);
// 後続の幅なし結合文字の数を足す
const auto trailing_text = std::wstring_view(pData + nIdx + units, nDataLen - nIdx - units);
const auto count = CountNonSpacingMarkCharactersByUTF16CodeUnits(trailing_text);
return CLogicInt(static_cast<int>(units + count));
}

//! 指定した位置の文字が半角何個分かを返す
Expand Down
4 changes: 4 additions & 0 deletions sakura_core/parse/CWordParse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
*/
#include "StdAfx.h"
#include "CWordParse.h"

#include <Windows.h>

#include "basis/SakuraBasis.h"
#include "charset/charcode.h"
#include "charset/codechecker.h"
#include "mem/CNativeW.h"
Expand Down
35 changes: 21 additions & 14 deletions sakura_core/view/CTextMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,14 @@
distribution.
*/
#include "StdAfx.h"
#include "CTextMetrics.h"

#include <algorithm>
#include <iterator>
#include <vector>
#include "CTextMetrics.h"

#include <icu.h>

#include "charset/codechecker.h"
#include "mem/CNativeW.h"

Expand Down Expand Up @@ -86,34 +91,36 @@ const int* CTextMetrics::GenerateDxArray(
std::vector<int>& vResultArray = *pvResultArray;
vResultArray.clear();

for (int i = 0; i < nLength; ++i) {
int i = 0;
while (i < nLength) {
if (pText[i] == WCODE::TAB) {
// TAB対応 2013/5/7 Uchi
if (i > 0 && pText[i - 1] == WCODE::TAB) {
vResultArray.push_back(nTabSpace);
nIndent += nTabSpace;
continue;
} else {
vResultArray.push_back((nTabSpace + nHankakuDx - 1) - ((nIndent + nHankakuDx - 1) % nTabSpace));
nIndent += vResultArray.back();
}
vResultArray.push_back((nTabSpace + nHankakuDx - 1) - ((nIndent + nHankakuDx - 1) % nTabSpace));
nIndent += vResultArray.back();
++i;
continue;
}

const int spacing = CNativeW::GetKetaOfChar(pText, nLength, i, cache) * nCharSpacing;
if(IsUTF16High(pText[i]) && i + 1 < nLength && IsUTF16Low(pText[i + 1])) {
vResultArray.push_back(cache.CalcPxWidthByFont2(pText + i) + spacing);
vResultArray.push_back(0);
i++;
continue;
}
vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing);
nIndent += vResultArray.back();

if (IsVariationSelector(pText + i + 1)) {
vResultArray.push_back(0);
vResultArray.push_back(0);
i += 2;
} else {
vResultArray.push_back(cache.CalcPxWidthByFont(pText[i]) + spacing);
nIndent += vResultArray.back();
++i;
}

const auto trailing_text = std::wstring_view(pText + i, nLength - i);
const auto count = CountNonSpacingMarkCharactersByUTF16CodeUnits(trailing_text);
std::fill_n(std::back_inserter(vResultArray), count, 0);
i += count;
}
return vResultArray.data();
}
Expand Down
19 changes: 18 additions & 1 deletion tests/unittests/test-ccodebase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ TEST(CCodeBase, Latin1ToHex)
EXPECT_STREQ(L"D83DDEB9", pCodeBase->CodeToHex(L"\U0001F6B9", sStatusbar).c_str());
}

TEST(CCodeBase, UnicodeToHex)
TEST(CCodeBase, UnicodeToHex1)
{
const auto eCodeType = CODE_UNICODE;
auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType);
Expand All @@ -860,3 +860,20 @@ TEST(CCodeBase, UnicodeToHex)
sStatusbar.m_bDispSPCodepoint = false;
EXPECT_STREQ(L"845B, DB40DD00", pCodeBase->CodeToHex(L"葛󠄀", sStatusbar).c_str());
}

TEST(CCodeBase, UnicodeToHex2)
{
const auto eCodeType = CODE_UNICODE;
auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType);

// 特定コードのマルチバイトを表示する設定
CommonSetting_Statusbar sStatusbar;
sStatusbar.m_bDispUniInSjis = false;
sStatusbar.m_bDispUniInJis = false;
sStatusbar.m_bDispUniInEuc = false;
sStatusbar.m_bDispUtf8Codepoint = false;
sStatusbar.m_bDispSPCodepoint = false;
sStatusbar.m_bDispSPCodepoint = false;

EXPECT_STREQ(L"U+30AB...", pCodeBase->CodeToHex(L"ガ", sStatusbar).c_str());
}
Loading
Loading