Skip to content

Commit

Permalink
Merge pull request #25 from tdakkota/fix/keep-graphic-unicode
Browse files Browse the repository at this point in the history
fix: do not escape some unicode characters
  • Loading branch information
tdakkota authored Aug 16, 2022
2 parents 130bc38 + 61e8612 commit a5ca84f
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
15 changes: 15 additions & 0 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,8 @@ var marshalTests = []struct {
},

// Encode unicode as utf-8 rather than in escaped form.
//
// See https://github.com/go-yaml/yaml/issues/737.
{
map[string]string{"a": "你好"},
"a: 你好\n",
Expand All @@ -491,6 +493,19 @@ var marshalTests = []struct {
"你好",
"你好\n",
},
{
map[string]string{"a": "🛑"},
"a: 🛑\n",
},
// Notice that result is not escaped.
{
map[string]string{"a": "\U0001f3f3\ufe0f\u200d\U0001f308"},
"a: " + "\U0001f3f3\ufe0f\u200d\U0001f308" + "\n",
},
{"\U0001f3f3\ufe0f\u200d\U0001f308", "\U0001f3f3\ufe0f\u200d\U0001f308\n"},
{"\U0001f439", "\U0001f439\n"},
{"\U0001f1fa\U0001f1f8", "\U0001f1fa\U0001f1f8\n"},
{"\U0001f474\U0001f3ff", "\U0001f474\U0001f3ff\n"},

// Support encoding.TextMarshaler.
{
Expand Down
15 changes: 11 additions & 4 deletions yamlprivateh.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@

package yaml

import (
"unicode"
"unicode/utf8"
)

const (
// The size of the input raw buffer.
input_raw_buffer_size = 512
Expand Down Expand Up @@ -83,15 +88,17 @@ func is_ascii(b []byte, i int) bool {

// Check if the character at the start of the buffer can be printed unescaped.
func is_printable(b []byte, i int) bool {
r, _ := utf8.DecodeRune(b[i:])
if r == utf8.RuneError {
return false
}

return ((b[i] == 0x0A) || // . == #x0A
(b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E
(b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF
(b[i] > 0xC2 && b[i] < 0xED) ||
(b[i] == 0xED && b[i+1] < 0xA0) ||
(b[i] == 0xEE) ||
(b[i] == 0xEF && // #xE000 <= . <= #xFFFD
!(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF
!(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF))))
unicode.IsGraphic(r))
}

// Check if the character at the specified position is NUL.
Expand Down

0 comments on commit a5ca84f

Please sign in to comment.