diff --git a/encode_test.go b/encode_test.go index 3829167..44877ba 100644 --- a/encode_test.go +++ b/encode_test.go @@ -483,6 +483,8 @@ var marshalTests = []struct { }, // Encode unicode as utf-8 rather than in escaped form. + // + // See https://github.com/go-yaml/yaml/issues/737. { map[string]string{"a": "你好"}, "a: 你好\n", @@ -491,6 +493,19 @@ var marshalTests = []struct { "你好", "你好\n", }, + { + map[string]string{"a": "🛑"}, + "a: 🛑\n", + }, + // Notice that result is not escaped. + { + map[string]string{"a": "\U0001f3f3\ufe0f\u200d\U0001f308"}, + "a: " + "\U0001f3f3\ufe0f\u200d\U0001f308" + "\n", + }, + {"\U0001f3f3\ufe0f\u200d\U0001f308", "\U0001f3f3\ufe0f\u200d\U0001f308\n"}, + {"\U0001f439", "\U0001f439\n"}, + {"\U0001f1fa\U0001f1f8", "\U0001f1fa\U0001f1f8\n"}, + {"\U0001f474\U0001f3ff", "\U0001f474\U0001f3ff\n"}, // Support encoding.TextMarshaler. { diff --git a/yamlprivateh.go b/yamlprivateh.go index 96a859e..09e1297 100644 --- a/yamlprivateh.go +++ b/yamlprivateh.go @@ -22,6 +22,11 @@ package yaml +import ( + "unicode" + "unicode/utf8" +) + const ( // The size of the input raw buffer. input_raw_buffer_size = 512 @@ -83,15 +88,17 @@ func is_ascii(b []byte, i int) bool { // Check if the character at the start of the buffer can be printed unescaped. func is_printable(b []byte, i int) bool { + r, _ := utf8.DecodeRune(b[i:]) + if r == utf8.RuneError { + return false + } + return ((b[i] == 0x0A) || // . == #x0A (b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E (b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF (b[i] > 0xC2 && b[i] < 0xED) || (b[i] == 0xED && b[i+1] < 0xA0) || - (b[i] == 0xEE) || - (b[i] == 0xEF && // #xE000 <= . <= #xFFFD - !(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF - !(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF)))) + unicode.IsGraphic(r)) } // Check if the character at the specified position is NUL.