-
-
Notifications
You must be signed in to change notification settings - Fork 310
/
encoding.go
140 lines (126 loc) · 5.07 KB
/
encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Copyright 2022 The TCell Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use file except in compliance with the License.
// You may obtain a copy of the license at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tcell
import (
"strings"
"sync"
"golang.org/x/text/encoding"
gencoding "github.com/gdamore/encoding"
)
var encodings map[string]encoding.Encoding
var encodingLk sync.Mutex
var encodingFallback EncodingFallback = EncodingFallbackFail
// RegisterEncoding may be called by the application to register an encoding.
// The presence of additional encodings will facilitate application usage with
// terminal environments where the I/O subsystem does not support Unicode.
//
// Windows systems use Unicode natively, and do not need any of the encoding
// subsystem when using Windows Console screens.
//
// Please see the Go documentation for golang.org/x/text/encoding -- most of
// the common ones exist already as stock variables. For example, ISO8859-15
// can be registered using the following code:
//
// import "golang.org/x/text/encoding/charmap"
//
// ...
// RegisterEncoding("ISO8859-15", charmap.ISO8859_15)
//
// Aliases can be registered as well, for example "8859-15" could be an alias
// for "ISO8859-15".
//
// For POSIX systems, this package will check the environment variables
// LC_ALL, LC_CTYPE, and LANG (in that order) to determine the character set.
// These are expected to have the following pattern:
//
// $language[.$codeset[@$variant]
//
// We extract only the $codeset part, which will usually be something like
// UTF-8 or ISO8859-15 or KOI8-R. Note that if the locale is either "POSIX"
// or "C", then we assume US-ASCII (the POSIX 'portable character set'
// and assume all other characters are somehow invalid.)
//
// Modern POSIX systems and terminal emulators may use UTF-8, and for those
// systems, this API is also unnecessary. For example, Darwin (MacOS X) and
// modern Linux running modern xterm generally will out of the box without
// any of this. Use of UTF-8 is recommended when possible, as it saves
// quite a lot processing overhead.
//
// Note that some encodings are quite large (for example GB18030 which is a
// superset of Unicode) and so the application size can be expected to
// increase quite a bit as each encoding is added.
// The East Asian encodings have been seen to add 100-200K per encoding to the
// size of the resulting binary.
func RegisterEncoding(charset string, enc encoding.Encoding) {
encodingLk.Lock()
charset = strings.ToLower(charset)
encodings[charset] = enc
encodingLk.Unlock()
}
// EncodingFallback describes how the system behaves when the locale
// requires a character set that we do not support. The system always
// supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also
// supported automatically. Other character sets must be added using the
// RegisterEncoding API. (A large group of nearly all of them can be
// added using the RegisterAll function in the encoding sub package.)
type EncodingFallback int
const (
// EncodingFallbackFail behavior causes GetEncoding to fail
// when it cannot find an encoding.
EncodingFallbackFail = iota
// EncodingFallbackASCII behavior causes GetEncoding to fall back
// to a 7-bit ASCII encoding, if no other encoding can be found.
EncodingFallbackASCII
// EncodingFallbackUTF8 behavior causes GetEncoding to assume
// UTF8 can pass unmodified upon failure. Note that this behavior
// is not recommended, unless you are sure your terminal can cope
// with real UTF8 sequences.
EncodingFallbackUTF8
)
// SetEncodingFallback changes the behavior of GetEncoding when a suitable
// encoding is not found. The default is EncodingFallbackFail, which
// causes GetEncoding to simply return nil.
func SetEncodingFallback(fb EncodingFallback) {
encodingLk.Lock()
encodingFallback = fb
encodingLk.Unlock()
}
// GetEncoding is used by Screen implementors who want to locate an encoding
// for the given character set name. Note that this will return nil for
// either the Unicode (UTF-8) or ASCII encodings, since we don't use
// encodings for them but instead have our own native methods.
func GetEncoding(charset string) encoding.Encoding {
charset = strings.ToLower(charset)
encodingLk.Lock()
defer encodingLk.Unlock()
if enc, ok := encodings[charset]; ok {
return enc
}
switch encodingFallback {
case EncodingFallbackASCII:
return gencoding.ASCII
case EncodingFallbackUTF8:
return encoding.Nop
}
return nil
}
func init() {
// We always support UTF-8 and ASCII.
encodings = make(map[string]encoding.Encoding)
encodings["utf-8"] = gencoding.UTF8
encodings["utf8"] = gencoding.UTF8
encodings["us-ascii"] = gencoding.ASCII
encodings["ascii"] = gencoding.ASCII
encodings["iso646"] = gencoding.ASCII
}