-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathxchar.tex
815 lines (669 loc) · 26.8 KB
/
xchar.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
% !TeX root = forth.tex
% !TeX spellcheck = en_US
% !TeX program = pdflatex
\chapter{The optional Extended-Character word set} % 18
\wordlist{xchar}
\section{Introduction} % 18.1
\label{xchar:intro}
This word set deals with variable width character encodings. It also
works with fixed width encodings.
Since the standard specifies ASCII encoding for characters, only
ASCII-compatible encodings may be used. Because ASCII compatibility
has so many benefits, most encodings actually are ASCII compatible.
The characters beyond the ASCII encoding are called ``extended
characters'' (xchars).
All words dealing with strings shall handle xchars when the xchar word
set is present. This includes dictionary definitions. White space
parsing does not have to treat code points greater than \$20 as white
space.
\section{Additional terms and notation} % 18.2
\subsection{Definition of Terms} % 18.2.1
\begin{description}
\item[code point:] A member of an extended character set.
\end{description}
\subsection{Parsed-text notation}
Append table \ref{xchar:syntax} to table \ref{table:syntax}.
\begin{table}[ht]
\begin{center}
\caption{Parsed text abbreviations}
\label{xchar:syntax}
\begin{tabular}{ll}
\hline\hline
\emph{Abbreviation} & \emph{Description} \\ \hline
\param{<xchar>} & the delimiting extended character \\
\hline\hline
\end{tabular}
\end{center}
\end{table}
See: \xref{notation:parsed}.
\section{Additional usage requirements} % 18.3
\subsection{Data types} % 18.3.1
Append table \ref{xchar:types} to table \ref{table:datatypes}.
\begin{table}[ht]
\begin{center}
\caption{Data Types}
\label{xchar:types}
\begin{tabular}{llr}
\hline\hline
\emph{Symbol} & \emph{Data type} & \emph{Size on stack} \\ \hline
\emph{pchar} & primitive character & 1 cell \\
\emph{xchar} & extended character & 1 cell \\
\emph{xc-addr} & xchar-aligned address & 1 cell \\
% \emph{xc-addr u} & character-aligned string & 2 cells \\
\hline\hline
\end{tabular}
\end{center}
\end{table}
See: \xref{usage:data}.
\subsubsection{Extended Characters} % 18.3.1.1
An extended character (xchar) is the code point of a character within an
extended character set; on the stack it is a subset of \emph{u}. Extended
characters are stored in memory encoded as one or more primitive characters
(pchars).
%\begin{description}
%\item[xc-addr] is the address of an xchar in memory.
% Alignment requirements are the same as \emph{c-addr}.
%\item[xc-addr u] is a string of xchars in memory, starting at
% \emph{xc-addr}, \emph{u} pchars long.
%\end{description}
\subsection{Environmental queries} % 18.3.2.
Append table \ref{xchar:env} to table \ref{table:env}.
\begin{table}[ht]
\begin{center}
\caption{Environmental Query Strings}
\label{xchar:env}
\begin{tabular}{p{9em}rcp{0.42\textwidth}}
\hline\hline
\multicolumn{2}{l}{String \hfill Value data type} & Constant? & Meaning \\
\hline
\texttt{XCHAR-ENCODING} & \emph{c-addr u} & no &
Returns a printable ASCII string that represents the encoding,
and use the preferred MIME name (if any) or the name in the
IANA character-set register\footnotemark[1] (RFC-1700) such
as ``\texttt{ISO-LATIN-1}'' or ``\texttt{UTF--8}'',
with the exception of ``\texttt{ASCII}'', where the alias
``\texttt{ASCII}'' is preferred. \\
\texttt{MAX-XCHAR} & \emph{u} & no &
Maximal value for \emph{xchar} \\
\texttt{XCHAR-MAXMEM} & \emph{u} & no &
Maximal memory consumed by an \emph{xchar} in address units \\
\hline\hline \\[-1ex]
\multicolumn{4}{l}{\footnotemark[1]
\url{http://www.iana.org/assignments/character-sets}} \\
\end{tabular}
\end{center}
\end{table}
See: \xref[3.2.6 Environmental queries]{usage:env}.
\subsection{Common encodings} % 18.3.3
Input and files are often encoded iso--latin--1 or utf--8. The encoding
depends on settings of the computer system such as the LANG environment
variable on Unix. You can use the system consistently only when you do
not change the encoding, or only use the ASCII subset.
The typical practice in environments requiring more than one encoding
is that the base system is ASCII only, and the character set is then
extended to specify the required encoding.
%\subsection{Exception Handling} % 18.3.5
%
%Append table \ref{xchar:throw} to table \ref{table:throw}:
%
%\begin{table}[ht]
% \begin{center}
% \caption{\word[exception]{THROW} code assignments}
% \label{xchar:throw}
% \begin{tabular}{ll}
% \hline\hline
% Code & Reserved for \\\hline
% -77 & Malformed xchar
% \\ \hline\hline
% \end{tabular}
% \end{center}
%\end{table}
\subsection{The Forth text interpreter} % 18.3.4
In section \xref{usage:numbers}{}, \arg{cnum} should be redefined to be:
\begin{center}
\begin{tabular}{ll}
\arg{cnum} & the number is the value of \arg{xchar}
\end{tabular}
\end{center}
\subsection{Input and Output} % 18.3.5
\label{xchar:io}
IO words such as
\word{KEY},
\word{EMIT},
\word{TYPE},
\word[file]{READ-FILE},
\word[file]{READ-LINE},
\word[file]{WRITE-FILE}, and
\word[file]{WRITE-LINE}
operate on \emph{pchars}. Therefore, it is possible that these words
read or write incomplete \emph{xchars}, which are completed in the next
consecutive operation(s). The IO system shall combine these \emph{pchars}
into a complete \emph{xchars} on output, or split an \emph{xchars} into
\emph{pchars} on input, and shall not throw a ``malformed \emph{xchars}''
exception when the combination of these \emph{pchars} form a valid
\emph{xchars}. \word[xchar]{-TRAILING-GARBAGE} can be used to process
an incomplete \emph{xchars} at the end of such an IO operation.
\word{ACCEPT} as input editor may be aware of \emph{xchars} to
provide comfort like backspace or cursor movement.
\section{Additional documentation requirements} % 18.4
\subsection{System documentation} % 18.4.1
\subsubsection{Implementation-defined options} % 18.4.1.1
\label{xchar:impopt}
Since Unicode input and display poses a number of challenges like input
method editors for different languages, left-to-right and right-to-left
writing, and most fonts contain only a subset of Unicode glyphs,
systems should document their capabilities. File IO and in-memory
string handling should work transparently with \emph{xchars}.
\subsubsection{Ambiguous conditions} % 18.4.1.2
\begin{itemize}
\item the data in memory does not encode a valid xchar
(\wref{xchar:X-SIZE}{});
\item the \emph{xchars} value is outside the range of allowed code points of
the current character set used;
\item words improperly used outside
\wref{core:num-start}{<\num} and \wref{core:num-end}{\num>}
(\wref{xchar:XHOLD}{}).
\end{itemize}
\subsubsection{Other system documentation} % 18.4.1.3
\begin{itemize}
\item no additional requirements.
\end{itemize}
\subsection{Program documentation} % 18.4.2
\begin{itemize}
\item no additional requirements.
\end{itemize}
\section{Compliance and labeling} % 18.5
\subsection{Forth-\snapshot{} systems} % 18.5.1
The phrase ``Providing the Extended-Character word set'' shall be
appended to the label of any Standard System that provides all of
the Extended-Character word set.
The phrase ``Providing \emph{name(s)} from the Extended-Character
Extensions word set'' shall be appended to the label of any Standard
System that provides portions of the Extended-Character Extensions
word set.
The phrase ``Providing the Extended-Character Extensions word set''
shall be appended to the label of any Standard System that provides
all of the Extended-Character and Extended-Character Extensions
word sets.
\subsection{Forth-\snapshot{} programs} % 18.5.2
The phrase ``Requiring the Extended-Character word set'' shall be
appended to the label of Standard Programs that require the system
to provide the Extended-Character word set.
The phrase ``Requiring \emph{name(s)} from the Extended-Character
Extensions word set'' shall be appended to the label of Standard Programs
that require the system to provide portions of the Extended-Character
Extensions word set.
The phrase ``Requiring the Extended-Character Extensions word set''
shall be appended to the label of Standard Programs that require the
system to provide all of the Extended-Character Exception and
Extended-Character Extensions word sets.
\section{Glossary} % 18.6
\subsection{Extended-Character words} % 18.6.1
\begin{worddef}{2486}[50]{X-SIZE}[]%[X:xchar]
\item \stack{xc-addr u_1}{u_2}
\param{u_2} is the number of pchars used to encode the first xchar
stored in the string \param{xc-addr u1}. To calculate the size of
the xchar, only the bytes inside the buffer may be accessed. An
ambiguous condition exists if the xchar is incomplete or malformed.
\begin{implement} % I.18.6.1.2486.5 X-SIZE
\word{:} \word{X-SIZE} \word{p} xc-addr u1 -{}- u2 ) \\
\tab \word{0=} ~~~~~~~~\word{IF} \word{DROP} 0 \word{EXIT} \word{THEN} \\
\tab \word{bs} \textdf{length of UTF-8 char starting at u8-addr (accesses only u8-addr)} \\
\tab \word{C@} \\
\tab \word{DUP} \$80 \word{Uless} \word{IF} \word{DROP} 1 \word{EXIT} \word{THEN} \\
\tab \word{DUP} \$c0 \word{Uless} \word{IF} -77 \word[exception]{THROW} ~~\word{THEN} \\
\tab \word{DUP} \$e0 \word{Uless} \word{IF} \word{DROP} 2 \word{EXIT} \word{THEN} \\
\tab \word{DUP} \$f0 \word{Uless} \word{IF} \word{DROP} 3 \word{EXIT} \word{THEN} \\
\tab \word{DUP} \$f8 \word{Uless} \word{IF} \word{DROP} 4 \word{EXIT} \word{THEN} \\
\tab \word{DUP} \$fc \word{Uless} \word{IF} \word{DROP} 5 \word{EXIT} \word{THEN} \\
\tab \word{DUP} \$fe \word{Uless} \word{IF} \word{DROP} 6 \word{EXIT} \word{THEN} \\
\tab -77 \word[exception]{THROW} \word{;}
\end{implement}
\end{worddef}
\begin{worddef}{2487}[10]{XC!+}[x-c-store-plus]%[X:xchar]
\item \stack{xchar xc-addr_1}{xc-addr_2}
Stores the \param{xchar} at \param{xc-addr_1}. \param{xc-addr_2}
points to the first memory location after the stored \param{xchar}.
\begin{implement} % I.18.6.1.2487.10 XC!+
\word{:} \word{XC!+} \word{p} xchar xc-addr -{}- xc-addr' ) \\
\tab \word{OVER} \$80 \word{Uless} \word{IF} \word{TUCK} \word{C!} \word{CHAR+} \word{EXIT} \word{THEN} \word{bs} \textdf{special case ASCII} \\
\tab \word{toR} 0 \word{SWAP} \$3F \\
\tab \word{BEGIN} \word{2DUP} \word{Umore} \word{WHILE} \\
\tab[2] \word{2/} \word{toR} \word{DUP} \$3F \word{AND} \$80 \word{OR} \word{SWAP} 6 \word{RSHIFT} \word{Rfrom} \\
\tab \word{REPEAT} \$7F \word{XOR} \word{2*} \word{OR} \word{Rfrom} \\
\tab \word{BEGIN} \word{OVER} \$80 \word{Uless} \word{0=} \word{WHILE} \word{TUCK} \word{C!} \word{CHAR+} \word{REPEAT} \word{NIP} \\
\word{;}
\end{implement}
\end{worddef}
\vspace*{-4ex}
\begin{worddef}[XC!+q]{2487}[15]{XC!+?}[x-c-store-plus-query]%[X:xchar]
\item \stack{xchar xc-addr_1 u_1}{xc-addr_2 u_2 flag}
Stores the \param{xchar} into the string buffer specified by
\param{xc-addr_1 u_1}. \param{xc-addr_2 u_2} is the remaining string
buffer. If the \param{xchar} did fit into the buffer, \param{flag}
is true, otherwise \param{flag} is false, and \param{xc-addr_2 u_2}
equal \param{xc-addr_1 u_1}. \word{XC!+q} is safe for buffer overflows.
\begin{implement} % I.18.6.1.2487.15 XC!+?
\word{:} \word{XC!+q} \word{p} xchar xc-addr u -{}- xc-addr' u' flag ) \\
\tab \word{toR} \word{OVER} \word{XC-SIZE} \word{R@} \word{OVER} \word{Uless} \word{IF} \word{p} xchar xc-addr1 len r: u1 ) \\
\tab[2] \word{bs} \textdf{not enough space} \\
\tab[2] \word{DROP} \word{NIP} \word{Rfrom} \word{FALSE} \\
\tab \word{ELSE} \\
\tab[2] \word{toR} \word{XC!+} \word{Rfrom} \word{Rfrom} \word{SWAP} \word{-} \word{TRUE} \\
\tab \word{THEN} \word{;}
\end{implement}
\begin{testing} % T.18.6.1.2487.15 XC!+?
\test{\$ffff \word{PAD} 4 \word{XC!+q}}{\word{PAD} 3 \word{+} 1 <TRUE>}
\end{testing}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2487}[20]{XC,}[x-c-comma]%[X:xchar]
\item \stack{xchar}{}
Append the encoding of \param{xchar} to the dictionary.
\see \wref{core:C,}{}.
\begin{implement} % I.18.6.1.2487,29 XC,
\word{:} \word{XC,} \word{p} xchar -{}- ) \word{HERE} \word{XC!+} DP \word{!} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2487}[25]{XC-SIZE}[x-c-size]%[X:xchar]
\item \stack{xchar}{u}
\param{u} is the number of pchars used to encode \param{xchar} in memory.
\begin{implement} % I.18.6.1.2487.25 XC-SIZE
\word{:} \word{XC-SIZE} \word{p} xchar -{}- n ) \\
\tab \word{DUP} \$80 \word{Uless} \word{IF} \word{DROP} 1 \word{EXIT} \word{THEN} \word{bs} \textdf{special case ASCII} \\
\tab \$800 2 \word{toR} \\
\tab \word{BEGIN} \word{2DUP} U>= \word{WHILE} 5 \word{LSHIFT} \word{Rfrom} \word{1+} \word{toR} \word{DUP} \word{0=} \word{UNTIL} \word{THEN} \\
\tab \word{2DROP} \word{Rfrom} \\
\word{;}
\end{implement}
\begin{testing} % T..18.6.1.2487.25 XC-SIZE
This test assumes UTF-8 encoding is being used.
\ttfamily
\word{HEX} \\
\test{ 0 \word{XC-SIZE}}{1} \\
\test{ 7f \word{XC-SIZE}}{1} \\
\test{ 80 \word{XC-SIZE}}{2} \\
\test{ 7ff \word{XC-SIZE}}{2} \\
\test{ 800 \word{XC-SIZE}}{3} \\
\test{ ffff \word{XC-SIZE}}{3} \\
\test{ 10000 \word{XC-SIZE}}{4} \\
\test{1fffff \word{XC-SIZE}}{4}
\end{testing}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2487}[35]{XC@+}[x-c-fetch-plus]%[X:xchar]
\item \stack{xc-addr_1}{xc-addr_2 xchar}
Fetches the \param{xchar} at \param{xc-addr_1}. \param{xc-addr_2}
points to the first memory location after the retrieved \param{xchar}.
\begin{implement} % I.18.6.1.2487.35 XC@+
\word{:} \word{XC@+} \word{p} xc-addr -{}- xc-addr' u ) \\
\tab \word{COUNT} \word{DUP} \$80 \word{Uless} \word{IF} \word{EXIT} \word{THEN} \word{bs} \textdf{special case ASCII} \\
\tab \$7F \word{AND} \$40 \word{toR} \\
\tab \word{BEGIN} \word{DUP} \word{R@} \word{AND} \word{WHILE} \word{R@} \word{XOR} \\
\tab[2] 6 \word{LSHIFT} \word{Rfrom} 5 \word{LSHIFT} \word{toR} \word{toR} \word{COUNT} \\
\tab[2] \$3F \word{AND} \word{Rfrom} \word{OR} \\
\tab \word{REPEAT} \word{Rfrom} \word{DROP} \\
\word{;}
\end{implement}
% \begin{testing} % T.18.6.1.2487.35 XC@+
% \ttfamily
% \word{HEX} \\
% \word{:} test-string \word{Sq}
% \begin{CJK}{UTF8}{cyberbit}
% 恭喜发财
% \end{CJK}!"
% \word{;} \\
% \word{:} TXC@ \word{XC@+} \word{SWAP} \word{;}
%
% \test{test-string \word{DROP} TXC@ TXC@ TXC@ TXC@ TXC@ \word{DROP} \\}{606D 559C 53D1 8D22 21}
% \end{testing}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2487}[40]{XCHAR+}[x-char-plus]%[X:xchar]
\item \stack{xc-addr_1}{xc-addr_2}
Adds the size of the xchar stored at \param{xc-addr_1} to this address,
giving \param{xc-addr_2}.
\see \wref{core:CHAR+}{CHAR+}.
\begin{implement} % I.18.6.1.2487.40 XCHAR+
\word{:} \word{XCHAR+} \word{p} xc-addr -{}- xc-addr' ) \word{XC@+} \word{DROP} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2488}[10]{XEMIT}[x-emit]%[X:xchar]
\item \stack{xchar}{}
Prints an \param{xchar} on the terminal.
\see \wref{core:EMIT}.
\begin{implement} % I.18.6.1.2488.10 XEMIT
\word{:} \word{XEMIT} \word{p} xchar -{}- ) \\
\tab \word{DUP} \$80 \word{Uless} \word{IF} \word{EMIT} \word{EXIT} \word{THEN} \word{bs} \textdf{special case ASCII} \\
\tab 0 \word{SWAP} \$3F \\
\tab \word{BEGIN} \word{2DUP} \word{Umore} \word{WHILE} \\
\tab[2] \word{2/} \word{toR} \word{DUP} \$3F \word{AND} \$80 \word{OR} \word{SWAP} 6 \word{RSHIFT} \word{Rfrom} \\
\tab \word{REPEAT} \$7F \word{XOR} \word{2*} \word{OR} \\
\tab \word{BEGIN} \word{DUP} \$80 \word{Uless} \word{0=} \word{WHILE} \word{EMIT} \word{REPEAT} \word{DROP}\\
\word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\enlargethispage{6ex}
\begin{worddef}{2488}[30]{XKEY}[x-key]%[X:xchar]
\item \stack{}{xchar}
Reads an \param{xchar} from the terminal. This will discard all input
events up to the completion of the \param{xchar}.
\see \wref{core:KEY}{}.
\begin{implement} % I.18.6.1.2488.30 XKEY
\word{:} \word{XKEY} \word{p} -{}- xchar ) \\
\tab \word{KEY} \word{DUP} \$80 \word{Uless} \word{IF} \word{EXIT} \word{THEN} \word{bs} \textdf{special case ASCII} \\
\tab \$7F \word{AND} \$40 \word{toR} \\
\tab \word{BEGIN} \word{DUP} \word{R@} \word{AND} \word{WHILE} \word{R@} \word{XOR} \\
\tab[2] 6 \word{LSHIFT} \word{Rfrom} 5 \word{LSHIFT} \word{toR} \word{toR} \word{KEY} \\
\tab[2] \$3F \word{AND} \word{Rfrom} \word{OR} \\
\tab \word{REPEAT} \word{Rfrom} \word{DROP} \word{;}
\end{implement}
\end{worddef}
\vspace*{-6ex}
\begin{worddef}[XKEYq]{2488}[35]{XKEY?}[x-key-query]%[X:xchar]
\item \stack{}{flag}
\param{Flag} is true when it's possible to do \word{XKEY} without
blocking. Subsequent \word[facility]{KEYq}, \word{KEY},
\word[facility]{EKEYq}, and \word[facility]{EKEY} may be affected
by \word{XKEYq}.
\see \wref{facility:KEYq}{}.
\end{worddef}
\subsection{Extended-Character extension words} % 18.6.2
\extended
\begin{worddef}{0145}{+X/STRING}[plus-x-string]%[X:xchar]
\item \stack{xc-addr_1 u_1}{xc-addr_2 u_2}
Step forward by one xchar in the buffer defined by \param{xc-addr_1 u_1}.
\param{xc-addr_2 u_2} is the remaining buffer after stepping over the
first xchar in the buffer.
\begin{implement} % I.18.6.2.0145 +X/STRING
\word{:} \word{+X/STRING} \word{p} xc-addr1 u1 -{}- xc-addr2 u2 ) \\
\tab \word{OVER} \word{DUP} \word{XCHAR+} \word{SWAP} \word{-} \word[string]{/STRING} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{0175}{-TRAILING-GARBAGE}[minus-trailing-garbage]%[X:xchar]
\item \stack{xc-addr u_1}{xc-addr u_2}
Examine the last xchar in the string \param{xc-addr u_1} --- if the
encoding is correct and it represents a full xchar, \param{u_2} equals
\param{u_1}, otherwise, \param{u_2} represents the string without the
last (garbled) xchar. \word{-TRAILING-GARBAGE} does not change this
garbled xchar.
\begin{implement} % I.18.6.2.0175 -TRAILING-GARBAGE
\word{:} \word{-TRAILING-GARBAGE} \word{p} xc-addr u1 -{}- xc-addr u2 ) \\
\tab \word{2DUP} \word{+} \word{DUP} \word{XCHAR-} \word{p} addr u1 end1 end2 ) \\
\tab \word{2DUP} \word{DUP} \word{OVER} \word{OVER} \word{-} \word{X-SIZE} \word{+} \word{=} \word{IF} \word{bs} \textdf{last xchar ok} \\
\tab[2] \word{2DROP} \\
\tab \word{ELSE} \\
\tab[2] \word{NIP} \word{NIP} \word{OVER} \word{-} \\
\tab \word{THEN} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{0895}{CHAR}[]%[X:xchar]
\item \stack{"<spaces>name"}{xchar}
Skip leading space delimiters. Parse \param{name} delimited by a space.
Put the value of its first \param{xchar} onto the stack.
\see \wref{core:CHAR}.
\begin{rationale} % A.18.6.2.0895 CHAR
The behavior of the extended version of \word{CHAR} is fully backward
compatible with \wref{core:CHAR}{}.
\end{rationale}
\begin{implement} % I.18.6.2.0895 CHAR
\word{:} \word{CHAR} \word{p} "name" -{}- xchar )
\word{BL} \word{WORD} \word{COUNT} \word{DROP} \word{XC@+} \word{NIP} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}[EKEYtoXCHAR]{1306}[60]{EKEY>XCHAR}[e-key-to-x-char]%[X:xchar]
\item \stack{x}{xchar true | x false}
If the keyboard event \param{x} corresponds to an xchar, return the
\param{xchar} and \param{true}. Otherwise, return \param{x} and
\param{false}.
\see \wref{facility:EKEY}{},
\wref{facility:EKEYtoCHAR}{},
\wref{facility:EKEYtoFKEY}{}.
\end{worddef}
\vspace*{-2ex}
\enlargethispage{6ex}
\begin{worddef}{2008}{PARSE}[]%[X:xchar]
\item \stack{xchar "ccc<xchar>"}{c-addr u}
Parse \param{ccc} in the input stream delimited by \param{xchar}.
\param{c-addr} is the address (within the input buffer) and \param{u}
is the length of the parsed string. If the parse area was empty, the
resulting string has a zero length.
\see \xref{usage:parsing}{},
\wref{core:PARSE}{},
\rref{core:PARSE}{}.
\end{worddef}
\vspace*{-6ex}
\begin{worddef}{2486}[70]{X-WIDTH}[]%[X:xchar]
\item \stack{xc-addr u}{n}
\param{n} is the number of monospace ASCII characters that take the same
space to display as the xchar string \param{xc-addr u}; assuming a
monospaced display font, i.e., xchar width is always an integer multiple
of the width of an ASCII character.
\begin{implement} % I.18.6.2.2486.70 X-WIDTH
\word{:} \word{X-WIDTH} \word{p} xc-addr u -{}- n ) \\
\tab 0 \word{ROT} \word{ROT} \word{OVER} \word{+} \word{SWAP} \word{qDO} \\
\tab[2] \word{I} \word{XC@+} \word{SWAP} \word{toR} \word{XC-WIDTH} \word{+} \\
\tab \word{Rfrom} \word{I} \word{-} \word{+LOOP} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2487}[30]{XC-WIDTH}[x-c-width]%[X:xchar]
\item \stack{xchar}{n}
\param{n} is the number of monospace ASCII characters that take the same
space to display as the \param{xchar}; i.e., \param{xchar} width is always
an integer multiple of the width of an ASCII char.
\begin{implement} % I.18.6.2.2487.30 XC-WIDTH
\word{:} wc, \word{p} n low high -{}- ) \word{1+} \word{,} \word{,} \word{,} \word{;}
\word{CREATE} wc-table \word{bs} \textdf{derived from wcwidth source code, for UCS32} \\
0 ~0300 ~0357 wc,\tab[2]
0 ~035D ~036F wc,\tab[2]
0 ~0483 ~0486 wc,\\
0 ~0488 ~0489 wc,\tab[2]
0 ~0591 ~05A1 wc,\tab[2]
0 ~05A3 ~05B9 wc,\\
0 ~05BB ~05BD wc,\tab[2]
0 ~05BF ~05BF wc,\tab[2]
0 ~05C1 ~05C2 wc,\\
0 ~05C4 ~05C4 wc,\tab[2]
0 ~0600 ~0603 wc,\tab[2]
0 ~0610 ~0615 wc,\\
0 ~064B ~0658 wc,\tab[2]
0 ~0670 ~0670 wc,\tab[2]
0 ~06D6 ~06E4 wc,\\
0 ~06E7 ~06E8 wc,\tab[2]
0 ~06EA ~06ED wc,\tab[2]
0 ~070F ~070F wc,\\
0 ~0711 ~0711 wc,\tab[2]
0 ~0730 ~074A wc,\tab[2]
0 ~07A6 ~07B0 wc,\\
0 ~0901 ~0902 wc,\tab[2]
0 ~093C ~093C wc,\tab[2]
0 ~0941 ~0948 wc,\\
0 ~094D ~094D wc,\tab[2]
0 ~0951 ~0954 wc,\tab[2]
0 ~0962 ~0963 wc,\\
0 ~0981 ~0981 wc,\tab[2]
0 ~09BC ~09BC wc,\tab[2]
0 ~09C1 ~09C4 wc,\\
0 ~09CD ~09CD wc,\tab[2]
0 ~09E2 ~09E3 wc,\tab[2]
0 ~0A01 ~0A02 wc,\\
0 ~0A3C ~0A3C wc,\tab[2]
0 ~0A41 ~0A42 wc,\tab[2]
0 ~0A47 ~0A48 wc,\\
0 ~0A4B ~0A4D wc,\tab[2]
0 ~0A70 ~0A71 wc,\tab[2]
0 ~0A81 ~0A82 wc,\\
0 ~0ABC ~0ABC wc,\tab[2]
0 ~0AC1 ~0AC5 wc,\tab[2]
0 ~0AC7 ~0AC8 wc,\\
0 ~0ACD ~0ACD wc,\tab[2]
0 ~0AE2 ~0AE3 wc,\tab[2]
0 ~0B01 ~0B01 wc,\\
0 ~0B3C ~0B3C wc,\tab[2]
0 ~0B3F ~0B3F wc,\tab[2]
0 ~0B41 ~0B43 wc,\\
0 ~0B4D ~0B4D wc,\tab[2]
0 ~0B56 ~0B56 wc,\tab[2]
0 ~0B82 ~0B82 wc,\\
0 ~0BC0 ~0BC0 wc,\tab[2]
0 ~0BCD ~0BCD wc,\tab[2]
0 ~0C3E ~0C40 wc,\\
0 ~0C46 ~0C48 wc,\tab[2]
0 ~0C4A ~0C4D wc,\tab[2]
0 ~0C55 ~0C56 wc,\\
0 ~0CBC ~0CBC wc,\tab[2]
0 ~0CBF ~0CBF wc,\tab[2]
0 ~0CC6 ~0CC6 wc,\\
0 ~0CCC ~0CCD wc,\tab[2]
0 ~0D41 ~0D43 wc,\tab[2]
0 ~0D4D ~0D4D wc,\\
0 ~0DCA ~0DCA wc,\tab[2]
0 ~0DD2 ~0DD4 wc,\tab[2]
0 ~0DD6 ~0DD6 wc,\\
0 ~0E31 ~0E31 wc,\tab[2]
0 ~0E34 ~0E3A wc,\tab[2]
0 ~0E47 ~0E4E wc,\\
0 ~0EB1 ~0EB1 wc,\tab[2]
0 ~0EB4 ~0EB9 wc,\tab[2]
0 ~0EBB ~0EBC wc,\\
0 ~0EC8 ~0ECD wc,\tab[2]
0 ~0F18 ~0F19 wc,\tab[2]
0 ~0F35 ~0F35 wc,\\
0 ~0F37 ~0F37 wc,\tab[2]
0 ~0F39 ~0F39 wc,\tab[2]
0 ~0F71 ~0F7E wc,\\
0 ~0F80 ~0F84 wc,\tab[2]
0 ~0F86 ~0F87 wc,\tab[2]
0 ~0F90 ~0F97 wc,\\
0 ~0F99 ~0FBC wc,\tab[2]
0 ~0FC6 ~0FC6 wc,\tab[2]
0 ~102D ~1030 wc,\\
0 ~1032 ~1032 wc,\tab[2]
0 ~1036 ~1037 wc,\tab[2]
0 ~1039 ~1039 wc,\\
0 ~1058 ~1059 wc,\tab[2]
1 ~0000 ~1100 wc,\tab[2]
2 ~1100 ~115f wc,\\
0 ~1160 ~11FF wc,\tab[2]
0 ~1712 ~1714 wc,\tab[2]
0 ~1732 ~1734 wc,\\
0 ~1752 ~1753 wc,\tab[2]
0 ~1772 ~1773 wc,\tab[2]
0 ~17B4 ~17B5 wc,\\
0 ~17B7 ~17BD wc,\tab[2]
0 ~17C6 ~17C6 wc,\tab[2]
0 ~17C9 ~17D3 wc,\\
0 ~17DD ~17DD wc,\tab[2]
0 ~180B ~180D wc,\tab[2]
0 ~18A9 ~18A9 wc,\\
0 ~1920 ~1922 wc,\tab[2]
0 ~1927 ~1928 wc,\tab[2]
0 ~1932 ~1932 wc,\\
0 ~1939 ~193B wc,\tab[2]
0 ~200B ~200F wc,\tab[2]
0 ~202A ~202E wc,\\
0 ~2060 ~2063 wc,\tab[2]
0 ~206A ~206F wc,\tab[2]
0 ~20D0 ~20EA wc,\\
2 ~2329 ~232A wc,\tab[2]
0 ~302A ~302F wc,\tab[2]
2 ~2E80 ~303E wc,\\
0 ~3099 ~309A wc,\tab[2]
2 ~3040 ~A4CF wc,\tab[2]
2 ~AC00 ~D7A3 wc,\\
2 ~F900 ~FAFF wc,\tab[2]
0 ~FB1E ~FB1E wc,\tab[2]
0 ~FE00 ~FE0F wc,\\
0 ~FE20 ~FE23 wc,\tab[2]
2 ~FE30 ~FE6F wc,\tab[2]
0 ~FEFF ~FEFF wc,\\
2 ~FF00 ~FF60 wc,\tab[2]
2 ~FFE0 ~FFE6 wc,\tab[2]
0 ~FFF9 ~FFFB wc,\\
0 1D167 1D169 wc,\tab[2]
0 1D173 1D182 wc,\tab[2]
0 1D185 1D18B wc,\\
0 1D1AA 1D1AD wc,\tab[2]
2 20000 2FFFD wc,\tab[2]
2 30000 3FFFD wc,\\
0 E0001 E0001 wc,\tab[2]
0 E0020 E007F wc,\tab[2]
0 E0100 E01EF wc,\\
\word{HERE} wc-table \word{-} \word{CONSTANT} \#wc-table
\word{bs} \textdf{inefficient table walk:}
\word{:} \word{XC-WIDTH} \word{p} xchar -{}- n ) \\
\tab wc-table \#wc-table \word{OVER} \word{+} \word{SWAP} \word{qDO} \\
\tab[2] \word{DUP} \word{I} \word{2@} \word{WITHIN} \word{IF}
\word{DROP} \word{I} 2 \word{CELLS} \word{+} \word{@} \word{UNLOOP} \word{EXIT} \word{THEN} \\
\tab 3 \word{CELLS} \word{+LOOP} \word{DROP} 1 \word{;}
\end{implement}
\begin{testing} % T.18.6.2.2487 XC-WIDTH
\test{\$606D \word{XC-WIDTH}}{2} \\
\test{ \$41 \word{XC-WIDTH}}{1} \\
\test{\$2060 \word{XC-WIDTH}}{0}
\end{testing}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2487}[45]{XCHAR-}[x-char-minus]%[X:xchar]
\item \stack{xc-addr_1}{xc-addr_2}
Goes backward from \param{xc-addr_1} until it finds an xchar so that the
size of this xchar added to \param{xc-addr_2} gives \param{xc-addr1}.
There is an ambiguous condition when the encoding doesn't permit reliable
backward stepping through the text.
\begin{implement} % I.18.6.2.2387 XCHAR-
\word{:} \word{XCHAR-} \word{p} xc-addr -{}- xc-addr' ) \\
\tab \word{BEGIN} 1 \word{CHARS} \word{-} \word{DUP} \word{C@} \$C0 \word{AND} \$80 \word{ne} \word{UNTIL} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}{2488}[20]{XHOLD}[x-hold]%[X:xchar]
\item \stack{xchar}{}
Adds \param{xchar} to the picture numeric output string. An ambiguous
condition exists if \word{XHOLD} executes outside of a \word{num-start}
\word{num-end} delimited number conversion.
\see \wref{core:HOLD}{}.
\begin{implement} % I.18.6.2.2488 XHOLD
\word{CREATE} xholdbuf 8 \word{ALLOT}
\word{:} \word{XHOLD} \word{p} xchar -{}- )
xholdbuf \word{TUCK} \word{XC!+} \word{OVER} \word{-} \word{HOLDS} \word{;}
\end{implement}
\end{worddef}
\vspace*{-2ex}
\begin{worddef}[XSTRING-]{2495}{X\bs{}STRING-}[x-string-minus]%[X:xchar]
\item \stack{xc-addr u_1}{xc-addr u_2}
Search for the penultimate xchar in the string \param{xc-addr u_1}.
The string \param{xc-addr u_2} contains all xchars of
\param{xc-addr u_1}, but the last. Unlike \word{XCHAR-},
\word{XSTRING-} can be implemented in encodings where xchar
boundaries can only reliably detected when scanning in forward
direction.
\begin{implement} % I.18.6.2.2495 X\STRING-
\word{:} \word{XSTRING-} \word{p} xc-addr u -{}- xc-addr u' ) \\
\tab \word{OVER} \word{+} \word{XCHAR-} \word{OVER} \word{-} \word{;}
\end{implement}
\end{worddef}
\vspace*{-3ex}
\enlargethispage{6ex}
\begin{worddef}{2520}{[CHAR]}[bracket-char]%[X:xchar]
\interpret
Interpretation semantics for this word are undefined.
\compile \stack{"<spaces>name"}{}
Skip leading space delimiters. Parse \param{name} delimited by a space.
Append the run-time semantics given below to the current definition.
\runtime \stack{}{xchar}
Place \param{xchar}, the value of the first xchar of \param{name}, on
the stack.
\see \wref{core:[CHAR]}.
\begin{implement} % I.18.6.2.2520 [CHAR]
\word{:} \word{[CHAR]} \word{p} "name" -{}- rt:xchar ) \\
\tab \word{CHAR} \word{POSTPONE} \word{LITERAL} \word{;} \word{IMMEDIATE}
\end{implement}
\end{worddef}
\endinput