From 74f5f3e7f4b8cd28a1addbc6e51e0f1407b0d131 Mon Sep 17 00:00:00 2001 From: Gabriel Omar Cotelli Date: Wed, 19 Jun 2024 18:14:35 -0300 Subject: [PATCH] Move LanguageTag and LanguageRange from Hyperspace project Add String>>#escaped and String>>#unescaped to handle character control and Unicode escape sequences --- source/BaselineOfBuoy/BaselineOfBuoy.class.st | 17 ++ .../String.extension.st | 43 ++++ .../Buoy-Localization-Extensions/package.st | 1 + .../LanguageRangeTest.class.st | 94 +++++++++ .../LanguageTagTest.class.st | 125 ++++++++++++ .../StringEscapingRuleTest.class.st | 23 +++ .../StringLocalizationExtensionsTest.class.st | 176 ++++++++++++++++ source/Buoy-Localization-Tests/package.st | 1 + .../ControlCharactersEscapingRule.class.st | 81 ++++++++ .../Buoy-Localization/LanguageRange.class.st | 110 ++++++++++ source/Buoy-Localization/LanguageTag.class.st | 189 ++++++++++++++++++ .../ReverseSolidusEscapingRule.class.st | 47 +++++ .../StringEscapingRule.class.st | 89 +++++++++ .../UnicodeCharacterEscapingRule.class.st | 102 ++++++++++ source/Buoy-Localization/package.st | 1 + 15 files changed, 1099 insertions(+) create mode 100644 source/Buoy-Localization-Extensions/String.extension.st create mode 100644 source/Buoy-Localization-Extensions/package.st create mode 100644 source/Buoy-Localization-Tests/LanguageRangeTest.class.st create mode 100644 source/Buoy-Localization-Tests/LanguageTagTest.class.st create mode 100644 source/Buoy-Localization-Tests/StringEscapingRuleTest.class.st create mode 100644 source/Buoy-Localization-Tests/StringLocalizationExtensionsTest.class.st create mode 100644 source/Buoy-Localization-Tests/package.st create mode 100644 source/Buoy-Localization/ControlCharactersEscapingRule.class.st create mode 100644 source/Buoy-Localization/LanguageRange.class.st create mode 100644 source/Buoy-Localization/LanguageTag.class.st create mode 100644 source/Buoy-Localization/ReverseSolidusEscapingRule.class.st create mode 100644 source/Buoy-Localization/StringEscapingRule.class.st create mode 100644 source/Buoy-Localization/UnicodeCharacterEscapingRule.class.st create mode 100644 source/Buoy-Localization/package.st diff --git a/source/BaselineOfBuoy/BaselineOfBuoy.class.st b/source/BaselineOfBuoy/BaselineOfBuoy.class.st index b3ee84e..16830f8 100644 --- a/source/BaselineOfBuoy/BaselineOfBuoy.class.st +++ b/source/BaselineOfBuoy/BaselineOfBuoy.class.st @@ -22,6 +22,7 @@ BaselineOfBuoy >> baseline: spec [ baselineComparison: spec; baselineDynamicBinding: spec; baselineExceptionHandling: spec; + baselineLocalization: spec; baselineMath: spec; baselineMetaprogramming: spec; baselineSUnit: spec; @@ -175,6 +176,22 @@ BaselineOfBuoy >> baselineGS64Development: spec [ group: 'GS64-Development' with: 'Buoy-Chronology-GS64-Extensions' ] +{ #category : 'baselines' } +BaselineOfBuoy >> baselineLocalization: spec [ + + spec + package: 'Buoy-Localization' + with: [ + spec requires: + #( 'Buoy-Assertions' 'Buoy-Dynamic-Binding' 'Buoy-Metaprogramming-Extensions' ) ]; + group: 'Deployment' with: 'Buoy-Localization'; + package: 'Buoy-Localization-Extensions' with: [ spec requires: 'Buoy-Localization' ]; + group: 'Deployment' with: 'Buoy-Localization-Extensions'; + package: 'Buoy-Localization-Tests' + with: [ spec requires: #( 'Buoy-Localization-Extensions' 'Dependent-SUnit-Extensions' ) ]; + group: 'Tests' with: 'Buoy-Localization-Tests' +] + { #category : 'baselines' } BaselineOfBuoy >> baselineMath: spec [ diff --git a/source/Buoy-Localization-Extensions/String.extension.st b/source/Buoy-Localization-Extensions/String.extension.st new file mode 100644 index 0000000..59daa50 --- /dev/null +++ b/source/Buoy-Localization-Extensions/String.extension.st @@ -0,0 +1,43 @@ +Extension { #name : 'String' } + +{ #category : '*Buoy-Localization-Extensions' } +String >> asLanguageRange [ + + ^ LanguageRange fromString: self +] + +{ #category : '*Buoy-Localization-Extensions' } +String >> asLanguageTag [ + + ^ LanguageTag fromString: self +] + +{ #category : '*Buoy-Localization-Extensions' } +String >> escaped [ + + ^ self species new: self size streamContents: [ :result | + | stream | + stream := self readStream. + [ stream atEnd ] whileFalse: [ StringEscapingRule escape: stream next on: result ] + ] +] + +{ #category : '*Buoy-Localization-Extensions' } +String >> unescaped [ + + ^ self species new: self size streamContents: [ :result | + | stream | + stream := self readStream. + [ stream atEnd ] whileFalse: [ + | currentChar | + currentChar := stream next. + currentChar == $\ + ifTrue: [ + stream atEnd + ifTrue: [ AssertionFailed signal: 'Missing escape sequence' ] + ifFalse: [ StringEscapingRule unescape: stream next from: stream on: result ] + ] + ifFalse: [ result nextPut: currentChar ] + ] + ] +] diff --git a/source/Buoy-Localization-Extensions/package.st b/source/Buoy-Localization-Extensions/package.st new file mode 100644 index 0000000..217b1a5 --- /dev/null +++ b/source/Buoy-Localization-Extensions/package.st @@ -0,0 +1 @@ +Package { #name : 'Buoy-Localization-Extensions' } diff --git a/source/Buoy-Localization-Tests/LanguageRangeTest.class.st b/source/Buoy-Localization-Tests/LanguageRangeTest.class.st new file mode 100644 index 0000000..c1c3f87 --- /dev/null +++ b/source/Buoy-Localization-Tests/LanguageRangeTest.class.st @@ -0,0 +1,94 @@ +" +A LanguageRangeTest is a test class for testing the behavior of LanguageRange +" +Class { + #name : 'LanguageRangeTest', + #superclass : 'TestCase', + #category : 'Buoy-Localization-Tests', + #package : 'Buoy-Localization-Tests' +} + +{ #category : 'tests' } +LanguageRangeTest >> testAny [ + + self + assert: LanguageRange any printString equals: '*'; + assert: LanguageRange any equals: LanguageRange any; + assert: LanguageRange any hash equals: LanguageRange any hash; + assert: LanguageRange any subtags equals: #('*') +] + +{ #category : 'tests' } +LanguageRangeTest >> testAnyMatches [ + + | range | + + range := LanguageRange any. + self + assert: ( range matches: 'en-Latn-US' asLanguageTag ); + assert: ( range matches: 'en' asLanguageTag ); + assert: ( range matches: 'es-ar' asLanguageTag ) +] + +{ #category : 'tests' } +LanguageRangeTest >> testAsLanguageRange [ + + self + assert: '*' asLanguageRange equals: LanguageRange any; + assert: LanguageRange any asLanguageRange equals: LanguageRange any; + assert: 'es-AR' asLanguageRange equals: ( LanguageRange from: #( 'es' 'AR' ) ) asLanguageRange +] + +{ #category : 'tests' } +LanguageRangeTest >> testComparison [ + + | range | + + range := LanguageRange from: #('en' 'US'). + self + assert: range equals: ( LanguageRange fromString: 'en-us' ); + assert: range hash equals: ( LanguageRange fromString: 'en-us' ) hash; + deny: range equals: LanguageRange any; + deny: range equals: ( LanguageRange from: #('en') ) +] + +{ #category : 'tests' } +LanguageRangeTest >> testCreation [ + + | range | + + range := LanguageRange from: #('en' 'Latn' 'us'). + self + assert: range subtags equals: #('en' 'Latn' 'US'); + assert: range printString equals: 'en-Latn-US' +] + +{ #category : 'tests' } +LanguageRangeTest >> testMatches [ + + | range | + + range := LanguageRange from: #('en' 'US'). + self + deny: ( range matches: 'en-Latn-US' asLanguageTag ); + assert: ( range matches: 'en-US' asLanguageTag ); + deny: ( range matches: 'en' asLanguageTag ); + assert: ( range matches: 'en-us-x-x-x' asLanguageTag ). + + range := LanguageRange from: #('en'). + self + assert: ( range matches: 'en-Latn-US' asLanguageTag ); + assert: ( range matches: 'en-US' asLanguageTag ); + assert: ( range matches: 'en' asLanguageTag ); + assert: ( range matches: 'en-us-x-x-x' asLanguageTag ) +] + +{ #category : 'tests' } +LanguageRangeTest >> testSpecificity [ + + self + assert: '*' asLanguageRange specificity equals: 0; + assert: 'es' asLanguageRange specificity equals: 1; + assert: 'es-AR' asLanguageRange specificity equals: 2; + assert: 'hy-Latn-IT-arevela' asLanguageRange specificity equals: 4 +] diff --git a/source/Buoy-Localization-Tests/LanguageTagTest.class.st b/source/Buoy-Localization-Tests/LanguageTagTest.class.st new file mode 100644 index 0000000..066de0f --- /dev/null +++ b/source/Buoy-Localization-Tests/LanguageTagTest.class.st @@ -0,0 +1,125 @@ +" +A LanguageTagTest is a test class for testing the behavior of LanguageTag +" +Class { + #name : 'LanguageTagTest', + #superclass : 'TestCase', + #category : 'Buoy-Localization-Tests', + #package : 'Buoy-Localization-Tests' +} + +{ #category : 'tests' } +LanguageTagTest >> testCantCreateWhenLanguageCodeIsInvalid [ + + self + should: [ LanguageTag fromString: 'e' ] + raise: InstanceCreationFailed + withMessageText: 'ISO 639 language codes must be 2 or 3 letters.'; + should: [ LanguageTag fromString: 'e2' ] + raise: InstanceCreationFailed + withMessageText: 'ISO 639 language codes must consist only of letters.' +] + +{ #category : 'tests' } +LanguageTagTest >> testCantCreateWhenRegionIsInvalid [ + + self + should: [ LanguageTag fromString: 'en-A3' ] + raise: InstanceCreationFailed + withMessageText: 'Supported ISO 3166-1 codes must be 2 letters.' +] + +{ #category : 'tests' } +LanguageTagTest >> testCantCreateWhenScriptIsInvalid [ + + self + should: [ LanguageTag fromString: 'en-L123' ] + raise: InstanceCreationFailed + withMessageText: 'ISO 15924 script codes must be 4 letters.' +] + +{ #category : 'tests' } +LanguageTagTest >> testCreation [ + + | tag | + + tag := LanguageTag fromString: 'en-Latn-US'. + self assert: tag languageCode equals: 'en'. + tag + withScriptDo: [ :script | self assert: script equals: 'Latn' ]; + withRegionDo: [ :region | self assert: region equals: 'US' ]. + + self assert: tag subtags equals: #('en' 'Latn' 'US') +] + +{ #category : 'tests' } +LanguageTagTest >> testCreationWithExtensions [ + + | tag | + + tag := LanguageTag fromString: 'en-Latn-GB-boont-r-extended-sequence-x-private'. + self assert: tag languageCode equals: 'en'. + tag + withScriptDo: [ :script | self assert: script equals: 'Latn' ]; + withRegionDo: [ :region | self assert: region equals: 'GB' ]. + + self + assert: tag subtags + equals: #('en' 'Latn' 'GB' 'boont' 'r' 'extended' 'sequence' 'x' 'private') +] + +{ #category : 'tests' } +LanguageTagTest >> testCreationWithHorribleCase [ + + | tag | + + tag := LanguageTag fromString: 'Es-lAtN-aR'. + self assert: tag languageCode equals: 'es'. + tag + withScriptDo: [ :script | self assert: script equals: 'Latn' ]; + withRegionDo: [ :region | self assert: region equals: 'AR' ]. + + self assert: tag subtags equals: #('es' 'Latn' 'AR') +] + +{ #category : 'tests' } +LanguageTagTest >> testCreationWithOnlyLanguage [ + + | tag | + + tag := LanguageTag fromString: 'en'. + self assert: tag languageCode equals: 'en'. + tag + withScriptDo: [ :script | self fail ]; + withRegionDo: [ :region | self fail ]. + + self assert: tag subtags equals: #('en') +] + +{ #category : 'tests' } +LanguageTagTest >> testCreationWithRegionButNotScript [ + + | tag | + + tag := LanguageTag fromString: 'en-us'. + self assert: tag languageCode equals: 'en'. + tag + withScriptDo: [ :script | self fail ]; + withRegionDo: [ :region | self assert: region equals: 'US' ]. + + self assert: tag subtags equals: #('en' 'US') +] + +{ #category : 'tests' } +LanguageTagTest >> testCreationWithScriptButNoRegion [ + + | tag | + + tag := LanguageTag fromString: 'es-latn'. + self assert: tag languageCode equals: 'es'. + tag + withScriptDo: [ :script | self assert: script equals: 'Latn' ]; + withRegionDo: [ :region | self fail ]. + + self assert: tag subtags equals: #('es' 'Latn') +] diff --git a/source/Buoy-Localization-Tests/StringEscapingRuleTest.class.st b/source/Buoy-Localization-Tests/StringEscapingRuleTest.class.st new file mode 100644 index 0000000..8c02bbf --- /dev/null +++ b/source/Buoy-Localization-Tests/StringEscapingRuleTest.class.st @@ -0,0 +1,23 @@ +" +A StringEscapingRuleTest is a test class for testing the behavior of StringEscapingRule +" +Class { + #name : 'StringEscapingRuleTest', + #superclass : 'TestCase', + #category : 'Buoy-Localization-Tests', + #package : 'Buoy-Localization-Tests' +} + +{ #category : 'tests' } +StringEscapingRuleTest >> testIsAbstract [ + + self assert: StringEscapingRule isAbstract +] + +{ #category : 'tests' } +StringEscapingRuleTest >> testPriority [ + + self + assert: ReverseSolidusEscapingRule new priority > ControlCharactersEscapingRule new priority; + assert: ControlCharactersEscapingRule new priority > UnicodeCharacterEscapingRule new priority +] diff --git a/source/Buoy-Localization-Tests/StringLocalizationExtensionsTest.class.st b/source/Buoy-Localization-Tests/StringLocalizationExtensionsTest.class.st new file mode 100644 index 0000000..f769827 --- /dev/null +++ b/source/Buoy-Localization-Tests/StringLocalizationExtensionsTest.class.st @@ -0,0 +1,176 @@ +Class { + #name : 'StringLocalizationExtensionsTest', + #superclass : 'TestCase', + #category : 'Buoy-Localization-Tests', + #package : 'Buoy-Localization-Tests' +} + +{ #category : 'private' } +StringLocalizationExtensionsTest >> assertUnescaping: string raisesAsError: errorMessage [ + + self should: [ string unescaped ] raise: AssertionFailed withMessageText: errorMessage +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testErrorsInUnescaped [ + + self assertUnescaping: '\💩' raisesAsError: 'There''s no escaping rule for "💩" (\u{1F4A9})'. + + self + assertUnescaping: '\ \\' raisesAsError: 'There''s no escaping rule for " " (\u{0020})'; + assertUnescaping: '\\\' raisesAsError: 'Missing escape sequence'; + assertUnescaping: '\\ \' raisesAsError: 'Missing escape sequence'. + + self + assertUnescaping: '\u' raisesAsError: 'Missing opening "{" in escaping sequence'; + assertUnescaping: '\u{' raisesAsError: 'Missing closing "}" in escaping sequence'; + assertUnescaping: '\u{010' raisesAsError: 'Missing closing "}" in escaping sequence'; + assertUnescaping: '\u{}' raisesAsError: 'Unexpected hexadecimal sequence ""'; + assertUnescaping: '\u{💩}' raisesAsError: 'Unexpected hexadecimal sequence "💩"'; + assertUnescaping: '\u{G}' raisesAsError: 'Unexpected hexadecimal sequence "G"'; + assertUnescaping: '\u{-1}' raisesAsError: 'Code point "-1" is out of range'; + assertUnescaping: '\u{FFFFFF}' raisesAsError: 'Code point "FFFFFF" is out of range' +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testEscaped [ + + self + assert: 'я' escaped equals: '\u{44F}'; + assert: 'café' escaped equals: 'cafe\u{301}'; + assert: 'ā' escaped equals: '\u{101}'; + assert: 'Ω' escaped equals: '\u{3A9}'; + assert: '🇷' escaped equals: '\u{1F1F7}'; + + assert: '{1} seems to be out of sync. Please fetch from "{2}" and try again.' escaped + equals: '{1} seems to be out of sync. Please fetch from "{2}" and try again.'; + + assert: 'Ṩ{10} {name and surname} \ \{ ∈ ⊕' expandMacros escaped + equals: 'S\u{307}\u{323}\l\t\r{10} {name and surname} \\ \\{ \u{2208} \u{2295}'; + + assert: '̣̇Walter ∈ ⊕' expandMacros escaped + equals: '\u{307}\u{323}\l\t\rWalter \u{2208} \u{2295}'; + + assert: '

Czech is in Czech čeština.

' escaped + equals: '

Czech is in Czech \u{10D}e\u{161}tina.

' +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testEscapedControlCharacters [ + + self + assert: '\r' equals: String cr escaped; + assert: '\l' equals: String lf escaped; + assert: '\t' equals: String tab escaped; + assert: '\a' equals: ( Character codePoint: 7 ) asString escaped; + assert: '\b' equals: Character backspace asString escaped; + assert: '\v' equals: ( Character codePoint: 11 ) asString escaped; + assert: '\f' equals: ( Character codePoint: 12 ) asString escaped; + assert: '\e' equals: Character escape asString escaped +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testEscapedReverseSolidus [ + + self + assert: '\\' equals: '\' escaped; + assert: '\\ \\' equals: '\ \' escaped +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testEscapedUnicode [ + + self + assert: '\u{44F}' equals: 'я' escaped; + assert: '\u{101}' equals: 'ā' escaped; + assert: '\u{3A9}' equals: 'Ω' escaped; + assert: '\u{1F1F7}' equals: '🇷' escaped; + assert: '\u{1F63B}' equals: '😻' escaped +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testEscapedWithoutEscapingSequence [ + + self + assert: 'aabb' escaped equals: 'aabb'; + assert: 'a + b' escaped equals: 'a + b'; + assert: 'café' escaped equals: 'café'; + assert: '¡vamos! ¿adonde?' escaped equals: '¡vamos! ¿adonde?' +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testUnescaped [ + + self + assert: '\u{044F}' unescaped equals: 'я'; + assert: 'cafe\u{0301}' unescaped equals: 'café'; + assert: '\u{101}' unescaped equals: 'ā'; + assert: '\u{03A9}' unescaped equals: 'Ω'; + assert: '\u{1F1F7}' unescaped equals: '🇷'; + + assert: '{1} seems to be out of sync. Please fetch from "{2}" and try again.' unescaped + equals: '{1} seems to be out of sync. Please fetch from "{2}" and try again.'; + + assert: 'S\u{0307}\u{0323}\n\t\r{10} {name and surname} \\ \\{ \u{2208} \u{2295}' unescaped + equals: 'Ṩ{10} {name and surname} \ \{ ∈ ⊕' expandMacros; + + assert: '\u{0307}\u{0323}\n\t\rWalter \u{2208} \u{2295}' unescaped + equals: '̣̇Walter ∈ ⊕' expandMacros; + + assert: '

Czech is in Czech \u{10D}e\u{161}tina.

' unescaped + equals: '

Czech is in Czech čeština.

' +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testUnescapedControlCharacters [ + + self + assert: '\r' unescaped equals: String cr; + assert: '\l' unescaped equals: String lf; + assert: '\t' unescaped equals: String tab; + assert: '\n' unescaped equals: OSPlatform current lineEnding; + assert: '\a' unescaped equals: ( Character codePoint: 7 ) asString; + assert: '\b' unescaped equals: Character backspace asString; + assert: '\v' unescaped equals: ( Character codePoint: 11 ) asString; + assert: '\f' unescaped equals: ( Character codePoint: 12 ) asString; + assert: '\e' unescaped equals: Character escape asString +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testUnescapedReverseSolidus [ + + self + assert: '\\' unescaped equals: '\'; + assert: '\\ \\' unescaped equals: '\ \'. + + self + should: [ '\\\' unescaped ] raise: AssertionFailed withMessageText: 'Missing escape sequence'; + should: [ '\ \\' unescaped ] + raise: AssertionFailed + withMessageText: 'There''s no escaping rule for " " (\u{0020})'; + should: [ '\\ \' unescaped ] raise: AssertionFailed withMessageText: 'Missing escape sequence' +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testUnescapedUnicode [ + + self + assert: '\u{044F}' unescaped equals: 'я'; + assert: '\u{44F}' unescaped equals: 'я'; + assert: '\u{101}' unescaped equals: 'ā'; + assert: '\u{03A9}' unescaped equals: 'Ω'; + assert: '\u{3A9}' unescaped equals: 'Ω'; + assert: '\u{1F1F7}' unescaped equals: '🇷'; + assert: '\u{41}' unescaped equals: 'A'; + assert: '\u{041}' unescaped equals: 'A'; + assert: '\u{0041}' unescaped equals: 'A'; + assert: '\u{1F63b}' unescaped equals: '😻' +] + +{ #category : 'tests - escaping' } +StringLocalizationExtensionsTest >> testUnescapedWithoutEscapingSequence [ + + self + assert: 'aabb' unescaped equals: 'aabb'; + assert: 'Ω + 💩' unescaped equals: 'Ω + 💩' +] diff --git a/source/Buoy-Localization-Tests/package.st b/source/Buoy-Localization-Tests/package.st new file mode 100644 index 0000000..e5cb464 --- /dev/null +++ b/source/Buoy-Localization-Tests/package.st @@ -0,0 +1 @@ +Package { #name : 'Buoy-Localization-Tests' } diff --git a/source/Buoy-Localization/ControlCharactersEscapingRule.class.st b/source/Buoy-Localization/ControlCharactersEscapingRule.class.st new file mode 100644 index 0000000..8dd3904 --- /dev/null +++ b/source/Buoy-Localization/ControlCharactersEscapingRule.class.st @@ -0,0 +1,81 @@ +" +I'm an escaping rule implementing escaping for control characters. + +The following escaping sequences are handled by my instances: +- `\a` BEL Bell +- `\b` BS Backspace +- `\e` ESC Escape +- `\f` FF Form Feed +- `\l` LF Line Feed +- `\n` OS EOL LF on Unix / CRLF on Windows +- `\r` CR Carriage Return +- `\t` TAB Horizontal Tabulation +- `\v` VT Vertical Tabulation +" +Class { + #name : 'ControlCharactersEscapingRule', + #superclass : 'StringEscapingRule', + #category : 'Buoy-Localization', + #package : 'Buoy-Localization' +} + +{ #category : 'class initialization' } +ControlCharactersEscapingRule class >> initialize [ + + + StringEscapingRule registerRule: self new +] + +{ #category : 'escaping' } +ControlCharactersEscapingRule >> escape: character on: stream [ + + character codePoint == 7 ifTrue: [ ^ stream nextPutAll: '\a' ]. + character == Character backspace ifTrue: [ ^ stream nextPutAll: '\b' ]. + character == Character escape ifTrue: [ ^ stream nextPutAll: '\e' ]. + character codePoint == 12 ifTrue: [ ^ stream nextPutAll: '\f' ]. + character == Character lf ifTrue: [ ^ stream nextPutAll: '\l' ]. + character == Character cr ifTrue: [ ^ stream nextPutAll: '\r' ]. + character == Character tab ifTrue: [ ^ stream nextPutAll: '\t' ]. + character codePoint == 11 ifTrue: [ ^ stream nextPutAll: '\v' ]. + self raiseError +] + +{ #category : 'testing' } +ControlCharactersEscapingRule >> handlesEscapeOf: character [ + + ^ ( character codePoint between: 7 and: 13 ) or: [ character codePoint == 27 ] +] + +{ #category : 'testing' } +ControlCharactersEscapingRule >> handlesUnescapeOf: controlCharacter [ + + ^ 'abeflnrtv' includes: controlCharacter +] + +{ #category : 'accessing' } +ControlCharactersEscapingRule >> priority [ + + ^ 50 +] + +{ #category : 'private' } +ControlCharactersEscapingRule >> raiseError [ + + + self error: 'The control character escaping rule is mishandling some case' +] + +{ #category : 'escaping' } +ControlCharactersEscapingRule >> unescape: aCharacter from: sourceStream on: targetStream [ + + aCharacter == $a then: [ ^ targetStream nextPut: ( Character codePoint: 7 ) ]. + aCharacter == $b then: [ ^ targetStream nextPut: Character backspace ]. + aCharacter == $e then: [ ^ targetStream nextPut: Character escape ]. + aCharacter == $f then: [ ^ targetStream nextPut: ( Character codePoint: 12 ) ]. + aCharacter == $l then: [ ^ targetStream lf ]. + aCharacter == $n then: [ ^ targetStream nextPutAll: LanguagePlatform current os lineEnding ]. + aCharacter == $r then: [ ^ targetStream cr ]. + aCharacter == $t then: [ ^ targetStream tab ]. + aCharacter == $v then: [ ^ targetStream nextPut: ( Character codePoint: 11 ) ]. + self raiseError +] diff --git a/source/Buoy-Localization/LanguageRange.class.st b/source/Buoy-Localization/LanguageRange.class.st new file mode 100644 index 0000000..f53a3b1 --- /dev/null +++ b/source/Buoy-Localization/LanguageRange.class.st @@ -0,0 +1,110 @@ +" +A language-range has the same syntax as a language-tag, or is the single character ""*"". + +A language-range matches a language-tag if it exactly equals the tag, +or if it exactly equals a prefix of the tag such that the first +character following the prefix is ""-"". + +The special range ""*"" matches any tag. A protocol which uses +language ranges may specify additional rules about the semantics of +""*""; for instance, HTTP/1.1 specifies that the range ""*"" matches only +languages not matched by any other range within an ""Accept-Language:"" +header. + +Reference: + + https://datatracker.ietf.org/doc/html/rfc3066 +" +Class { + #name : 'LanguageRange', + #superclass : 'Object', + #instVars : [ + 'languageTagOptional' + ], + #category : 'Buoy-Localization', + #package : 'Buoy-Localization' +} + +{ #category : 'instance creation' } +LanguageRange class >> any [ + + ^ self with: Optional unused +] + +{ #category : 'instance creation' } +LanguageRange class >> from: aSubtagCollection [ + + ^ self with: ( Optional containing: ( LanguageTag from: aSubtagCollection ) ) +] + +{ #category : 'instance creation' } +LanguageRange class >> fromString: aString [ + + ^ aString = '*' + then: [ self any ] + otherwise: [ self with: ( Optional containing: ( LanguageTag fromString: aString ) ) ] +] + +{ #category : 'private - instance creation' } +LanguageRange class >> with: aLanguageTagOptional [ + + ^ self new initializeWith: aLanguageTagOptional +] + +{ #category : 'comparing' } +LanguageRange >> = anObject [ + + ^ self equalityChecker + compare: #subtags; + checkAgainst: anObject +] + +{ #category : 'converting' } +LanguageRange >> asLanguageRange [ + + ^ self +] + +{ #category : 'converting' } +LanguageRange >> asString [ + + ^ self printString +] + +{ #category : 'comparing' } +LanguageRange >> hash [ + + ^ languageTagOptional withContentDo: [ :tag | tag hash ] ifUnused: [ $* hash ] +] + +{ #category : 'initialization' } +LanguageRange >> initializeWith: aLanguageTagOptional [ + + languageTagOptional := aLanguageTagOptional +] + +{ #category : 'testing' } +LanguageRange >> matches: aLanguageTag [ + + ^ languageTagOptional + withContentDo: [ :tag | aLanguageTag subtags beginsWith: tag subtags ] + ifUnused: [ true ] +] + +{ #category : 'printing' } +LanguageRange >> printOn: stream [ + + languageTagOptional withContentDo: [ :tag | tag printOn: stream ] ifUnused: [ stream nextPut: $* ] +] + +{ #category : 'accessing' } +LanguageRange >> specificity [ + + ^ languageTagOptional withContentDo: [ :languageTag | languageTag subtags size ] ifUnused: [ 0 ] +] + +{ #category : 'accessing' } +LanguageRange >> subtags [ + + ^ languageTagOptional withContentDo: [ :tag | tag subtags ] ifUnused: [ #('*') ] +] diff --git a/source/Buoy-Localization/LanguageTag.class.st b/source/Buoy-Localization/LanguageTag.class.st new file mode 100644 index 0000000..5158eae --- /dev/null +++ b/source/Buoy-Localization/LanguageTag.class.st @@ -0,0 +1,189 @@ +" +A language tag is used to label the language used by some information content. + +These tags can also be used to specify the user's preferences when selecting information +content or to label additional attributes of content and associated +resources. + +Sometimes language tags are used to indicate additional language +attributes of content. + +Reference: + + https://www.rfc-editor.org/rfc/rfc5646.html + +" +Class { + #name : 'LanguageTag', + #superclass : 'Object', + #instVars : [ + 'languageCode', + 'scriptOptional', + 'regionOptional', + 'subtags' + ], + #category : 'Buoy-Localization', + #package : 'Buoy-Localization' +} + +{ #category : 'instance creation' } +LanguageTag class >> from: aSubtagCollection [ + + AssertionChecker + enforce: [ aSubtagCollection notEmpty ] + because: 'At least a sub tag is required.' + raising: InstanceCreationFailed. + + ^ self new initializeFrom: aSubtagCollection +] + +{ #category : 'instance creation' } +LanguageTag class >> fromString: aString [ + + ^ self from: ( aString substrings: '-' ) +] + +{ #category : 'comparing' } +LanguageTag >> = anObject [ + + ^ self equalityChecker + compare: #subtags; + checkAgainst: anObject +] + +{ #category : 'converting' } +LanguageTag >> asByteArray [ + + ^ self printString asByteArray +] + +{ #category : 'converting' } +LanguageTag >> asLanguageTag [ + + ^ self +] + +{ #category : 'printing' } +LanguageTag >> asString [ + + ^ self printString +] + +{ #category : 'private' } +LanguageTag >> assertIsValid [ + + AssertionCheckerBuilder new + raising: InstanceCreationFailed; + checking: [ :asserter | + asserter + enforce: [ languageCode size between: 2 and: 3 ] + because: 'ISO 639 language codes must be 2 or 3 letters.'; + enforce: [ languageCode allSatisfy: #isLetter ] + because: 'ISO 639 language codes must consist only of letters.'. + self + withScriptDo: [ :script | + asserter + enforce: [ script size = 4 ] because: 'ISO 15924 script codes must be 4 letters.'; + enforce: [ script allSatisfy: #isLetter ] + because: 'ISO 15924 script codes must be 4 letters.' + ]; + withRegionDo: [ :region | + asserter + enforce: [ region size = 2 ] because: 'Supported ISO 3166-1 codes must be 2 letters.'; + enforce: [ region allSatisfy: #isLetter ] + because: 'Supported ISO 3166-1 codes must be 2 letters.' + ] + ]; + buildAndCheck +] + +{ #category : 'comparing' } +LanguageTag >> hash [ + + ^ self equalityHashCombinator combineHashesOfAll: subtags +] + +{ #category : 'initialization' } +LanguageTag >> initializeFrom: aSubtagCollection [ + + subtags := Array withAll: aSubtagCollection. + self + initializeLanguageCode; + initializeScript; + initializeRegion; + assertIsValid +] + +{ #category : 'private' } +LanguageTag >> initializeLanguageCode [ + + subtags at: 1 put: subtags first asLowercase. + languageCode := subtags first +] + +{ #category : 'private' } +LanguageTag >> initializeRegion [ + + regionOptional := Optional unused. + scriptOptional + withContentDo: [ self initializeRegionTaggedAt: 3 ] + ifUnused: [ self initializeRegionTaggedAt: 2 ] +] + +{ #category : 'private' } +LanguageTag >> initializeRegionTaggedAt: index [ + + [ | regionCandidate | + + regionCandidate := ( subtags at: index ) asUppercase. + regionCandidate size = 2 then: [ + subtags at: index put: regionCandidate. + regionOptional := Optional containing: regionCandidate + ] + ] unless: subtags size < index +] + +{ #category : 'private' } +LanguageTag >> initializeScript [ + + scriptOptional := Optional unused. + + [ + | scriptCandidate | + scriptCandidate := subtags second asLowercase capitalized. + scriptCandidate size = 4 then: [ + subtags at: 2 put: scriptCandidate. + scriptOptional := Optional containing: scriptCandidate + ] + ] unless: subtags size = 1 +] + +{ #category : 'accessing' } +LanguageTag >> languageCode [ + + ^ languageCode +] + +{ #category : 'printing' } +LanguageTag >> printOn: stream [ + + subtags do: [ :subtag | stream nextPutAll: subtag ] separatedBy: [ stream nextPut: $- ] +] + +{ #category : 'accessing' } +LanguageTag >> subtags [ + + ^ subtags +] + +{ #category : 'accessing' } +LanguageTag >> withRegionDo: aBlock [ + + regionOptional withContentDo: aBlock +] + +{ #category : 'accessing' } +LanguageTag >> withScriptDo: aBlock [ + + scriptOptional withContentDo: aBlock +] diff --git a/source/Buoy-Localization/ReverseSolidusEscapingRule.class.st b/source/Buoy-Localization/ReverseSolidusEscapingRule.class.st new file mode 100644 index 0000000..a81be5d --- /dev/null +++ b/source/Buoy-Localization/ReverseSolidusEscapingRule.class.st @@ -0,0 +1,47 @@ +" +I'm an escaping rule implementing escaping for the Reverse Solidus character (the backslash) \\. + +" +Class { + #name : 'ReverseSolidusEscapingRule', + #superclass : 'StringEscapingRule', + #category : 'Buoy-Localization', + #package : 'Buoy-Localization' +} + +{ #category : 'class initialization' } +ReverseSolidusEscapingRule class >> initialize [ + + + StringEscapingRule registerRule: self new +] + +{ #category : 'escaping' } +ReverseSolidusEscapingRule >> escape: character on: stream [ + + stream nextPutAll: '\\' +] + +{ #category : 'testing' } +ReverseSolidusEscapingRule >> handlesEscapeOf: character [ + + ^ character == $\ +] + +{ #category : 'testing' } +ReverseSolidusEscapingRule >> handlesUnescapeOf: controlCharacter [ + + ^ controlCharacter == $\ +] + +{ #category : 'accessing' } +ReverseSolidusEscapingRule >> priority [ + + ^ 100 +] + +{ #category : 'escaping' } +ReverseSolidusEscapingRule >> unescape: character from: sourceStream on: targetStream [ + + targetStream nextPut: $\ +] diff --git a/source/Buoy-Localization/StringEscapingRule.class.st b/source/Buoy-Localization/StringEscapingRule.class.st new file mode 100644 index 0000000..5b736ff --- /dev/null +++ b/source/Buoy-Localization/StringEscapingRule.class.st @@ -0,0 +1,89 @@ +Class { + #name : 'StringEscapingRule', + #superclass : 'Object', + #classVars : [ + 'Available' + ], + #category : 'Buoy-Localization', + #package : 'Buoy-Localization' +} + +{ #category : 'escaping' } +StringEscapingRule class >> escape: character on: targetStream [ + + ^ Available + detect: [ :rule | rule handlesEscapeOf: character ] + ifFound: [ :rule | rule escape: character on: targetStream ] + ifNone: [ targetStream nextPut: character ] +] + +{ #category : 'class initialization' } +StringEscapingRule class >> initialize [ + + + self initializeAvailableRules +] + +{ #category : 'class initialization' } +StringEscapingRule class >> initializeAvailableRules [ + + + Available ifNil: [ Available := SortedCollection sortUsing: #priority descending ] +] + +{ #category : 'testing' } +StringEscapingRule class >> isAbstract [ + + ^ self = StringEscapingRule +] + +{ #category : 'rule management' } +StringEscapingRule class >> registerRule: anEscapingRule [ + + + self initializeAvailableRules. + Available add: anEscapingRule +] + +{ #category : 'escaping' } +StringEscapingRule class >> unescape: aCharacter from: sourceStream on: targetStream [ + + ^ Available + detect: [ :rule | rule handlesUnescapeOf: aCharacter ] + ifFound: [ :rule | rule unescape: aCharacter from: sourceStream on: targetStream ] + ifNone: [ + AssertionFailed signal: ( 'There''s no escaping rule for "<1s>" (\u{<2s>})' + expandMacrosWith: aCharacter asString + with: ( aCharacter codePoint asHexStringPaddedTo: 4 ) ) + ] +] + +{ #category : 'escaping' } +StringEscapingRule >> escape: character on: stream [ + + self subclassResponsibility +] + +{ #category : 'testing' } +StringEscapingRule >> handlesEscapeOf: character [ + + ^ self subclassResponsibility +] + +{ #category : 'testing' } +StringEscapingRule >> handlesUnescapeOf: controlCharacter [ + + ^ self subclassResponsibility +] + +{ #category : 'accessing' } +StringEscapingRule >> priority [ + + ^ self subclassResponsibility +] + +{ #category : 'escaping' } +StringEscapingRule >> unescape: aCharacter from: sourceStream on: targetStream [ + + self subclassResponsibility +] diff --git a/source/Buoy-Localization/UnicodeCharacterEscapingRule.class.st b/source/Buoy-Localization/UnicodeCharacterEscapingRule.class.st new file mode 100644 index 0000000..bba35f4 --- /dev/null +++ b/source/Buoy-Localization/UnicodeCharacterEscapingRule.class.st @@ -0,0 +1,102 @@ +" +I'm an escaping rule implementing escaping for Unicode characters. + +The following escaping sequences are handled by my instances: + +- `\u{HHHH}` Represents the character with the given hexadecimal Unicode code point +" +Class { + #name : 'UnicodeCharacterEscapingRule', + #superclass : 'StringEscapingRule', + #category : 'Buoy-Localization', + #package : 'Buoy-Localization' +} + +{ #category : 'class initialization' } +UnicodeCharacterEscapingRule class >> initialize [ + + + StringEscapingRule registerRule: self new +] + +{ #category : 'escaping' } +UnicodeCharacterEscapingRule >> escape: character on: stream [ + + stream nextPutAll: '\u{'. + character codePoint printOn: stream base: 16. + stream nextPut: $} +] + +{ #category : 'private' } +UnicodeCharacterEscapingRule >> extractExpressionFrom: stream [ + + | closingBracketWasFound expression openingBracket | + + AssertionChecker refuse: [ stream atEnd ] because: 'Missing opening "{" in escaping sequence'. + + openingBracket := stream next. + AssertionChecker + enforce: [ openingBracket == ${ ] + because: 'Missing opening "{" in escaping sequence'. + + closingBracketWasFound := false. + expression := stream upToAnyOf: '}' do: [ :char | closingBracketWasFound := true ]. + + AssertionChecker + enforce: [ closingBracketWasFound ] + because: 'Missing closing "}" in escaping sequence'. + + ^ expression +] + +{ #category : 'testing' } +UnicodeCharacterEscapingRule >> handlesEscapeOf: character [ + + | codePoint | + "Ignore the printable characters on the Basic Latin and Latin-1 Supplement sections of Unicode" + codePoint := character codePoint. + + ^ ( ( codePoint between: 20 and: 126 ) or: [ codePoint between: 161 and: 255 ] ) not +] + +{ #category : 'testing' } +UnicodeCharacterEscapingRule >> handlesUnescapeOf: controlCharacter [ + + ^ controlCharacter == $u +] + +{ #category : 'private' } +UnicodeCharacterEscapingRule >> maximumUTFCode [ + + ^ 16r10FFFF +] + +{ #category : 'private' } +UnicodeCharacterEscapingRule >> parseCodePointFrom: stream [ + + | codePoint expression | + expression := self extractExpressionFrom: stream. + codePoint := Integer + readFromHex: expression + ifFail: [ + AssertionFailed signal: + ( 'Unexpected hexadecimal sequence "<1s>"' expandMacrosWith: expression ) ]. + AssertionChecker + enforce: [ codePoint between: 0 and: self maximumUTFCode ] + because: [ 'Code point "<1s>" is out of range' expandMacrosWith: expression ]. + ^ codePoint +] + +{ #category : 'accessing' } +UnicodeCharacterEscapingRule >> priority [ + + ^ 10 +] + +{ #category : 'escaping' } +UnicodeCharacterEscapingRule >> unescape: aCharacter from: sourceStream on: targetStream [ + + | codePoint | + codePoint := self parseCodePointFrom: sourceStream. + targetStream nextPut: ( Character codePoint: codePoint ) +] diff --git a/source/Buoy-Localization/package.st b/source/Buoy-Localization/package.st new file mode 100644 index 0000000..137a50c --- /dev/null +++ b/source/Buoy-Localization/package.st @@ -0,0 +1 @@ +Package { #name : 'Buoy-Localization' }