Skip to content

Commit

Permalink
Support for special symbols with Char type (#179)
Browse files Browse the repository at this point in the history
### What's done:
- Previously ktoml did not support special escaped characters while decoding of Char type
- Added extra tests for literal strings
- Moved logic related to the escaped characters to new utils file
  • Loading branch information
orchestr7 authored Jan 7, 2023
1 parent 90403d0 commit 2d3c02c
Show file tree
Hide file tree
Showing 12 changed files with 309 additions and 190 deletions.
130 changes: 76 additions & 54 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ We are still developing and testing this library, so it has several limitations:
:white_check_mark: deserialization (with some parsing limitations) \
:white_check_mark: serialization (with tree-related limitations)

**Parsing** \
**Parsing and decoding** \
:white_check_mark: Table sections (single and dotted) \
:white_check_mark: Key-value pairs (single and dotted) \
:white_check_mark: Integer type \
:white_check_mark: Float type \
:white_check_mark: String type \
:white_check_mark: Float type \
:white_check_mark: Long/Integer/Byte/Short types \
:white_check_mark: Double/Float types \
:white_check_mark: Basic Strings \
:white_check_mark: Literal Strings \
:white_check_mark: Char type \
:white_check_mark: Boolean type \
:white_check_mark: Simple Arrays \
:white_check_mark: Comments \
:white_check_mark: Literal Strings \
:white_check_mark: Inline Tables \
:white_check_mark: Offset Date-Time (to `Instant` of [kotlinx-datetime](https://github.com/Kotlin/kotlinx-datetime)) \
:white_check_mark: Local Date-Time (to `LocalDateTime` of [kotlinx-datetime](https://github.com/Kotlin/kotlinx-datetime)) \
Expand Down Expand Up @@ -264,25 +264,26 @@ someBooleanProperty = true
gradle-libs-like-property = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }

[table1]
# it can be null or nil, but don't forget to mark it with '?' in the code
# keep in mind, that null is prohibited by TOML spec, but it is very important in Kotlin, so we supported it
# see allowNullValues for a more strict enforcement of the TOML spec
property1 = null
property2 = 6
# check property3 in Table1 below. As it has the default value, it is not required and can be not provided

[table2]
someNumber = 5
[table2."akuleshov7.com"]
name = 'this is a "literal" string'
# empty lists are also supported
configurationList = ["a", "b", "c", null]
# null is prohibited by the TOML spec, but allowed in ktoml for nullable types
# so for 'property1' null value is ok. Use: property1 = null
property1 = 100
property2 = 6

# such redeclaration of table2
# is prohibited in toml specification;
# but ktoml is allowing it in non-strict mode:
[table2]
otherNumber = 5.56
[table2]
someNumber = 5
[table2."akuleshov7.com"]
name = 'this is a "literal" string'
# empty lists are also supported
configurationList = ["a", "b", "c"]

# such redeclaration of table2
# is prohibited in toml specification;
# but ktoml is allowing it in non-strict mode:
[table2]
otherNumber = 5.56
# use single quotes
charFromString = 'a'
charFromInteger = 123
```

can be deserialized to `MyClass`:
Expand All @@ -292,34 +293,45 @@ data class MyClass(
val someBooleanProperty: Boolean,
val table1: Table1,
val table2: Table2,
@SerialName("gradle-libs-like-property")
val kotlinJvm: GradlePlugin
@SerialName("gradle-libs-like-property")
val kotlinJvm: GradlePlugin
)

@Serializable
data class Table1(
// nullable values, from toml you can pass null/nil/empty value to this kind of a field
// nullable property, from toml input you can pass "null"/"nil"/"empty" value (no quotes needed) to this field
val property1: Long?,
// please note, that according to the specification of toml integer values should be represented with Long
val property2: Long,
// no need to pass this value as it has the default value and is NOT REQUIRED
val property3: Long = 5
// please note, that according to the specification of toml integer values should be represented with Long,
// but we allow to use Int/Short/etc. Just be careful with overflow
val property2: Byte,
// no need to pass this value in the input as it has the default value and so it is NOT REQUIRED
val property3: Short = 5
)

@Serializable
data class Table2(
val someNumber: Long,
@SerialName("akuleshov7.com")
val inlineTable: InlineTable,
val otherNumber: Double
val inlineTable: NestedTable,
val otherNumber: Double,
// Char in a manner of Java/Kotlin is not supported in TOML, because single quotes are used for literal strings.
// However, ktoml supports reading Char from both single-char string and from it's integer code
val charFromString: Char,
val charFromInteger: Char
)

@Serializable
data class NestedTable(
val name: String,
@SerialName("configurationList")
val overriddenName: List<String?>
)

@Serializable
data class GradlePlugin(val id: String, val version: Version)

@Serializable
data class Version(val ref: String)

```

with the following code:
Expand All @@ -332,26 +344,29 @@ Translation of the example above to json-terminology:
```json
{
"someBooleanProperty": true,

"gradle-libs-like-property": {
"id": "org.jetbrains.kotlin.jvm",
"version": {
"ref": "kotlin"
}
},

"table1": {
"property1": 5,
"property1": 100,
"property2": 5
},
"table2": {
"someNumber": 5,

"otherNumber": 5.56,
"akuleshov7.com": {
"name": "my name",
"configurationList": [
"a",
"b",
"c"
],
"otherNumber": 5.56
}
},
"gradle-libs-like-property": {
"id": "org.jetbrains.kotlin.jvm",
"version": {
"ref": "kotlin"
]
}
}
}
Expand All @@ -364,22 +379,31 @@ The following example from above:

```toml
someBooleanProperty = true
# inline tables in gradle 'libs.versions.toml' notation
gradle-libs-like-property = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }

# Comments can be added
# More comments can also be added
[table1]
property1 = null # At the end of lines too
property2 = 6
# null is prohibited by the TOML spec, but allowed in ktoml for nullable types
# so for 'property1' null value is ok. Use: property1 = null.
# Null can also be prohibited with 'allowNullValues = false'
property1 = 100
property2 = 6

[table2]
someNumber = 5

# Properties always appear before sub-tables, tables aren't redeclared
otherNumber = 5.56
[table2."akuleshov7.com"]
name = 'this is a "literal" string'
configurationList = ["a", "b", "c", null]
# empty lists are also supported
configurationList = ["a", "b", "c"]

# such redeclaration of table2
# is prohibited in toml specification;
# but ktoml is allowing it in non-strict mode:
[table2]
otherNumber = 5.56
# use single quotes
charFromString = 'a'
charFromInteger = 123
```

can be serialized from `MyClass`:
Expand Down Expand Up @@ -449,8 +473,6 @@ data class Version(val ref: String)
with the following code:

```kotlin
Toml.decodeFromString<MyClass>(/* your toml string */)
Toml.encodeToString<MyClass>(/* your encoded object */)
```
</details>


Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import com.akuleshov7.ktoml.utils.FloatingPointLimitsEnum
import com.akuleshov7.ktoml.utils.FloatingPointLimitsEnum.*
import com.akuleshov7.ktoml.utils.IntegerLimitsEnum
import com.akuleshov7.ktoml.utils.IntegerLimitsEnum.*
import com.akuleshov7.ktoml.utils.convertSpecialCharacters
import kotlinx.datetime.Instant
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
Expand Down Expand Up @@ -42,7 +43,7 @@ public abstract class TomlAbstractDecoder : AbstractDecoder() {
// converting to Char from a parsed Literal String (with single quotes: '')
is TomlLiteralString ->
try {
(value.content as String).single()
(value.content as String).convertSpecialCharacters(keyValue.lineNo).single()
} catch (ex: NoSuchElementException) {
throw IllegalTypeException("Empty value is not allowed for type [Char], " +
"please check the value: [${value.content}] or use [String] type for deserialization of " +
Expand All @@ -58,7 +59,8 @@ public abstract class TomlAbstractDecoder : AbstractDecoder() {
// all other toml tree types are not supported
else -> throw IllegalTypeException(
"Cannot decode the key [${keyValue.key.last()}] with the value [${keyValue.value.content}]" +
" and with the provided type [Char]. Please check the type in your Serializable class or it's nullability",
" and with the provided type [Char]. Please check the type" +
" in your Serializable class or it's nullability",
keyValue.lineNo
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@ import kotlinx.serialization.descriptors.elementNames

public sealed class TomlDecodingException(message: String) : SerializationException(message)

internal class ParseException(message: String, lineNo: Int) : TomlDecodingException("Line $lineNo: $message")
internal open class ParseException(message: String, lineNo: Int) : TomlDecodingException("Line $lineNo: $message")

internal class UnknownEscapeSymbolsException(invalid: String, lineNo: Int) : ParseException(
"According to TOML documentation unknown" +
" escape symbols are not allowed. Please check: [\\$invalid]",
lineNo
)

internal class InternalDecodingException(message: String) : TomlDecodingException(message)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ import com.akuleshov7.ktoml.TomlOutputConfig
import com.akuleshov7.ktoml.exceptions.ParseException
import com.akuleshov7.ktoml.exceptions.TomlWritingException
import com.akuleshov7.ktoml.parsers.trimQuotes
import com.akuleshov7.ktoml.utils.appendCodePointCompat
import com.akuleshov7.ktoml.utils.controlCharacterRegex
import com.akuleshov7.ktoml.utils.unescapedBackslashRegex
import com.akuleshov7.ktoml.utils.unescapedDoubleQuoteRegex
import com.akuleshov7.ktoml.utils.convertSpecialCharacters
import com.akuleshov7.ktoml.utils.escapeSpecialCharacters
import com.akuleshov7.ktoml.writers.TomlEmitter

/**
Expand Down Expand Up @@ -43,12 +41,6 @@ public class TomlBasicString internal constructor(
}

public companion object {
private const val COMPLEX_UNICODE_LENGTH = 8
private const val COMPLEX_UNICODE_PREFIX = 'U'
private const val HEX_RADIX = 16
private const val SIMPLE_UNICODE_LENGTH = 4
private const val SIMPLE_UNICODE_PREFIX = 'u'

private fun String.verifyAndTrimQuotes(lineNo: Int): Any =
when {
// ========= basic string ("abc") =======
Expand Down Expand Up @@ -80,103 +72,5 @@ public class TomlBasicString internal constructor(
}
return this
}

private fun String.convertSpecialCharacters(lineNo: Int): String {
val resultString = StringBuilder()
var i = 0
while (i < length) {
val currentChar = get(i)
var offset = 1
if (currentChar == '\\' && i != lastIndex) {
// Escaped
val next = get(i + 1)
offset++
when (next) {
't' -> resultString.append('\t')
'b' -> resultString.append('\b')
'r' -> resultString.append('\r')
'n' -> resultString.append('\n')
'f' -> resultString.append('\u000C')
'\\' -> resultString.append('\\')
'\'' -> resultString.append('\'')
'"' -> resultString.append('"')
SIMPLE_UNICODE_PREFIX, COMPLEX_UNICODE_PREFIX ->
offset += resultString.appendEscapedUnicode(this, next, i + 2, lineNo)
else -> throw ParseException(
"According to TOML documentation unknown" +
" escape symbols are not allowed. Please check: [\\$next]",
lineNo
)
}
} else {
resultString.append(currentChar)
}
i += offset
}
return resultString.toString()
}

private fun StringBuilder.appendEscapedUnicode(
fullString: String,
marker: Char,
codeStartIndex: Int,
lineNo: Int
): Int {
val nbUnicodeChars = if (marker == SIMPLE_UNICODE_PREFIX) {
SIMPLE_UNICODE_LENGTH
} else {
COMPLEX_UNICODE_LENGTH
}
if (codeStartIndex + nbUnicodeChars > fullString.length) {
val invalid = fullString.substring(codeStartIndex - 1)
throw ParseException(
"According to TOML documentation unknown" +
" escape symbols are not allowed. Please check: [\\$invalid]",
lineNo
)
}
val hexCode = fullString.substring(codeStartIndex, codeStartIndex + nbUnicodeChars)
val codePoint = hexCode.toInt(HEX_RADIX)
try {
appendCodePointCompat(codePoint)
} catch (e: IllegalArgumentException) {
throw ParseException(
"According to TOML documentation unknown" +
" escape symbols are not allowed. Please check: [\\$marker$hexCode]",
lineNo
)
}
return nbUnicodeChars
}

private fun String.escapeSpecialCharacters(): String {
val withCtrlCharsEscaped = replace(controlCharacterRegex) { match ->
when (val char = match.value.single()) {
'\t' -> "\\t"
'\b' -> "\\b"
'\n' -> "\\n"
'\u000C' -> "\\f"
'\r' -> "\\r"
else -> {
val code = char.code

val hexDigits = code.toString(HEX_RADIX)

"\\$SIMPLE_UNICODE_PREFIX${
hexDigits.padStart(SIMPLE_UNICODE_LENGTH, '0')
}"
}
}
}

val withQuotesEscaped = withCtrlCharsEscaped.replace(unescapedDoubleQuoteRegex) { match ->
match.value.replace("\"", "\\\"")
}

return withQuotesEscaped.replace(
unescapedBackslashRegex,
Regex.escapeReplacement("\\\\")
)
}
}
}
Loading

0 comments on commit 2d3c02c

Please sign in to comment.