-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
207 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// Package encoding provides interfaces and utilities for encoding and decoding data. | ||
package encoding | ||
|
||
import ( | ||
"bytes" | ||
"errors" | ||
"strconv" | ||
"strings" | ||
"unicode/utf8" | ||
) | ||
|
||
// ErrInvalidString is returned when the input string is invalid. | ||
var ErrInvalidString = errors.New("invalid string") | ||
|
||
// Encoder is a function type that encodes a value into bytes. | ||
type Encoder func(any) ([]byte, error) | ||
|
||
// Decoder is a function type that decodes bytes into a provided value. | ||
type Decoder func([]byte, any) error | ||
|
||
// Marshaler is an interface for types that can marshal themselves into bytes. | ||
type Marshaler interface { | ||
Marshal() ([]byte, error) | ||
} | ||
|
||
// Unmarshaler is an interface for types that can unmarshal bytes into themselves. | ||
type Unmarshaler interface { | ||
Unmarshal([]byte) error | ||
} | ||
|
||
// UnmarshalString decodes a string from byte slice data. | ||
// It supports both quoted strings and literal strings. | ||
// | ||
// Parameters: | ||
// - data: The byte slice containing the string to unmarshal. | ||
// - literalChar: The character used for literal strings. Use 0 for no literal strings. | ||
// - allowNewline: Whether newlines are allowed in literal strings. | ||
// | ||
// Returns: | ||
// - The unmarshaled string and nil error if successful. | ||
// - An empty string and an error if unmarshaling fails. | ||
func UnmarshalString(data []byte, literalChar byte, allowNewline bool) (string, error) { | ||
data = bytes.TrimSpace(data) | ||
if len(data) < 2 { | ||
return "", errors.New("string too short") | ||
} | ||
|
||
switch data[0] { | ||
case '"': | ||
return unquoteString(data) | ||
case literalChar: | ||
if literalChar == 0 || literalChar == '"' { | ||
break | ||
} | ||
return extractLiteralString(data, literalChar, allowNewline) | ||
} | ||
|
||
return "", ErrInvalidString | ||
} | ||
|
||
func unquoteString(data []byte) (string, error) { | ||
if data[len(data)-1] != '"' { | ||
return "", errors.New("mismatched quotes") | ||
} | ||
return strconv.Unquote(string(data)) | ||
} | ||
|
||
func extractLiteralString(data []byte, literalChar byte, allowNewline bool) (string, error) { | ||
if data[len(data)-1] != literalChar { | ||
return "", errors.New("mismatched quotes") | ||
} | ||
str := string(data[1 : len(data)-1]) | ||
if !allowNewline && strings.ContainsRune(str, '\n') { | ||
return "", errors.New("newlines not allowed in literal string") | ||
} | ||
if !utf8.ValidString(str) { | ||
return "", errors.New("invalid UTF-8 in string") | ||
} | ||
return str, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package encoding | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
func TestUnmarshalString(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
input []byte | ||
literalChar byte | ||
allowNewline bool | ||
want string | ||
wantErr bool | ||
}{ | ||
{"Empty input", []byte{}, 0, false, "", true}, | ||
{"Single character", []byte{'"'}, 0, false, "", true}, | ||
{"Valid quoted string", []byte(`"hello"`), 0, false, "hello", false}, | ||
{"Valid literal string", []byte(`'hello'`), '\'', false, "hello", false}, | ||
{"Quoted string with escapes", []byte(`"he\"llo"`), 0, false, `he"llo`, false}, | ||
{"Literal string with newline allowed", []byte("'hello\nworld'"), '\'', true, "hello\nworld", false}, | ||
{"Literal string with newline disallowed", []byte("'hello\nworld'"), '\'', false, "", true}, | ||
{"Invalid UTF-8 in literal string", []byte{'\'', 0xFF, '\''}, '\'', false, "", true}, | ||
{"Mismatched quotes in quoted string", []byte(`"hello`), 0, false, "", true}, | ||
{"Mismatched quotes in literal string", []byte(`'hello`), '\'', false, "", true}, | ||
{"Invalid string start", []byte(`hello`), 0, false, "", true}, | ||
{"Quoted string with literal char", []byte(`"'hello'"`), '\'', false, "'hello'", false}, | ||
{"Literal string with quote char", []byte(`'"hello"'`), '\'', false, `"hello"`, false}, | ||
{"Literal char is quote", []byte(`"hello"`), '"', false, "hello", false}, | ||
{"Whitespace before valid string", []byte(" 'hello'"), '\'', false, "hello", false}, | ||
{"Whitespace after valid string", []byte("'hello' "), '\'', false, "hello", false}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
got, err := UnmarshalString(tt.input, tt.literalChar, tt.allowNewline) | ||
if (err != nil) != tt.wantErr { | ||
t.Errorf("UnmarshalString() error = %v, wantErr %v", err, tt.wantErr) | ||
return | ||
} | ||
if got != tt.want { | ||
t.Errorf("UnmarshalString() = %v, want %v", got, tt.want) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.