Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement TakeTo #10

Merged
merged 1 commit into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,35 @@ func TakeUntil(predicate func(r rune) bool) Parser[string] {
}
}

// TakeTo returns a [Parser] that consumes characters until it first hits an exact string.
//
// If the input is empty or the exact string is not in the input, an error will be returned.
//
// The value will contain everything from the start of the input up to the first occurrence of
// match, and the remainder will contain the match and everything thereafter.
func TakeTo(match string) Parser[string] {
return func(input string) (string, string, error) {
if input == "" {
return "", "", errors.New("TakeTo: input text is empty")
}

if !utf8.ValidString(input) {
return "", "", errors.New("TakeTo: input not valid utf-8")
}

if match == "" {
return "", "", errors.New("TakeTo: match must not be empty")
}

start := strings.Index(input, match)
if start == -1 {
return "", "", fmt.Errorf("TakeTo: match (%s) not in input", match)
}

return input[:start], input[start:], nil
}
}

// OneOf returns a [Parser] that recognises one of the provided characters from the start of input.
//
// If you want to match anything other than the provided char set, use [NoneOf].
Expand Down Expand Up @@ -322,7 +351,7 @@ func NoneOf(chars string) Parser[string] {
// passed in set of chars.
//
// If the input or chars is empty, an error will be returned.
// Likewise if none of the chars is present.
// Likewise if none of the chars are present.
func AnyOf(chars string) Parser[string] {
return func(input string) (string, string, error) {
if input == "" {
Expand All @@ -345,7 +374,7 @@ func AnyOf(chars string) Parser[string] {
}
}

// If we've broken the loop but end is still 0, there was no matches
// If we've broken the loop but end is still 0, there were no matches
// in the entire input
if end == 0 {
return "", "", fmt.Errorf("AnyOf: no match for any char in (%s) found in input", chars)
Expand Down
158 changes: 158 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,137 @@ func TestTakeUntil(t *testing.T) {
}
}

func TestTakeTo(t *testing.T) {
tests := []struct {
name string // Identifying test case name
input string // Entire input to be parsed
match string // The exact string to stop at
value string // The parsed value
remainder string // The remaining unparsed input
err string // The expected error message (if there is one)
wantErr bool // Whether it should have returned an error
}{
{
name: "empty input",
input: "",
match: "something",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: input text is empty",
},
{
name: "bad utf8",
input: "\xf8\xa1\xa1\xa1\xa1",
match: "something",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: input not valid utf-8",
},
{
name: "empty input and match",
input: "",
match: "",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: input text is empty",
},
{
name: "empty match",
input: "some text",
match: "",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: match must not be empty",
},
{
name: "no match",
input: "a long sentence",
match: "not here",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: match (not here) not in input",
},
{
name: "simple",
input: "lots of stuff KEYWORD more stuff",
match: "KEYWORD",
value: "lots of stuff ",
remainder: "KEYWORD more stuff",
wantErr: false,
err: "",
},
{
name: "match at end of input",
input: "blah blah lots of inputeof",
match: "eof",
value: "blah blah lots of input",
remainder: "eof",
wantErr: false,
err: "",
},
{
name: "match at start of input",
input: "eofblah blah lots of input",
match: "eof",
value: "",
remainder: "eofblah blah lots of input",
wantErr: false,
err: "",
},
{
name: "multiple matches",
input: "blaheof blah eof lots of inputeof",
match: "eof",
value: "blah",
remainder: "eof blah eof lots of inputeof",
wantErr: false,
err: "",
},
{
name: "match utf8",
input: "abcdef語ç日ð本Ê語",
match: "語ç日",
value: "abcdef",
remainder: "語ç日ð本Ê語",
wantErr: false,
err: "",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
value, remainder, err := parser.TakeTo(tt.match)(tt.input)

// Should only error if we wanted one
if (err != nil) != tt.wantErr {
t.Fatalf("\nGot error:\t%v\nWanted error:\t%v\n", err, tt.wantErr)
}

// If we did get an error, the message should match what we expect
if err != nil {
if msg := err.Error(); msg != tt.err {
t.Fatalf("\nGot:\t%q\nWanted:\t%q\n", msg, tt.err)
}
}

// The value should be as expected
if value != tt.value {
t.Errorf("\nGot:\t%q\nWanted:\t%q\n", value, tt.value)
}

// Likewise the remainder
if remainder != tt.remainder {
t.Errorf("\nGot:\t%q\nWanted:\t%q\n", remainder, tt.remainder)
}
})
}
}

func TestOneOf(t *testing.T) {
tests := []struct {
name string // Identifying test case name
Expand Down Expand Up @@ -1345,6 +1476,18 @@ func BenchmarkTakeUntil(b *testing.B) {
}
}

func BenchmarkTakeTo(b *testing.B) {
input := "some words KEYWORD the rest"

b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, err := parser.TakeTo("KEYWORD")(input)
if err != nil {
b.Fatal(err)
}
}
}

func BenchmarkOneOf(b *testing.B) {
input := "abcdef"
chars := "abc"
Expand Down Expand Up @@ -1490,6 +1633,21 @@ func ExampleTakeUntil() {
// Remainder: " <- first whitespace is here"
}

func ExampleTakeTo() {
input := "lots of stuff KEYWORD more stuff"

value, remainder, err := parser.TakeTo("KEYWORD")(input)
if err != nil {
fmt.Fprintln(os.Stderr, err)
}

fmt.Printf("Value: %q\n", value)
fmt.Printf("Remainder: %q\n", remainder)

// Output: Value: "lots of stuff "
// Remainder: "KEYWORD more stuff"
}

func ExampleOneOf() {
input := "abcdefg"

Expand Down
Loading