Skip to content

Commit

Permalink
Implement TakeTo (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
FollowTheProcess authored Jan 25, 2024
1 parent 920e374 commit b378ddd
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 2 deletions.
33 changes: 31 additions & 2 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,35 @@ func TakeUntil(predicate func(r rune) bool) Parser[string] {
}
}

// TakeTo returns a [Parser] that consumes characters until it first hits an exact string.
//
// If the input is empty or the exact string is not in the input, an error will be returned.
//
// The value will contain everything from the start of the input up to the first occurrence of
// match, and the remainder will contain the match and everything thereafter.
func TakeTo(match string) Parser[string] {
return func(input string) (string, string, error) {
if input == "" {
return "", "", errors.New("TakeTo: input text is empty")
}

if !utf8.ValidString(input) {
return "", "", errors.New("TakeTo: input not valid utf-8")
}

if match == "" {
return "", "", errors.New("TakeTo: match must not be empty")
}

start := strings.Index(input, match)
if start == -1 {
return "", "", fmt.Errorf("TakeTo: match (%s) not in input", match)
}

return input[:start], input[start:], nil
}
}

// OneOf returns a [Parser] that recognises one of the provided characters from the start of input.
//
// If you want to match anything other than the provided char set, use [NoneOf].
Expand Down Expand Up @@ -322,7 +351,7 @@ func NoneOf(chars string) Parser[string] {
// passed in set of chars.
//
// If the input or chars is empty, an error will be returned.
// Likewise if none of the chars is present.
// Likewise if none of the chars are present.
func AnyOf(chars string) Parser[string] {
return func(input string) (string, string, error) {
if input == "" {
Expand All @@ -345,7 +374,7 @@ func AnyOf(chars string) Parser[string] {
}
}

// If we've broken the loop but end is still 0, there was no matches
// If we've broken the loop but end is still 0, there were no matches
// in the entire input
if end == 0 {
return "", "", fmt.Errorf("AnyOf: no match for any char in (%s) found in input", chars)
Expand Down
158 changes: 158 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,137 @@ func TestTakeUntil(t *testing.T) {
}
}

func TestTakeTo(t *testing.T) {
tests := []struct {
name string // Identifying test case name
input string // Entire input to be parsed
match string // The exact string to stop at
value string // The parsed value
remainder string // The remaining unparsed input
err string // The expected error message (if there is one)
wantErr bool // Whether it should have returned an error
}{
{
name: "empty input",
input: "",
match: "something",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: input text is empty",
},
{
name: "bad utf8",
input: "\xf8\xa1\xa1\xa1\xa1",
match: "something",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: input not valid utf-8",
},
{
name: "empty input and match",
input: "",
match: "",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: input text is empty",
},
{
name: "empty match",
input: "some text",
match: "",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: match must not be empty",
},
{
name: "no match",
input: "a long sentence",
match: "not here",
value: "",
remainder: "",
wantErr: true,
err: "TakeTo: match (not here) not in input",
},
{
name: "simple",
input: "lots of stuff KEYWORD more stuff",
match: "KEYWORD",
value: "lots of stuff ",
remainder: "KEYWORD more stuff",
wantErr: false,
err: "",
},
{
name: "match at end of input",
input: "blah blah lots of inputeof",
match: "eof",
value: "blah blah lots of input",
remainder: "eof",
wantErr: false,
err: "",
},
{
name: "match at start of input",
input: "eofblah blah lots of input",
match: "eof",
value: "",
remainder: "eofblah blah lots of input",
wantErr: false,
err: "",
},
{
name: "multiple matches",
input: "blaheof blah eof lots of inputeof",
match: "eof",
value: "blah",
remainder: "eof blah eof lots of inputeof",
wantErr: false,
err: "",
},
{
name: "match utf8",
input: "abcdef語ç日ð本Ê語",
match: "語ç日",
value: "abcdef",
remainder: "語ç日ð本Ê語",
wantErr: false,
err: "",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
value, remainder, err := parser.TakeTo(tt.match)(tt.input)

// Should only error if we wanted one
if (err != nil) != tt.wantErr {
t.Fatalf("\nGot error:\t%v\nWanted error:\t%v\n", err, tt.wantErr)
}

// If we did get an error, the message should match what we expect
if err != nil {
if msg := err.Error(); msg != tt.err {
t.Fatalf("\nGot:\t%q\nWanted:\t%q\n", msg, tt.err)
}
}

// The value should be as expected
if value != tt.value {
t.Errorf("\nGot:\t%q\nWanted:\t%q\n", value, tt.value)
}

// Likewise the remainder
if remainder != tt.remainder {
t.Errorf("\nGot:\t%q\nWanted:\t%q\n", remainder, tt.remainder)
}
})
}
}

func TestOneOf(t *testing.T) {
tests := []struct {
name string // Identifying test case name
Expand Down Expand Up @@ -1345,6 +1476,18 @@ func BenchmarkTakeUntil(b *testing.B) {
}
}

func BenchmarkTakeTo(b *testing.B) {
input := "some words KEYWORD the rest"

b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, err := parser.TakeTo("KEYWORD")(input)
if err != nil {
b.Fatal(err)
}
}
}

func BenchmarkOneOf(b *testing.B) {
input := "abcdef"
chars := "abc"
Expand Down Expand Up @@ -1490,6 +1633,21 @@ func ExampleTakeUntil() {
// Remainder: " <- first whitespace is here"
}

func ExampleTakeTo() {
input := "lots of stuff KEYWORD more stuff"

value, remainder, err := parser.TakeTo("KEYWORD")(input)
if err != nil {
fmt.Fprintln(os.Stderr, err)
}

fmt.Printf("Value: %q\n", value)
fmt.Printf("Remainder: %q\n", remainder)

// Output: Value: "lots of stuff "
// Remainder: "KEYWORD more stuff"
}

func ExampleOneOf() {
input := "abcdefg"

Expand Down

0 comments on commit b378ddd

Please sign in to comment.