From 1d823af5017b42327c98d6056e6efdc4d8c49f70 Mon Sep 17 00:00:00 2001
From: Jakob Borg <jakob@kastelo.net>
Date: Wed, 15 Jun 2022 10:05:05 +0200
Subject: [PATCH] Correctly handle non-ASCII runes in patterns (fixes #54)

When matching a row we calculate an index into the string, and this
index was in runes. However when slicing the string Go uses byte
indexes. This change tracks both, using the rune count to determine the
correct length and the byte index to slice the string.
---
 glob_test.go | 10 ++++++++++
 match/row.go | 15 +++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/glob_test.go b/glob_test.go
index 810036f..fb6b2ff 100644
--- a/glob_test.go
+++ b/glob_test.go
@@ -162,6 +162,16 @@ func TestGlob(t *testing.T) {
 
 		glob(true, pattern_prefix_suffix, fixture_prefix_suffix_match),
 		glob(false, pattern_prefix_suffix, fixture_prefix_suffix_mismatch),
+
+		glob(true, "155ö", "155ö"),
+		glob(true, "1?5ö", "155ö"), // <-
+		glob(true, "1?ö5", "15ö5"),
+		glob(true, "155helloö", "155helloö"),
+		glob(true, "1?5helloö", "155helloö"), // <-
+		glob(true, "1?ö5hello", "15ö5hello"),
+		glob(true, "1?5heöllo", "155heöllo"),
+		glob(true, "1ö?5", "1ö55"), // <-
+		glob(true, "ö1?5", "ö155"),
 	} {
 		t.Run("", func(t *testing.T) {
 			g := MustCompile(test.pattern, test.delimiters...)
diff --git a/match/row.go b/match/row.go
index 4379042..a34d0a5 100644
--- a/match/row.go
+++ b/match/row.go
@@ -2,6 +2,7 @@ package match
 
 import (
 	"fmt"
+	"unicode/utf8"
 )
 
 type Row struct {
@@ -23,19 +24,21 @@ func (self Row) matchAll(s string) bool {
 	for _, m := range self.Matchers {
 		length := m.Len()
 
-		var next, i int
-		for next = range s[idx:] {
-			i++
-			if i == length {
+		var runeCount, byteIdx int
+		var r rune
+		for _, r = range s[idx:] {
+			runeCount++
+			byteIdx += utf8.RuneLen(r)
+			if runeCount == length {
 				break
 			}
 		}
 
-		if i < length || !m.Match(s[idx:idx+next+1]) {
+		if runeCount < length || !m.Match(s[idx:idx+byteIdx]) {
 			return false
 		}
 
-		idx += next + 1
+		idx += byteIdx
 	}
 
 	return true