Skip to content

Commit

Permalink
Proper entity length handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mymmrac committed May 3, 2023
1 parent 0c6b007 commit 62b15f8
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 75 deletions.
2 changes: 2 additions & 0 deletions internal/integration/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import (
const (
img1Jpg = "img1.jpg"
kittenMp3 = "kitten.mp3"

exampleMp3 = "https://file-examples.com/storage/fe0e4ffeec64469f8a2ba23/2017/11/file_example_MP3_700KB.mp3"
)

var (
Expand Down
64 changes: 46 additions & 18 deletions internal/integration/send_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,6 @@ func TestSendMessage(t *testing.T) {
assert.NotNil(t, msg)
})

t.Run("unicode-entities", func(t *testing.T) {
text, entities := tu.MessageEntities(
tu.Entity("😅").Italic(),
tu.Entity(" test ").Bold(),
tu.Entity("🌗").Italic(),
tu.Entity(" Україна").Bold(),
tu.Entity(" "),
tu.Entity("\U0001FAE5 ").Italic(),
tu.Entity("世界").Bold(),
)
msg, err := bot.SendMessage(tu.Message(tu.ID(chatID), text).WithEntities(entities...))
require.NoError(t, err)

assert.Equal(t, msg.Text, text)
assert.Equal(t, msg.Entities, entities)
})

t.Run("new_line", func(t *testing.T) {
msg, err := bot.SendMessage(&telego.SendMessageParams{
ChatID: tu.ID(chatID),
Expand All @@ -76,6 +59,51 @@ func TestSendMessage(t *testing.T) {
assert.Equal(t, msg.Text, text)
assert.Equal(t, msg.Entities, entities)
})

t.Run("markdown_and_entities", func(t *testing.T) {
text, entities := tu.MessageEntities(
tu.Entity("😅").Italic(),
tu.Entity(" test ").Bold(),
tu.Entity("🌗").Italic(),
tu.Entity(" Україна").Bold(),
tu.Entity(" "),
tu.Entity("\U0001FAE5 ").Italic(),
tu.Entity("世界").Bold(),
)

msg, err := bot.SendMessage(tu.Message(tu.ID(chatID), "_😅_* test *_🌗_* Україна* _\U0001FAE5 _*世界*").
WithParseMode(telego.ModeMarkdownV2))
require.NoError(t, err)

assert.Equal(t, msg.Text, text)
assert.Equal(t, len(msg.Entities), len(entities))

for i := 0; i < len(entities); i++ {
assert.Equal(t, msg.Entities[i].Type, entities[i].Type)
}
})

t.Run("entities_check", func(t *testing.T) {
msg, err := bot.SendMessage(tu.MessageWithEntities(tu.ID(chatID),
tu.Entity("Lo").Strikethrough(), tu.Entity("rem").Underline(), tu.Entity(" ipsum "),
tu.Entity("dolor").Strikethrough().Underline(), tu.Entity(" sit amet, consectetur adipiscing elit."),
tu.Entity("\n"),
tu.Entity("Praesent "), tu.Entity("sed mi blandit").Code(),
tu.Entity(", tristique urna"), tu.Entity(" sit").TextLink("https://example.org"),
tu.Entity(" amet,"), tu.Entity(" interdum ").Spoiler(), tu.Entity("justo."),
tu.Entity("\n"),
tu.Entity("\tMauris eget lobortis elit.").Pre(""),
tu.Entity("\n"),
tu.Entity(" Sed posuere pharetra\n justo ac commodo.").Code(),
tu.Entity("\n"),
tu.Entity("a ").Code(), tu.Entity("a"),
tu.Entity("\n"),
tu.Entity("a "), tu.Entity("a"),
))

require.NoError(t, err)
assert.NotNil(t, msg)
})
}

func TestSendPhoto(t *testing.T) {
Expand Down Expand Up @@ -133,7 +161,7 @@ func TestSendAudio(t *testing.T) {
t.Run("url", func(t *testing.T) {
msg, err := bot.SendAudio(&telego.SendAudioParams{
ChatID: tu.ID(chatID),
Audio: tu.FileFromURL("https://file-examples.com/storage/fe0e4ffeec64469f8a2ba23/2017/11/file_example_MP3_700KB.mp3"),
Audio: tu.FileFromURL(exampleMp3),
Caption: "SendAudio " + timeNow,
Thumbnail: telego.ToPtr(tu.File(open(img1Jpg))), // Expected to be not displayed
})
Expand Down
88 changes: 36 additions & 52 deletions telegoutil/message_entity.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@ package telegoutil
import (
"fmt"
"strings"
"unicode"
"unicode/utf16"

"github.com/mymmrac/telego"
)

// MessageEntityCollection represents text and slice of telego.MessageEntity associated with it
type MessageEntityCollection struct {
text string
entities []telego.MessageEntity
keepSpaces bool
text string
entities []telego.MessageEntity
}

// Entity creates new MessageEntityCollection with provided text and no entities
Expand Down Expand Up @@ -51,7 +48,7 @@ func (c MessageEntityCollection) SetOffset(offset int) {
func (c MessageEntityCollection) Mention() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeMention,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -60,7 +57,7 @@ func (c MessageEntityCollection) Mention() MessageEntityCollection {
func (c MessageEntityCollection) Hashtag() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeHashtag,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -69,7 +66,7 @@ func (c MessageEntityCollection) Hashtag() MessageEntityCollection {
func (c MessageEntityCollection) Cashtag() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeCashtag,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -78,7 +75,7 @@ func (c MessageEntityCollection) Cashtag() MessageEntityCollection {
func (c MessageEntityCollection) BotCommand() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeBotCommand,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -87,7 +84,7 @@ func (c MessageEntityCollection) BotCommand() MessageEntityCollection {
func (c MessageEntityCollection) URL() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeURL,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -96,7 +93,7 @@ func (c MessageEntityCollection) URL() MessageEntityCollection {
func (c MessageEntityCollection) Email() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeEmail,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -105,7 +102,7 @@ func (c MessageEntityCollection) Email() MessageEntityCollection {
func (c MessageEntityCollection) PhoneNumber() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypePhoneNumber,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -114,7 +111,7 @@ func (c MessageEntityCollection) PhoneNumber() MessageEntityCollection {
func (c MessageEntityCollection) Bold() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeBold,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -123,7 +120,7 @@ func (c MessageEntityCollection) Bold() MessageEntityCollection {
func (c MessageEntityCollection) Italic() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeItalic,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -132,7 +129,7 @@ func (c MessageEntityCollection) Italic() MessageEntityCollection {
func (c MessageEntityCollection) Underline() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeUnderline,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -141,7 +138,7 @@ func (c MessageEntityCollection) Underline() MessageEntityCollection {
func (c MessageEntityCollection) Strikethrough() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeStrikethrough,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}
Expand All @@ -150,14 +147,13 @@ func (c MessageEntityCollection) Strikethrough() MessageEntityCollection {
func (c MessageEntityCollection) Spoiler() MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeSpoiler,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
})
return c
}

// Code assigns code entity and returns new collection
func (c MessageEntityCollection) Code() MessageEntityCollection {
c.keepSpaces = true
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeCode,
Length: UTF16TextLen(c.text),
Expand All @@ -167,7 +163,6 @@ func (c MessageEntityCollection) Code() MessageEntityCollection {

// Pre assigns pre entity with language and returns a new collection
func (c MessageEntityCollection) Pre(language string) MessageEntityCollection {
c.keepSpaces = true
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypePre,
Length: UTF16TextLen(c.text),
Expand All @@ -180,7 +175,7 @@ func (c MessageEntityCollection) Pre(language string) MessageEntityCollection {
func (c MessageEntityCollection) TextLink(url string) MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeTextLink,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
URL: url,
})
return c
Expand All @@ -190,7 +185,7 @@ func (c MessageEntityCollection) TextLink(url string) MessageEntityCollection {
func (c MessageEntityCollection) TextMention(user *telego.User) MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeTextMention,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
User: user,
})
return c
Expand All @@ -200,7 +195,7 @@ func (c MessageEntityCollection) TextMention(user *telego.User) MessageEntityCol
func (c MessageEntityCollection) TextMentionWithID(userID int64) MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeTextMention,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
User: &telego.User{ID: userID},
})
return c
Expand All @@ -210,53 +205,42 @@ func (c MessageEntityCollection) TextMentionWithID(userID int64) MessageEntityCo
func (c MessageEntityCollection) CustomEmoji(emojiID string) MessageEntityCollection {
c.entities = append(c.entities, telego.MessageEntity{
Type: telego.EntityTypeCustomEmoji,
Length: TrimmedUTF16TextLen(c.text),
Length: UTF16TextLen(c.text),
CustomEmojiID: emojiID,
})
return c
}

// MessageEntities coverts entity collections into the text and slice of telego.MessageEntity associated with that text
// MessageEntities coverts entity collections into the text and slice of [telego.MessageEntity] associated with text
// Note: Entity length is not trimmed as described in docs on purpose, Telegram still handles all entities perfectly
// fine, but trimming their length actually limits what can be sent
func MessageEntities(entityCollections ...MessageEntityCollection) (string, []telego.MessageEntity) {
text := strings.Builder{}
var entities []telego.MessageEntity

for _, collection := range entityCollections {
spaceOffset := 0
collText := collection.Text()
if !collection.keepSpaces {
spaceOffset = leftSpaceCount(collText)
}

collection.SetOffset(UTF16TextLen(text.String()) + spaceOffset)
collection.SetOffset(UTF16TextLen(text.String()))
entities = append(entities, collection.Entities()...)

_, _ = text.WriteString(collText)
_, _ = text.WriteString(collection.Text())
}

return text.String(), entities
}

// leftSpaceCount returns number of spaces at the start of the text
func leftSpaceCount(text string) int {
start := 0

textRunes := []rune(text)
for ; start < len(textRunes); start++ {
if !unicode.IsSpace(textRunes[start]) {
break
}
}

return start
}

// UTF16TextLen returns length of a UTF-16 text
// Credit: https://core.telegram.org/api/entities#computing-entity-length
//
//nolint:gomnd
func UTF16TextLen(text string) int {
return len(utf16.Encode([]rune(text)))
}

// TrimmedUTF16TextLen returns length of a trimmed UTF-16 text
func TrimmedUTF16TextLen(text string) int {
return UTF16TextLen(strings.TrimSpace(text))
length := 0
for _, b := range []byte(text) {
if (b & 0xc0) != 0x80 {
length++
if b >= 0xf0 {
length++
}
}
}
return length
}

This comment has been minimized.

Copy link
@abakum

abakum May 4, 2023

Contributor

👍

10 changes: 5 additions & 5 deletions telegoutil/message_entity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ func TestEntityLength(t *testing.T) {
}, entities[0]) // "😅"
assert.Equal(t, telego.MessageEntity{
Type: telego.EntityTypeBold,
Offset: 3,
Length: 4,
Offset: 2,
Length: 6,
}, entities[1]) // " test "
assert.Equal(t, telego.MessageEntity{

This comment has been minimized.

Copy link
@abakum

abakum May 4, 2023

Contributor

"text":"😅* test *🌗","parse_mode":"MarkdownV2"}
"text":"\ud83d\ude05 test \ud83c\udf17","entities":[{"offset":3,"length":5,"type":"bold"}]}

sorry to bother you

Type: telego.EntityTypeItalic,
Expand All @@ -146,13 +146,13 @@ func TestEntityLength(t *testing.T) {
}, entities[2]) // "🌗"
assert.Equal(t, telego.MessageEntity{
Type: telego.EntityTypeBold,
Offset: 11,
Length: 7,
Offset: 10,
Length: 8,
}, entities[3]) // " Україна"
assert.Equal(t, telego.MessageEntity{
Type: telego.EntityTypeItalic,
Offset: 19,
Length: 2,
Length: 3,
}, entities[4]) // "\U0001FAE5 "
assert.Equal(t, telego.MessageEntity{
Type: telego.EntityTypeBold,
Expand Down

0 comments on commit 62b15f8

Please sign in to comment.