Skip to content

Commit

Permalink
added new TextstyleParser implementation
Browse files Browse the repository at this point in the history
* the old implementation was cumbersome to maintain and had some
  problems with nested formatting.

see #630
  • Loading branch information
bbernhard committed Dec 21, 2024
1 parent 54c884c commit f0a49e6
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 116 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ARG GRAALVM_VERSION=21.0.0

ARG BUILD_VERSION_ARG=unset

FROM golang:1.22-bookworm AS buildcontainer
FROM golang:1.23-bookworm AS buildcontainer

ARG SIGNAL_CLI_VERSION
ARG LIBSIGNAL_CLIENT_VERSION
Expand Down
3 changes: 2 additions & 1 deletion src/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,8 @@ func (s *SignalClient) send(signalCliSendRequest ds.SignalCliSendRequest) (*Send

signalCliTextFormatStrings := []string{}
if signalCliSendRequest.TextMode != nil && *signalCliSendRequest.TextMode == "styled" {
signalCliSendRequest.Message, signalCliTextFormatStrings = utils.ParseMarkdownMessage(signalCliSendRequest.Message)
textstyleParser := utils.NewTextstyleParser(signalCliSendRequest.Message)
signalCliSendRequest.Message, signalCliTextFormatStrings = textstyleParser.Parse()
}

var groupId string = ""
Expand Down
224 changes: 124 additions & 100 deletions src/utils/textstyleparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package utils

import (
"strconv"
"unicode/utf16"
"unicode/utf8"
)

const (
Expand All @@ -18,119 +20,141 @@ const (
ItalicBegin = 1
ItalicEnd = 2
BoldBegin = 3
BoldEnd1 = 4
BoldEnd2 = 5
MonoSpaceBegin = 6
MonoSpaceEnd = 7
StrikethroughBegin = 8
StrikethroughEnd = 9
SpoilerBegin1 = 10
SpoilerBegin = 11
SpoilerEnd1 = 12
SpoilerEnd2 = 13
SpoilerBegin = 9
)

func getUtf16CharacterCount(s string) int {
stringLength := len(s)
if stringLength == 1 {
return 1
func getUtf16StringLength(s string) int {
runes := []rune(s) //turn string to slice

length := 0
for _, r := range runes {
length += utf16.RuneLen(r)
}
return stringLength / 2
return length
}

type TokenState struct {
BeginPos int
Token int
}

type Stack []TokenState

func (s *Stack) Push(v TokenState) {
*s = append(*s, v)
}

func (s *Stack) Pop() TokenState {
ret := (*s)[len(*s)-1]
*s = (*s)[0 : len(*s)-1]

return ret
}

func getAdditionalCharacterCount(characterCount int) int {
additionalCharacterCount := characterCount - 1
if additionalCharacterCount > 0 {
return additionalCharacterCount
func (s *Stack) Peek() TokenState {
ret := (*s)[len(*s)-1]
return ret
}

func (s *Stack) Empty() bool {
if len(*s) == 0 {
return true
}
return 0
return false
}

func ParseMarkdownMessage(message string) (string, []string) {
textFormat := Normal
textFormatBegin := 0
textFormatLength := 0
numOfControlChars := 0
state := None
signalCliFormatStrings := []string{}
fullString := ""
lastChar := ""
additionalCharacterCount := 0

runes := []rune(message) //turn string to slice

for i, v := range runes { //iterate through rune
if v == '*' {
if state == ItalicBegin {
if lastChar == "*" {
state = BoldBegin
textFormat = Bold
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
additionalCharacterCount = 0
} else {
state = ItalicEnd
}
} else if state == None {
state = ItalicBegin
textFormat = Italic
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == BoldBegin {
state = BoldEnd1
} else if state == BoldEnd1 {
state = BoldEnd2
}
numOfControlChars += 1
} else if v == '|' {
if state == None {
state = SpoilerBegin1
} else if state == SpoilerBegin1 && lastChar == "|" {
state = SpoilerBegin
textFormat = Spoiler
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == SpoilerBegin {
state = SpoilerEnd1
} else if state == SpoilerEnd1 && lastChar == "|" {
state = SpoilerEnd2
}
numOfControlChars += 1
} else if v == '`' {
if state == None {
state = MonoSpaceBegin
textFormat = Monospace
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == MonoSpaceBegin {
state = MonoSpaceEnd
}
numOfControlChars += 1
} else if v == '~' {
if state == None {
state = StrikethroughBegin
textFormat = Strikethrough
textFormatBegin = i - numOfControlChars + additionalCharacterCount
textFormatLength = 0
} else if state == StrikethroughBegin {
state = StrikethroughEnd
}
numOfControlChars += 1
const eof = -1

type TextstyleParser struct {
input string
pos int
width int
tokens Stack
fullString string
signalCliFormatStrings []string
//numOfControlTokens int
}

func NewTextstyleParser(input string) *TextstyleParser {
return &TextstyleParser{
input: input,
pos: 0,
width: 0,
tokens: make(Stack, 0),
fullString: "",
signalCliFormatStrings: []string{},
}
}

func (l *TextstyleParser) next() (rune rune) {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
//r := []rune(l.input[l.pos:])[0]
//l.width = utf16.RuneLen(r)
//l.pos += l.width
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return rune
}

// backup steps back one rune.
// Can be called only once per call of next.
func (l *TextstyleParser) backup() {
l.pos -= l.width
}

// peek returns but does not consume
// the next rune in the input.
func (l *TextstyleParser) peek() rune {
rune := l.next()
l.backup()
return rune
}

func (l *TextstyleParser) handleToken(tokenType int, signalCliStylingType string) {
if l.tokens.Empty() {
l.tokens.Push(TokenState{BeginPos: getUtf16StringLength(l.fullString), Token: tokenType})
} else {
if l.tokens.Peek().Token == tokenType {
tokenBeginState := l.tokens.Pop()
l.signalCliFormatStrings = append(l.signalCliFormatStrings, strconv.Itoa(tokenBeginState.BeginPos)+":"+strconv.Itoa(getUtf16StringLength(l.fullString)-tokenBeginState.BeginPos)+":"+signalCliStylingType)
} else {
textFormatLength += 1
fullString += string(v)
additionalCharacterCount += getAdditionalCharacterCount(getUtf16CharacterCount(string(v)))
l.tokens.Push(TokenState{BeginPos: getUtf16StringLength(l.fullString), Token: tokenType})
}
}
}

func (l *TextstyleParser) Parse() (string, []string) {
for {
c := l.next()
if c == eof {
break
}
lastChar = string(v)

if state == ItalicEnd || state == BoldEnd2 || state == MonoSpaceEnd || state == StrikethroughEnd || state == SpoilerEnd2 {
signalCliFormatStrings = append(signalCliFormatStrings, strconv.Itoa(textFormatBegin)+":"+strconv.Itoa(textFormatLength+additionalCharacterCount)+":"+textFormat)
state = None
textFormatBegin = 0
textFormatLength = 0
textFormat = Normal

nextRune := l.peek()

if c == '*' {
if nextRune == '*' { //Bold
l.next()
l.handleToken(BoldBegin, Bold)
} else { //Italic
l.handleToken(ItalicBegin, Italic)
}
} else if (c == '|') && (nextRune == '|') {
l.next()
l.handleToken(SpoilerBegin, Spoiler)
} else if c == '~' {
l.handleToken(StrikethroughBegin, Strikethrough)
} else if c == '`' {
l.handleToken(MonoSpaceBegin, Monospace)
} else {
l.fullString += string(c)
}
}

return fullString, signalCliFormatStrings
return l.fullString, l.signalCliFormatStrings
}
55 changes: 41 additions & 14 deletions src/utils/textstyleparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,80 +15,107 @@ func expectFormatStringsEqual(t *testing.T, formatStrings1 []string, formatStrin
}
}

func TestSimpleMessage1(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("*italic*")
func TestSimpleItalicMessage(t *testing.T) {
textstyleParser := NewTextstyleParser("*italic*")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "italic")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:6:ITALIC"})
}

func TestSimpleBoldMessage(t *testing.T) {
textstyleParser := NewTextstyleParser("**bold**")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "bold")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:4:BOLD"})
}

func TestSimpleMessage(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("*This is a italic message*")
textstyleParser := NewTextstyleParser("*This is a italic message*")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a italic message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:24:ITALIC"})
}

func TestBoldAndItalicMessage(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a **bold** and *italic* message")
textstyleParser := NewTextstyleParser("This is a **bold** and *italic* message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a bold and italic message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "19:6:ITALIC"})
}

func TestTwoBoldFormattedStrings(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a **bold** and another **bold** message")
textstyleParser := NewTextstyleParser("This is a **bold** and another **bold** message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a bold and another bold message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "27:4:BOLD"})
}

func TestStrikethrough(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a ~strikethrough~ and a **bold** message")
textstyleParser := NewTextstyleParser("This is a ~strikethrough~ and a **bold** message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a strikethrough and a bold message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:13:STRIKETHROUGH", "30:4:BOLD"})
}

func TestMonospace(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("This is a `monospace` and a **bold** message")
textstyleParser := NewTextstyleParser("This is a `monospace` and a **bold** message")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "This is a monospace and a bold message")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:9:MONOSPACE", "26:4:BOLD"})
}

func TestMulticharacterEmoji(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋abcdefg")
textstyleParser := NewTextstyleParser("👋abcdefg")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{})
}

func TestMulticharacterEmojiWithBoldText(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋**abcdefg**")
textstyleParser := NewTextstyleParser("👋**abcdefg**")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"2:7:BOLD"})
}

func TestMultipleMulticharacterEmoji(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋🏾abcdefg")
textstyleParser := NewTextstyleParser("👋🏾abcdefg")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋🏾abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{})
}

func TestMultipleMulticharacterEmojiWithBoldText(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("👋🏾**abcdefg**")
textstyleParser := NewTextstyleParser("👋🏾**abcdefg**")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "👋🏾abcdefg")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"4:7:BOLD"})
}

func TestMulticharacterEmojiWithBoldText2(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("Test 👦🏿 via **signal** API")
textstyleParser := NewTextstyleParser("Test 👦🏿 via **signal** API")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "Test 👦🏿 via signal API")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"14:6:BOLD"})
}

func TestSpoiler(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("||this is a spoiler||")
textstyleParser := NewTextstyleParser("||this is a spoiler||")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "this is a spoiler")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER"})
}

func TestSpoiler1(t *testing.T) {
message, signalCliFormatStrings := ParseMarkdownMessage("||this is a spoiler|| and another ||spoiler||")
textstyleParser := NewTextstyleParser("||this is a spoiler|| and another ||spoiler||")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "this is a spoiler and another spoiler")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER", "30:7:SPOILER"})
}

func TestBoldTextInsideSpoiler(t *testing.T) {
textstyleParser := NewTextstyleParser("||**this is a bold text inside a spoiler**||")
message, signalCliFormatStrings := textstyleParser.Parse()
expectMessageEqual(t, message, "this is a bold text inside a spoiler")
expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:36:BOLD", "0:36:SPOILER"})
}

0 comments on commit f0a49e6

Please sign in to comment.