Skip to content

Commit

Permalink
The search ignore diacritics
Browse files Browse the repository at this point in the history
  • Loading branch information
BostX committed Jan 28, 2024
1 parent 38b9a17 commit c19b30b
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 22 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ $(eval isGuix := $(shell command -v guix > /dev/null 2>&1 && echo t || echo f))
$(eval destDir := $(shell [ "${isGuix}" = t ] && echo $${dotf}/bin || echo ~/bin))
orgRoamLink := ${HOME}/org-roam

all: show-environment clean install-deps
all: show-environment clean install-deps test
[ ! -L "${orgRoamLink}" ] && ln -s ${dev}/notes/notes "${orgRoamLink}" || :
[ ! -d ${destDir} ] && mkdir ${destDir} || :
raco exe -o ${destDir}/search-notes main.rkt
Expand All @@ -32,3 +32,5 @@ install-deps:
clean:
rm -rf ./compiled/ ./scribblings/compiled/

test:
raco test ./
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Installation: run `make`. See also Makefile.
# Fix the 'loading code: version mismatch' error
rm -rf ./compiled/ ./scribblings/compiled/
raco pkg install --auto ansi-color
# raco test ./ # optionally
isGuix=$(command -v guix > /dev/null 2>&1 && echo t || echo f)
[ ${isGuix} = t ] && destDir=$dotf/bin || destDir=~/bin
[ ! -d ${destDir} ] && mkdir $destDir || :
Expand Down
96 changes: 76 additions & 20 deletions main.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
;; TODO try #lang hacket - haskell + racket
;; https://lexi-lambda.github.io/hackett/index.html

(module+ test
(require rackunit
racket/match))

;; Notice
;; To install (from within the package directory):
;; $ raco pkg install
Expand All @@ -27,28 +23,88 @@
;; See the current version of the racket style guide here:
;; http://docs.racket-lang.org/style/index.html

;; Code here



(module+ test
;; Any code in this `test` submodule runs when this file is run using DrRacket
;; or with `raco test`. The code here does not run when this file is
;; required by another module.

(check-equal? (+ 2 2) 4))

(module+ main
;; (Optional) main submodule. Put code here if you need it to be executed when
;; this file is run using DrRacket or the `racket` executable. The code here
;; does not run when this file is required by another module. Documentation:
;; http://docs.racket-lang.org/guide/Module_Syntax.html#%28part._main-and-test%29

(provide regexp-normalize-match* regexp-normalize-split)

(require
;; (prefix-in com: "common.rkt")
"notes.rkt" ;; is used indeed
"notes-reader.rkt"
ansi-color)
ansi-color

;; for string-replace
racket/string
)

(define diacritic-map
(hash "a" "[aáäàâæ]"
"c" "[cčç]"
"d" "[dď]"
"e" "[eéèêë]"
"i" "[iíîï]"
"l" "[lĺľ]"
"n" "[nň]"
"o" "[oóôöœ]"
"r" "[rŕř]"
"s" "[sš]"
"t" "[tť]"
"u" "[uúûüù]"
"y" "[yý]"
"z" "[zž]"
"A" "[AÁÄÀÂÆ]"
"C" "[CČÇ]"
"D" "[DĎ]"
"E" "[EÉÈÊË]"
"I" "[IÍÎÏ]"
"L" "[LĹĽ]"
"N" "[NŇ]"
"O" "[OÓÔÖŒ]"
"R" "[RŔŘ]"
"S" "[SŠ]"
"T" "[TŤ]"
"U" "[UÚÛÜÙ]"
"Y" "[YÝ]"
"Z" "[ZŽ]"
"ß" "ß")) ; German sharp S

(define (string-normalize s)
;; Normalization Form C, Canonical Decomposition followed by Canonical
;; Composition:
;; Decompose characters and then recomposes them using canonical
;; equivalence. E.g., 'é' would first be split into 'e' and the combining
;; accent, and then recomposed back into 'é'.
;; Use this when you want to normalize characters to their composed forms
;; while still respecting canonical equivalence.
(string-normalize-nfc s))

(define (regexp-normalize-match* rxs target-str)
(let* ((normalized-target (string-normalize target-str))
(modified-regex
(string-append*
(map (lambda (char)
(hash-ref diacritic-map (string char) (string char)))
(string->list rxs)))))
;; (printf "modified-regex: ~a\n" modified-regex)
;; (printf "target-str: ~a\n" target-str)
;; (printf "normalized-target: ~a\n" normalized-target)
(regexp-match* (regexp modified-regex) normalized-target)))

(define (regexp-normalize-split rxs target-str)
(let* ((normalized-target (string-normalize target-str))
(modified-regex
(string-append*
(map (lambda (char)
(hash-ref diacritic-map (string char) (string char)))
(string->list rxs)))))
;; (printf "modified-regex: ~a\n" modified-regex)
;; (printf "target-str: ~a\n" target-str)
;; (printf "normalized-target: ~a\n" normalized-target)
(regexp-split (regexp modified-regex) normalized-target)))

(define pattern-param (make-parameter ""))
(define filepaths-param (make-parameter ""))
Expand Down Expand Up @@ -164,10 +220,10 @@ racket main.rkt -e \"/home/bost/der/search-notes/main.rkt /home/bost/der/search-
(displayln first-file-string))
(colorize colorize-matches?
display-fn
(regexp-split regexp-split-match
relevant-file-strings-joined)
(regexp-match* regexp-split-match
relevant-file-strings-joined))
(regexp-normalize-split
regexp-split-match relevant-file-strings-joined)
(regexp-normalize-match*
regexp-split-match relevant-file-strings-joined))
(printf "\n\n")))
relevant-file-strings)))
(curry map
Expand Down
3 changes: 2 additions & 1 deletion scribblings/search-notes.scrbl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#lang scribble/manual
@require[@for-label[search-notes
@require[@for-label[
;; search-notes
racket/base]]

@title{search-notes}
Expand Down
31 changes: 31 additions & 0 deletions test.rkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#lang racket

(module+ test
(require
;; main
rackunit
racket/match
"main.rkt"
(submod "main.rkt" main)
)

;; Any code in this `test` submodule runs when this file is run using DrRacket
;; or with `raco test`. The code here does not run when this file is
;; required by another module.

;; (test-case "Test for add function"
;; (check-equal? (+ 2 2) 4))

(test-case "Test diacritics"
(define sdiacr "jkl \n abčd \n xyz \n 123 \n ábc \n 567")
(define sd sdiacr)
(define splain "jkl \n abcd \n xyz \n 123 \n abc \n 567")
(define sp splain)
(define rxs "abc")

(check-equal? (length (regexp-normalize-match* rxs sdiacr))
(length (regexp-match* rxs splain)))
(check-equal? (regexp-normalize-split rxs sdiacr)
(regexp-split rxs splain))
(check-equal? (regexp-normalize-split rxs splain)
(regexp-split rxs splain))))

0 comments on commit c19b30b

Please sign in to comment.