Skip to content

Commit

Permalink
Merge pull request #87 from m4rc1e/user-page
Browse files Browse the repository at this point in the history
diffbrowsers: render a html page for user wordlists
  • Loading branch information
m4rc1e authored Aug 18, 2023
2 parents 599bb00 + f522663 commit c1cbb82
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 81 deletions.
16 changes: 13 additions & 3 deletions src/diffenator2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ def ninja_proof(
filter_styles: str = "",
characters: str = ".*",
pt_size: int = 20,
user_wordlist: str = "",
):
if not os.path.exists(out):
os.mkdir(out)

if filter_styles:
_ninja_proof(fonts, out, imgs, styles, filter_styles, characters, pt_size)
_ninja_proof(fonts, out, imgs, styles, filter_styles, characters, pt_size, user_wordlist)
return

font_styles = get_font_styles(fonts, styles)
Expand All @@ -40,7 +41,7 @@ def ninja_proof(
o = os.path.join(out, filter_styles.replace("|", "-"))
if not os.path.exists(o):
os.mkdir(o)
_ninja_proof(fonts, o, imgs, styles, filter_styles, characters, pt_size)
_ninja_proof(fonts, o, imgs, styles, filter_styles, characters, pt_size, user_wordlist)


def _ninja_proof(
Expand All @@ -51,6 +52,7 @@ def _ninja_proof(
filter_styles: str = "",
characters=".*",
pt_size: int = 20,
user_wordlist = "",
):
w = Writer(open(NINJA_BUILD_FILE, "w", encoding="utf8"))
w.comment("Rules")
Expand All @@ -62,6 +64,8 @@ def _ninja_proof(
cmd += " --imgs"
if filter_styles:
cmd += f' --filter-styles "$filters"'
if user_wordlist:
cmd += f' --user-wordlist "$user_wordlist"'
w.rule("proofing", cmd)
w.newline()

Expand All @@ -78,6 +82,8 @@ def _ninja_proof(
variables["imgs"] = imgs
if filter_styles:
variables["filters"] = filter_styles
if user_wordlist:
variables["user_wordlist"] = user_wordlist
w.build(out, "proofing", variables=variables)
w.close()
ninja._program("ninja", [])
Expand Down Expand Up @@ -172,13 +178,15 @@ def _ninja_diff(
db_cmd += " --imgs"
if filter_styles:
db_cmd += ' --filter-styles "$filters"'
if user_wordlist:
db_cmd += ' --user-wordlist "$user_wordlist"'
w.rule("diffbrowsers", db_cmd)
w.newline()

w.comment("Run diffenator VF")
diff_cmd = f'_diffenator $font_before $font_after -t $threshold -o $out -ch "$characters"'
if user_wordlist:
diff_cmd += " --user-wordlist $user_wordlist"
diff_cmd += ' --user-wordlist "$user_wordlist"'
diff_inst_cmd = diff_cmd + " --coords $coords"
w.rule("diffenator", diff_cmd)
w.rule("diffenator-inst", diff_inst_cmd)
Expand All @@ -198,6 +206,8 @@ def _ninja_diff(
)
if filter_styles:
db_variables["filters"] = filter_styles
if user_wordlist:
db_variables["user_wordlist"] = user_wordlist
w.build(diffbrowsers_out, "diffbrowsers", variables=db_variables)
if diffenator:
matcher = FontMatcher(fonts_before, fonts_after)
Expand Down
3 changes: 2 additions & 1 deletion src/diffenator2/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def main(**kwargs):
default="instances",
help="Show font instances, cross product or master styles"
)
universal_options_parser.add_argument("--user-wordlist", default=None)
proof_parser = subparsers.add_parser(
"proof",
parents=[universal_options_parser],
Expand All @@ -44,7 +45,6 @@ def main(**kwargs):
)
diff_parser.add_argument("--fonts-before", "-fb", nargs="+", required=True)
diff_parser.add_argument("--fonts-after", "-fa", nargs="+", required=True)
diff_parser.add_argument("--user-wordlist", default=None)
diff_parser.add_argument("--no-diffenator", default=False, action="store_true")
diff_parser.add_argument("--threshold", "-t", type=float, default=THRESHOLD)
args = parser.parse_args()
Expand All @@ -63,6 +63,7 @@ def main(**kwargs):
filter_styles=args.filter_styles,
characters=args.characters,
pt_size=args.pt_size,
user_wordlist=args.user_wordlist,
)
elif args.command == "diff":
fonts_before = [DFont(f) for f in args.fonts_before]
Expand Down
7 changes: 6 additions & 1 deletion src/diffenator2/_diffbrowsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def main():
)
universal_options_parser.add_argument("--filter-styles", default=None)
universal_options_parser.add_argument("--characters", "-ch", default=".*")
universal_options_parser.add_argument(
"--user-wordlist", help="File of strings to visually compare", default=None
)

proof_parser = subparsers.add_parser(
"proof",
Expand Down Expand Up @@ -100,7 +103,8 @@ def main():
args.out,
filter_styles=args.filter_styles,
characters=characters,
pt_size=args.pt_size
pt_size=args.pt_size,
user_wordlist=args.user_wordlist,
)

elif args.command == "diff":
Expand All @@ -116,6 +120,7 @@ def main():
filter_styles=args.filter_styles,
characters=characters,
pt_size=args.pt_size,
user_wordlist=args.user_wordlist,
)

if args.imgs:
Expand Down
14 changes: 12 additions & 2 deletions src/diffenator2/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from glyphsets import GFTestData
import re
from pathlib import Path
from diffenator2.shape import parse_wordlist


WIDTH_CLASS_TO_CSS = {
Expand Down Expand Up @@ -89,14 +90,15 @@ def filtered_font_sample_text(ttFont, characters):
return " ".join(sample_text)


def proof_rendering(styles, templates, dst="out", filter_styles=None, characters=set(), pt_size=20):
def proof_rendering(styles, templates, dst="out", filter_styles=None, characters=set(), pt_size=20, user_wordlist=None):
ttFont = styles[0].font.ttFont
font_faces = set(style.font.css_font_face for style in styles)
font_styles = [style.css_font_style for style in styles]
sample_text = filtered_font_sample_text(ttFont, characters)
test_strings = GFTestData.test_strings_in_font(ttFont)
characters = characters or [chr(c) for c in ttFont.getBestCmap()]
characters = list(sorted(characters))
user_words = None if not user_wordlist else parse_wordlist(user_wordlist)
_package(
templates,
dst,
Expand All @@ -106,10 +108,13 @@ def proof_rendering(styles, templates, dst="out", filter_styles=None, characters
characters=characters,
test_strings=test_strings,
pt_size=pt_size,
user_strings=user_words
)


def diff_rendering(matcher, templates, dst="out", filter_styles=None, characters=set(), pt_size=20):

def diff_rendering(matcher, templates, dst="out", filter_styles=None, characters=set(), pt_size=20, user_wordlist=None):
dFont = matcher.old_styles[0].font
ttFont = matcher.old_styles[0].font.ttFont
font_faces_old = set(style.font.css_font_face for style in matcher.old_styles)
font_styles_old = [style.css_font_style for style in matcher.old_styles]
Expand All @@ -121,6 +126,7 @@ def diff_rendering(matcher, templates, dst="out", filter_styles=None, characters
test_strings = GFTestData.test_strings_in_font(ttFont)
characters = characters or [chr(c) for c in ttFont.getBestCmap()]
characters = list(sorted(characters))
user_words = None if not user_wordlist else parse_wordlist(user_wordlist)
_package(
templates,
dst,
Expand All @@ -133,6 +139,7 @@ def diff_rendering(matcher, templates, dst="out", filter_styles=None, characters
characters=characters,
test_strings=test_strings,
pt_size=pt_size,
user_strings=user_words,
)


Expand Down Expand Up @@ -182,6 +189,9 @@ def _package(templates, dst, **kwargs):

# write docs
for template_fp in templates:
if "user_strings" in template_fp:
if kwargs["user_strings"] == None:
continue
env = Environment(
loader=FileSystemLoader(os.path.dirname(template_fp)),
)
Expand Down
6 changes: 5 additions & 1 deletion src/diffenator2/screenshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@ def __del__(self):
def screenshot_dir(
dir_fp: str,
out: str,
skip=["diffbrowsers_proofer.html", "diffenator.html"],
skip=[
"diffbrowsers_proofer.html",
"diffenator.html",
"diffbrowsers_user_strings.html"
],
):
"""Screenshot a folder of html docs. Walk the damn things"""
if not os.path.exists(out):
Expand Down
132 changes: 59 additions & 73 deletions src/diffenator2/shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
import os
from diffenator2 import THRESHOLD
from diffenator2.renderer import PixelDiffer
from diffenator2.template_elements import WordDiff, Glyph, GlyphDiff
from diffenator2.template_elements import Word, WordDiff, Glyph, GlyphDiff
from pkg_resources import resource_filename
import tqdm
from diffenator2.segmenting import textSegments
from collections import defaultdict


# Hashing strategies for elements of a Harfbuzz buffer
Expand All @@ -37,23 +38,6 @@ def gid_pos_hash(info, pos):
ot_to_dir = {None: "ltr", "arab": "rlt", "hebr": "rtl"}


@dataclass
class Word:
string: str
hb: str

@classmethod
def from_buffer(cls, word, buffer, hash_func=gid_pos_hash):
infos = buffer.glyph_infos
pos = buffer.glyph_positions
hb = "".join(hash_func(i, j) for i, j in zip(infos, pos))
return cls(word, hb)

def __eq__(self, other):
return (self.string, hb) == (other.string, other.hb)

def __hash__(self):
return hash((self.string, self.hb))


@dataclass
Expand Down Expand Up @@ -118,6 +102,20 @@ def test_font_words(font_a, font_b, skip_glyphs=set(), threshold=THRESHOLD):
return res


def parse_wordlist(fp):
from diffenator2.shape import Word as TemplateWord
results = []
with open(fp, encoding="utf8") as doc:
lines = doc.read().split("\n")
for line in lines:
items = line.split(",")
try:
results.append(TemplateWord(string=items[0], script=items[1], lang=items[2], ot_features={k: True for k in items[3:]}))
except IndexError:
results.append(TemplateWord(string=items[0], script="dflt", lang=None, ot_features={}))
return results


def test_words(
word_file,
font_a,
Expand All @@ -127,74 +125,62 @@ def test_words(
threshold=THRESHOLD,
):
res = set()
from collections import defaultdict

seen_gids = defaultdict(int)

differ = PixelDiffer(font_a, font_b)
with open(word_file, encoding="utf8") as doc:
sentences = doc.read().split("\n")
print(f"testing {len(sentences)} words")
word_total = len(sentences)
for i, line in tqdm.tqdm(enumerate(sentences), total=word_total):
items = line.split(",")
try:
sentence, script, lang, features = items[0], items[1], items[2], items[3:]
# for wordlists which just contain sentences
except IndexError:
sentence, script, lang, features = items[0], "dflt", None, []
features = {k: True for k in features}
word_list = parse_wordlist(word_file)
for i, word in tqdm.tqdm(enumerate(word_list), total=len(word_list)):
differ.set_script(word.script)
differ.set_lang(word.lang)
differ.set_features(word.ot_features)

differ.set_script(script)
differ.set_lang(lang)
differ.set_features(features)
# split sentences into individual script segments. This mimmics the
# same behaviour as dtp apps, web browsers etc
for segment, script, _, _, in textSegments(word.string)[0]:

# split sentences into individual script segments. This mimmics the
# same behaviour as dtp apps, web browsers etc
for segment, script, _, _, in textSegments(sentence)[0]:
if any(c.string in segment for c in skip_glyphs):
continue

if any(c.string in segment for c in skip_glyphs):
continue
if not segment:
continue

if not segment:
continue
buf_b = differ.renderer_b.shape(segment)
word_b = Word.from_buffer(segment, buf_b)

buf_b = differ.renderer_b.shape(segment)
word_b = Word.from_buffer(segment, buf_b)
gid_hashes = [hash_func(i, j) for i, j in zip(buf_b.glyph_infos, buf_b.glyph_positions)]
# I'm not entirely convinced this is a valid test; but it seems to
# work and speeds things up a lot...
if all(gid_hash in seen_gids for gid_hash in gid_hashes):
continue

gid_hashes = [hash_func(i, j) for i, j in zip(buf_b.glyph_infos, buf_b.glyph_positions)]
# I'm not entirely convinced this is a valid test; but it seems to
# work and speeds things up a lot...
if all(gid_hash in seen_gids for gid_hash in gid_hashes):
continue
buf_a = differ.renderer_a.shape(segment)
word_a = Word.from_buffer(segment, buf_a)

buf_a = differ.renderer_a.shape(segment)
word_a = Word.from_buffer(segment, buf_a)
# skip any words which cannot be shaped correctly
if any([g.codepoint == 0 for g in buf_a.glyph_infos+buf_b.glyph_infos]):
continue

# skip any words which cannot be shaped correctly
if any([g.codepoint == 0 for g in buf_a.glyph_infos+buf_b.glyph_infos]):
continue
pc, diff_map = differ.diff(segment)

pc, diff_map = differ.diff(segment)

for gid_hash in gid_hashes:
seen_gids[gid_hash] = True

if pc < threshold:
continue
res.add(
(
pc,
WordDiff(
sentence,
word_a.hb,
word_b.hb,
tuple(features.keys()),
ot_to_html_lang.get((script, lang)),
ot_to_dir.get(script, None),
"%.2f" % pc,
),
)
for gid_hash in gid_hashes:
seen_gids[gid_hash] = True

if pc < threshold:
continue
res.add(
(
pc,
WordDiff(
word.string,
word_a.hb,
word_b.hb,
tuple(word.ot_features.keys()),
ot_to_html_lang.get((script, word.lang)),
ot_to_dir.get(script, None),
"%.2f" % pc,
),
)
)
return [w[1] for w in sorted(res, key=lambda k: k[0], reverse=True)]

Loading

0 comments on commit c1cbb82

Please sign in to comment.