From b327f631a71993de11e1f18331e7977e59a49c28 Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 10 May 2024 20:36:58 +0200 Subject: [PATCH 01/27] First steps with Idefics and a general experimentation workflow --- .gitignore | 9 +- _testbed/README.md | 55 + _testbed/experiments.ipynb | 6810 +++++++++++++++++++++++++++++++++ _testbed/experiments.py | 1995 ++++++++++ _testbed/helpers.ipynb | 835 ++++ _testbed/helpers.py | 372 ++ _testbed/ocr_idefics.py | 184 + _testbed/ocr_metric.ipynb | 276 ++ _testbed/ocr_metric.py | 64 + _testbed/requirements.txt | 5 + _testbed/test_idefics.ipynb | 1497 ++++++++ _testbed/test_tesseract.ipynb | 638 +++ 12 files changed, 12735 insertions(+), 5 deletions(-) create mode 100644 _testbed/README.md create mode 100644 _testbed/experiments.ipynb create mode 100644 _testbed/experiments.py create mode 100644 _testbed/helpers.ipynb create mode 100644 _testbed/helpers.py create mode 100644 _testbed/ocr_idefics.py create mode 100644 _testbed/ocr_metric.ipynb create mode 100644 _testbed/ocr_metric.py create mode 100644 _testbed/requirements.txt create mode 100644 _testbed/test_idefics.ipynb create mode 100644 _testbed/test_tesseract.ipynb diff --git a/.gitignore b/.gitignore index c94304f9..8fc7b143 100644 --- a/.gitignore +++ b/.gitignore @@ -167,9 +167,8 @@ cython_debug/ .idea/ *.sh -_testbed/cleaner/ -*/_testbed/experiment/cache/** -*/_testbed/experiment/source/** - -!.gitkeep \ No newline at end of file +.DS_Store +_testbed/media/** +_testbed/cleaner/** +_testbed/model/** diff --git a/_testbed/README.md b/_testbed/README.md new file mode 100644 index 00000000..702df030 --- /dev/null +++ b/_testbed/README.md @@ -0,0 +1,55 @@ +# PanelCleaner Testbed + +## Overview +The **PanelCleaner** testbed serves as a dedicated area for experimenting and testing new ideas with *PanelCleaner* using Jupyter Notebooks. Currently, it focuses on **OCR** technologies, primarily using **Tesseract** and **IDefics** models. The testbed also begins the development of an evaluation framework to support future experiments. This project utilizes the `nbdev` literate programming environment. + +## Installation +To get started with the notebooks, you'll need Jupyter Lab/Notebook or any Python IDE that supports Jupyter notebooks like *VSCode* or *Google Colab*. +The setup mostly shares the same requirements as PanelCleaner and its CLI, with a few additional dependencies. +Here’s how to set up your environment: +1. Activate a virtual environment. +2. Navigate to the `_testbed` directory: + ```bash + cd _testbed + ``` +3. Install the required dependencies: + ```bash + pip install -r requirements.txt + ``` +Note: Each notebook may require the installation of additional dependencies. + +## Google Colab Support +The notebooks are ready to use on Google Colab, allowing you to run them directly on the platform without any extra setup or local GPU rigs. +Instructions to use Google Colab are included in the notebooks (TBD). + +## Install Test Images +The test images are not included in the repository but can be downloaded from the following link: +- [Test images](https://drive.google.com/drive/folders/101_1_20240229) + +After downloading, place the test images in the [media](media) directory. If you want to use your own, each image should have a corresponding text file with the same name, but with the extension `.txt`, which contains the ground truth data, one line per box (as calculated by PanelCleaner). Optionally, you can also include a `.json` file with the same name, specifying the language of the page: +```json +{ + "lang": "Spanish" +} +``` +If no language file is found, English will be used by default. In the near future, language detection will be automated. + +## Introduction to nbdev +[nbdev](https://nbdev.fast.ai/) is a **literate programming** environment that allows you to develop a Python library in Jupyter Notebooks, integrating exploratory programming, code, tests, and documentation into a single cohesive workflow. Inspired by **Donald Knuth**'s concept of literate programming, this approach not only makes the development process more intuitive but also eases the maintenance and understanding of the codebase. + +## Notebooks (WIP) + +#### [helpers.ipynb](helpers.ipynb) +This notebook includes utility functions and helpers that support the experiments in other notebooks, streamlining repetitive tasks and data manipulation. + +#### [ocr_metric.ipynb](ocr_metric.ipynb) +This notebook focuses on defining and implementing metrics to evaluate the performance and accuracy of OCR engines, crucial for assessing the effectiveness of OCR technologies in various scenarios. It currently develops a basic metric for evaluating OCR models. In the near future, additional metrics will be added, such as precision and recall using Levenshtein distance (edit distance). More importantly, it will introduce a metric tailored to the unique characteristics of Comics/Manga OCR, a topic currently unexplored in technical literature. + +#### [experiments.ipynb](experiments.ipynb) +This notebook details the development of the evaluation framework used in other notebooks, with Tesseract as a case study to illustrate the evaluation process. It's a work in progress, and will be updated continuously. If you're only interested in visualizing the results of the experiments, go directly to `Test_tesseract.ipynb` or `Test_idefics.ipynb`, which are much shorter and more to the point. + +#### [test_tesseract.ipynb](test_tesseract.ipynb) +This notebook is dedicated to testing the Tesseract OCR engine, offering insights into its capabilities and limitations through hands-on experiments. + +#### [test_idefics.ipynb](test_idefics.ipynb) +Similar to `test_tesseract.ipynb`, this notebook focuses on the IDefics LVM model, evaluating its performance and accuracy under different conditions. Here you can compare the results of the Tesseract OCR engine with the IDefics LVM model to see how the two compare in terms of accuracy and performance. diff --git a/_testbed/experiments.ipynb b/_testbed/experiments.ipynb new file mode 100644 index 00000000..6c8d3c81 --- /dev/null +++ b/_testbed/experiments.ipynb @@ -0,0 +1,6810 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# %reload_ext autoreload\n", + "# %autoreload 0\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# try: \n", + "# import fastcore as FC\n", + "# except ImportError: \n", + "# !pip install -q fastcore\n", + "# try:\n", + "# import rich\n", + "# except ImportError:\n", + "# !pip install -q rich\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: we're using the `testbed` branch of PanelCleaner.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `Tesseract` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from __future__ import annotations\n", + "\n", + "import dataclasses\n", + "import difflib\n", + "import functools\n", + "import json\n", + "import shutil\n", + "from collections import defaultdict\n", + "from enum import Enum\n", + "from pathlib import Path\n", + "from typing import Any\n", + "from typing import Callable\n", + "from typing import cast\n", + "from typing import Mapping\n", + "from typing import Self\n", + "from typing import TypeAlias\n", + "\n", + "import fastcore.all as FC\n", + "import ipywidgets as W\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pcleaner.config as cfg\n", + "import pcleaner.ctd_interface as ctm\n", + "import pcleaner.image_ops as ops\n", + "import pcleaner.ocr.ocr as ocr\n", + "import pcleaner.structures as st\n", + "import torch\n", + "from IPython.display import clear_output\n", + "from IPython.display import display\n", + "from IPython.display import HTML\n", + "from ipywidgets.widgets.interaction import show_inline_matplotlib_plots\n", + "from loguru import logger\n", + "from pcleaner.ocr.ocr_tesseract import TesseractOcr\n", + "from PIL import Image\n", + "from PIL import ImageFilter\n", + "from rich.console import Console\n", + "from tqdm.notebook import tqdm\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from helpers import *\n", + "from ocr_metric import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "\n", + "import fastcore.xtras # patch Path with some utils\n", + "import pcleaner.cli_utils as cli\n", + "import pcleaner.preprocessor as pp\n", + "import rich\n", + "from fastcore.test import * # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tesseract installation" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tesseract 5.3.4',\n", + " ' leptonica-1.84.1',\n", + " ' libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp 1.4.0 : libopenjp2 2.5.2',\n", + " ' Found NEON',\n", + " ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n", + " ' Found libcurl/8.4.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.11 nghttp2/1.51.0']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = !tesseract --version\n", + "out\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install jpn_vert tesserac lang\n", + "> It has much better results than the default `jpn` language model.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best), or from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) trained for vertical Japanese text as found in manga.\n", + "\n", + "Note: I've not play much with this one, `managa-ocr` is surely a much better fit, but it can be educational to compare.\n", + "\n", + "I have copied models in my GDrive, and installed (in my Ubuntu, similar in Mac):\n", + "```bash\n", + "cd model\n", + "ln -s jpn_vert_tessdata_best.traineddata /usr/share/tesseract-ocr/5/tessdata/jpn_vert.traineddata\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Path('/opt/homebrew/share/tessdata'),\n", + " ['afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n", + " 'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n", + " 'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n", + " 'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n", + " 'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n", + " 'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n", + " 'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n", + " 'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, script/Kannada, script/Khmer',\n", + " 'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n", + " 'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n", + " 'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = !tesseract --list-langs\n", + "tessdata = Path(out[0].split('\"')[1])\n", + "tessdata, [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'),\n",
+       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'),\n",
+       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata')\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "langs = tessdata.ls()\n", + "cprint([p.resolve() for p in langs if 'eng' in p.name] + [p.resolve() for p in langs if 'jpn' in p.name])\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OCR results clean-up" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def remove_multiple_whitespaces(text):\n", + " return ' '.join(text.split())\n", + "\n", + " \n", + "def postprocess_ocr(text):\n", + " \"Basic postprocessing for English Tesseract OCR results.\"\n", + " return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize()\n", + "\n", + "def accuracy_ocr_naive(text, ground_truth):\n", + " return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text)\n", + "\n", + "\n", + "def accuracy_ocr_difflib(text, ground_truth):\n", + " \"\"\"\n", + " Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text,\n", + " using difflib's SequenceMatcher to account for differences in a manner similar to git diffs.\n", + "\n", + " :param text: The OCR-generated text.\n", + " :param ground_truth: The ground truth text.\n", + " :return: A float representing the similarity ratio between the OCR text and the ground truth, \n", + " where 1.0 is identical.\n", + " \"\"\"\n", + " # Initialize the SequenceMatcher with the OCR text and the ground truth\n", + " matcher = difflib.SequenceMatcher(None, text, ground_truth)\n", + " \n", + " # Get the similarity ratio\n", + " similarity_ratio = matcher.ratio()\n", + " \n", + " return similarity_ratio" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ground truth" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def ground_truth_path(page_data: st.PageData):\n", + " path = Path(page_data.original_path)\n", + " return path.with_stem(path.stem + '_gt').with_suffix('.txt')\n", + "\n", + "\n", + "def read_ground_truth(page_data: st.PageData):\n", + " gts_path = ground_truth_path(page_data)\n", + " if gts_path.exists():\n", + " gts = gts_path.read_text(encoding=\"utf-8\").splitlines()\n", + " else:\n", + " gts = [\"\" for _ in range(len(page_data.boxes))]\n", + " return gts\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cropping" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def dilate_by_fractional_pixel(image, dilation_fraction, filter_base_size=3):\n", + " \"\"\"\n", + " Dilates an image by a specified fractional pixel amount. The function calculates \n", + " the necessary scaling factor and filter size based on the desired dilation fraction.\n", + "\n", + " :param image: A PIL Image object (1-bit mode).\n", + " :param dilation_fraction: The desired fractional pixel amount for dilation (e.g., 0.2).\n", + " :param filter_base_size: The base size of the dilation filter to apply on the scaled image.\n", + " This size is adjusted based on the scaling factor to achieve the\n", + " desired dilation effect.\n", + " :return: A PIL Image object after dilation, converted back to grayscale.\n", + " \"\"\"\n", + " # Calculate the scale factor based on the desired dilation fraction\n", + " scale_factor = int(1 / dilation_fraction)\n", + " \n", + " # Adjust the filter size based on the scale factor\n", + " # This ensures the dilation effect is proportional to the desired fraction\n", + " filter_size = max(1, filter_base_size * scale_factor // 5)\n", + "\n", + " # Convert the image to grayscale for more nuanced intermediate values\n", + " image_gray = image.convert(\"L\")\n", + "\n", + " # Resize the image to a larger size using bicubic interpolation\n", + " larger_size = (int(image.width * scale_factor), int(image.height * scale_factor))\n", + " image_resized = image_gray.resize(larger_size, Image.BICUBIC)\n", + "\n", + " # Apply the dilation filter to the resized image\n", + " dilated_image = image_resized.filter(ImageFilter.MaxFilter(filter_size))\n", + "\n", + " # Resize the image back to its original size using bicubic interpolation\n", + " image_dilated_fractional_pixel = dilated_image.resize(image.size, Image.BICUBIC)\n", + "\n", + " return image_dilated_fractional_pixel\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def extract_text(image, text_mask, box):\n", + " cropped_image = crop_box(box, image)\n", + " cropped_mask = crop_box(box, text_mask)\n", + " extracted = ops.extract_text(cropped_image, cropped_mask)\n", + " return cropped_image, cropped_mask, extracted\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lang\n", + "> language name to a language code \n", + "> every one has language codes: tesseract, comic-text-detector, earthlings...\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "_lang2pcleaner = {'English': st.DetectedLang.ENG, 'Japanese': st.DetectedLang.JA, 'Spanish': st.DetectedLang.ENG,\n", + " 'French':st.DetectedLang.ENG}\n", + "# _lang2tesseract = {'English': 'eng', 'Japanese': 'jpn'}\n", + "_lang2tesseract = {'English': 'eng', 'Japanese': 'jpn_vert', 'Spanish': 'spa', 'French': 'fra'}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def lang2pcleaner(lang: str):\n", + " return _lang2pcleaner[lang]\n", + "\n", + "def lang2tesseract(lang: str):\n", + " return _lang2tesseract[lang]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Tesseract experiments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PanelCleaner Configuration\n", + "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "config = cfg.load_config()\n", + "config.cache_dir = Path(\".\")\n", + "\n", + "cache_dir = config.get_cleaner_cache_dir()\n", + "\n", + "profile = config.current_profile\n", + "preprocessor_conf = profile.preprocessor\n", + "# Modify the profile to OCR all boxes.\n", + "# Make sure OCR is enabled.\n", + "preprocessor_conf.ocr_enabled = True\n", + "# Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", + "preprocessor_conf.ocr_max_size = 10**10\n", + "# Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", + "preprocessor_conf.suspicious_box_min_size = 10**10\n", + "# Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", + "preprocessor_conf.ocr_blacklist_pattern = \".*\"\n", + "\n", + "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + "model_path = config.get_model_path(gpu)\n", + "device = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n", + "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "media_path = Path(\"media/\")\n", + "\n", + "IMAGE_PATHS = sorted(\n", + " [_ for _ in media_path.glob(\"*\") if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", + "\n", + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Results helper\n", + "> Dataclass helper to store and display results\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "@dataclasses.dataclass\n", + "class ResultOCR:\n", + " block_idx: int\n", + " image: Image.Image | None\n", + " ocr: str\n", + " page_data: st.PageData\n", + " gts: list[str]\n", + " description: str = dataclasses.field(default='', kw_only=True)\n", + "\n", + " def __post_init__(self): \n", + " if self.image is None:\n", + " cache_path = self.cache_path()\n", + " if cache_path.exists():\n", + " self.image = Image.open(cache_path)\n", + "\n", + " @property\n", + " def acc(self):\n", + " self._acc = accuracy_ocr_difflib(self.ocr, self.gts[self.block_idx])\n", + " return self._acc\n", + " @property\n", + " def suffix(self): return f\"{self.block_idx}_{self.description}\"\n", + "\n", + " def diff_tagged(self):\n", + " _, html2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr, False)\n", + " return f\"{html2}\"\n", + " \n", + " def cache_path(self, suffix: str | None = None):\n", + " suffix = self.suffix + (('_'+suffix) if suffix else '')\n", + " parent = Path(self.page_data.image_path).parent\n", + " img_name = Path(self.page_data.original_path).stem\n", + " box_image_path = parent / f\"{img_name}_{suffix}.png\"\n", + " return box_image_path\n", + " \n", + " def cache_image(self, image: Image.Image | None = None, suffix: str | None = None):\n", + " image = image or (self.image if not suffix else None)\n", + " box_image_path = self.cache_path(suffix)\n", + " if image and not box_image_path.exists():\n", + " image.save(box_image_path)\n", + " return box_image_path\n", + "\n", + "\n", + " def as_html(self):\n", + " acc_html = f\"
{self.acc:.2f}\"\n", + " box_image_path = self.cache_image()\n", + " html1 = get_columns_html([[box_image_path], [self.ocr + acc_html]])\n", + " html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr)\n", + " html2 = f\"
{html_str1}
{html_str2}
\"\n", + " return html1 + '\\n
\\n' + html2\n", + "\n", + " def __repr__(self): \n", + " return f\"{type(self).__name__}#block {self.block_idx:02}: {self.acc:.2f}||{self.ocr}\"\n", + " \n", + " def display(self): display(HTML(self.as_html()))\n", + " \n", + " def _ipython_display_(self): self.display()\n", + "\n", + " def to_dict(self):\n", + " d = dataclasses.asdict(self)\n", + " d['image'] = d['page_data'] = d['gts'] = None\n", + " return d\n", + "\n", + " # @classmethod\n", + " # def from_dict(cls, d: dict, page_data: st.PageData, gts: list[str]):\n", + " # return cls(**(d | {'page_data':page_data, 'gts':gts}))\n", + "\n", + "\n", + "@dataclasses.dataclass\n", + "class ResultOCRExtracted(ResultOCR):\n", + "\n", + " def __repr__(self): return super().__repr__()\n", + " def as_html(self):\n", + " html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr)\n", + " diff_html = f\"
{html_str1}
{html_str2}
\"\n", + " cropped_image_path = self.cache_image(None, \"cropped\")\n", + " cropped_mask_path = self.cache_image(None, \"mask\")\n", + " result_path = self.cache_image()\n", + " return '\\n
\\n'.join([\n", + " get_image_grid_html([cropped_image_path, cropped_mask_path, result_path], 1, 3), \n", + " acc_as_html(self.acc), \n", + " diff_html\n", + " ])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CropMethod\n", + "> Box cropping methods.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class CropMethod(Enum):\n", + " INITIAL_BOX = 'Initial box'\n", + " DEFAULT = 'Default'\n", + " DEFAULT_GREY_PAD = 'Default, grey pad'\n", + " PADDED_4 = 'Padded 4px'\n", + " PADDED_8 = 'Padded 8px'\n", + " EXTRACTED_INIT_BOX = 'Extracted, init box'\n", + " PADDED_4_EXTRACTED = 'Padded 4, extracted'\n", + " PADDED_8_EXTRACTED = 'Padded 8, extracted'\n", + " PADDED_8_DILATION_1 = 'Padded 8, dilation 1'\n", + " PAD_8_FRACT_0_5 = 'Pad 8, fract. 0.5'\n", + " PAD_8_FRACT_0_2 = 'Pad 8, fract. 0.2'\n", + "\n", + " @classmethod\n", + " def __display_names__(cls):\n", + " return dict(\n", + " zip([_.value for _ in cls], \n", + " cls))\n", + "\n", + "\n", + "CM = CropMethod\n", + "\n", + "_IMAGE_METHODS = [CM.INITIAL_BOX, CM.DEFAULT, CM.DEFAULT_GREY_PAD, \n", + " CM.PADDED_4, CM.PADDED_8]\n", + "_EXTRACTED_METHODS = [CM.EXTRACTED_INIT_BOX, CM.PADDED_4_EXTRACTED, \n", + " CM.PADDED_8_EXTRACTED, CM.PADDED_8_DILATION_1, \n", + " CM.PAD_8_FRACT_0_5, CM.PAD_8_FRACT_0_2]\n", + "\n", + "\n", + "def crop_by_image(method: CM, \n", + " box: st.Box, \n", + " base: Image.Image, \n", + " preproc: cfg.PreprocessorConfig,\n", + " ):\n", + " image = None\n", + " match method:\n", + " case CM.INITIAL_BOX :\n", + " image = crop_box(box, base)\n", + " case CM.DEFAULT:\n", + " padded2_4 = (\n", + " box.pad(preproc.box_padding_initial, base.size).right_pad(\n", + " preproc.box_right_padding_initial, base.size))\n", + " image = crop_box(padded2_4, base)\n", + " case CM.DEFAULT_GREY_PAD:\n", + " image = crop_box(box, base)\n", + " image = ops.pad_image(image, 8, fill_color=(128, 128, 128))\n", + " case CM.PADDED_4:\n", + " padded4 = box.pad(4, base.size)\n", + " image = crop_box(padded4, base)\n", + " case CM.PADDED_8:\n", + " padded4 = box.pad(8, base.size)\n", + " image = crop_box(padded4, base)\n", + " case _: pass\n", + " return image\n", + "\n", + "\n", + "def crop_by_extracted(method: CM, \n", + " box: st.Box, \n", + " base: Image.Image, \n", + " mask: Image.Image,\n", + " cropped_image_path: Path,\n", + " cropped_mask_path: Path,\n", + " dilated: dict[float, Image.Image]\n", + " ):\n", + " cropped_image, cropped_mask, image = None, None, None\n", + " if method in _EXTRACTED_METHODS:\n", + " if not cropped_image_path.exists() or not cropped_mask_path.exists():\n", + " match method:\n", + " case CM.EXTRACTED_INIT_BOX:\n", + " cropped_image, cropped_mask, image = extract_text(base, mask, box)\n", + " case CM.PADDED_4_EXTRACTED:\n", + " padded4 = box.pad(4, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(base, mask, padded4)\n", + " case CM.PADDED_8_EXTRACTED:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(base, mask, padded8)\n", + " case CM.PADDED_8_DILATION_1:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(\n", + " base, dilated[1], padded8)\n", + " case CM.PAD_8_FRACT_0_5:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(\n", + " base, dilated[0.5], padded8)\n", + " case CM.PAD_8_FRACT_0_2:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(\n", + " base, dilated[0.2], padded8)\n", + " case _: pass\n", + "\n", + " return image, cropped_image, cropped_mask\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ResultSet\n", + "> tagged nested dict to store image results keyed by box, and crop method\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "SubjIdT: TypeAlias = int\n", + "ImgIdT = SubjIdT\n", + "BoxIdT: TypeAlias = int\n", + "\n", + "class ResultSet(dict[BoxIdT, dict[CropMethod, ResultOCR]]): ...\n", + "\n", + "class ResultSetDefault(defaultdict[BoxIdT, dict[CropMethod, ResultOCR]]): ...\n", + "\n", + "def results_to_dict(results: ResultSet) -> dict[BoxIdT, dict[str, str]]:\n", + " d = {}\n", + " for box, box_methods in results.items():\n", + " for method, result in box_methods.items():\n", + " if box not in d:\n", + " d[box] = {}\n", + " d[box][method.name] = result.ocr\n", + " return d\n", + "\n", + "def dict_to_results(\n", + " image_idx: ImgIdT, \n", + " results_dict: dict[BoxIdT, dict[str, str]],\n", + " result_factory: Callable\n", + " ) -> ResultSetDefault:\n", + " results = ResultSetDefault(dict[CropMethod, ResultOCR])\n", + " for box_idx, box_methods in results_dict.items():\n", + " box_idx = int(box_idx)\n", + " for method, ocr in box_methods.items():\n", + " m = CM[method]\n", + " results[box_idx][m] = result_factory(image_idx, box_idx, m, ocr)\n", + " return results\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ExperimentContext\n", + "> Utility class to maintain shared state across all experiments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "# class ExperimentSubject(Protocol):\n", + "# @property\n", + "# def exp(self) -> 'ExperimentContext': ...\n", + "# @property\n", + "# def idx(self) -> SubjIdT: ...\n", + "# def setup(self,\n", + "# exp: 'ExperimentContext',\n", + "# idx: Any,\n", + "# *args, **kwargs\n", + "# ): ...\n", + "\n", + "\n", + "# class ExperimentContext(Protocol):\n", + "# def subject_factory(self) -> Callable[..., ExperimentSubject]: ...\n", + "# def normalize_idx(self, idx: Any) -> SubjIdT: ...\n", + "# def experiment_subject(self, idx: Any, /, \n", + "# create: bool = False, *args, **kwargs) -> ExperimentSubject | None: \n", + "# \"\"\"Get or create an `ExperimentSubject` for the given identifier. \n", + "# Returns `None` if `idx` is out of domain range.\n", + "# \"\"\"\n", + "# ...\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ExperimentSubject:\n", + " exp: ExperimentContext\n", + " idx: SubjIdT\n", + "\n", + " def setup(self, exp: ExperimentContext, idx: Any, *args, **kwargs): \n", + " self.exp = exp\n", + " self.idx = cast(SubjIdT, exp.normalize_idx(idx))\n", + " return self\n", + "\n", + " def __new__(cls,\n", + " exp: ExperimentContext,\n", + " idx: Any,\n", + " *args, **kwargs):\n", + " self = exp.experiment_subject(idx)\n", + " if self is None:\n", + " self = super().__new__(cls)\n", + " self = exp.experiment_subject(idx, new_subject=self, *args, **kwargs)\n", + " if self is None:\n", + " raise ValueError(f\"Can't create new subject with idx: {idx}: out of range\")\n", + " return self\n", + "\n", + "\n", + "class ExperimentContext:\n", + " \"Class to maintain shared state across all file-based experiments within the experiment domain.\"\n", + "\n", + " subject_cls: Callable[..., ExperimentSubject]\n", + " def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls\n", + "\n", + " def normalize_idx(self, idx: int | str | Path) -> SubjIdT | None:\n", + " nidx = None\n", + " if isinstance(idx, int) and idx < len(self._paths):\n", + " nidx = idx\n", + " elif isinstance(idx, str):\n", + " try:\n", + " nidx = [_.name for _ in self._paths].index(idx)\n", + " except Exception:\n", + " pass\n", + " elif isinstance(idx, Path):\n", + " idx = idx.resolve()\n", + " if idx in self._paths:\n", + " nidx = self._paths.index(idx)\n", + " return nidx\n", + " \n", + " def path_from_idx(self, idx: int | str | Path):\n", + " _idx = self.normalize_idx(idx)\n", + " if _idx is None:\n", + " raise ValueError(f\"{_idx} not found in context.\")\n", + " path = Path(self._paths[_idx])\n", + " if not path.exists():\n", + " raise ValueError(f\"{path} not found in context.\")\n", + " return path\n", + " \n", + " @property\n", + " def count(self): return len(self._paths)\n", + " @property\n", + " def cache_dir(self): return Path(\".cache/\")\n", + " @functools.lru_cache()\n", + " def _cache_dir(self, idx: SubjIdT):\n", + " # create one folder for each image to cache and save results\n", + " path = self.path_from_idx(idx)\n", + " cache_dir = self.cache_dir / path.stem\n", + " cache_dir.mkdir(parents=True, exist_ok=True)\n", + " return cache_dir\n", + " def subject_cache_dir(self, idx: int | str | Path):\n", + " return self._cache_dir(idx)\n", + "\n", + " def empty_cache(self, idx: SubjIdT | None = None):\n", + " cache_dir = self.cache_dir\n", + " if idx is None:\n", + " shutil.rmtree(cache_dir, ignore_errors=True)\n", + " cache_dir.mkdir(parents=True, exist_ok=True)\n", + " else:\n", + " path = Path(self._paths[idx])\n", + " cache_dir = cache_dir / path.stem\n", + " for p in cache_dir.glob(\"*\"):\n", + " p.unlink(missing_ok=True)\n", + " if not any(cache_dir.iterdir()):\n", + " cache_dir.rmdir()\n", + "\n", + " def empty_cache_warn(self, idx: SubjIdT | None=None, *, warn: bool=True, out: W.Output | None=None):\n", + " def on_confirm_clicked(b):\n", + " try:\n", + " self.empty_cache(idx)\n", + " print(\"Cache cleared successfully.\")\n", + " except Exception as e:\n", + " print(f\"Failed to clear cache: {e}\")\n", + " finally:\n", + " for widget in confirmation_box.children:\n", + " widget.close()\n", + "\n", + " def on_cancel_clicked(b):\n", + " print(\"Cache clear cancelled.\")\n", + " for widget in confirmation_box.children:\n", + " widget.close()\n", + "\n", + " if out is None:\n", + " out = W.Output()\n", + " with out:\n", + " if FC.IN_NOTEBOOK:\n", + " confirm_button = W.Button(description=\"Confirm\")\n", + " cancel_button = W.Button(description=\"Cancel\")\n", + " confirm_button.on_click(on_confirm_clicked)\n", + " cancel_button.on_click(on_cancel_clicked)\n", + " label = W.Label('Are you sure you want to clear the cache? This action cannot be undone.')\n", + " confirmation_box = W.VBox([label, W.HBox([confirm_button, cancel_button])])\n", + " display(confirmation_box)\n", + " else:\n", + " on_confirm_clicked(None)\n", + "\n", + " def experiment_subject(self, idx: SubjIdT | str | Path, /, \n", + " new_subject: ExperimentSubject | None = None, *args, **kwargs) -> ExperimentSubject | None:\n", + " \"Cached subject. If provided, `new_subject` replaces value at the index.\"\n", + " if (nidx := self.normalize_idx(idx)) is None:\n", + " return None\n", + " if new_subject is None:\n", + " subject = self._subjects.get(nidx)\n", + " else:\n", + " new_subject.setup(self, nidx, *args, **kwargs)\n", + " self._subjects[nidx] = subject = new_subject\n", + " return subject\n", + "\n", + " def reset(self):\n", + " self._subjects.clear()\n", + " self._cache_dir.cache_clear()\n", + " \n", + " def __init__(self, paths: list[Path], root: Path | None = None):\n", + " self._root = (root or Path('.')).resolve()\n", + " self._paths = [p.resolve().relative_to(self._root) for p in paths]\n", + " self._subjects: dict[SubjIdT, ExperimentSubject] = {}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ExperimentSubject`s are singletons" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "exp = ExperimentContext([Path('a'), Path('b')])\n", + "subj = exp.experiment_subject(5)\n", + "test_eq(subj, None)\n", + "\n", + "_ = exp.experiment_subject(1)\n", + "test_is(_, None)\n", + "\n", + "subj1 = ExperimentSubject(exp, 1)\n", + "_ = exp.experiment_subject(1)\n", + "test_eq(_ is not None, True)\n", + "test_is(_, subj1)\n", + "test_is(subj1, ExperimentSubject(exp, 1))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can't create `ExperimentSubject`s beyond `ExperimentContext` domain." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "test_fail(lambda:ExperimentSubject(exp, 2), 'out of range')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ImageContext\n", + "> A utility class to maintain image state for a `OCRExperimentContext`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "ImgSpecT: TypeAlias = ImgIdT | str | Path\n", + "\n", + "class ImageContext(ExperimentSubject):\n", + " \"\"\"\n", + " A utility class to maintain image state for a ExperimentContext.\n", + " This class encapsulates state necessary for conducting OCR experiments.\n", + "\n", + " Attributes:\n", + " json_data (dict): JSON data loaded from cached files.\n", + " page_data (st.PageData): PanelClaner page data.\n", + " base_image (Image.Image): The base image loaded from the page data.\n", + " mask (Image.Image): The mask image used for text detection.\n", + " gts (list[str]): Ground truth data for the text in the images.\n", + " ocr_model (str): Name or identifier of the OCR model used.\n", + " mocr (ocr.OCRModel): OCR model configured for the experiment.\n", + " mask_dilated1 (Image.Image): Image mask dilated by 1 pixel.\n", + " mask_dilated05 (Image.Image): Image mask dilated by 0.5 pixels.\n", + " mask_dilated02 (Image.Image): Image mask dilated by 0.2 pixels.\n", + "\n", + " Methods:\n", + " init(config: cfg.Config, img_path: Path, cache_dir: Path, ocr_model: str):\n", + " Initializes the experiment context. It also handles the generation of text boxes \n", + " if they are not already present.\n", + "\n", + " setup_ground_truth():\n", + " Loads or initializes ground truth data for the experiment based on the page data.\n", + "\n", + " setup_crop_masks():\n", + " Prepares various dilated versions of the mask image to be used in different cropping \n", + " strategies during the experiments.\n", + " \"\"\"\n", + " exp: ExperimentContext\n", + " idx: ImgIdT\n", + " base_image: Image.Image\n", + " mask: Image.Image\n", + " json_data: dict | None\n", + " page_data: st.PageData\n", + " # ocr_model: str\n", + " # mocr: ocr.OCRModel\n", + " # postprocess_ocr: Callable[..., str]\n", + " _page_lang: str\n", + " _gts: list[str]\n", + " _mask_dilated1: Image.Image | None\n", + " _mask_dilated05: Image.Image | None\n", + " _mask_dilated02: Image.Image | None\n", + " \n", + "\n", + " # # this methods will be set downstream, declared here to make the type checker happy\n", + " # def result(self: Self, \n", + " # box_idx: int, method: CropMethod, ocr: bool = True, reset: bool=False) -> ResultOCR: ...\n", + " # def summary_box(self: Self, box_idx: int): ...\n", + "\n", + " def to_dict(self):\n", + " return {\n", + " 'image_idx': self.idx,\n", + " 'page_lang': self.page_lang,\n", + " }\n", + " \n", + " @property\n", + " def image_idx(self): return self.idx\n", + " @property\n", + " def cache_dir(self): \n", + " return self.exp.subject_cache_dir(self.idx)\n", + " cache_dir_image = cache_dir\n", + " \n", + " @property\n", + " def image_info(self): \n", + " img = self.base_image\n", + " w, h = img.size\n", + " print_size_in = size(w, h, 'in', 300)\n", + " print_size_cm = size(w, h, 'cm', 300)\n", + " required_dpi = dpi(w, h, 'Modern Age')\n", + " return (w, h), print_size_in, print_size_cm, required_dpi\n", + "\n", + " @property\n", + " def original_image_path(self): return Path(self.page_data.original_path)\n", + " @property\n", + " def image_path(self): return Path(self.page_data.image_path)\n", + " @property\n", + " def image_name(self): return self.original_image_path.name\n", + " @property\n", + " def image_size(self): return self.base_image.size\n", + " @property\n", + " def image_dim(self):return size(*self.image_size)\n", + " @property\n", + " def image_dpi(self): return dpi(*self.image_size)\n", + " @property\n", + " def image_print(self):\n", + " return self.image_size, self.image_dim, self.image_dpi\n", + " @property\n", + " def image_name_rich(self):\n", + " siz, dim, res = self.image_print\n", + " return f\"{self.image_name} - {siz[0]}x{siz[1]} px: {dim[0]:.2f}x{dim[1]:.2f}\\\" @ {res:.2f} dpi\"\n", + " \n", + " def setup_page_lang(self, page_lang: str | None = None):\n", + " path = Path(self.page_data.original_path).with_suffix('.json')\n", + " metadata = json.load(open(path)) if path.exists() else {}\n", + " if 'lang' in metadata and (page_lang == metadata['lang'] or page_lang is None):\n", + " self._page_lang = metadata['lang']\n", + " return\n", + " self._page_lang = metadata['lang'] = page_lang or 'English'\n", + " json.dump(metadata, open(path, 'w'), indent=2)\n", + " @property\n", + " def page_lang(self):\n", + " if self._page_lang == None:\n", + " self.setup_page_lang()\n", + " return self._page_lang\n", + " \n", + " @property\n", + " def boxes(self): return self.page_data.boxes\n", + " \n", + " def setup_ground_truth(self):\n", + " self._gts = read_ground_truth(self.page_data)\n", + " @property\n", + " def gts(self): \n", + " if self._gts is None:\n", + " self.setup_ground_truth()\n", + " return self._gts\n", + " \n", + " @functools.lru_cache(typed=True)\n", + " def dilated_mask(self, fraction: float):\n", + " return dilate_by_fractional_pixel(self.mask, fraction)\n", + " \n", + " def mask_dilated1(self): \n", + " if self._mask_dilated1 is None:\n", + " self._mask_dilated1 = self.mask.filter(ImageFilter.MaxFilter(3))\n", + " return self._mask_dilated1\n", + " \n", + " def mask_dilated05(self): \n", + " if self._mask_dilated05 is None:\n", + " self._mask_dilated05 = self.dilated_mask(0.5)\n", + " return self._mask_dilated05\n", + " \n", + " def mask_dilated02(self): \n", + " if self._mask_dilated02 is None:\n", + " self._mask_dilated02 = self.dilated_mask(0.2)\n", + " return self._mask_dilated02\n", + " \n", + " def dilated(self):\n", + " return {1: self.mask_dilated1(),\n", + " 0.5: self.mask_dilated05(),\n", + " 0.2: self.mask_dilated02(),}\n", + "\n", + " def __new__(cls,\n", + " exp: ExperimentContext,\n", + " idx: ImgSpecT,\n", + " *args, **kwargs) -> Self:\n", + " return super().__new__(cls, exp, idx, *args, **kwargs) # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OCRExperimentContext" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class OCRExperimentContext(ExperimentContext):\n", + " \"\"\"\n", + " A utility class to maintain shared state across all experiments within OCR domain.\n", + " This class encapsulates state necessary for conducting PanelCleaner OCR experiments.\n", + " \"\"\"\n", + "\n", + " config: cfg.Config\n", + " image_paths: list[Path]\n", + " # OCR engine -> Image index -> Box index -> Crop method -> Result\n", + " _results: dict[str, dict[ImgIdT, ResultSet]]\n", + "\n", + " \n", + " engines = {\n", + " 'Tesseract': cfg.OCREngine.TESSERACT, \n", + " 'Idefics': None, \n", + " 'manga-ocr': cfg.OCREngine.MANGAOCR}\n", + "\n", + " # subject_cls: ImageContext\n", + " # def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls\n", + "\n", + " @classmethod\n", + " def get_config(cls, cache_dir: Path | None = None) -> cfg.Config:\n", + " config = cfg.load_config()\n", + " config.cache_dir = cache_dir or Path(\".\")\n", + " profile = config.current_profile\n", + " preprocessor_conf = profile.preprocessor\n", + " # Modify the profile to OCR all boxes.\n", + " # Make sure OCR is enabled.\n", + " preprocessor_conf.ocr_enabled = True\n", + " # Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", + " preprocessor_conf.ocr_max_size = 10**10\n", + " # Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", + " preprocessor_conf.suspicious_box_min_size = 10**10\n", + " # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", + " preprocessor_conf.ocr_blacklist_pattern = \".*\"\n", + " return config\n", + "\n", + " def to_dict(self):\n", + " return {\n", + " 'image_paths': list(map(str, self.image_paths)),\n", + " 'cache_dir': str(self.config.cache_dir)\n", + " }\n", + " def to_json(self):\n", + " return json.dumps(self.to_dict(), indent=2)\n", + " @classmethod\n", + " def from_json_data(cls, d: dict):\n", + " return cls(cls.get_config(Path(d['cache_dir'])), d['image_paths'])\n", + " @classmethod\n", + " def from_json_path(cls, path: Path):\n", + " return cls.from_json_data(json.loads(path.read_text()))\n", + "\n", + " \n", + " @functools.lru_cache()\n", + " def mocr(self, ocr_model: str, lang: str):\n", + " engine = self.engines[ocr_model]\n", + " ocr_processor = ocr.get_ocr_processor(True, engine)\n", + " proc = ocr_processor[lang2pcleaner(lang)]\n", + " if isinstance(proc, TesseractOcr):\n", + " proc.lang = lang2tesseract(lang)\n", + " return proc\n", + "\n", + " def ocr_box(self, result: ResultOCR, ocr_model: str, lang: str): \n", + " assert result.image is not None\n", + " text = self.mocr(ocr_model, lang)(result.image)\n", + " result.ocr = postprocess_ocr(text)\n", + " return result\n", + "\n", + " @property\n", + " def cache_dir(self): return self.config.get_cleaner_cache_dir()\n", + " image_cache_dir = ExperimentContext.subject_cache_dir\n", + "\n", + " @functools.lru_cache()\n", + " def _load_page_data(self, image_idx: int):\n", + " config = self.config\n", + " cache_dir = self.image_cache_dir(image_idx)\n", + " img_path = self.path_from_idx(image_idx)\n", + " image_name = img_path.stem\n", + " # read cached json\n", + " jsons = [_ for _ in cache_dir.glob(\"*#raw.json\") if image_name in _.stem]\n", + " assert len(jsons) <= 1\n", + " # generate text boxes if needed\n", + " if not jsons:\n", + " pfl = config.current_profile\n", + " gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + " model_path = config.get_model_path(gpu)\n", + " ctm.model2annotations(pfl.general, pfl.text_detector, model_path, [img_path], cache_dir)\n", + " # we don't need unique names for this tests, strip uuids\n", + " for p in cache_dir.glob(f\"*{image_name}*\"):\n", + " p.rename(strip_uuid(p))\n", + " jsons = [_ for _ in cache_dir.glob(\"*#raw.json\") if image_name in _.stem]\n", + "\n", + " # adapt paths to be relative to this notebook\n", + " this_path = self._root\n", + " json_file_path = jsons[0]\n", + " json_data = json.loads(json_file_path.read_text(encoding=\"utf-8\"))\n", + " json_data[\"image_path\"] = str(strip_uuid(json_data[\"image_path\"]).relative_to(this_path))\n", + " json_data[\"mask_path\"] = str(strip_uuid(json_data[\"mask_path\"]).relative_to(this_path))\n", + " json.dump(json_data, open(json_file_path, \"w\"), indent=2)\n", + " else:\n", + " json_file_path = jsons[0]\n", + " json_data = json.loads(json_file_path.read_text(encoding=\"utf-8\"))\n", + "\n", + " page_data = st.PageData(\n", + " json_data[\"image_path\"], json_data[\"mask_path\"], \n", + " json_data[\"original_path\"], json_data[\"scale\"], \n", + " [st.Box(*data[\"xyxy\"]) for data in json_data[\"blk_list\"]], \n", + " [], [], [])\n", + " # Merge boxes that have mutually overlapping centers.\n", + " page_data.resolve_total_overlaps()\n", + " return json_data, page_data\n", + "\n", + " def page_data(self, image_idx: int):\n", + " _, page_data = self._load_page_data(image_idx)\n", + " return page_data\n", + " def json_data(self, image_idx: int):\n", + " json_data, _ = self._load_page_data(image_idx)\n", + " return json_data\n", + "\n", + " def experiment_image(self, image_idx: ImgIdT | str | Path) -> ImageContext | None:\n", + " \"Cached image context.\"\n", + " return cast(ImageContext, self.experiment_subject(image_idx))\n", + "\n", + " def update_results(self, ocr_model: str, img_idx: ImgIdT, results: ResultSetDefault):\n", + " self._results[ocr_model][img_idx] = cast(ResultSet, results)\n", + " \n", + " \n", + " def _result_from(self, image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None):\n", + " img_ctx = ImageContext(self, image_idx)\n", + " extracted = method in _EXTRACTED_METHODS\n", + " result_cls = ResultOCRExtracted if extracted else ResultOCR\n", + " result = result_cls(int(box_idx), None, '', img_ctx.page_data, \n", + " img_ctx.gts, description=f\"{method.value}\")\n", + " if ocr is not None:\n", + " result.ocr = ocr\n", + " return result\n", + " \n", + " def result(self, \n", + " ocr_model: str,\n", + " image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, \n", + " ocr: bool=True, \n", + " rebuild: bool=False) -> ResultOCR | None:\n", + " img_ctx = ImageContext(self, image_idx)\n", + " result = self._results[ocr_model][image_idx][box_idx].get(method)\n", + " if not rebuild and result is not None:\n", + " return result\n", + " \n", + " result = self._result_from(image_idx, box_idx, method)\n", + " image, cropped_image, cropped_mask = result.image, None, None\n", + " base_image = img_ctx.base_image\n", + " box = img_ctx.boxes[box_idx]\n", + " if image is None and method in _IMAGE_METHODS:\n", + " image = crop_by_image(\n", + " method, box, base_image, self.config.current_profile.preprocessor)\n", + "\n", + " if image is None and method in _EXTRACTED_METHODS:\n", + " mask = img_ctx.mask\n", + " cropped_image_path = result.cache_image(cropped_image, \"cropped\")\n", + " cropped_mask_path = result.cache_image(cropped_mask, \"mask\")\n", + " if not cropped_image_path.exists() or not cropped_mask_path.exists():\n", + " image, cropped_image, cropped_mask = crop_by_extracted(\n", + " method, box, base_image, mask, \n", + " cropped_image_path, cropped_mask_path, img_ctx.dilated())\n", + " \n", + " assert image is not None\n", + " if result.image is None:\n", + " result.image = image\n", + " result.cache_image()\n", + " if cropped_image is not None:\n", + " result.cache_image(cropped_image, \"cropped\")\n", + " if cropped_mask is not None:\n", + " result.cache_image(cropped_mask, \"mask\")\n", + " \n", + " if ocr:\n", + " result = self.ocr_box(result, ocr_model, img_ctx.page_lang)\n", + " self._results[ocr_model][image_idx][box_idx][method] = result\n", + " return result\n", + "\n", + " def results(self, ocr_model: str | None = None, img_idx: ImgIdT | None = None):\n", + " if ocr_model is None: return self._results\n", + " if img_idx is None: return self._results[ocr_model]\n", + " return self._results[ocr_model][img_idx]\n", + " def model_results(self, ocr_model: str):\n", + " return cast(dict[ImgIdT, ResultSet], self.results(ocr_model))\n", + " def image_results(self, ocr_model: str, img_idx: ImgIdT):\n", + " return cast(ResultSet, self.results(ocr_model, img_idx))\n", + " def box_results(self, ocr_model: str, img_idx: ImgIdT, box_idx: BoxIdT):\n", + " return cast(ResultSet, self.results(ocr_model, img_idx))[box_idx]\n", + " def method_results(self, ocr_model: str, img_idx: ImgIdT, method: CropMethod):\n", + " image_results = self.image_results(ocr_model, img_idx)\n", + " return {i: box_results.get(method) for i,box_results in image_results.items()}\n", + "\n", + " def _reset_results(self):\n", + " results = defaultdict(lambda: defaultdict(lambda: ResultSetDefault(dict)))\n", + " self._results = cast(dict[str, dict[ImgIdT, ResultSet]], results)\n", + " def reset_results(self, \n", + " ocr_model: str | None = None, \n", + " image_idx: int | None = None, \n", + " box_idx: int | None = None, \n", + " method: CropMethod | None = None):\n", + " if ocr_model is None and image_idx is None and box_idx is None and method is None:\n", + " self._reset_results()\n", + " return\n", + " results = self._results\n", + " models = tuple(results.keys()) if ocr_model is None else [ocr_model] if ocr_model in results else []\n", + " for ocr_model in models:\n", + " img_nodes = results[ocr_model]\n", + " imgs = tuple(img_nodes.keys()) if image_idx is None else [image_idx] if image_idx in img_nodes else []\n", + " for img_idx in imgs:\n", + " box_nodes = img_nodes[img_idx]\n", + " boxes = tuple(box_nodes.keys()) if box_idx is None else [box_idx] if box_idx in box_nodes else []\n", + " for box_idx in boxes:\n", + " if method is None:\n", + " del box_nodes[box_idx]\n", + " else:\n", + " methods = box_nodes[box_idx]\n", + " if method in methods:\n", + " del methods[method]\n", + " if not box_nodes[box_idx]:\n", + " del box_nodes[box_idx]\n", + " if not img_nodes[img_idx]:\n", + " del img_nodes[img_idx]\n", + " if not results[ocr_model]:\n", + " del results[ocr_model]\n", + " def reset(self):\n", + " super().reset()\n", + " self.reset_results()\n", + " self._load_page_data.cache_clear()\n", + " self.mocr.cache_clear()\n", + "\n", + " def __init__(self, \n", + " config: cfg.Config | None, \n", + " image_paths: list[Path]\n", + " ):\n", + " super().__init__(list(map(lambda p: p.resolve(), image_paths)))\n", + " self.config = config or type(self).get_config()\n", + " self.image_paths = self._paths\n", + " self._reset_results()\n", + " self._images = self._subjects\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "@FC.patch_to(ImageContext)\n", + "def setup(self, exp: OCRExperimentContext, image_idx: ImgSpecT, page_lang: str | None = None):\n", + " super(type(self), self).setup(exp, image_idx)\n", + " self._mask_dilated1 = self._mask_dilated05 = self._mask_dilated02 = None\n", + " # if ocr_model not in exp.engines:\n", + " # raise ValueError(f\"OCR model {ocr_model} not supported.\")\n", + " # self.ocr_model = ocr_model\n", + " # self.idx = exp.normalize_idx(image_idx)\n", + " self.json_data, self.page_data = exp._load_page_data(self.idx)\n", + " self.setup_page_lang(page_lang)\n", + " self.mask = Image.open(self.page_data.mask_path)\n", + " self.base_image = Image.open(self.page_data.image_path)\n", + " self.setup_ground_truth()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "tirar = OCRExperimentContext(None, [])\n", + "test_eq(rr := tirar._results, {})\n", + "test_eq(rr['Tesseract'][0][0], {})\n", + "test_eq(rr, {'Tesseract': {0: {0: {}}}})\n", + "test_eq(rr['Tesseract'][0][0].get(CM.INITIAL_BOX), None)\n", + "rr['Tesseract'][0][0][CM.INITIAL_BOX] = 'a' # type: ignore\n", + "test_eq(rr, {'Tesseract': {0: {0: {CM.INITIAL_BOX: 'a'}}}})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ContextVisor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ContextVisor:\n", + " ctx: Any\n", + " # control_names: list[str]\n", + " values: dict[str, Any]\n", + "\n", + " _css = ''\n", + "\n", + " _ctxs: dict[str, ContextVisor]\n", + " _hdlrs: dict[str, ContextVisor]\n", + "\n", + " @property\n", + " def w(self) -> W.DOMWidget:\n", + " if getattr(self, '_w', None) is None:\n", + " self._w = self.setup_ui()\n", + " return self._w\n", + " @property\n", + " def out(self) -> W.Output:\n", + " if getattr(self, '_out', None) is None:\n", + " self._out = W.Output()\n", + " self._out.clear_output(wait=True)\n", + " return self._out # type: ignore\n", + " @property\n", + " def controls(self) -> dict[str, W.ValueWidget | W.fixed]:\n", + " if getattr(self, '_controls', None) is None:\n", + " self._controls = self.setup_controls()\n", + " return self._controls\n", + " @property\n", + " def all_controls(self) -> dict[str, W.ValueWidget | W.fixed]:\n", + " if getattr(self, '_all_controls', None) is None:\n", + " controls = {}\n", + " for visor in self._ctxs.values():\n", + " controls.update(visor.all_controls)\n", + " controls.update(self.controls)\n", + " self._all_controls = controls\n", + " return self._all_controls\n", + " \n", + " @property\n", + " def all_values(self):\n", + " return {**{k:v.values for k,v in (self._ctxs | {'self': self}).items()}, **self.values}\n", + " \n", + " @property\n", + " def comps(self): return self._ctxs\n", + " def comp(self, k: str) -> ContextVisor | None:\n", + " return self._ctxs.get(k)\n", + " def handler(self, k: str) -> ContextVisor | None:\n", + " return self._hdlrs.get(k)\n", + " \n", + " @property\n", + " def styler(self) -> W.Output | None:\n", + " if (stl := self.setup_style()) is None: \n", + " return None\n", + " if getattr(self, '_style', None) is None:\n", + " self._style = W.Output(layout={'height': '0px'})\n", + " with self._style:\n", + " display(stl)\n", + " return self._style\n", + " def setup_style(self):\n", + " return HTML(f\"\") if self._css else None\n", + " \n", + " def update_output(self, **kwargs): \n", + " cprint(kwargs)\n", + " \n", + " def setup_controls(self) -> dict[str, W.ValueWidget | W.fixed]:\n", + " return {k: W.Label(value=k) for k,v in self.values.items()}\n", + " \n", + " def hide(self):\n", + " self.w.layout.visibility = 'hidden'\n", + " def show(self):\n", + " self.w.layout.visibility = 'visible'\n", + "\n", + " def setup_ui(self):\n", + " comps = []\n", + " for visor in self._ctxs.values():\n", + " comps.append(visor.w)\n", + " return W.HBox([*comps, *self.controls.values()])\n", + "\n", + " def setup_display(self): \n", + " if getattr(self, '_w', None) is None:\n", + " self._w = self.setup_ui()\n", + " \n", + "\n", + " def _output(self, **kwargs):\n", + " collator = defaultdict(dict)\n", + " show_inline_matplotlib_plots()\n", + " with self.out:\n", + " clear_output(wait=True)\n", + " for k,v in kwargs.items():\n", + " if (comp := self.handler(k)) is not None:\n", + " collator[comp][k] = v\n", + " else:\n", + " assert 0\n", + " # self.update_output(**{k: v})\n", + " for comp, kw in collator.items():\n", + " comp.update_output(**kw)\n", + " show_inline_matplotlib_plots()\n", + " def interactive_output(self):\n", + " controls = self.all_controls\n", + " controls2names = {v:k for k,v in controls.items()}\n", + " def observer(change):\n", + " control_name = controls2names[change['owner']]\n", + " kwargs = {control_name: change['new']}\n", + " updated = self._update(**kwargs)\n", + " self._output(**updated)\n", + " for w in controls.values():\n", + " w.observe(observer, 'value')\n", + " def display(self, **kwargs): \n", + " if getattr(self, '_w', None) is None:\n", + " self.setup_display()\n", + " self.interactive_output()\n", + " self._update(**(self.values | kwargs))\n", + " all_values= {}\n", + " for comp in list(self.comps.values()) + [self]: all_values.update(comp.values)\n", + " self._hdlrs = {k:self._hdlrs.get(k, self) for k in all_values}\n", + " self._output(**all_values)\n", + " display(self.styler, self.w, self.out) if self.styler else display(self.w, self.out)\n", + " else:\n", + " self.update(**kwargs)\n", + " def _ipython_display_(self): self.display()\n", + "\n", + " def _update(self, update_value: bool=True, **kwargs):\n", + " updated = {}\n", + " for visor in self.comps.values():\n", + " updated.update(visor._update(update_value=update_value, **kwargs))\n", + " values = self.values\n", + " my_vals = _pops_(kwargs, self.values.keys())\n", + " for k,v in my_vals.items():\n", + " if v is not None and v != values[k]:\n", + " if update_value: values[k] = v\n", + " updated[k] = v\n", + " return updated\n", + " def update(self, **kwargs):\n", + " updated = self._update(update_value=False, **kwargs)\n", + " controls = self.all_controls\n", + " for k in updated:\n", + " controls[k].value = updated[k]\n", + " # self._output(**updated)\n", + " \n", + " def __init__(self, \n", + " ctx: Any, \n", + " values: dict[str, Any], \n", + " out: W.Output | None = None,\n", + " ctxs: dict[str, ContextVisor] | None = None,\n", + " hdlrs: dict[str, ContextVisor] | None = None,\n", + " ):\n", + " self._ctxs = ctxs or {}\n", + " self._hdlrs = hdlrs or {}\n", + " self.ctx = ctx\n", + " self._out = out\n", + " self.values = values\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "82c802235bf84f6eb36d87cd72607440", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(Label(value='a'),))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4f0809d6276b4bea9f0f992b337817a8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('test_visor')\n", + "\n", + "test_visor = ContextVisor(None, {'a': 1})\n", + "test_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(test_visor.values, {'a': 1})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT\n", + "> `CONTEXT` is an `OCRExperimentContext` object that contains the configuration and the list of image paths.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can get the configuration with `OCRExperimentContext.get_config()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: .\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
      cache_dir: Path('cleaner')\n",
+       "     model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "         device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " cache_dir: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'cleaner'\u001b[0m\u001b[1m)\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONFIG = OCRExperimentContext.get_config()\n", + "\n", + "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + "model_path = CONFIG.get_model_path(gpu)\n", + "device = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", + "\n", + "CONFIG.show()\n", + "cprint(\n", + " f\"{'cache_dir':>15}: {repr(cache_dir)}\\n\"\n", + " f\"{'model_path':>15}: {repr(model_path)}\\n\"\n", + " f\"{'device':>15}: {repr(device)}\")\n", + "\n", + "test_eq(CONFIG.cache_dir, Path(\".\"))\n", + "test_eq(CONFIG.current_profile.preprocessor.ocr_enabled, True)\n", + "test_eq(CONFIG.current_profile.preprocessor.ocr_max_size, 10**10)\n", + "test_eq(CONFIG.current_profile.preprocessor.suspicious_box_min_size, 10**10)\n", + "test_eq(CONFIG.current_profile.preprocessor.ocr_blacklist_pattern, \".*\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT = OCRExperimentContext(CONFIG, IMAGE_PATHS)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ImageSelector" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ImageSelector(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + "\n", + " @property\n", + " def image_ctx(self):\n", + " return ImageContext(self.ctx, self.values['image_idx'])\n", + " \n", + " def setup_controls(self):\n", + " paths = self.ctx.image_paths\n", + " w = W.Dropdown(\n", + " options={_.stem:i for i,_ in enumerate(paths)}, \n", + " value=self.values['image_idx'],\n", + " layout={'width': 'fit-content'},\n", + " style={'description_width': 'initial'})\n", + " return {'image_idx': w}\n", + "\n", + " def update(self, image_idx: ImgSpecT | None = None, **kwargs):\n", + " if image_idx is None: return\n", + " idx = self.ctx.normalize_idx(image_idx)\n", + " if idx is None: return\n", + " super().update(image_idx=idx, **kwargs)\n", + "\n", + "\n", + " def __init__(self, \n", + " ctx: OCRExperimentContext, /, \n", + " image_idx: ImgSpecT = 0, *, \n", + " out: W.Output | None=None):\n", + " idx = ctx.normalize_idx(image_idx)\n", + " assert idx is not None, f\"Image {image_idx} not found in experiment context\"\n", + " super().__init__(ctx, {'image_idx': idx}, out)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c033b2c76629478f8d7702e1f3f8666a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(Dropdown(index=2, layout=Layout(width='fit-content'), options={'Action_Comics_1960-01-00_(262)'…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d78cb5cf72c34bcab9274cfd77f585a5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('image_selector')\n", + "\n", + "image_selector = ImageSelector(CONTEXT, 2)\n", + "image_selector\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "image_selector.update(13)\n", + "test_eq(image_selector.values['image_idx'], 13)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OCRContextVisor" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class OCRContextVisor(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + " \n", + " def update_output(self, /, image_idx: ImgIdT, **kwargs):\n", + " img_path = self.ctx.path_from_idx(image_idx)\n", + " display_image_grid([img_path], 1, 1)\n", + "\n", + " def update(self, image_idx: ImgSpecT | None = None, **kwargs):\n", + " if image_idx is None: return\n", + " idx = self.ctx.normalize_idx(image_idx)\n", + " if idx is None: return\n", + " super().update(image_idx=idx, **kwargs)\n", + " \n", + " def __init__(self, \n", + " ctx: OCRExperimentContext, /, \n", + " image_idx: ImgSpecT = 0, *, \n", + " out: W.Output | None=None):\n", + " super().__init__(ctx, {}, out, \n", + " ctxs={'image_idx': ImageSelector(ctx, image_idx, out=self.out)})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3b40f82e57564fbcae7913d7a76fbc32", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(index=2, layout=Layout(width='fit-content'), options={'Action_Comics_19…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "174fbbc4b6ff42ac8013984946e787e4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('ctx_visor')\n", + "\n", + "# ContextVisor(CONTEXT)\n", + "# ContextVisor(CONTEXT).display(3)\n", + "ctx_visor = OCRContextVisor(CONTEXT, 2)\n", + "ctx_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "ctx_visor.update('Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base image\n", + "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", + "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"0033\")\n", + "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"INOUE_KYOUMEN_002\")\n", + "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"Action_Comics_1960-01-00_(262)\")\n", + "\n", + "assert BASE_IMAGE_IDX is not None\n", + "img_path = Path(CONTEXT.image_paths[BASE_IMAGE_IDX])\n", + "assert img_path.exists()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Empty cache\n", + "> Clear the image cache used profusely throughout the examples below." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "You will be warned before the cache is emptied." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# CONTEXT.empty_cache_warn()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "# CONTEXT.empty_cache_warn(BASE_IMAGE_IDX)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ImageContext of base image\n", + "> Creates the `ImageContext` for the base image.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If `PanelCleaner` page data is already cached, it is loaded from the cache.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "page_lang = 'English'\n", + "# page_lang = 'Japanese'\n", + "# page_lang = 'Spanish'\n", + "# page_lang = 'French'\n", + "\n", + "IMAGE_CONTEXT = ImageContext(CONTEXT, BASE_IMAGE_IDX, page_lang=page_lang)\n", + "test_eq(IMAGE_CONTEXT.page_data is not None, True)\n", + "# cprint(IMAGE_CONTEXT.page_data.boxes)\n", + "RenderJSON(IMAGE_CONTEXT.json_data, 360, 2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "test_is(IMAGE_CONTEXT, ImageContext(CONTEXT, BASE_IMAGE_IDX))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualize image" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Strange_Tales_172005.jpg - 1275x1888 px: 4.25x6.29\" @ 188.32 dpi
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "page_data = IMAGE_CONTEXT.page_data\n", + "display_image_grid([page_data.image_path, page_data.mask_path], 1, 2, caption=IMAGE_CONTEXT.image_name_rich)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, out_path = page_boxes(page_data)\n", + "display_image_grid([out_path], 1, 1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ModelSelector\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class OCRModel(Enum):\n", + " TESSERACT = 0\n", + " IDEFICS = 1\n", + " @staticmethod\n", + " def __display_names__() -> dict[str, OCRModel]:\n", + " return dict(\n", + " zip(\"Tesseract, Idefics\".split(', '), \n", + " OCRModel))\n", + "\n", + "\n", + "class ModelSelector(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + " \n", + " def setup_controls(self):\n", + " options = self.models\n", + " w = W.Dropdown(\n", + " options=options, \n", + " value=self.values['model'],\n", + " layout={'width': 'fit-content'},\n", + " style={'description_width': 'initial'})\n", + " return {'model': w}\n", + "\n", + " def setup_ui(self):\n", + " ctls = self.controls\n", + " model_grp = W.HBox([ctls['model']])\n", + " model_grp.add_class('model_grp')\n", + " comps = []\n", + " for visor in self.comps.values():\n", + " comps.append(visor.setup_ui())\n", + " ui = W.HBox([*comps, model_grp])\n", + " return ui\n", + "\n", + " def __init__(self, \n", + " exp_ctx: OCRExperimentContext,\n", + " ocr_model: OCRModel | None=OCRModel.TESSERACT,\n", + " ocr_models: dict[str, OCRModel] | None = None,\n", + " out: W.Output | None = None\n", + " ):\n", + " self.models: dict[str, OCRModel] = ocr_models or OCRModel.__display_names__()\n", + " super().__init__(exp_ctx, \n", + " {'model': ocr_model or OCRModel.TESSERACT}, \n", + " out=out or self.out)#, ctxs=[exp_visor])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "413f66f1d43a4d849e43a79ca9b16502", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), options={'Tesseract': 30}: {w} x {h} pixels\\n\"\n", + " f\"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\\n\"\n", + " f\"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in\"\n", + " f\" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\\n\"\n", + " f\"Required DPI Modern Age format: {required_dpi:.3f} dpi \"\n", + " f\"({format[0]:.3f} x {format[1]:.3f} in)\")\n", + "\n", + "\n", + " def display_content(self, image_ctx: ImageContext, display_option: DisplayOptions):\n", + " page_data = image_ctx.page_data\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.PAGE_DATA):\n", + " self.image_info(image_ctx)\n", + " RenderJSON(image_ctx.json_data, 350, 2).display()\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH):\n", + " cprint(image_ctx.gts)\n", + " if display_option == DisplayOptions.IMAGE:\n", + " display_image_grid([page_data.image_path], 1, 1)\n", + " if display_option == DisplayOptions.MASK:\n", + " display_image_grid([page_data.mask_path], 1, 1)\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.IMAGE_MASK):\n", + " display_image_grid([page_data.image_path, page_data.mask_path], 1, 2)\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.BOXES):\n", + " _, out_path = page_boxes(page_data)\n", + " display_image_grid([out_path], 1, 1)\n", + "\n", + "\n", + " def setup_controls(self):\n", + " options = self.display_options or {**DisplayOptions.__display_names__()}\n", + " display_option_wdgt = W.Dropdown(\n", + " options=options, \n", + " value=self.values['display_option'],\n", + " layout={'width': '120px'},\n", + " style={'description_width': 'initial'})\n", + " return {'display_option': display_option_wdgt}\n", + "\n", + "\n", + " def setup_ui(self):\n", + " ctls = self.controls\n", + " display_option_grp = W.HBox([ctls['display_option']])\n", + " display_option_grp.add_class('display_option_grp')\n", + " comps = []\n", + " for visor in self.comps.values():\n", + " comps.append(visor.setup_ui())\n", + " ui = W.HBox([*comps, display_option_grp])\n", + " return ui\n", + "\n", + "\n", + " def __init__(self, \n", + " exp_ctx: OCRExperimentContext,\n", + " display_option: DisplayOptions | None=DisplayOptions.BOXES,\n", + " display_options: Mapping[str, DisplayOptions] | None = None,\n", + " out: W.Output | None = None\n", + " ):\n", + " self.display_options = display_options\n", + " super().__init__(exp_ctx, \n", + " {'display_option': display_option or DisplayOptions.BOXES}, \n", + " out=out or self.out)#, ctxs=[exp_visor])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "51b272e199e94fe69465b30306499d8a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(index=2, layout=Layout(width='120px'), options={'Boxes': Width x Height: 1275 x 1888 pixels\n", + " PIL Info DPI: None\n", + " Print Size 300 DPI: 4.250 x 6.293 in / 10.795 x 15.985 cm\n", + "Required DPI Modern Age format: 188.324 dpi (6.625 x 10.250 in)\n", + "\n" + ], + "text/plain": [ + " Width x Height: \u001b[1;36m1275\u001b[0m x \u001b[1;36m1888\u001b[0m pixels\n", + " PIL Info DPI: \u001b[3;35mNone\u001b[0m\n", + " Print Size \u001b[1;36m300\u001b[0m DPI: \u001b[1;36m4.250\u001b[0m x \u001b[1;36m6.293\u001b[0m in \u001b[35m/\u001b[0m \u001b[1;36m10.795\u001b[0m x \u001b[1;36m15.985\u001b[0m cm\n", + "Required DPI Modern Age format: \u001b[1;36m188.324\u001b[0m dpi \u001b[1m(\u001b[0m\u001b[1;36m6.625\u001b[0m x \u001b[1;36m10.250\u001b[0m in\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "img = IMAGE_CONTEXT.base_image\n", + "w, h = img.size\n", + "\n", + "print_size_in = size(w, h, 'in', 300)\n", + "print_size_cm = size(w, h, 'cm', 300)\n", + "required_dpi = dpi(w, h, 'Modern Age')\n", + "format = PRINT_FORMATS['Modern Age']\n", + "cprint( f\"{'Width x Height':>30}: {w} x {h} pixels\\n\"\n", + " f\"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\\n\"\n", + " f\"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in\"\n", + " f\" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\\n\"\n", + " f\"Required DPI Modern Age format: {required_dpi:.3f} dpi ({format[0]:.3f} x {format[1]:.3f} in)\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((1275, 1888),\n", + " (4.25, 6.293333333333333),\n", + " (10.795, 15.985066666666667),\n", + " 188.32397606994937)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
                Width x Height: 804 x 1241 pixels\n",
+       "                  PIL Info DPI: None\n",
+       "            Print Size 300 DPI: 2.680 x 4.137 in / 6.807 x 10.507 cm\n",
+       "Required DPI Modern Age format: 121.216 dpi (6.625 x 10.250 in)\n",
+       "
\n" + ], + "text/plain": [ + " Width x Height: \u001b[1;36m804\u001b[0m x \u001b[1;36m1241\u001b[0m pixels\n", + " PIL Info DPI: \u001b[3;35mNone\u001b[0m\n", + " Print Size \u001b[1;36m300\u001b[0m DPI: \u001b[1;36m2.680\u001b[0m x \u001b[1;36m4.137\u001b[0m in \u001b[35m/\u001b[0m \u001b[1;36m6.807\u001b[0m x \u001b[1;36m10.507\u001b[0m cm\n", + "Required DPI Modern Age format: \u001b[1;36m121.216\u001b[0m dpi \u001b[1m(\u001b[0m\u001b[1;36m6.625\u001b[0m x \u001b[1;36m10.250\u001b[0m in\u001b[1m)\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(IMAGE_CONTEXT.image_info)\n", + "img_visor.image_info()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Balloons and Captions Ground Truth\n", + "> The ground truth for the balloons and captions is read from a `.txt` file.\n", + "\n", + "The file is named `.gt.txt` and contains one entry per line, corresponding to each balloon or caption in the order found in PanelClenaer page data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.',\n", + " 'The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home--',\n", + " 'And one in need of some help, it would appear.',\n", + " 'Bambu-- we have a guest.',\n", + " '--and tonight, he comes most urgently, slamming open the oaken front doors!',\n", + " 'Tell me, master-- how may Bambu serve?',\n", + " 'Some blankets to keep her warm, Bambu-- and perhaps some dry clothes',\n", + " \"The echo of the old man's footsteps fades down the hall as...\",\n", + " 'How curious the whims of fate. Had I not chanced to stroll along the river tonight--',\n", + " 'As quickly as I can, master',\n", + " '--the girl would most surely be dead by now.',\n", + " 'Ghede has been generous. the Death God has given the girl a second chance at--',\n", + " \"Easy, girl-- there's nothing to scream about anymore.\",\n", + " \"You're among friends now, you're safe!\",\n", + " 'Continued after next page']" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IMAGE_CONTEXT.gts\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment\n", + "> Use `ExperimentOCR` to perform OCR on the page balloons given a `CropMethod` and a model (i.e., `'Tesseract'` or `'Idefics'`)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "def trimmed_mean(data, trim_percent):\n", + " sorted_data = np.sort(data)\n", + " n = len(data)\n", + " trim_count = int(trim_percent * n)\n", + " trimmed_data = sorted_data[trim_count:-trim_count]\n", + " return np.mean(trimmed_data)\n", + "\n", + "def mad_based_outlier(points, threshold=3.5):\n", + " median = np.median(points)\n", + " diff = np.abs(points - median)\n", + " mad = np.median(diff)\n", + " modified_z_score = 0.6745 * diff / mad\n", + " return points[modified_z_score < threshold]\n", + "\n", + "def iqr_outlier_removal(data):\n", + " q1 = np.percentile(data, 25)\n", + " q3 = np.percentile(data, 75)\n", + " iqr = q3 - q1\n", + " lower_bound = q1 - 1.5 * iqr\n", + " upper_bound = q3 + 1.5 * iqr\n", + " return data[(data >= lower_bound) & (data <= upper_bound)]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "@dataclasses.dataclass\n", + "class Experiment:\n", + " ctx: ExperimentContext\n", + "\n", + "\n", + "@dataclasses.dataclass\n", + "class ExperimentOCR(Experiment):\n", + " ctx: ImageContext\n", + " ocr_model: str\n", + "\n", + " @property\n", + " def img_ctx(self): return self.ctx\n", + " @property\n", + " def ctxs(self):\n", + " img_ctx = self.img_ctx\n", + " return cast(OCRExperimentContext, img_ctx.exp), img_ctx\n", + "\n", + " @classmethod\n", + " def file_path_of(cls, page_data: st.PageData, ocr_model: str):\n", + " return f\"{Path(page_data.original_path).stem}_{ocr_model}.json\"\n", + " \n", + " def file_path(self):\n", + " img_ctx = self.img_ctx\n", + " return type(self).file_path_of(img_ctx.page_data, self.ocr_model)\n", + " \n", + " def to_dict(self):\n", + " \"JSON serializable dict of the experiment\"\n", + " img_ctx = self.img_ctx\n", + " img_idx = img_ctx.image_idx\n", + " results = results_to_dict(self.results())\n", + " return {\n", + " 'image_name': img_ctx.image_name,\n", + " 'ocr_model': self.ocr_model, \n", + " 'results': results,\n", + " }\n", + "\n", + " def to_json(self, out_dir: Path | None = None):\n", + " img_ctx = self.img_ctx\n", + " fp = (out_dir or img_ctx.cache_dir_image) / self.file_path()\n", + " data = self.to_dict()\n", + " with open(fp, 'w') as f:\n", + " json.dump(data, f, indent=2)\n", + " return fp, data\n", + "\n", + " @classmethod\n", + " def from_json(cls, experiment: OCRExperimentContext, json_path: Path) -> Self:\n", + " try:\n", + " with open(json_path, 'r') as f:\n", + " data = json.load(f)\n", + " except Exception as e:\n", + " logger.error(f\"Error loading {json_path}: {e}\")\n", + " raise e\n", + " ocr_model = data['ocr_model']\n", + " img_ctx = ImageContext(experiment, data['image_name'])\n", + " results: ResultSetDefault = dict_to_results(\n", + " img_ctx.image_idx, \n", + " data['results'], \n", + " result_factory=experiment._result_from)\n", + " experiment.update_results(ocr_model, img_ctx.image_idx, results)\n", + " return cls(img_ctx, ocr_model)\n", + "\n", + " @classmethod\n", + " def from_image(cls, \n", + " ctx: OCRExperimentContext, \n", + " ocr_model: str, \n", + " image_idx: ImgSpecT):\n", + " idx = cast(ImgIdT, ctx.normalize_idx(image_idx))\n", + " img_ctx = ImageContext(ctx, idx)\n", + " if img_ctx is None:\n", + " raise ValueError(f\"Image {image_idx} not found in experiment context\")\n", + " fp = img_ctx.cache_dir / cls.file_path_of(img_ctx.page_data, ocr_model)\n", + " if fp.exists(): \n", + " return cast(Self, cls.from_json(cast(OCRExperimentContext, img_ctx.exp), fp))\n", + " return cls(img_ctx, ocr_model)\n", + "\n", + " @classmethod\n", + " def from_method(cls, \n", + " ctx: OCRExperimentContext, \n", + " ocr_model: str, \n", + " image_idx: ImgIdT | str | Path, \n", + " method: CropMethod):\n", + " experiment = cls.from_image(ctx, ocr_model, image_idx)\n", + " if experiment is None:\n", + " return None\n", + " return experiment.method_experiment(method)\n", + "\n", + " @classmethod\n", + " def saved_experiment(cls, \n", + " ctx: OCRExperimentContext, ocr_model: str, image_idx: ImgIdT | str | Path):\n", + " idx = ctx.normalize_idx(image_idx)\n", + " if idx is None: \n", + " logger.warning(f\"Image {image_idx} not found in experiment context\")\n", + " return None\n", + " return cls.from_image(ctx, ocr_model, idx)\n", + "\n", + " @classmethod\n", + " def saved_experiments(cls, ctx: OCRExperimentContext, ocr_model: str) -> list[Self]:\n", + " return [exp for i in range(len(ctx.image_paths))\n", + " if (exp := cls.from_image(ctx, ocr_model, i)) is not None]\n", + " \n", + "\n", + " def result(self, box_idx: BoxIdT, method: CropMethod, ocr: bool=True, rebuild: bool=False):\n", + " ctx, img_ctx = self.ctxs\n", + " return ctx.result(self.ocr_model, img_ctx.image_idx, box_idx, method, ocr, rebuild)\n", + "\n", + " def results(self):\n", + " ctx, img_ctx = self.ctxs\n", + " return cast(ResultSet, ctx.results(self.ocr_model, img_ctx.image_idx))\n", + "\n", + " def has_run(self):\n", + " \"at least one method has run\"\n", + " img_ctx = self.img_ctx\n", + " return len(self.results()) == len(img_ctx.page_data.boxes)\n", + " \n", + " def best_results(self):\n", + " img_ctx = self.img_ctx\n", + " results = self.results()\n", + " if len(results) < len(img_ctx.page_data.boxes): # at least one method has run\n", + " return None\n", + " best = []\n", + " for box_idx in results:\n", + " methods = results[box_idx]\n", + " best_method = max(methods, key=lambda m: methods[m].acc) # type: ignore\n", + " best.append((best_method, methods[best_method]))\n", + " return best\n", + "\n", + " def save_results_as_ground_truth(self, overwrite=False):\n", + " img_ctx = self.img_ctx\n", + " gts_path = ground_truth_path(img_ctx.page_data)\n", + " if overwrite or not gts_path.exists():\n", + " best_results = self.best_results()\n", + " if best_results:\n", + " tt = [r.ocr for m,r in best_results]\n", + " gts_path.write_text('\\n'.join(tt), encoding=\"utf-8\")\n", + " img_ctx.setup_ground_truth()\n", + " logger.info(f\"Ground truth data saved successfully to {gts_path}\")\n", + " return True\n", + " else:\n", + " logger.info(\"No best results available to save.\")\n", + " return False\n", + " else:\n", + " return False\n", + "\n", + " @property\n", + " def experiments(self):\n", + " if not hasattr(self, '_experiments'):\n", + " self._experiments = {}\n", + " return self._experiments\n", + " def method_experiment(self, method: CropMethod) -> ExperimentOCRMethod:\n", + " if method not in self.experiments:\n", + " self.experiments[method] = ExperimentOCRMethod(self, method)\n", + " return self.experiments[method]\n", + " \n", + "\n", + " def to_dataframe(self):\n", + " \"Dataframe with crop methods as columns and box ids as rows\"\n", + " methods = list(CropMethod.__members__.values())\n", + " experiments = [self.method_experiment(m) for m in methods]\n", + " accuracies = [[result.acc for result in exp.results()] for exp in experiments]\n", + " # transpose accuracies\n", + " accuracies = list(zip(*accuracies))\n", + " return pd.DataFrame(accuracies, columns=CropMethod.__display_names__())\n", + "\n", + " def plot_accuracies(self, \n", + " methods: list[CropMethod] | None = None, \n", + " ):\n", + " \"Plots a horizontal bar chart of the accuracies for a list of method experiments.\"\n", + " methods = methods or list(CropMethod.__members__.values())\n", + " experiments = [self.method_experiment(m) for m in methods]\n", + " if not experiments: return\n", + "\n", + " ctx, img_ctx = self.ctxs\n", + " page_data = img_ctx.page_data\n", + " model = self.ocr_model\n", + " accuracies = [[result.acc for result in exp.results()] for exp in experiments]\n", + " accuracies = [np.mean(a) for a in accuracies]\n", + " # accuracies = [np.mean([result.acc for result in exp.results()]) for exp in experiments]\n", + "\n", + " _, ax = plt.subplots(figsize=(10, 5))\n", + " \n", + " # Normalize the accuracies for color mapping\n", + " norm = plt.Normalize(min(accuracies), max(accuracies))\n", + " # Color map from red to green\n", + " cmap = plt.get_cmap('RdYlGn')\n", + " colors = cmap(norm(accuracies))\n", + "\n", + " ax.barh([m.value for m in methods], accuracies, color=colors)\n", + "\n", + " ax.set_xscale('log') # Set the x-axis to a logarithmic scale\n", + " ax.set_xlabel('Average Accuracy (log scale)', fontsize=12, fontweight='bold')\n", + "\n", + " ax.set_ylabel('Method', fontsize=12, fontweight='bold')\n", + " ax.set_yticks(range(len(methods)))\n", + " ax.set_yticklabels([f'{method.value} ({acc:.2f})' \n", + " for method, acc in zip(methods, accuracies)], fontsize=12)\n", + " max_acc_index = np.argmax(accuracies)\n", + " ax.get_yticklabels()[max_acc_index].set(color='blue', fontweight='bold')\n", + "\n", + " title_text = (f\"{page_data.original_path} - OCR model: {model}\")\n", + " ax.set_title(title_text, fontsize=12, fontweight='bold')\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "\n", + " def summary_box(self, box_idx: int):\n", + " results: list[tuple[CropMethod, ResultOCR]] = []\n", + " pb = tqdm(CropMethod.__members__.values(), leave=False, desc=f\"Box #{box_idx+1}\")\n", + " for m in pb:\n", + " r = cast(ResultOCR, self.result(box_idx, m))\n", + " results.append((m, r))\n", + " methods, images, ocrs, accs = zip(\n", + " *map(\n", + " lambda t: (t[0].value, t[1].cache_image(), t[1].diff_tagged(), acc_as_html(t[1].acc)), \n", + " results))\n", + " display_columns([methods, images, accs, ocrs], \n", + " headers=[\"Method\", f\"Box #{box_idx+1}\", \"Accuracy\", \"OCR\"])\n", + "\n", + "\n", + " def summary_method(self, method: CropMethod):\n", + " results = self.method_experiment(method).results()\n", + " methods, images, ocrs, accs = zip(\n", + " *map(\n", + " lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), \n", + " results))\n", + " display_columns([methods, images, accs, ocrs], \n", + " headers=[\"Box #\", \"Box\", \"Accuracy\", f\"{method.value} OCR\"])\n", + "\n", + "\n", + " def display(self):\n", + " out = []\n", + " for method in CropMethod:\n", + " out.append(f\"---------- {method.value} ----------\")\n", + " results = self.method_experiment(method).results()\n", + " out.extend(results)\n", + " out.append('\\n')\n", + " cprint(*out, soft_wrap=True)\n", + "\n", + "\n", + " def reset(self, box_idx: int | None = None, method: CropMethod | None = None):\n", + " ctx, img_ctx = self.ctxs\n", + " ctx.reset_results(None, img_ctx.image_idx, box_idx, method)\n", + "\n", + " def perform_methods(self, \n", + " methods: CropMethod | list[CropMethod] | None = None, \n", + " box_idxs: BoxIdT | list[BoxIdT] | None = None,\n", + " rebuild: bool = False,\n", + " plot_acc: bool = False\n", + " ):\n", + " if methods is None:\n", + " methods = [*CropMethod.__members__.values()]\n", + " elif isinstance(methods, CropMethod):\n", + " methods = [methods]\n", + " if rebuild:\n", + " _methods = tqdm(methods, desc=\"Methods\")\n", + " else:\n", + " _methods = methods\n", + " for method in _methods:\n", + " method_exp = self.method_experiment(method)\n", + " if method_exp: \n", + " if rebuild:\n", + " method_exp(box_idxs, rebuild=rebuild)\n", + " if plot_acc:\n", + " self.plot_accuracies()\n", + "\n", + " def __call__(self, \n", + " box_idxs: BoxIdT | list[BoxIdT] | None = None,\n", + " methods: CropMethod | list[CropMethod] | None = None, \n", + " save: bool = True,\n", + " display=False, \n", + " rebuild: bool=False, \n", + " save_as_ground_truth=False):\n", + " self.perform_methods(methods, box_idxs, rebuild=rebuild)\n", + " if save_as_ground_truth:\n", + " self.save_results_as_ground_truth(overwrite=True)\n", + " if save:\n", + " self.to_json()\n", + " if display:\n", + " self.display()\n", + " \n", + "\n", + "@dataclasses.dataclass\n", + "class ExperimentOCRMethod:\n", + " ctx: ExperimentOCR\n", + " method: CropMethod\n", + "\n", + " @property\n", + " def exp_ctx(self): return self.ctx\n", + " @property\n", + " def img_ctx(self): return self.ctx.ctx\n", + " @property\n", + " def ctxs(self):\n", + " img_ctx = self.img_ctx\n", + " return cast(OCRExperimentContext, img_ctx.exp), img_ctx, self.ctx\n", + " \n", + " def result(self, box_idx: BoxIdT, ocr: bool=True, rebuild: bool=False) -> ResultOCR | None:\n", + " ctx, img_ctx, exp_ctx = self.ctxs\n", + " return ctx.result(exp_ctx.ocr_model, img_ctx.image_idx, box_idx, self.method, ocr, rebuild)\n", + "\n", + " def results(self, \n", + " box_idxs: BoxIdT | list[BoxIdT] | None = None, \n", + " ocr: bool=True, rebuild: bool=False) -> list[ResultOCR]:\n", + " ctx, img_ctx, exp_ctx = self.ctxs\n", + " if box_idxs is None:\n", + " box_idxs = list(range(len(img_ctx.boxes)))\n", + " elif isinstance(box_idxs, int):\n", + " box_idxs = [box_idxs]\n", + " model = exp_ctx.ocr_model\n", + " results = ctx.method_results(model, img_ctx.image_idx, self.method)\n", + " results = {i:results[i] if i in results else None for i in box_idxs}\n", + " pb = rebuild or not results or any(r is None for r in results.values())\n", + " if pb and len(results) > 2:\n", + " progress_bar = tqdm(list(results.keys()), desc=f\"{self.method.value} - {model}\")\n", + " else:\n", + " progress_bar = list(results.keys())\n", + " results = []\n", + " for i in progress_bar:\n", + " results.append(self.result(i, ocr, rebuild=rebuild))\n", + " return results\n", + "\n", + "\n", + " def get_results_html(self, \n", + " box_idxs: BoxIdT | list[BoxIdT] | None = None,\n", + " max_image_width: int | None = None): \n", + " _, img_ctx, exp_ctx = self.ctxs\n", + " results: list[ResultOCR] = self.results(box_idxs)\n", + " accs = np.array([r.acc for r in results])\n", + " mean_accuracy = np.mean(accs)\n", + " mean_trimmed = trimmed_mean(accs, 0.1)\n", + " # filtered_data = mad_based_outlier(accs)\n", + " # mean_mad = np.mean(filtered_data)\n", + " # filtered_data = iqr_outlier_removal(accs)\n", + " # mean_iqr = np.mean(filtered_data)\n", + " \n", + " descriptions, images, ocrs, accs = zip(*map(\n", + " lambda r: (\n", + " r.block_idx+1, \n", + " r.cache_image(), \n", + " r.diff_tagged(), \n", + " acc_as_html(r.acc)\n", + " ), results))\n", + " non_breakin_space = u'\\u00A0'\n", + " tmpl = \"{}\"\n", + " padded_s = lambda s,n: tmpl.format(s.rjust(n))\n", + " acc_fmt = f\"{mean_accuracy:.2f}/{mean_trimmed:.2f}\"\n", + " w, h = img_ctx.base_image.size\n", + " dim, _dpi = size(w, h), dpi(w, h)\n", + " dim_fmt = f\"{w}x{h} px: {dim[0]:.2f} x {dim[1]:.2f} in @ {_dpi:.2f} dpi\"\n", + " return '\\n
\\n'.join([\n", + " (\"
\"\n", + " f\"{padded_s('Page', 24)}: {img_ctx.page_data.original_path}
\"\n", + " f\"{padded_s('Size', 24)}: {dim_fmt}
\"\n", + " f\"{padded_s('Model', 24)}: {exp_ctx.ocr_model}
\"\n", + " f\"{padded_s('Crop Method', 24)}: {self.method.value}
\"\n", + " f\"{padded_s('Accuracy Mean/Trimmed', 24)}: {acc_fmt}\"\n", + " \"
\"), \n", + " get_columns_html(\n", + " [descriptions, images, accs, ocrs], \n", + " max_image_width, \n", + " headers=[\"Box #\", \"Image\", \"Accuracy\", \"OCR\"]),\n", + " ])\n", + "\n", + " def display(self, \n", + " box_idxs: BoxIdT | list[BoxIdT] | None = None, max_image_width: int | None = None):\n", + " display(HTML(self.get_results_html(box_idxs, max_image_width)))\n", + "\n", + "\n", + " def summary(self):\n", + " results = self.results()\n", + " methods, images, ocrs, accs = zip(\n", + " *map(\n", + " lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), \n", + " results))\n", + " display_columns([methods, images, accs, ocrs], \n", + " headers=[\"Box #\", \"Box\", \"Accuracy\", f\"{self.method.value} OCR\"])\n", + "\n", + "\n", + " def reset(self):\n", + " _, _, exp_ctx = self.ctxs\n", + " exp_ctx.reset(method=self.method)\n", + " \n", + " def __call__(self, box_idxs: BoxIdT | list[BoxIdT] | None = None, display=False, rebuild=False):\n", + " if isinstance(box_idxs, int):\n", + " result = self.result(cast(BoxIdT, box_idxs), rebuild=rebuild)\n", + " if result is not None and display:\n", + " result.display()\n", + " else:\n", + " results = self.results(box_idxs, rebuild=rebuild)\n", + " if results and display:\n", + " self.display(box_idxs)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Box id\n", + "> change `BOX_IDX` to use any box to test crop methods" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "BOX_IDX = 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Crop methods testing" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT.reset()\n", + "test_eq(CONTEXT.results(), {})\n", + "\n", + "image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Tesseract')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Single box results\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### comics_text_detector initial boxes + padding" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Initial box" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-10 20:25:26.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpcleaner.ocr.ocr_mangaocr\u001b[0m:\u001b[36m__new__\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mCreating the MangaOcr instance\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
0.90
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.⎕⎕

Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method = CropMethod.INITIAL_BOX\n", + "\n", + "result = image_experiment.result(BOX_IDX, method, ocr=False)\n", + "assert result is not None\n", + "\n", + "image = result.image\n", + "assert image is not None\n", + "text = CONTEXT.mocr('Tesseract', page_lang)(image)\n", + "result.ocr = postprocess_ocr(text)\n", + "result\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### PanelCleaner default pad" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as
0.85
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orl⎕⎕⎕eans, kept tidy by a white-haired old man known only as Bambu.

Eneowered by great gnarled cypress jrfes, the ancient manor !⎕⎕⎕⎕⎕ alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired ao⎕⎕ han known only as⎕⎕⎕⎕⎕⎕⎕
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method = CropMethod.DEFAULT\n", + "\n", + "result = image_experiment.result(BOX_IDX, method, ocr=False)\n", + "assert result is not None\n", + "\n", + "CONTEXT.ocr_box(result, 'Tesseract', page_lang)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### PanelCleaner default pad, grey pad" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as
0.95
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as⎕⎕⎕⎕⎕⎕⎕
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method = CropMethod.DEFAULT_GREY_PAD\n", + "result = image_experiment.result(BOX_IDX, method)\n", + "result\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded, 4px" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as
0.88
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orl⎕⎕⎕eans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired aolo man known only as⎕⎕⎕⎕⎕⎕⎕
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PADDED_4)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded, 8px" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as
0.88
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired aolo man known omy as⎕⎕⎕⎕⎕⎕⎕
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PADDED_8)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extracted text" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Extracted text, initial box\n", + "\n", + "Unfortunately, the `comic-text_detector` does not remove letter holes from the text mask, despite using OpenCV. This oversight likely impacts the accuracy of the OCR results." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.92
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu⎕⎕.

Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a white-haire old man known only as bambi] .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method = CropMethod.EXTRACTED_INIT_BOX\n", + "# results[method] = IMAGE_CONTEXT.result(BOX_IDX, method)\n", + "# image = results[method].image\n", + "# assert image is not None\n", + "# results[method].ocr = postprocess_ocr(IMAGE_CONTEXT.mocr(image))\n", + "# display_extracted_result(None, None, results[method], IMAGE_CONTEXT.gts[BOX_IDX])\n", + "image_experiment.result(BOX_IDX, method)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded 4, extracted" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.91
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonsred by great shale cypress trees, the anci⎕⎕⎕ manor stands alone on the [tskirts of new orleans, kept tidy by a white-haired old man known only as b8ambl .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PADDED_4_EXTRACTED)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded 8, extracted" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.94
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu⎕⎕.

Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as 8ambli .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PADDED_8_EXTRACTED)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded 8, dilation 1, extracted" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.61
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

O⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕utskirts of new orleans, kept tipy by a white-haired old man known only as sams .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PADDED_8_DILATION_1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded 8, dilation 0.5, extracted" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.94
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PAD_8_FRACT_0_5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### padded 8, dilation 0.2, extracted" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.94
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PAD_8_FRACT_0_2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary\n", + "> Use `ImageContext.summary_box` to display the results of the crop methods for OCR of a given box index.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2b5b75333766431bb85d2ae72ee47b50", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Box #1: 0%| | 0/11 [00:00MethodBox #1AccuracyOCRInitial box
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
Default
0.85
Eneowered by great gnarled cypress jrfes, the ancient manor !⎕⎕⎕⎕⎕ alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired ao⎕⎕ han known only as⎕⎕⎕⎕⎕⎕⎕
Default, grey pad
0.95
Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as⎕⎕⎕⎕⎕⎕⎕
Padded 4px
0.88
Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired aolo man known only as⎕⎕⎕⎕⎕⎕⎕
Padded 8px
0.88
Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired aolo man known omy as⎕⎕⎕⎕⎕⎕⎕
Extracted, init box
0.92
Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a white-haire old man known only as bambi] .
Padded 4, extracted
0.91
Enbonsred by great shale cypress trees, the anci⎕⎕⎕ manor stands alone on the [tskirts of new orleans, kept tidy by a white-haired old man known only as b8ambl .
Padded 8, extracted
0.94
Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as 8ambli .
Padded 8, dilation 1
0.61
O⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕utskirts of new orleans, kept tipy by a white-haired old man known only as sams .
Pad 8, fract. 0.5
0.94
Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
Pad 8, fract. 0.2
0.94
Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# methods, images, ocrs, accs = zip(\n", + "# *map(lambda t: (t[0].value, t[1].cache_image(), t[1].diff_tagged(), acc_as_html(t[1].acc)), \n", + "# IMAGE_CONTEXT.results[BOX_IDX].items()))\n", + "# display_columns([methods, images, accs, ocrs], headers=[\"Method\", \"Box\", \"Accuracy\", \"OCR\"])\n", + "\n", + "image_experiment.summary_box(BOX_IDX)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show result for any box # and any method" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Tonight, he comes host slamming open
0.61
\n", + "
\n", + "
--and tonight, he comes most urgently, slamming open the oaken front doors!

T⎕⎕⎕⎕⎕⎕onight, he comes host⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(4, CropMethod.PADDED_8)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.67
\n", + "
\n", + "
Bambu-- we have a guest.

⎕⎕⎕⎕⎕⎕ we have a i=s7t.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(3, CropMethod.EXTRACTED_INIT_BOX)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ResultVisor" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ResultVisor(ContextVisor):\n", + " ctx: ExperimentOCR\n", + " control_names: list[str] = ['all_boxes', 'box_idx', 'all_methods', 'method']\n", + "\n", + " _css = \"\"\"\n", + " .box_grp {\n", + " background-color: aliceblue;\n", + " }\n", + " .method_grp {\n", + " background-color: #ededed;\n", + " }\n", + " \"\"\"\n", + " \n", + " def best_results(self): \n", + " ll = self.ctx.best_results()\n", + " if ll:\n", + " cprint([(m.value, f\"{r.acc:.3f}\", r.ocr) for m,r in ll])\n", + "\n", + " def pd_to_html(self):\n", + " df = self.ctx.to_dataframe()\n", + " # set float precision\n", + " df = df.round(3)\n", + " # display floats with 3 decimal digits\n", + " df = df.applymap(lambda x: f\"{x:.3f}\")\n", + " # highlight max value in each row\n", + " stl = df.style.highlight_max(axis=0)\n", + " display(HTML(stl.to_html()))\n", + "\n", + " def update_output(self, **kwargs):\n", + " all_boxes: bool = self.values['all_boxes']\n", + " box_idx: int = self.values['box_idx']\n", + " all_methods: bool = self.values['all_methods']\n", + " method: CropMethod = self.values['method']\n", + "\n", + " # cprint(f\"all_boxes: {all_boxes}, box_idx: {box_idx}, all_methods: {all_methods}, method: {method}\")\n", + "\n", + " if all_boxes and all_methods:\n", + " self.ctx.plot_accuracies()\n", + " elif all_boxes:\n", + " self.ctx.summary_method(method)\n", + " elif all_methods:\n", + " self.ctx.summary_box(box_idx)\n", + " else:\n", + " result = self.ctx.result(box_idx, method)\n", + " if result is not None:\n", + " result.display()\n", + "\n", + " def setup_controls(self):\n", + " _, img_ctx = self.ctx.ctxs\n", + " values = self.values\n", + " box_wdgt = W.BoundedIntText(\n", + " value=values['box_idx'], min=0, max=len(img_ctx.boxes)-1, step=1,\n", + " disabled=values['all_boxes'],\n", + " layout={'width': '50px'},\n", + " style={'description_width': 'initial'})\n", + " methods_wdgt = W.Dropdown(\n", + " options=CropMethod.__display_names__(), \n", + " value=values['method'],\n", + " layout={'width': '150px'},\n", + " style={'description_width': 'initial'})\n", + " all_boxes_wdgt = W.Checkbox(label='All', value=values['all_boxes'], \n", + " description=\"all\", \n", + " layout={'width': 'initial'},\n", + " style={'description_width': '0px'})\n", + " all_methods_wdgt = W.Checkbox(label='All', value=values['all_methods'], \n", + " description=\"all\", \n", + " layout={'width': 'initial'},\n", + " style={'description_width': '0px'})\n", + " return {'all_boxes': all_boxes_wdgt, 'box_idx': box_wdgt, \n", + " 'all_methods': all_methods_wdgt, 'method': methods_wdgt}\n", + " \n", + " def setup_ui(self):\n", + " ctls = self.controls\n", + " _, img_ctx = self.ctx.ctxs\n", + " box_label = W.Label(\n", + " value=f\"Box # (of {len(img_ctx.boxes)}):\", \n", + " layout={'width': 'initial', 'padding': '0px 0px 0px 10px'})\n", + " method_label = W.Label(value='Method:', layout={'width': 'initial', 'padding': '0px 0px 0px 10px'})\n", + "\n", + " box_grp = W.HBox([box_label, ctls['all_boxes'], ctls['box_idx']])\n", + " box_grp.add_class('box_grp')\n", + " method_grp = W.HBox([method_label, ctls['all_methods'], ctls['method']])\n", + " method_grp.add_class('method_grp')\n", + " \n", + " return W.HBox([box_grp, method_grp])\n", + "\n", + " def __init__(self, \n", + " ctx: OCRExperimentContext | ExperimentOCR,\n", + " img_idx: int | str | Path | None = None,\n", + " all_boxes: bool = False,\n", + " box_idx: int = 0,\n", + " all_methods: bool = False,\n", + " method: CropMethod=CropMethod.INITIAL_BOX,\n", + " out: W.Output | None = None,\n", + " ):\n", + " if isinstance(ctx, OCRExperimentContext):\n", + " assert img_idx is not None, \"img_idx must be provided if ctx is an ExperimentContext\"\n", + " exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx)\n", + " if not exp:\n", + " raise ValueError(f\"Image {img_idx} not found in experiment context\")\n", + " ctx = exp\n", + " else:\n", + " if not isinstance(ctx, ExperimentOCR):\n", + " raise ValueError(\"ctx must be an ExperimentOCR or OCRExperimentContext\")\n", + " \n", + " super().__init__(ctx, {'all_boxes': all_boxes, 'box_idx': box_idx, \n", + " 'all_methods': all_methods, 'method': method}, out=out or self.out)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "400781b58e3e43cb868c0c278fd3ecd2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output(layout=Layout(height='0px'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2f2917080eb943acb8b665e1fa607c13", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Label(value='Box # (of 15):', layout=Layout(padding='0px 0px 0px 10px', width='i…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "81027eadfa3d40b6a81efcd991f2379a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('result_visor')\n", + "\n", + "result_visor = ResultVisor(image_experiment)\n", + "result_visor\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ExperimentVisor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ExperimentVisor(ContextVisor):\n", + " ctx: ExperimentOCR\n", + "\n", + " def update_output(self, \n", + " image_idx: int | None = None,\n", + " **kwargs):\n", + " exp_ctx, img_ctx = self.ctx.ctxs\n", + " if image_idx is not None and image_idx != img_ctx.image_idx:\n", + " ctx = ImageContext(exp_ctx, image_idx)\n", + " assert ctx is not None\n", + " self.ctx.ctx = ctx\n", + " result_visor = self.comp('result_visor')\n", + " if result_visor is not None:\n", + " result_visor.update_output(**kwargs)\n", + "\n", + " def __init__(self, \n", + " ctx: OCRExperimentContext | ExperimentOCR,\n", + " img_idx: int | str | Path | None = None,\n", + " all_boxes: bool = False,\n", + " box_idx: int = 0,\n", + " all_methods: bool = False,\n", + " method: CropMethod=CropMethod.INITIAL_BOX,\n", + " out: W.Output | None = None,\n", + " ):\n", + " if isinstance(ctx, OCRExperimentContext):\n", + " assert img_idx is not None, \"img_idx must be provided if ctx is an ExperimentContext\"\n", + " exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx)\n", + " if not exp:\n", + " raise ValueError(f\"Image {img_idx} not found in experiment context\")\n", + " ctx = exp\n", + " else:\n", + " if not issubclass(type(ctx), ExperimentOCR):\n", + " raise ValueError(\"ctx must be an ExperimentOCR or OCRExperimentContext\")\n", + " \n", + " exp_ctx, img_ctx = ctx.ctxs\n", + " out = out or self.out\n", + " image_selector = ImageSelector(exp_ctx, image_idx=img_ctx.image_idx, out=out)\n", + " result_visor = ResultVisor(ctx, out=out,\n", + " all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method)\n", + "\n", + " super().__init__(ctx, {}, out=out, \n", + " ctxs={'image_selector': image_selector, 'result_visor': result_visor},\n", + " hdlrs={'display_option': result_visor}\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1e40a9f23a674b48abd50720e6523c2c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5af1d96d5d9942fd8e895787ff50bbe5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('exp_visor')\n", + "\n", + "exp_visor = ExperimentVisor(image_experiment)\n", + "exp_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "exp_visor.update(box_idx=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "exp_visor.update(image_idx=0)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Method experiment\n", + "> perform method on one or more boxes" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT.reset()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Tesseract')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualize summary of crop methods on a given box\n" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "151bc83d198e447d8932fe48aa7ef2e6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Box #2: 0%| | 0/11 [00:00MethodBox #2AccuracyOCRInitial box
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
Default
0.96
The house and the old man are alike in many ways; tall, proud, patient, contentel always to wait until their. master cones home ~~
Default, grey pad
0.96
The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their.⎕* master cones home⎕~-
Padded 4px
0.93
The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }
Padded 8px
0.99
The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home⎕⎕
Extracted, init box
0.93
Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~
Padded 4, extracted
0.98
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home--
Padded 8, extracted
0.98
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home--
Padded 8, dilation 1
0.88
The house and the old man are alike in many ways, tall, proud, ⎕⎕⎕⎕⎕nt, contented live⎕⎕⎕⎕ walt gtie their, master comes home-=
Pad 8, fract. 0.5
0.97
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~
Pad 8, fract. 0.2
0.97
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.summary_box(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Results for any crop method" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Box #BoxAccuracyPadded 4px OCR
1
0.88
Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired aolo man known only as⎕⎕⎕⎕⎕⎕⎕
2
0.93
The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }
3
0.69
F and one in ee⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.77
" bambli- we have a gliest.
5
0.55
P⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ comes⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open the caken⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
6
0.57
Tel oe ⎕⎕⎕⎕er-- 5 ow a =⎕⎕⎕⎕ 7⎕⎕⎕⎕⎕
7
0.38
W⎕⎕e⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps c oe /⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
8
0.75
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall sra⎕⎕⎕⎕
9
0.92
How curious the a⎕whims of fate. - had i not chanced to stroll along the river yl⎕tonight~-
10
0.79
A⎕⎕⎕ulckly as t can, masrer.
11
0.92
<the girl wolld⎕- most surely be dead by now.
12
0.88
Ghede has been generous. the oeath gop has given -⎕the girl. a second chance ye,⎕alem
13
0.67
Soe⎕⎕ ⎕⎕⎕⎕⎕⎕⎕⎕⎕ereke othing to scream ay⎕⎕⎕ anymore.
14
0.94
"you're among friends now. you're safe!
15
1.00
Continued after next page
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.summary_method(CropMethod.PADDED_4)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can use method experiment directly" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
0.90
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.⎕⎕

Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method_experiment = cast(ExperimentOCRMethod, \n", + " ExperimentOCR.from_method(CONTEXT, 'Tesseract', IMAGE_CONTEXT.image_idx, CropMethod.INITIAL_BOX))\n", + "method_experiment(BOX_IDX, display=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "for all boxes" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Initial box
Accuracy Mean/Trimmed: 0.79/0.80
\n", + "
\n", + "
Box #ImageAccuracyOCR
1
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
2
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
3
0.70
“and one in ee⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.62
Re bambli-~ we have a⎕⎕⎕⎕⎕⎕⎕
5
0.70
T⎕⎕⎕⎕⎕⎕onight, he comes noost⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open the caken⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
6
0.82
Tell me naster. how may bambli serve 7
7
0.56
£7⎕⎕ »⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps some dry clothes...⎕7⎕/
8
0.81
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man'⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall as...⎕7
9
0.85
How curious the 4⎕fate.⎕whims of h⎕⎕⎕⎕⎕⎕ad t not chanced to stroll along the river yl⎕tonight ==
10
0.80
Fas oulckly as t ca, master.
11
0.91
<the girl would -⎕most slirely be dead by now.
12
0.47
A⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕th⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ the girl. a second chance ge⎕a ee yg adil
13
0.84
Ah⎕⎕⎕ girl--there's othing to scream nt⎕⎕t anymore.
14
0.93
You're among friends now. you're sale
15
1.00
Continued after next page
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method_experiment(display=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "or selected boxes" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT.reset()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Initial box
Accuracy Mean/Trimmed: 0.80/nan
\n", + "
\n", + "
Box #ImageAccuracyOCR
1
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
2
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
7
0.56
£7⎕⎕ »⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps some dry clothes...⎕7⎕/
10
0.80
Fas oulckly as t ca, master.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method_experiment = cast(ExperimentOCRMethod, \n", + " ExperimentOCR.from_method(CONTEXT, 'Tesseract', IMAGE_CONTEXT.image_idx, CropMethod.INITIAL_BOX))\n", + "method_experiment([0, 1, 6, 9], display=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Full page results\n", + "> all methods on all boxes" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT.reset()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Tesseract')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Initial box
Accuracy Mean/Trimmed: 0.79/0.80
\n", + "
\n", + "
Box #ImageAccuracyOCR
1
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
2
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
3
0.70
“and one in ee⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.62
Re bambli-~ we have a⎕⎕⎕⎕⎕⎕⎕
5
0.70
T⎕⎕⎕⎕⎕⎕onight, he comes noost⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open the caken⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
6
0.82
Tell me naster. how may bambli serve 7
7
0.56
£7⎕⎕ »⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps some dry clothes...⎕7⎕/
8
0.81
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man'⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall as...⎕7
9
0.85
How curious the 4⎕fate.⎕whims of h⎕⎕⎕⎕⎕⎕ad t not chanced to stroll along the river yl⎕tonight ==
10
0.80
Fas oulckly as t ca, master.
11
0.91
<the girl would -⎕most slirely be dead by now.
12
0.47
A⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕th⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ the girl. a second chance ge⎕a ee yg adil
13
0.84
Ah⎕⎕⎕ girl--there's othing to scream nt⎕⎕t anymore.
14
0.93
You're among friends now. you're sale
15
1.00
Continued after next page
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method = CropMethod.INITIAL_BOX\n", + "# method = CropMethod.DEFAULT\n", + "# method = CropMethod.PADDED_4\n", + "# method = CropMethod.PADDED_8\n", + "# method = CropMethod.EXTRACTED_INIT_BOX\n", + "# method = CropMethod.PAD_8_FRACT_0_5\n", + "# method = CropMethod.PAD_8_FRACT_0_2\n", + "\n", + "# image_experiment.method_experiment(CropMethod.INITIAL_BOX).results()\n", + "initial_box_exp = image_experiment.method_experiment(CropMethod.INITIAL_BOX)\n", + "initial_box_exp(display=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Other method" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as,\n", + " ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~,\n", + " ResultOCR#block 02: 0.74||And one in ee would appear.,\n", + " ResultOCR#block 03: 0.41||Rir guest.,\n", + " ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo,\n", + " ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _,\n", + " ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps,\n", + " ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00,\n", + " ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >,\n", + " ResultOCR#block 09: 0.50||Aulckly “master as t can,,\n", + " ResultOCR#block 10: 0.94||" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.perform_methods(plot_acc=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
---------- Initial box ----------\n",
+       "ResultOCR#block 00: 0.90||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3\n",
+       "ResultOCR#block 01: 0.93||The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~\n",
+       "ResultOCR#block 02: 0.70||“and one in ee would appear.\n",
+       "ResultOCR#block 03: 0.62||Re bambli-~ we have a\n",
+       "ResultOCR#block 04: 0.70||Tonight, he comes noost slamming open the caken\n",
+       "ResultOCR#block 05: 0.82||Tell me naster. how may bambli serve 7\n",
+       "ResultOCR#block 06: 0.56||£7 » and perhaps some dry clothes... 7 /\n",
+       "ResultOCR#block 07: 0.81||The the old man's fades down the hall as... 7\n",
+       "ResultOCR#block 08: 0.85||How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==\n",
+       "ResultOCR#block 09: 0.80||Fas oulckly as t ca, master.\n",
+       "ResultOCR#block 10: 0.91||<the girl would - most slirely be dead by now.\n",
+       "ResultOCR#block 11: 0.47||Ath the girl. a second chance ge a ee yg adil\n",
+       "ResultOCR#block 12: 0.84||Ah girl--there's othing to scream ntt anymore .\n",
+       "ResultOCR#block 13: 0.93||You're among friends now. you're sale\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Default ----------\n",
+       "ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as\n",
+       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~\n",
+       "ResultOCR#block 02: 0.74||And one in ee would appear.\n",
+       "ResultOCR#block 03: 0.41||Rir guest.\n",
+       "ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo\n",
+       "ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _\n",
+       "ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps\n",
+       "ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00\n",
+       "ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >\n",
+       "ResultOCR#block 09: 0.50||Aulckly “master as t can,\n",
+       "ResultOCR#block 10: 0.94||<the girl would - most surely be dead by now.\n",
+       "ResultOCR#block 11: 0.50||Ath - the girl. a second chance ee oo tr tt\n",
+       "ResultOCR#block 12: 0.84||Oe girl--there's othing to scream nt anymore. 4\n",
+       "ResultOCR#block 13: 0.96||You're among friends now. youre safe!\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Default, grey pad ----------\n",
+       "ResultOCR#block 00: 0.95||Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as\n",
+       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-\n",
+       "ResultOCR#block 02: 0.94||“and one in > need of some help, it would appear .\n",
+       "ResultOCR#block 03: 0.88||\" bambl-- we have a guest.\n",
+       "ResultOCR#block 04: 0.72||~~and tonight, he comes urgently, slanming open\n",
+       "ResultOCR#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
+       "ResultOCR#block 06: 0.90||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.\n",
+       "ResultOCR#block 07: 0.06||As.\n",
+       "ResultOCR#block 08: 0.91||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n",
+       "ResultOCR#block 09: 0.55||Ickl as t can\n",
+       "ResultOCR#block 10: 0.00||\n",
+       "ResultOCR#block 11: 0.85||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n",
+       "ResultOCR#block 12: 0.95||Easy, girl--there's | nothing to scream about anyaore.\n",
+       "ResultOCR#block 13: 0.97||You're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 0.54||“continued a\n",
+       "\n",
+       " ---------- Padded 4px ----------\n",
+       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n",
+       "ResultOCR#block 01: 0.93||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }\n",
+       "ResultOCR#block 02: 0.69||F and one in ee would appear.\n",
+       "ResultOCR#block 03: 0.77||\" bambli-— we have a gliest.\n",
+       "ResultOCR#block 04: 0.55||P comes slamming open the caken\n",
+       "ResultOCR#block 05: 0.57||Tel oe er-- 5 ow a = 7\n",
+       "ResultOCR#block 06: 0.38||We and perhaps c oe /\n",
+       "ResultOCR#block 07: 0.75||The the old mans fades down the hall sra\n",
+       "ResultOCR#block 08: 0.92||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n",
+       "ResultOCR#block 09: 0.79||Aulckly as t can, ‘masrer.\n",
+       "ResultOCR#block 10: 0.92||<the girl wolld - most surely be dead by now.\n",
+       "ResultOCR#block 11: 0.88||Ghede has been generous. the oeath gop has given - the girl. a second chance ye, alem\n",
+       "ResultOCR#block 12: 0.67||Soe er eke othing to scream ay anymore.\n",
+       "ResultOCR#block 13: 0.94||\"you're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Padded 8px ----------\n",
+       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as\n",
+       "ResultOCR#block 01: 0.99||The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home\n",
+       "ResultOCR#block 02: 0.67||7 and one in ee would appear,\n",
+       "ResultOCR#block 03: 0.76||Zf mbl == we have a guest.\n",
+       "ResultOCR#block 04: 0.61||Tonight, he comes host slamming open\n",
+       "ResultOCR#block 05: 0.75||Yy i tell me master - how may bambli serve 7 _,\n",
+       "ResultOCR#block 06: 0.89||Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s\n",
+       "ResultOCR#block 07: 0.72||The the old mans fades down the hal. srl see\n",
+       "ResultOCR#block 08: 0.88||* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~\n",
+       "ResultOCR#block 09: 0.65||Tiie as t can, \\ master ,\n",
+       "ResultOCR#block 10: 0.86||The girl wolld - most slirely be - dead by now.\n",
+       "ResultOCR#block 11: 0.62||Ghede has been generous. : the crn son ue;\n",
+       "ResultOCR#block 12: 0.62||Soe er eke othing to scream hbolt anhore hr\n",
+       "ResultOCR#block 13: 0.92||” you're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 0.94||“continued after next page\n",
+       "\n",
+       " ---------- Extracted, init box ----------\n",
+       "ResultOCRExtracted#block 00: 0.92||Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .\n",
+       "ResultOCRExtracted#block 01: 0.93||Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~\n",
+       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
+       "ResultOCRExtracted#block 03: 0.67||— we have a i=s7t.\n",
+       "ResultOCRExtracted#block 04: 0.74||~and tonight, he comes urgently, slamming open\n",
+       "ResultOCRExtracted#block 05: 0.85||Tell me master how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.93||Some blankets to keep her warm, banbli-- and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.77||The the old man's fades down the hall s.,00\n",
+       "ResultOCRExtracted#block 08: 0.77||Hin ef fare” had i not chanced to stroll along the river. tonigmt=~\n",
+       "ResultOCRExtracted#block 09: 0.85||Aulckly as t can, master,\n",
+       "ResultOCRExtracted#block 10: 1.00||--the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.51||Ath the girl a second chance ro\n",
+       "ResultOCRExtracted#block 12: 0.56||Cas ee, othing to scream pls aa .\n",
+       "ResultOCRExtracted#block 13: 0.95||You're among friends now. you're sale!\n",
+       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
+       "\n",
+       " ---------- Padded 4, extracted ----------\n",
+       "ResultOCRExtracted#block 00: 0.91||Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home --\n",
+       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
+       "ResultOCRExtracted#block 03: 0.83||Bambli we have a gliest.\n",
+       "ResultOCRExtracted#block 04: 0.74||=~and tonight, he comes urgently, slamming open\n",
+       "ResultOCRExtracted#block 05: 0.84||Tell me master. how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.53||Warm, bambli-- and perhaps. som\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sra\n",
+       "ResultOCRExtracted#block 08: 0.76||We sea had i not chanced to stroll along the river. tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.76||Alley as t can, master,\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.92||Chepe has been generous. the peath god has given the girl a second chance amr\n",
+       "ResultOCRExtracted#block 12: 0.73||Cas gr theres othing to scream pissy tore .\n",
+       "ResultOCRExtracted#block 13: 0.95||You're among eriends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
+       "\n",
+       " ---------- Padded 8, extracted ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .\n",
+       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home --\n",
+       "ResultOCRExtracted#block 02: 0.70||And one in fee wolld appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve't\n",
+       "ResultOCRExtracted#block 06: 0.91||Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sire\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.77||Allckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the peath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.74||Cas gr theres othing to scream pps hore .\n",
+       "ResultOCRExtracted#block 13: 0.42||You're safe § r\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Padded 8, dilation 1 ----------\n",
+       "ResultOCRExtracted#block 00: 0.61||Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .\n",
+       "ResultOCRExtracted#block 01: 0.88||The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=\n",
+       "ResultOCRExtracted#block 02: 0.97||And one in need of some help, it wolld appear .\n",
+       "ResultOCRExtracted#block 03: 0.78||Bambli ~~ we have a gliest.\n",
+       "ResultOCRExtracted#block 04: 0.79||=and tonight, he comes most slamming open the front\n",
+       "ResultOCRExtracted#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.85||Gone blankets to keep her. warm, bambli-~ and perhaps some dry\n",
+       "ResultOCRExtracted#block 07: 0.73||The old man's footsteps the hall as.re\n",
+       "ResultOCRExtracted#block 08: 0.94||How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.68||Aulckly as t can,\n",
+       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.75||Ee lh boe ene. the death gop has the girl. a second chance\n",
+       "ResultOCRExtracted#block 12: 0.92||Easy, girl--there's nothing to scream abolit anymo!\n",
+       "ResultOCRExtracted#block 13: 0.97||You're among friends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Pad 8, fract. 0.5 ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~\n",
+       "ResultOCRExtracted#block 02: 0.78||And one in eee pe would appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
+       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.73||The the old mans fades donn the hall sire\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.92||Ghede hag been generous. the peath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.76||Cas srl theres othing to scream seoit hore .\n",
+       "ResultOCRExtracted#block 13: 0.42||You're safe 4 ’\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Pad 8, fract. 0.2 ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~\n",
+       "ResultOCRExtracted#block 02: 0.77||And one in eet sve would appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
+       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sere\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the ceath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.67||Yi renee othing to scream seat anhore .\n",
+       "ResultOCRExtracted#block 13: 0.93||Youre among eriends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "---------- Initial box ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski\u001b[1m)\u001b[0m \u001b[1;36m2\u001b[0m of mew ce eans, kept tidy by a white-haired old man known only as bambs, \u001b[1;36m3\u001b[0m\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the old man are alike in many ways; tall, prolid, patient, contented always \u001b[1;36m0\u001b[0m wait until. their. master cones mome ~~\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.70\u001b[0m||“and one in ee would appear.\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.62\u001b[0m||Re bambli-~ we have a\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.70\u001b[0m||Tonight, he comes noost slamming open the caken\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.82\u001b[0m||Tell me naster. how may bambli serve \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.56\u001b[0m||£\u001b[1;36m7\u001b[0m » and perhaps some dry clothes\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.81\u001b[0m||The the old man's fades down the hall as\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.85\u001b[0m||How curious the \u001b[1;36m4\u001b[0m fate. whims of had t not chanced to stroll along the river yl tonight ==\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.80\u001b[0m||Fas oulckly as t ca, master.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.91\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.50\u001b[0m||Aulckly “master as t can,\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m|| need of some help, it would appear .\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.88\u001b[0m||\" bambl-- we have a guest.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.72\u001b[0m||~~and tonight, he comes urgently, slanming open\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.86\u001b[0m||Tell me, master: how may bambli serve \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes-\u001b[1;36m-7\u001b[0m \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.06\u001b[0m||As.\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.91\u001b[0m||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.55\u001b[0m||Ickl as t can\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.85\u001b[0m||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.95\u001b[0m||Easy, girl--there's | nothing to scream about anyaore.\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.97\u001b[0m||You're among friends now. you're safe!\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.54\u001b[0m||“continued a\n", + "\n", + " ---------- Padded 4px ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.88\u001b[0m||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways \u001b[1;36m0\u001b[0m wait until their. aster comes home ~~ | \u001b[1m}\u001b[0m\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.69\u001b[0m||F and one in ee would appear.\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.77\u001b[0m||\" bambli-— we have a gliest.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.55\u001b[0m||P comes slamming open the caken\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.57\u001b[0m||Tel oe er-- \u001b[1;36m5\u001b[0m ow a = \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.38\u001b[0m||We and perhaps c oe \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.75\u001b[0m||The the old mans fades down the hall sra\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.92\u001b[0m||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.79\u001b[0m||Aulckly as t can, ‘masrer.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.92\u001b[0m||" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.plot_accuracies()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "RenderJSON(image_experiment.to_dict(), 400, 2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "best results" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    (\n",
+       "        'Default, grey pad',\n",
+       "        '0.953',\n",
+       "        'Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of \n",
+       "new orleans, kept tipy by a white-haired old man known only as'\n",
+       "    ),\n",
+       "    (\n",
+       "        'Padded 8px',\n",
+       "        '0.988',\n",
+       "        'The house and the old man are alike in many ways; tall, proud, patient, contented always to \n",
+       "wait until their. master comes home'\n",
+       "    ),\n",
+       "    ('Padded 8, dilation 1', '0.968', 'And one in need of some help, it wolld appear .'),\n",
+       "    ('Default, grey pad', '0.880', '\" bambl-- we have a guest.'),\n",
+       "    ('Padded 8, dilation 1', '0.794', '=and tonight, he comes most slamming open the front'),\n",
+       "    ('Default, grey pad', '0.857', 'Tell me, master: how may bambli serve 7'),\n",
+       "    (\n",
+       "        'Pad 8, fract. 0.5',\n",
+       "        '0.935',\n",
+       "        'Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes'\n",
+       "    ),\n",
+       "    ('Initial box', '0.811', \"The the old man's fades down the hall as... 7\"),\n",
+       "    (\n",
+       "        'Padded 8, extracted',\n",
+       "        '0.959',\n",
+       "        'How curious the whims of fate . had i not chanced to stroll along the river tonight-~'\n",
+       "    ),\n",
+       "    ('Extracted, init box', '0.846', 'Aulckly as t can, master,'),\n",
+       "    ('Extracted, init box', '1.000', '--the girl would most surely be dead by now.'),\n",
+       "    (\n",
+       "        'Padded 8, extracted',\n",
+       "        '0.935',\n",
+       "        'Ghede has been generous. the peath god has given the girl a second chance po'\n",
+       "    ),\n",
+       "    ('Default, grey pad', '0.953', \"Easy, girl--there's | nothing to scream about anyaore.\"),\n",
+       "    ('Default, grey pad', '0.974', \"You're among friends now. you're safe!\"),\n",
+       "    ('Initial box', '1.000', 'Continued after next page')\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[1m(\u001b[0m\n", + " \u001b[32m'Default, grey pad'\u001b[0m,\n", + " \u001b[32m'0.953'\u001b[0m,\n", + " \u001b[32m'Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of \u001b[0m\n", + "\u001b[32mnew orleans, kept tipy by a white-haired old man known only as'\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\n", + " \u001b[32m'Padded 8px'\u001b[0m,\n", + " \u001b[32m'0.988'\u001b[0m,\n", + " \u001b[32m'The house and the old man are alike in many ways; tall, proud, patient, contented always to \u001b[0m\n", + "\u001b[32mwait until their. master comes home'\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Padded 8, dilation 1'\u001b[0m, \u001b[32m'0.968'\u001b[0m, \u001b[32m'And one in need of some help, it wolld appear .'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.880'\u001b[0m, \u001b[32m'\" bambl-- we have a guest.'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Padded 8, dilation 1'\u001b[0m, \u001b[32m'0.794'\u001b[0m, \u001b[32m'=and tonight, he comes most slamming open the front'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.857'\u001b[0m, \u001b[32m'Tell me, master: how may bambli serve 7'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\n", + " \u001b[32m'Pad 8, fract. 0.5'\u001b[0m,\n", + " \u001b[32m'0.935'\u001b[0m,\n", + " \u001b[32m'Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes'\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Initial box'\u001b[0m, \u001b[32m'0.811'\u001b[0m, \u001b[32m\"The the old man's fades down the hall as... 7\"\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\n", + " \u001b[32m'Padded 8, extracted'\u001b[0m,\n", + " \u001b[32m'0.959'\u001b[0m,\n", + " \u001b[32m'How curious the whims of fate . had i not chanced to stroll along the river tonight-~'\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Extracted, init box'\u001b[0m, \u001b[32m'0.846'\u001b[0m, \u001b[32m'Aulckly as t can, master,'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Extracted, init box'\u001b[0m, \u001b[32m'1.000'\u001b[0m, \u001b[32m'--the girl would most surely be dead by now.'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\n", + " \u001b[32m'Padded 8, extracted'\u001b[0m,\n", + " \u001b[32m'0.935'\u001b[0m,\n", + " \u001b[32m'Ghede has been generous. the peath god has given the girl a second chance po'\u001b[0m\n", + " \u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.953'\u001b[0m, \u001b[32m\"Easy, girl--there's | nothing to scream about anyaore.\"\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.974'\u001b[0m, \u001b[32m\"You're among friends now. you're safe!\"\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1m(\u001b[0m\u001b[32m'Initial box'\u001b[0m, \u001b[32m'1.000'\u001b[0m, \u001b[32m'Continued after next page'\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ll = image_experiment.best_results()\n", + "if ll:\n", + " cprint([(m.value, f\"{r.acc:.3f}\", r.ocr) for m,r in ll])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perfom experiments given a list of `CropMethod`s\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "# methods = [*CropMethod.__members__.values()]\n", + "methods = [CropMethod.INITIAL_BOX, CropMethod.DEFAULT]\n", + "image_experiment.perform_methods(methods)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the perfomance of the experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# image_experiment.plot_accuracies(exps, IMAGE_CONTEXT)\n", + "image_experiment.plot_accuracies(methods)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Save the results to a file\n" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
cleaner/Strange_Tales_172005/Strange_Tales_172005_Tesseract.json\n",
+       "
\n" + ], + "text/plain": [ + "cleaner/Strange_Tales_172005/Strange_Tales_172005_Tesseract.json\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fp, json_results = image_experiment.to_json()\n", + "cprint(fp)\n", + "RenderJSON(json_results, 300, 2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the results from a file\n" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
---------- Initial box ----------\n",
+       "ResultOCR#block 00: 0.90||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3\n",
+       "ResultOCR#block 01: 0.93||The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~\n",
+       "ResultOCR#block 02: 0.70||“and one in ee would appear.\n",
+       "ResultOCR#block 03: 0.62||Re bambli-~ we have a\n",
+       "ResultOCR#block 04: 0.70||Tonight, he comes noost slamming open the caken\n",
+       "ResultOCR#block 05: 0.82||Tell me naster. how may bambli serve 7\n",
+       "ResultOCR#block 06: 0.56||£7 » and perhaps some dry clothes... 7 /\n",
+       "ResultOCR#block 07: 0.81||The the old man's fades down the hall as... 7\n",
+       "ResultOCR#block 08: 0.85||How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==\n",
+       "ResultOCR#block 09: 0.80||Fas oulckly as t ca, master.\n",
+       "ResultOCR#block 10: 0.91||<the girl would - most slirely be dead by now.\n",
+       "ResultOCR#block 11: 0.47||Ath the girl. a second chance ge a ee yg adil\n",
+       "ResultOCR#block 12: 0.84||Ah girl--there's othing to scream ntt anymore .\n",
+       "ResultOCR#block 13: 0.93||You're among friends now. you're sale\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Default ----------\n",
+       "ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as\n",
+       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~\n",
+       "ResultOCR#block 02: 0.74||And one in ee would appear.\n",
+       "ResultOCR#block 03: 0.41||Rir guest.\n",
+       "ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo\n",
+       "ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _\n",
+       "ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps\n",
+       "ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00\n",
+       "ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >\n",
+       "ResultOCR#block 09: 0.50||Aulckly “master as t can,\n",
+       "ResultOCR#block 10: 0.94||<the girl would - most surely be dead by now.\n",
+       "ResultOCR#block 11: 0.50||Ath - the girl. a second chance ee oo tr tt\n",
+       "ResultOCR#block 12: 0.84||Oe girl--there's othing to scream nt anymore. 4\n",
+       "ResultOCR#block 13: 0.96||You're among friends now. youre safe!\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Default, grey pad ----------\n",
+       "ResultOCR#block 00: 0.95||Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as\n",
+       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-\n",
+       "ResultOCR#block 02: 0.94||“and one in > need of some help, it would appear .\n",
+       "ResultOCR#block 03: 0.88||\" bambl-- we have a guest.\n",
+       "ResultOCR#block 04: 0.72||~~and tonight, he comes urgently, slanming open\n",
+       "ResultOCR#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
+       "ResultOCR#block 06: 0.90||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.\n",
+       "ResultOCR#block 07: 0.06||As.\n",
+       "ResultOCR#block 08: 0.91||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n",
+       "ResultOCR#block 09: 0.55||Ickl as t can\n",
+       "ResultOCR#block 10: 0.00||\n",
+       "ResultOCR#block 11: 0.85||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n",
+       "ResultOCR#block 12: 0.95||Easy, girl--there's | nothing to scream about anyaore.\n",
+       "ResultOCR#block 13: 0.97||You're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 0.54||“continued a\n",
+       "\n",
+       " ---------- Padded 4px ----------\n",
+       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n",
+       "ResultOCR#block 01: 0.93||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }\n",
+       "ResultOCR#block 02: 0.69||F and one in ee would appear.\n",
+       "ResultOCR#block 03: 0.77||\" bambli-— we have a gliest.\n",
+       "ResultOCR#block 04: 0.55||P comes slamming open the caken\n",
+       "ResultOCR#block 05: 0.57||Tel oe er-- 5 ow a = 7\n",
+       "ResultOCR#block 06: 0.38||We and perhaps c oe /\n",
+       "ResultOCR#block 07: 0.75||The the old mans fades down the hall sra\n",
+       "ResultOCR#block 08: 0.92||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n",
+       "ResultOCR#block 09: 0.79||Aulckly as t can, ‘masrer.\n",
+       "ResultOCR#block 10: 0.92||<the girl wolld - most surely be dead by now.\n",
+       "ResultOCR#block 11: 0.88||Ghede has been generous. the oeath gop has given - the girl. a second chance ye, alem\n",
+       "ResultOCR#block 12: 0.67||Soe er eke othing to scream ay anymore.\n",
+       "ResultOCR#block 13: 0.94||\"you're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Padded 8px ----------\n",
+       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as\n",
+       "ResultOCR#block 01: 0.99||The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home\n",
+       "ResultOCR#block 02: 0.67||7 and one in ee would appear,\n",
+       "ResultOCR#block 03: 0.76||Zf mbl == we have a guest.\n",
+       "ResultOCR#block 04: 0.61||Tonight, he comes host slamming open\n",
+       "ResultOCR#block 05: 0.75||Yy i tell me master - how may bambli serve 7 _,\n",
+       "ResultOCR#block 06: 0.89||Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s\n",
+       "ResultOCR#block 07: 0.72||The the old mans fades down the hal. srl see\n",
+       "ResultOCR#block 08: 0.88||* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~\n",
+       "ResultOCR#block 09: 0.65||Tiie as t can, \\ master ,\n",
+       "ResultOCR#block 10: 0.86||The girl wolld - most slirely be - dead by now.\n",
+       "ResultOCR#block 11: 0.62||Ghede has been generous. : the crn son ue;\n",
+       "ResultOCR#block 12: 0.62||Soe er eke othing to scream hbolt anhore hr\n",
+       "ResultOCR#block 13: 0.92||” you're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 0.94||“continued after next page\n",
+       "\n",
+       " ---------- Extracted, init box ----------\n",
+       "ResultOCRExtracted#block 00: 0.92||Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .\n",
+       "ResultOCRExtracted#block 01: 0.93||Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~\n",
+       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
+       "ResultOCRExtracted#block 03: 0.67||— we have a i=s7t.\n",
+       "ResultOCRExtracted#block 04: 0.74||~and tonight, he comes urgently, slamming open\n",
+       "ResultOCRExtracted#block 05: 0.85||Tell me master how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.93||Some blankets to keep her warm, banbli-- and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.77||The the old man's fades down the hall s.,00\n",
+       "ResultOCRExtracted#block 08: 0.77||Hin ef fare” had i not chanced to stroll along the river. tonigmt=~\n",
+       "ResultOCRExtracted#block 09: 0.85||Aulckly as t can, master,\n",
+       "ResultOCRExtracted#block 10: 1.00||--the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.51||Ath the girl a second chance ro\n",
+       "ResultOCRExtracted#block 12: 0.56||Cas ee, othing to scream pls aa .\n",
+       "ResultOCRExtracted#block 13: 0.95||You're among friends now. you're sale!\n",
+       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
+       "\n",
+       " ---------- Padded 4, extracted ----------\n",
+       "ResultOCRExtracted#block 00: 0.91||Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home --\n",
+       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
+       "ResultOCRExtracted#block 03: 0.83||Bambli we have a gliest.\n",
+       "ResultOCRExtracted#block 04: 0.74||=~and tonight, he comes urgently, slamming open\n",
+       "ResultOCRExtracted#block 05: 0.84||Tell me master. how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.53||Warm, bambli-- and perhaps. som\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sra\n",
+       "ResultOCRExtracted#block 08: 0.76||We sea had i not chanced to stroll along the river. tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.76||Alley as t can, master,\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.92||Chepe has been generous. the peath god has given the girl a second chance amr\n",
+       "ResultOCRExtracted#block 12: 0.73||Cas gr theres othing to scream pissy tore .\n",
+       "ResultOCRExtracted#block 13: 0.95||You're among eriends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
+       "\n",
+       " ---------- Padded 8, extracted ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .\n",
+       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home --\n",
+       "ResultOCRExtracted#block 02: 0.70||And one in fee wolld appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve't\n",
+       "ResultOCRExtracted#block 06: 0.91||Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sire\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.77||Allckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the peath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.74||Cas gr theres othing to scream pps hore .\n",
+       "ResultOCRExtracted#block 13: 0.42||You're safe § r\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Padded 8, dilation 1 ----------\n",
+       "ResultOCRExtracted#block 00: 0.61||Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .\n",
+       "ResultOCRExtracted#block 01: 0.88||The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=\n",
+       "ResultOCRExtracted#block 02: 0.97||And one in need of some help, it wolld appear .\n",
+       "ResultOCRExtracted#block 03: 0.78||Bambli ~~ we have a gliest.\n",
+       "ResultOCRExtracted#block 04: 0.79||=and tonight, he comes most slamming open the front\n",
+       "ResultOCRExtracted#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.85||Gone blankets to keep her. warm, bambli-~ and perhaps some dry\n",
+       "ResultOCRExtracted#block 07: 0.73||The old man's footsteps the hall as.re\n",
+       "ResultOCRExtracted#block 08: 0.94||How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.68||Aulckly as t can,\n",
+       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.75||Ee lh boe ene. the death gop has the girl. a second chance\n",
+       "ResultOCRExtracted#block 12: 0.92||Easy, girl--there's nothing to scream abolit anymo!\n",
+       "ResultOCRExtracted#block 13: 0.97||You're among friends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Pad 8, fract. 0.5 ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~\n",
+       "ResultOCRExtracted#block 02: 0.78||And one in eee pe would appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
+       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.73||The the old mans fades donn the hall sire\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.92||Ghede hag been generous. the peath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.76||Cas srl theres othing to scream seoit hore .\n",
+       "ResultOCRExtracted#block 13: 0.42||You're safe 4 ’\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Pad 8, fract. 0.2 ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~\n",
+       "ResultOCRExtracted#block 02: 0.77||And one in eet sve would appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
+       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sere\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the ceath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.67||Yi renee othing to scream seat anhore .\n",
+       "ResultOCRExtracted#block 13: 0.93||Youre among eriends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "---------- Initial box ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski\u001b[1m)\u001b[0m \u001b[1;36m2\u001b[0m of mew ce eans, kept tidy by a white-haired old man known only as bambs, \u001b[1;36m3\u001b[0m\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the old man are alike in many ways; tall, prolid, patient, contented always \u001b[1;36m0\u001b[0m wait until. their. master cones mome ~~\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.70\u001b[0m||“and one in ee would appear.\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.62\u001b[0m||Re bambli-~ we have a\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.70\u001b[0m||Tonight, he comes noost slamming open the caken\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.82\u001b[0m||Tell me naster. how may bambli serve \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.56\u001b[0m||£\u001b[1;36m7\u001b[0m » and perhaps some dry clothes\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.81\u001b[0m||The the old man's fades down the hall as\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.85\u001b[0m||How curious the \u001b[1;36m4\u001b[0m fate. whims of had t not chanced to stroll along the river yl tonight ==\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.80\u001b[0m||Fas oulckly as t ca, master.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.91\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.50\u001b[0m||Aulckly “master as t can,\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m|| need of some help, it would appear .\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.88\u001b[0m||\" bambl-- we have a guest.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.72\u001b[0m||~~and tonight, he comes urgently, slanming open\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.86\u001b[0m||Tell me, master: how may bambli serve \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes-\u001b[1;36m-7\u001b[0m \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.06\u001b[0m||As.\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.91\u001b[0m||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.55\u001b[0m||Ickl as t can\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.85\u001b[0m||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.95\u001b[0m||Easy, girl--there's | nothing to scream about anyaore.\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.97\u001b[0m||You're among friends now. you're safe!\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.54\u001b[0m||“continued a\n", + "\n", + " ---------- Padded 4px ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.88\u001b[0m||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways \u001b[1;36m0\u001b[0m wait until their. aster comes home ~~ | \u001b[1m}\u001b[0m\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.69\u001b[0m||F and one in ee would appear.\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.77\u001b[0m||\" bambli-— we have a gliest.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.55\u001b[0m||P comes slamming open the caken\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.57\u001b[0m||Tel oe er-- \u001b[1;36m5\u001b[0m ow a = \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.38\u001b[0m||We and perhaps c oe \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.75\u001b[0m||The the old mans fades down the hall sra\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.92\u001b[0m||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.79\u001b[0m||Aulckly as t can, ‘masrer.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.92\u001b[0m|| Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Pad 8, fract. 0.2
Accuracy Mean/Trimmed: 0.85/0.86
\n", + "
\n", + "
Box #ImageAccuracyOCR
1
0.94
Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
2
0.97
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~
3
0.77
And one in eet⎕⎕⎕ sve⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.86
Bambl ~~ we have a guest.
5
0.64
=~and tonight, he comes ⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕slamming open urg⎕⎕⎕⎕en⎕⎕⎕⎕⎕tly,⎕⎕⎕⎕
6
0.82
Tell me master... how may bambli serve'7
7
0.94
Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes
8
0.75
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall sere
9
0.96
How curious the whims of fate. had i not chanced to stroll along the river tonight-~
10
0.81
A⎕⎕⎕ulckry as t can, master.
11
0.95
~<the girl would most surely be dead by now.
12
0.94
Ghede has been generous. the ceath god has given the girl a second chance po⎕⎕
13
0.67
Yi⎕⎕⎕ ⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕renee othing to scream sea⎕⎕⎕t anhore.
14
0.93
Youre among eriends now. you're safe!
15
0.83
| continued af⎕⎕⎕ ext page
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "saved_exp = ExperimentOCR.saved_experiment(CONTEXT, 'Tesseract', IMAGE_CONTEXT.image_idx)\n", + "# saved_exp = ExperimentOCR.saved_experiment(IMAGE_CONTEXT, 'Tesseract', 'Action_Comics_1960-01-00_(262).JPG')\n", + "if saved_exp:\n", + " saved_exp.method_experiment(CropMethod.PAD_8_FRACT_0_2).display()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform experiments for selected boxes y methods" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
---------- Initial box ----------\n",
+       "ResultOCR#block 00: 0.90||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3\n",
+       "ResultOCR#block 01: 0.93||The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~\n",
+       "ResultOCR#block 02: 0.70||“and one in ee would appear.\n",
+       "ResultOCR#block 03: 0.62||Re bambli-~ we have a\n",
+       "ResultOCR#block 04: 0.70||Tonight, he comes noost slamming open the caken\n",
+       "ResultOCR#block 05: 0.82||Tell me naster. how may bambli serve 7\n",
+       "ResultOCR#block 06: 0.56||£7 » and perhaps some dry clothes... 7 /\n",
+       "ResultOCR#block 07: 0.81||The the old man's fades down the hall as... 7\n",
+       "ResultOCR#block 08: 0.85||How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==\n",
+       "ResultOCR#block 09: 0.80||Fas oulckly as t ca, master.\n",
+       "ResultOCR#block 10: 0.91||<the girl would - most slirely be dead by now.\n",
+       "ResultOCR#block 11: 0.47||Ath the girl. a second chance ge a ee yg adil\n",
+       "ResultOCR#block 12: 0.84||Ah girl--there's othing to scream ntt anymore .\n",
+       "ResultOCR#block 13: 0.93||You're among friends now. you're sale\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Default ----------\n",
+       "ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as\n",
+       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~\n",
+       "ResultOCR#block 02: 0.74||And one in ee would appear.\n",
+       "ResultOCR#block 03: 0.41||Rir guest.\n",
+       "ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo\n",
+       "ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _\n",
+       "ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps\n",
+       "ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00\n",
+       "ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >\n",
+       "ResultOCR#block 09: 0.50||Aulckly “master as t can,\n",
+       "ResultOCR#block 10: 0.94||<the girl would - most surely be dead by now.\n",
+       "ResultOCR#block 11: 0.50||Ath - the girl. a second chance ee oo tr tt\n",
+       "ResultOCR#block 12: 0.84||Oe girl--there's othing to scream nt anymore. 4\n",
+       "ResultOCR#block 13: 0.96||You're among friends now. youre safe!\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Default, grey pad ----------\n",
+       "ResultOCR#block 00: 0.95||Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as\n",
+       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-\n",
+       "ResultOCR#block 02: 0.94||“and one in > need of some help, it would appear .\n",
+       "ResultOCR#block 03: 0.88||\" bambl-- we have a guest.\n",
+       "ResultOCR#block 04: 0.72||~~and tonight, he comes urgently, slanming open\n",
+       "ResultOCR#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
+       "ResultOCR#block 06: 0.90||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.\n",
+       "ResultOCR#block 07: 0.06||As.\n",
+       "ResultOCR#block 08: 0.91||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n",
+       "ResultOCR#block 09: 0.55||Ickl as t can\n",
+       "ResultOCR#block 10: 0.00||\n",
+       "ResultOCR#block 11: 0.85||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n",
+       "ResultOCR#block 12: 0.95||Easy, girl--there's | nothing to scream about anyaore.\n",
+       "ResultOCR#block 13: 0.97||You're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 0.54||“continued a\n",
+       "\n",
+       " ---------- Padded 4px ----------\n",
+       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n",
+       "ResultOCR#block 01: 0.93||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }\n",
+       "ResultOCR#block 02: 0.69||F and one in ee would appear.\n",
+       "ResultOCR#block 03: 0.77||\" bambli-— we have a gliest.\n",
+       "ResultOCR#block 04: 0.55||P comes slamming open the caken\n",
+       "ResultOCR#block 05: 0.57||Tel oe er-- 5 ow a = 7\n",
+       "ResultOCR#block 06: 0.38||We and perhaps c oe /\n",
+       "ResultOCR#block 07: 0.75||The the old mans fades down the hall sra\n",
+       "ResultOCR#block 08: 0.92||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n",
+       "ResultOCR#block 09: 0.79||Aulckly as t can, ‘masrer.\n",
+       "ResultOCR#block 10: 0.92||<the girl wolld - most surely be dead by now.\n",
+       "ResultOCR#block 11: 0.88||Ghede has been generous. the oeath gop has given - the girl. a second chance ye, alem\n",
+       "ResultOCR#block 12: 0.67||Soe er eke othing to scream ay anymore.\n",
+       "ResultOCR#block 13: 0.94||\"you're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 1.00||Continued after next page\n",
+       "\n",
+       " ---------- Padded 8px ----------\n",
+       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as\n",
+       "ResultOCR#block 01: 0.99||The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home\n",
+       "ResultOCR#block 02: 0.67||7 and one in ee would appear,\n",
+       "ResultOCR#block 03: 0.76||Zf mbl == we have a guest.\n",
+       "ResultOCR#block 04: 0.61||Tonight, he comes host slamming open\n",
+       "ResultOCR#block 05: 0.75||Yy i tell me master - how may bambli serve 7 _,\n",
+       "ResultOCR#block 06: 0.89||Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s\n",
+       "ResultOCR#block 07: 0.72||The the old mans fades down the hal. srl see\n",
+       "ResultOCR#block 08: 0.88||* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~\n",
+       "ResultOCR#block 09: 0.65||Tiie as t can, \\ master ,\n",
+       "ResultOCR#block 10: 0.86||The girl wolld - most slirely be - dead by now.\n",
+       "ResultOCR#block 11: 0.62||Ghede has been generous. : the crn son ue;\n",
+       "ResultOCR#block 12: 0.62||Soe er eke othing to scream hbolt anhore hr\n",
+       "ResultOCR#block 13: 0.92||” you're among friends now. you're safe!\n",
+       "ResultOCR#block 14: 0.94||“continued after next page\n",
+       "\n",
+       " ---------- Extracted, init box ----------\n",
+       "ResultOCRExtracted#block 00: 0.92||Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .\n",
+       "ResultOCRExtracted#block 01: 0.93||Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~\n",
+       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
+       "ResultOCRExtracted#block 03: 0.67||— we have a i=s7t.\n",
+       "ResultOCRExtracted#block 04: 0.74||~and tonight, he comes urgently, slamming open\n",
+       "ResultOCRExtracted#block 05: 0.85||Tell me master how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.93||Some blankets to keep her warm, banbli-- and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.77||The the old man's fades down the hall s.,00\n",
+       "ResultOCRExtracted#block 08: 0.77||Hin ef fare” had i not chanced to stroll along the river. tonigmt=~\n",
+       "ResultOCRExtracted#block 09: 0.85||Aulckly as t can, master,\n",
+       "ResultOCRExtracted#block 10: 1.00||--the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.51||Ath the girl a second chance ro\n",
+       "ResultOCRExtracted#block 12: 0.56||Cas ee, othing to scream pls aa .\n",
+       "ResultOCRExtracted#block 13: 0.95||You're among friends now. you're sale!\n",
+       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
+       "\n",
+       " ---------- Padded 4, extracted ----------\n",
+       "ResultOCRExtracted#block 00: 0.91||Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home --\n",
+       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
+       "ResultOCRExtracted#block 03: 0.83||Bambli we have a gliest.\n",
+       "ResultOCRExtracted#block 04: 0.74||=~and tonight, he comes urgently, slamming open\n",
+       "ResultOCRExtracted#block 05: 0.84||Tell me master. how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.53||Warm, bambli-- and perhaps. som\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sra\n",
+       "ResultOCRExtracted#block 08: 0.76||We sea had i not chanced to stroll along the river. tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.76||Alley as t can, master,\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.92||Chepe has been generous. the peath god has given the girl a second chance amr\n",
+       "ResultOCRExtracted#block 12: 0.73||Cas gr theres othing to scream pissy tore .\n",
+       "ResultOCRExtracted#block 13: 0.95||You're among eriends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
+       "\n",
+       " ---------- Padded 8, extracted ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .\n",
+       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home --\n",
+       "ResultOCRExtracted#block 02: 0.70||And one in fee wolld appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve't\n",
+       "ResultOCRExtracted#block 06: 0.91||Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sire\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.77||Allckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the peath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.74||Cas gr theres othing to scream pps hore .\n",
+       "ResultOCRExtracted#block 13: 0.42||You're safe § r\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Padded 8, dilation 1 ----------\n",
+       "ResultOCRExtracted#block 00: 0.61||Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .\n",
+       "ResultOCRExtracted#block 01: 0.88||The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=\n",
+       "ResultOCRExtracted#block 02: 0.97||And one in need of some help, it wolld appear .\n",
+       "ResultOCRExtracted#block 03: 0.78||Bambli ~~ we have a gliest.\n",
+       "ResultOCRExtracted#block 04: 0.79||=and tonight, he comes most slamming open the front\n",
+       "ResultOCRExtracted#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
+       "ResultOCRExtracted#block 06: 0.85||Gone blankets to keep her. warm, bambli-~ and perhaps some dry\n",
+       "ResultOCRExtracted#block 07: 0.73||The old man's footsteps the hall as.re\n",
+       "ResultOCRExtracted#block 08: 0.94||How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.68||Aulckly as t can,\n",
+       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.75||Ee lh boe ene. the death gop has the girl. a second chance\n",
+       "ResultOCRExtracted#block 12: 0.92||Easy, girl--there's nothing to scream abolit anymo!\n",
+       "ResultOCRExtracted#block 13: 0.97||You're among friends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Pad 8, fract. 0.5 ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~\n",
+       "ResultOCRExtracted#block 02: 0.78||And one in eee pe would appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
+       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.73||The the old mans fades donn the hall sire\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.92||Ghede hag been generous. the peath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.76||Cas srl theres othing to scream seoit hore .\n",
+       "ResultOCRExtracted#block 13: 0.42||You're safe 4 ’\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       " ---------- Pad 8, fract. 0.2 ----------\n",
+       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
+       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~\n",
+       "ResultOCRExtracted#block 02: 0.77||And one in eet sve would appear.\n",
+       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
+       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
+       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
+       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes\n",
+       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sere\n",
+       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
+       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
+       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
+       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the ceath god has given the girl a second chance po\n",
+       "ResultOCRExtracted#block 12: 0.67||Yi renee othing to scream seat anhore .\n",
+       "ResultOCRExtracted#block 13: 0.93||Youre among eriends now. you're safe!\n",
+       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
+       "\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "---------- Initial box ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski\u001b[1m)\u001b[0m \u001b[1;36m2\u001b[0m of mew ce eans, kept tidy by a white-haired old man known only as bambs, \u001b[1;36m3\u001b[0m\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the old man are alike in many ways; tall, prolid, patient, contented always \u001b[1;36m0\u001b[0m wait until. their. master cones mome ~~\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.70\u001b[0m||“and one in ee would appear.\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.62\u001b[0m||Re bambli-~ we have a\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.70\u001b[0m||Tonight, he comes noost slamming open the caken\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.82\u001b[0m||Tell me naster. how may bambli serve \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.56\u001b[0m||£\u001b[1;36m7\u001b[0m » and perhaps some dry clothes\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.81\u001b[0m||The the old man's fades down the hall as\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.85\u001b[0m||How curious the \u001b[1;36m4\u001b[0m fate. whims of had t not chanced to stroll along the river yl tonight ==\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.80\u001b[0m||Fas oulckly as t ca, master.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.91\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.50\u001b[0m||Aulckly “master as t can,\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m|| need of some help, it would appear .\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.88\u001b[0m||\" bambl-- we have a guest.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.72\u001b[0m||~~and tonight, he comes urgently, slanming open\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.86\u001b[0m||Tell me, master: how may bambli serve \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes-\u001b[1;36m-7\u001b[0m \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.06\u001b[0m||As.\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.91\u001b[0m||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.55\u001b[0m||Ickl as t can\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.85\u001b[0m||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.95\u001b[0m||Easy, girl--there's | nothing to scream about anyaore.\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.97\u001b[0m||You're among friends now. you're safe!\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.54\u001b[0m||“continued a\n", + "\n", + " ---------- Padded 4px ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.88\u001b[0m||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways \u001b[1;36m0\u001b[0m wait until their. aster comes home ~~ | \u001b[1m}\u001b[0m\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.69\u001b[0m||F and one in ee would appear.\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.77\u001b[0m||\" bambli-— we have a gliest.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.55\u001b[0m||P comes slamming open the caken\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.57\u001b[0m||Tel oe er-- \u001b[1;36m5\u001b[0m ow a = \u001b[1;36m7\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.38\u001b[0m||We and perhaps c oe \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.75\u001b[0m||The the old mans fades down the hall sra\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.92\u001b[0m||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.79\u001b[0m||Aulckly as t can, ‘masrer.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.92\u001b[0m||---------- Initial box ----------\n", + "ResultOCR#block 00: 0.90||Suddenly.\n", + "ResultOCR#block 01: 0.81||>gasp!z everything's w- whirling around me!t can't stand sr rea\n", + "ResultOCR#block 02: 0.81||Clark!i'm falling' “help! help! —\n", + "ResultOCR#block 03: 0.67||I-i'm bs \"passing ohhh\n", + "ResultOCR#block 04: 0.92||Action comics\n", + "ResultOCR#block 05: 1.00||Then, seconds later...\n", + "ResultOCR#block 06: 0.89||Great caesars ghost! { this /s black magic! we've been transportel to the weirdest world, tit ever saw!\n", + "ResultOCR#block 07: 0.91||...|t certainly isn't our earth, perry. look at the size of those bees.\n", + "ResultOCR#block 08: 0.88||Watch out, clark!\n", + "ResultOCR#block 09: 0.73||Owwww.\n", + "ResultOCR#block 10: 0.85||Yet the bee's stinger went ws. right through my uniform and penetrated my skin! that means. the fabric of pe ay costume has become dj} satie fued 1,\n", + "ResultOCR#block 11: 0.87||Hurry. let's beat it before we get stung, foo = aii\n", + "ResultOCR#block 12: 0.86||Ggreat guns!...2g, .pa/n i feel n pain! as superman, i should be > invulnerable! 1 have unbreakabl ‘skin! under my clark kent clothes, im wearing an woestructisle superman uniform ! =\n", + "ResultOCR#block 13: 1.00||Abruptly...\n", + "ResultOCR#block 14: 0.77||Great caesar's ghost. he's spinning a web of g/ant, sr strands --\n", + "ResultOCR#block 15: 0.89||I-i feel the heat of the sun...the pain of the bee-sting ... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers! i've become an ordinary mortal in this world. /\n", + "ResultOCR#block 16: 0.84||Enormous spider- like creature is going berserk, as if the sight of us excited him (into mad spinning get back! that 2\n", + "\n", + " ---------- Default ----------\n", + "ResultOCR#block 00: 1.00||Suddenly...\n", + "ResultOCR#block 01: 0.80||Gasp! everything's w- whi rns atound me!i can't stand we a\n", + "ResultOCR#block 02: 0.87||Clark!i'm falling! help! help\n", + "ResultOCR#block 03: 0.45||I-i'm se ou\n", + "ResultOCR#block 04: 0.92||Action comics\n", + "ResultOCR#block 05: 0.95||Then, seconds later.\n", + "ResultOCR#block 06: 0.92||Great caesar's ghost! this /s black magic! we've been transported, to the weirdest world, i ever saw!\n", + "ResultOCR#block 07: 0.93||...|t certainly isn't our earth, perry look at the size of those bees!\n", + "ResultOCR#block 08: 0.88||Watch out, clark)\n", + "ResultOCR#block 09: 0.67||Owwww.,\n", + "ResultOCR#block 10: 0.97||Yet the bee's stinger went = right through my uniform ando penetrated my skin! that mean the fabric of my superman costume has become ordinary, cloth! £\n", + "ResultOCR#block 11: 0.89||Hurry. let's beat it before we get stung, tod yi\n", + "ResultOCR#block 12: 0.88||Ggreat guns!...2gasp/z...pain i feel n fan! as superman, i should be invulnerable! 1 have unbreakable skin! under my clark kent clothes, i'm wearing an /noestruct/ele superman uniform !\n", + "ResultOCR#block 13: 1.00||Abruptly...\n", + "ResultOCR#block 14: 0.76||Great caesars ° ghost! he's spinning & web of g/a, silk strands\n", + "ResultOCR#block 15: 0.89||I-i feel the heat of the sum...the pain of the bee-sting... the heavy weight of my { pack! every human discomfort... good grief! i've lost all my super -powersx ve become an ordinary mortal in this world! j\n", + "ResultOCR#block 16: 0.70||Like creature is going berserk, as if the sight of us excited him into mad spinning, get back! that \\ enormous spider-\n", + "\n", + " ---------- Default, grey pad ----------\n", + "ResultOCR#block 00: 0.78||Sudden.\n", + "ResultOCR#block 01: 0.82||5gasp!z everything's | w- whirling around] me!t can't stand ue a\n", + "ResultOCR#block 02: 0.57||Ark!i'm falling\n", + "ResultOCR#block 03: 0.85||I-t'm eq \"passing out... ohhh.\n", + "ResultOCR#block 04: 0.92||Action comics\n", + "ResultOCR#block 05: 0.00||\n", + "ResultOCR#block 06: 0.88||‘great caesar's ghost! || this /§ black magic! we've been transportel to the weirdest world, i ever saw.\n", + "ResultOCR#block 07: 0.87||\"...it certainly isn't our | earth, perry look at the| \\s|ze of those bees.\n", + "ResultOCR#block 08: 0.88||Watch out, clark!\n", + "ResultOCR#block 09: 0.73||Owwww.\n", + "ResultOCR#block 10: 0.90||Yet the bee's stinger went ~ right through my uniform and penetrated my skin! that means. the fabric of sera ry costume vis become din a s clots! a\n", + "ResultOCR#block 11: 0.80||Hurry. let's ) beat it before] we get stung, k 00! __j\n", + "ResultOCR#block 12: 0.86||Ggreat guns!...3gasp/=... rain t feel fan! as superman, i should be invulnerable® 1 have unbreakabli skin! under my clark kent clothes, ia wearing an /noestryct/iele supe iperman uniform !\n", + "ResultOCR#block 13: 0.90||Abruptly.\n", + "ResultOCR#block 14: 0.92||Great caesar's | ghost! he's spinning k web of giant, silk strands -- as tough as steel! ne]\n", + "ResultOCR#block 15: 0.89||\\i-t feel the heat of the sun...the pain of the bee-sting... the heavy weight of my & pack! every human discomfort... good grief! i've lost all my super-powers i've become an ordinary mortal in this world. /.\n", + "ResultOCR#block 16: 0.94||(get back! that enormous spider- like creature 1 § going berserk, as if the sight of us excited him into mad spinning]\n", + "\n", + " ---------- Padded 4px ----------\n", + "ResultOCR#block 00: 0.90||Suddenly.\n", + "ResultOCR#block 01: 0.83||Fgasp/= everything's whirling around me!i can't stand ea\n", + "ResultOCR#block 02: 0.68||Ie lottie eto clark!i'm falling help! help!\n", + "ResultOCR#block 03: 0.55||I-i'm yr a ohh hh.\n", + "ResultOCR#block 04: 0.92||Action comics\n", + "ResultOCR#block 05: 0.74||Then, seconds\n", + "ResultOCR#block 06: 0.62||Great caesar's ghost! \\ this /§ black magic! i ever saw!\n", + "ResultOCR#block 07: 0.87||T certainly isn't our earth, perry! look at the \\s|ze of those bees.\n", + "ResultOCR#block 08: 0.76||#3 watch out, clark!\n", + "ResultOCR#block 09: 0.73||Owwww.\n", + "ResultOCR#block 10: 0.95||\"yet the bee's stinger went ~~ right through my uniform and enetrated my skin! that means. the fabric of my superman costume has become ordinary, clot! &\n", + "ResultOCR#block 11: 0.86||Hurry! let's beat it b=fore we get stung, ¢ tool: puma\n", + "ResultOCR#block 12: 0.86||‘ggreat guns!...3gasp/5... pain i feel pain? as superman, i should be invilnerable | t ne unbreakasle gkin! under my clark kent clothes, tih wearing an indestructible superman uniform\n", + "ResultOCR#block 13: 1.00||Abruptly...\n", + "ResultOCR#block 14: 0.91||Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!\n", + "ResultOCR#block 15: 0.89||I-i feel the heat of the sun...the pain of the bee-sting... the heavy weight of my { pack! every human discomfort... good grief! i've lost all \\my super-powersx ve become an ordinary mortal in this world.\n", + "ResultOCR#block 16: 0.81||(get back! that enormous spider- as if the sight of us excited him [into mad spinning\n", + "\n", + " ---------- Padded 8px ----------\n", + "ResultOCR#block 00: 0.00||\n", + "ResultOCR#block 01: 0.76||Ega5p/% everything s\\ w- whirling around me! t can't stand ue... slee\n", + "ResultOCR#block 02: 0.41||\\ help! help’\n", + "ResultOCR#block 03: 0.86||-i'm passing qut... ohh hh.\n", + "ResultOCR#block 04: 0.92||Action comics\n", + "ResultOCR#block 05: 0.00||\n", + "ResultOCR#block 06: 0.18||I ever saw/\n", + "ResultOCR#block 07: 0.86||It certainly isn't our earth, perry! poor a the size of thos!\n", + "ResultOCR#block 08: 0.62||“watch out” ial ae clark!\n", + "ResultOCR#block 09: 0.73||Owwww.\n", + "ResultOCR#block 10: 0.98||Yet the bee's stinger went right through my uniform and \\penetrated my skin! that means. the fabric of my superman costume has become ordinar! cloth!\n", + "ResultOCR#block 11: 0.93||Hurry. let's beat it before we get stung, too!\n", + "ResultOCR#block 12: 0.88||Ggreat guns!...2gasp/z...pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ele $ superman uniform ! &\n", + "ResultOCR#block 13: 0.00||\n", + "ResultOCR#block 14: 0.91||Great caesai ghost. he's spinning a web of g/ant, silk strands -- as tough as steel!\n", + "ResultOCR#block 15: 0.88||I-i feel the heat of the sun...the pain of the bee-sting... the heavy weight of my ! every human discomfort... good grief! i've lost all \\my super-powers! ‘ve become an ordinary mortal in this world. x\n", + "ResultOCR#block 16: 0.60||As if the sight of us excited him {into mad spinning\n", + "\n", + " ---------- Extracted, init box ----------\n", + "ResultOCRExtracted#block 00: 0.90||Suddenly.\n", + "ResultOCRExtracted#block 01: 0.83||Fgasp!z everything § w- whirling around me!i can't stand ue.\n", + "ResultOCRExtracted#block 02: 0.89||Clark!i'm falling! help! help!\n", + "ResultOCRExtracted#block 03: 0.71||I-i'm passing ohhh?\n", + "ResultOCRExtracted#block 04: 0.92||Action comics\n", + "ResultOCRExtracted#block 05: 0.98||Then, seconds later..\n", + "ResultOCRExtracted#block 06: 0.91||Great caesar's ghost! this /s black magic! we've been transportel to the weirdest world tit ever saw.\n", + "ResultOCRExtracted#block 07: 0.90||...it certainly isnt our earth, perry. look at the size of those bees.\n", + "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", + "ResultOCRExtracted#block 09: 0.62||Oowwwww.\n", + "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block 11: 0.93||Hurry. let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block 12: 0.88||Ggreat guns!...2g/ pain t feel pain? as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform *\n", + "ResultOCRExtracted#block 13: 0.96||Abruptly. ..\n", + "ResultOCRExtracted#block 14: 0.80||Great caesar's ghost. he's spinning a web of g/ant, silk strands --\n", + "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal im this world.\n", + "ResultOCRExtracted#block 16: 0.97||Get back. that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning\n", + "\n", + " ---------- Padded 4, extracted ----------\n", + "ResultOCRExtracted#block 00: 1.00||Suddenly...\n", + "ResultOCRExtracted#block 01: 0.83||Sgasp/z everything s w- whirling around me!i can't stand ue.\n", + "ResultOCRExtracted#block 02: 0.87||Clark! i'm falling’ help! help!\n", + "ResultOCRExtracted#block 03: 0.78||I-i'm passing ohhhh.\n", + "ResultOCRExtracted#block 04: 0.92||Action comics\n", + "ResultOCRExtracted#block 05: 0.98||Then, seconds later..\n", + "ResultOCRExtracted#block 06: 0.93||Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw!\n", + "ResultOCRExtracted#block 07: 0.90||...it certainly isnt our earth, perry. look at the size of those bees.\n", + "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", + "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", + "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block 12: 0.89||Ggreat guns!...2gasp/:... pain t feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform !\n", + "ResultOCRExtracted#block 13: 1.00||Abruptly...\n", + "ResultOCRExtracted#block 14: 0.94||Great caesar's ghost! he's spinning a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block 16: 0.97||Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning\n", + "\n", + " ---------- Padded 8, extracted ----------\n", + "ResultOCRExtracted#block 00: 0.91||Suddemly...\n", + "ResultOCRExtracted#block 01: 0.85||Sgasp/z everything s w- whirling around me!i can't stand up.\n", + "ResultOCRExtracted#block 02: 0.84||Clark! i'm falling’ _ help! help!\n", + "ResultOCRExtracted#block 03: 0.73||I-i'm passing ohhha.\n", + "ResultOCRExtracted#block 04: 0.92||Action comics\n", + "ResultOCRExtracted#block 05: 1.00||Then, seconds later...\n", + "ResultOCRExtracted#block 06: 0.90||Great caesar's ghost! f this /§ black magic! : we've been transported to the weirdest world i ever saw.\n", + "ResultOCRExtracted#block 07: 0.90||...it certainly isnt our earth, perry. look at the size of those bees.\n", + "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", + "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", + "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block 12: 0.89||Ggreat guns!...2gasp/:... pain t feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform !\n", + "ResultOCRExtracted#block 13: 0.96||Abruptly. ..\n", + "ResultOCRExtracted#block 14: 0.93||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block 15: 0.89||I-t feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block 16: 0.95||Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning g [\n", + "\n", + " ---------- Padded 8, dilation 1 ----------\n", + "ResultOCRExtracted#block 00: 1.00||Suddenly...\n", + "ResultOCRExtracted#block 01: 0.83||Gasp! everything s w-whirling around wel] i can't stand\n", + "ResultOCRExtracted#block 02: 0.86||Clark!i'm falling! . help! help!\n", + "ResultOCRExtracted#block 03: 0.73||I-i'm passing ohhha.\n", + "ResultOCRExtracted#block 04: 0.92||Action comics\n", + "ResultOCRExtracted#block 05: 0.95||Then, seconds later.\n", + "ResultOCRExtracted#block 06: 0.91||Great caesar's ghost! e this /5 black magic! we've been transported to the weirdest world i ever saw/\n", + "ResultOCRExtracted#block 07: 0.91||...|it certainly isnt our earth, perry” look at the size of those bees!\n", + "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", + "ResultOCRExtracted#block 09: 0.73||Owwww.\n", + "ResultOCRExtracted#block 10: 0.94||Yet the bee's stinger we right through my tform and penetrated my skin! that means. the fabric of my superman othe has become ordinary clot?\n", + "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block 12: 0.82||Fain! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", + "ResultOCRExtracted#block 13: 0.00||\n", + "ResultOCRExtracted#block 14: 0.67||Great caesars ghost! he's spinning _ a web of g/ant,\n", + "ResultOCRExtracted#block 15: 0.86||I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve ie an ordinary mortal in this world.\n", + "ResultOCRExtracted#block 16: 0.96||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning’ g [\n", + "\n", + " ---------- Pad 8, fract. 0.5 ----------\n", + "ResultOCRExtracted#block 00: 0.91||Suddemly...\n", + "ResultOCRExtracted#block 01: 0.87||Gasp/z everything s w- whirling around me!i can't stand up.\n", + "ResultOCRExtracted#block 02: 0.84||Clark! i'm falling’ _ help! help!\n", + "ResultOCRExtracted#block 03: 0.78||I-i'm passing ohhhh.\n", + "ResultOCRExtracted#block 04: 0.92||Action comics\n", + "ResultOCRExtracted#block 05: 1.00||Then, seconds later...\n", + "ResultOCRExtracted#block 06: 0.90||Great caesar's ghost! f this /§ black magic! : we've been transported to the weirdest world i ever saw.\n", + "ResultOCRExtracted#block 07: 0.92||...it certainly isnt our earth, perry! look at the size of those bees.\n", + "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", + "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", + "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block 12: 0.90||Ggreat guns!...2gasp/:...pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /nodestruct/ible superman uniform !\n", + "ResultOCRExtracted#block 13: 0.96||Abruptly. ..\n", + "ResultOCRExtracted#block 14: 0.93||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block 16: 0.95||Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning g [\n", + "\n", + " ---------- Pad 8, fract. 0.2 ----------\n", + "ResultOCRExtracted#block 00: 1.00||Suddenly...\n", + "ResultOCRExtracted#block 01: 0.87||Gasp/z everything s w- whirling around me!i can't stand up.\n", + "ResultOCRExtracted#block 02: 0.84||Clark! i'm falling’ _ help! help!\n", + "ResultOCRExtracted#block 03: 0.73||I-i'm passing ohhha.\n", + "ResultOCRExtracted#block 04: 0.92||Action comics\n", + "ResultOCRExtracted#block 05: 1.00||Then, seconds later...\n", + "ResultOCRExtracted#block 06: 0.90||Great caesar's ghost! f this /§ black magic! : we've been transported to the weirdest world i ever saw!\n", + "ResultOCRExtracted#block 07: 0.92||...it certainly isnt our earth, perry! look at the size of those bees.\n", + "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", + "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", + "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block 12: 0.91||Ggreat guns!...2gasp/:... pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", + "ResultOCRExtracted#block 13: 0.96||Abruptly....\n", + "ResultOCRExtracted#block 14: 0.93||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block 16: 0.96||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning gs [\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "---------- Initial box ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Suddenly.\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.81\u001b[0m||>gasp!z everything's w- whirling around me!t can't stand sr rea\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.81\u001b[0m||Clark!i'm falling' “help! help! —\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.67\u001b[0m||I-i'm bs \"passing ohhh\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.89\u001b[0m||Great caesars ghost! \u001b[1m{\u001b[0m this \u001b[35m/\u001b[0m\u001b[95ms\u001b[0m black magic! we've been transportel to the weirdest world, tit ever saw!\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.91\u001b[0m||\u001b[33m...\u001b[0m|t certainly isn't our earth, perry. look at the size of those bees.\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.85\u001b[0m||Yet the bee's stinger went ws. right through my uniform and penetrated my skin! that means. the fabric of pe ay costume has become dj\u001b[1m}\u001b[0m satie fued \u001b[1;36m1\u001b[0m,\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.87\u001b[0m||Hurry. let's beat it before we get stung, foo = aii\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.86\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2g, .pa/n i feel n pain! as superman, i should be > invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl ‘skin! under my clark kent clothes, im wearing an woestructisle superman uniform ! =\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.77\u001b[0m||Great caesar's ghost. he's spinning a web of g/ant, sr strands --\n", + "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting \u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers! i've become an ordinary mortal in this world. \u001b[35m/\u001b[0m\n", + "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.84\u001b[0m||Enormous spider- like creature is going berserk, as if the sight of us excited him \u001b[1m(\u001b[0minto mad spinning get back! that \u001b[1;36m2\u001b[0m\n", + "\n", + " ---------- Default ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.80\u001b[0m||Gasp! everything's w- whi rns atound me!i can't stand we a\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.87\u001b[0m||Clark!i'm falling! help! help\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.45\u001b[0m||I-i'm se ou\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.95\u001b[0m||Then, seconds later.\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.92\u001b[0m||Great caesar's ghost! this \u001b[35m/\u001b[0m\u001b[95ms\u001b[0m black magic! we've been transported, to the weirdest world, i ever saw!\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.93\u001b[0m||\u001b[33m...\u001b[0m|t certainly isn't our earth, perry look at the size of those bees!\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark\u001b[1m)\u001b[0m\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.97\u001b[0m||Yet the bee's stinger went = right through my uniform ando penetrated my skin! that mean the fabric of my superman costume has become ordinary, cloth! £\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.89\u001b[0m||Hurry. let's beat it before we get stung, tod yi\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.88\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/z\u001b[33m...\u001b[0mpain i feel n fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakable skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mele\u001b[0m superman uniform !\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.76\u001b[0m||Great caesars ° ghost! he's spinning & web of g/a, silk strands\n", + "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sum\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my \u001b[1m{\u001b[0m pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super -powersx ve become an ordinary mortal in this world! j\n", + "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.70\u001b[0m||Like creature is going berserk, as if the sight of us excited him into mad spinning, get back! that \\ enormous spider-\n", + "\n", + " ---------- Default, grey pad ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.78\u001b[0m||Sudden.\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.82\u001b[0m||5gasp!z everything's | w- whirling around\u001b[1m]\u001b[0m me!t can't stand ue a\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.57\u001b[0m||Ark!i'm falling\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.85\u001b[0m||I-t'm eq \"passing out\u001b[33m...\u001b[0m ohhh.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.88\u001b[0m||‘great caesar's ghost! || this \u001b[35m/\u001b[0m§ black magic! we've been transportel to the weirdest world, i ever saw.\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.87\u001b[0m||\"\u001b[33m...\u001b[0mit certainly isn't our | earth, perry look at the| \\s|ze of those bees.\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.90\u001b[0m||Yet the bee's stinger went ~ right through my uniform and penetrated my skin! that means. the fabric of sera ry costume vis become din a s clots! a\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.80\u001b[0m||Hurry. let's \u001b[1m)\u001b[0m beat it before\u001b[1m]\u001b[0m we get stung, k \u001b[1;36m00\u001b[0m! __j\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.86\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m3gasp/=\u001b[33m...\u001b[0m rain t feel fan! as superman, i should be invulnerable® \u001b[1;36m1\u001b[0m have unbreakabli skin! under my clark kent clothes, ia wearing an \u001b[35m/noestryct/\u001b[0m\u001b[95miele\u001b[0m supe iperman uniform !\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.90\u001b[0m||Abruptly.\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.92\u001b[0m||Great caesar's | ghost! he's spinning k web of giant, silk strands -- as tough as steel! ne\u001b[1m]\u001b[0m\n", + "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||\\i-t feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my & pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers i've become an ordinary mortal in this world. \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", + "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.94\u001b[0m||\u001b[1m(\u001b[0mget back! that enormous spider- like creature \u001b[1;36m1\u001b[0m § going berserk, as if the sight of us excited him into mad spinning\u001b[1m]\u001b[0m\n", + "\n", + " ---------- Padded 4px ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Suddenly.\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Fgasp/= everything's whirling around me!i can't stand ea\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.68\u001b[0m||Ie lottie eto clark!i'm falling help! help!\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.55\u001b[0m||I-i'm yr a ohh hh.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.74\u001b[0m||Then, seconds\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.62\u001b[0m||Great caesar's ghost! \\ this \u001b[35m/\u001b[0m§ black magic! i ever saw!\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.87\u001b[0m||T certainly isn't our earth, perry! look at the \\s|ze of those bees.\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.76\u001b[0m||#\u001b[1;36m3\u001b[0m watch out, clark!\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.95\u001b[0m||\"yet the bee's stinger went ~~ right through my uniform and enetrated my skin! that means. the fabric of my superman costume has become ordinary, clot! &\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.86\u001b[0m||Hurry! let's beat it \u001b[33mb\u001b[0m=\u001b[35mfore\u001b[0m we get stung, ¢ tool: puma\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.86\u001b[0m||‘ggreat guns!\u001b[33m...\u001b[0m3gasp/\u001b[1;36m5\u001b[0m\u001b[33m...\u001b[0m pain i feel pain? as superman, i should be invilnerable | t ne unbreakasle gkin! under my clark kent clothes, tih wearing an indestructible superman uniform\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!\n", + "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my \u001b[1m{\u001b[0m pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all \\my super-powersx ve become an ordinary mortal in this world.\n", + "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.81\u001b[0m||\u001b[1m(\u001b[0mget back! that enormous spider- as if the sight of us excited him \u001b[1m[\u001b[0minto mad spinning\n", + "\n", + " ---------- Padded 8px ----------\n", + "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.76\u001b[0m||Ega5p/% everything s\\ w- whirling around me! t can't stand ue\u001b[33m...\u001b[0m slee\n", + "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.41\u001b[0m||\\ help! help’\n", + "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.86\u001b[0m||-i'm passing qut\u001b[33m...\u001b[0m ohh hh.\n", + "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.18\u001b[0m||I ever saw/\n", + "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.86\u001b[0m||It certainly isn't our earth, perry! poor a the size of thos!\n", + "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.62\u001b[0m||“watch out” ial ae clark!\n", + "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", + "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.98\u001b[0m||Yet the bee's stinger went right through my uniform and \\penetrated my skin! that means. the fabric of my superman costume has become ordinar! cloth!\n", + "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.93\u001b[0m||Hurry. let's beat it before we get stung, too!\n", + "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.88\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/z\u001b[33m...\u001b[0mpain i feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mele\u001b[0m $ superman uniform ! &\n", + "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesai ghost. he's spinning a web of g/ant, silk strands -- as tough as steel!\n", + "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.88\u001b[0m||I-i feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my ! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all \\my super-powers! ‘ve become an ordinary mortal in this world. x\n", + "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.60\u001b[0m||As if the sight of us excited him \u001b[1m{\u001b[0minto mad spinning\n", + "\n", + " ---------- Extracted, init box ----------\n", + "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Suddenly.\n", + "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Fgasp!z everything § w- whirling around me!i can't stand ue.\n", + "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.89\u001b[0m||Clark!i'm falling! help! help!\n", + "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.71\u001b[0m||I-i'm passing ohhh?\n", + "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.98\u001b[0m||Then, seconds later..\n", + "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesar's ghost! this \u001b[35m/\u001b[0m\u001b[95ms\u001b[0m black magic! we've been transportel to the weirdest world tit ever saw.\n", + "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.90\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry. look at the size of those bees.\n", + "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.62\u001b[0m||Oowwwww.\n", + "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.93\u001b[0m||Hurry. let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.88\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2g/ pain t feel pain? as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mble\u001b[0m superman uniform *\n", + "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly. ..\n", + "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.80\u001b[0m||Great caesar's ghost. he's spinning a web of g/ant, silk strands --\n", + "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun. the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! t've lost all my super-powers. ve become an ordinary mortal im this world.\n", + "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.97\u001b[0m||Get back. that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning\n", + "\n", + " ---------- Padded \u001b[1;36m4\u001b[0m, extracted ----------\n", + "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Sgasp/z everything s w- whirling around me!i can't stand ue.\n", + "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.87\u001b[0m||Clark! i'm falling’ help! help!\n", + "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.78\u001b[0m||I-i'm passing ohhhh.\n", + "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.98\u001b[0m||Then, seconds later..\n", + "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw!\n", + "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.90\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry. look at the size of those bees.\n", + "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", + "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.89\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0m pain t feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mble\u001b[0m superman uniform !\n", + "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.94\u001b[0m||Great caesar's ghost! he's spinning a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun., the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.97\u001b[0m||Get back! that enormous spider- like creature is going berserk, \u001b[1;36m25\u001b[0m if the sight of us excited him into mad spinning\n", + "\n", + " ---------- Padded \u001b[1;36m8\u001b[0m, extracted ----------\n", + "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.91\u001b[0m||Suddemly\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.85\u001b[0m||Sgasp/z everything s w- whirling around me!i can't stand up.\n", + "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.84\u001b[0m||Clark! i'm falling’ _ help! help!\n", + "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.73\u001b[0m||I-i'm passing ohhha.\n", + "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Great caesar's ghost! f this \u001b[35m/\u001b[0m§ black magic! : we've been transported to the weirdest world i ever saw.\n", + "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.90\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry. look at the size of those bees.\n", + "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", + "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.89\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0m pain t feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mble\u001b[0m superman uniform !\n", + "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly. ..\n", + "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-t feel the heat of the sun., the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.95\u001b[0m||Get back! that enormous spider- like creature is going berserk, \u001b[1;36m25\u001b[0m if the sight of us excited him into mad spinning g \u001b[1m[\u001b[0m\n", + "\n", + " ---------- Padded \u001b[1;36m8\u001b[0m, dilation \u001b[1;36m1\u001b[0m ----------\n", + "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Gasp! everything s w-whirling around wel\u001b[1m]\u001b[0m i can't stand\n", + "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.86\u001b[0m||Clark!i'm falling! . help! help!\n", + "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.73\u001b[0m||I-i'm passing ohhha.\n", + "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.95\u001b[0m||Then, seconds later.\n", + "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesar's ghost! e this \u001b[35m/\u001b[0m\u001b[95m5\u001b[0m black magic! we've been transported to the weirdest world i ever saw/\n", + "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.91\u001b[0m||\u001b[33m...\u001b[0m|it certainly isnt our earth, perry” look at the size of those bees!\n", + "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", + "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m||Yet the bee's stinger we right through my tform and penetrated my skin! that means. the fabric of my superman othe has become ordinary clot?\n", + "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.82\u001b[0m||Fain! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", + "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", + "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.67\u001b[0m||Great caesars ghost! he's spinning _ a web of g/ant,\n", + "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.86\u001b[0m||I-i feel the heat of the sun. the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers. ve ie an ordinary mortal in this world.\n", + "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.96\u001b[0m||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning’ g \u001b[1m[\u001b[0m\n", + "\n", + " ---------- Pad \u001b[1;36m8\u001b[0m, fract. \u001b[1;36m0.5\u001b[0m ----------\n", + "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.91\u001b[0m||Suddemly\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.87\u001b[0m||Gasp/z everything s w- whirling around me!i can't stand up.\n", + "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.84\u001b[0m||Clark! i'm falling’ _ help! help!\n", + "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.78\u001b[0m||I-i'm passing ohhhh.\n", + "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Great caesar's ghost! f this \u001b[35m/\u001b[0m§ black magic! : we've been transported to the weirdest world i ever saw.\n", + "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.92\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry! look at the size of those bees.\n", + "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", + "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.90\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0mpain i feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/nodestruct/\u001b[0m\u001b[95mible\u001b[0m superman uniform !\n", + "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly. ..\n", + "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun. the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.95\u001b[0m||Get back! that enormous spider- like creature is going berserk, \u001b[1;36m25\u001b[0m if the sight of us excited him into mad spinning g \u001b[1m[\u001b[0m\n", + "\n", + " ---------- Pad \u001b[1;36m8\u001b[0m, fract. \u001b[1;36m0.2\u001b[0m ----------\n", + "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.87\u001b[0m||Gasp/z everything s w- whirling around me!i can't stand up.\n", + "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.84\u001b[0m||Clark! i'm falling’ _ help! help!\n", + "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.73\u001b[0m||I-i'm passing ohhha.\n", + "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", + "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", + "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Great caesar's ghost! f this \u001b[35m/\u001b[0m§ black magic! : we've been transported to the weirdest world i ever saw!\n", + "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.92\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry! look at the size of those bees.\n", + "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", + "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", + "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", + "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", + "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.91\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0m pain i feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", + "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly\u001b[33m...\u001b[0m.\n", + "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", + "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun., the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", + "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.96\u001b[0m||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning gs \u001b[1m[\u001b[0m\n", + "\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_action_experiment: ExperimentOCR = cast(ExperimentOCR, ExperimentOCR.from_image(\n", + " CONTEXT, 'Tesseract', 'Action_Comics_1960-01-00_(262).JPG'))\n", + "image_action_experiment.display()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_action_experiment.plot_accuracies()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!
0.91
\n", + "
\n", + "
Great Caesar's Ghost! He's spinning a web of giant,⎕⎕ silk strands-- as tough as steel!

Great caesars ghost. he's spinning a web of g/ant,⎕| silk strands⎕⎕ as tough as steel!
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_action_experiment.result(14, CropMethod.PADDED_4)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_action_experiment.perform_methods(plot_acc=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Everything Everywhere All at Once\n" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT.reset()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ExperimentsVisor" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ExperimentsVisor(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + "\n", + " def update_output(self, \n", + " model: OCRModel | None = None,\n", + " image_idx: ImgIdT | None = None,\n", + " display_option: DisplayOptions | None = None, \n", + " **kwargs):\n", + " model_selector, image_selector, content_selector, result_visor = self._comps()\n", + " if model is not None:\n", + " exp_ctx = result_visor.ctx\n", + " exp_ctx.ocr_model = list(model_selector.models.keys())[model.value]\n", + " result_visor.ctx = exp_ctx\n", + " if image_idx is not None:\n", + " img_ctx = ImageContext(self.ctx, image_idx)\n", + " result_visor.ctx.ctx = img_ctx\n", + " display_option = content_selector.values['display_option']\n", + " if display_option is not None and display_option != DisplayOptions.RESULTS:\n", + " result_visor.hide()\n", + " if display_option == DisplayOptions.BEST_RESULTS:\n", + " result_visor.best_results()\n", + " elif display_option == DisplayOptions.DATAFRAME:\n", + " result_visor.pd_to_html()\n", + " else:\n", + " content_selector.display_content(image_selector.image_ctx, display_option)\n", + " else:\n", + " result_visor.show()\n", + " result_visor.update_output(**kwargs)\n", + "\n", + " def _comps(self):\n", + " cc = self.comps\n", + " msel: ModelSelector = cc['model_selector'] # type: ignore\n", + " isel: ImageSelector = cc['image_selector'] # type: ignore\n", + " cs: ContentSelector = cc['content_selector'] # type: ignore\n", + " rv: ResultVisor = cc['result_visor'] # type: ignore\n", + " return msel, isel, cs, rv\n", + "\n", + " def setup_ui(self):\n", + " ctls = self.controls.values()\n", + " msw, isw, csw, rvw = [_.w for _ in self._comps()]\n", + " return W.VBox([W.HBox([msw, isw, csw, *ctls]), rvw,])\n", + "\n", + " def __init__(self, \n", + " ctx: OCRExperimentContext,\n", + " image_idx: ImgIdT | str | Path = 0,\n", + " ocr_model: OCRModel = OCRModel.TESSERACT,\n", + " display_option: DisplayOptions = DisplayOptions.RESULTS,\n", + " all_boxes: bool = False,\n", + " box_idx: int = 0,\n", + " all_methods: bool = False,\n", + " method: CropMethod=CropMethod.INITIAL_BOX,\n", + " ocr_models: dict[str, OCRModel] = {'Tesseract': OCRModel.TESSERACT},\n", + " out: W.Output | None = None,\n", + " ):\n", + " if not isinstance(ctx, OCRExperimentContext):\n", + " raise ValueError(\"ctx must be an OCRExperimentContext\")\n", + " exp = ExperimentOCR.from_image(ctx, 'Tesseract', image_idx)\n", + " if not exp:\n", + " raise ValueError(f\"Image {image_idx} not found in experiment context\")\n", + " \n", + " out = out or self.out\n", + " model_selector = ModelSelector(ctx, ocr_model=ocr_model, \n", + " ocr_models=ocr_models, out=out)\n", + " image_selector = ImageSelector(ctx, image_idx=image_idx, out=out)\n", + " content_selector = ContentSelector(ctx, display_option=display_option, out=out)\n", + " result_visor = ResultVisor(exp, out=out,\n", + " all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method)\n", + "\n", + " super().__init__(ctx, {}, out=out, \n", + " ctxs={'model_selector': model_selector, 'image_selector': image_selector, 'content_selector': content_selector, \n", + " 'result_visor': result_visor}\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualize all" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c722001cdbcf444da9b56a366c6723e1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), optio…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8a95135fcb03495f94fb726416f3df54", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# tesseract_experiment = ExperimentsVisor(CONTEXT)\n", + "tesseract_experiment = ExperimentsVisor(CONTEXT, BASE_IMAGE_IDX)\n", + "\n", + "test_eq(tesseract_experiment.all_values, {\n", + " 'image_selector': {'image_idx': 20},\n", + " 'content_selector': {'display_option': DisplayOptions.RESULTS},\n", + " 'result_visor': {\n", + " 'all_boxes': False,\n", + " 'box_idx': 0,\n", + " 'all_methods': False,\n", + " 'method': CropMethod.INITIAL_BOX,\n", + " },\n", + " 'model_selector': {'model': OCRModel.TESSERACT},\n", + " 'self': {}\n", + "})\n", + "\n", + "tesseract_experiment\n" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "tesseract_experiment.update(display_option=DisplayOptions.BOXES)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Colophon\n", + "----\n" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "from nbdev.export import nb_export\n" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_NOTEBOOK:\n", + " nb_export('experiments.ipynb', '.')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/_testbed/experiments.py b/_testbed/experiments.py new file mode 100644 index 00000000..27d56ba6 --- /dev/null +++ b/_testbed/experiments.py @@ -0,0 +1,1995 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: experiments.ipynb. + +# %% experiments.ipynb 7 +from __future__ import annotations + +import dataclasses +import difflib +import functools +import json +import shutil +from collections import defaultdict +from enum import Enum +from pathlib import Path +from typing import Any +from typing import Callable +from typing import cast +from typing import Mapping +from typing import Self +from typing import TypeAlias + +import fastcore.all as FC +import ipywidgets as W +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import pcleaner.config as cfg +import pcleaner.ctd_interface as ctm +import pcleaner.image_ops as ops +import pcleaner.ocr.ocr as ocr +import pcleaner.structures as st +import torch +from IPython.display import clear_output +from IPython.display import display +from IPython.display import HTML +from ipywidgets.widgets.interaction import show_inline_matplotlib_plots +from loguru import logger +from pcleaner.ocr.ocr_tesseract import TesseractOcr +from PIL import Image +from PIL import ImageFilter +from rich.console import Console +from tqdm.notebook import tqdm + + +# %% auto 0 +__all__ = ['CM', 'SubjIdT', 'ImgIdT', 'BoxIdT', 'ImgSpecT', 'remove_multiple_whitespaces', 'postprocess_ocr', + 'accuracy_ocr_naive', 'accuracy_ocr_difflib', 'ground_truth_path', 'read_ground_truth', + 'dilate_by_fractional_pixel', 'extract_text', 'lang2pcleaner', 'lang2tesseract', 'ResultOCR', + 'ResultOCRExtracted', 'CropMethod', 'crop_by_image', 'crop_by_extracted', 'ResultSet', 'ResultSetDefault', + 'results_to_dict', 'dict_to_results', 'ExperimentSubject', 'ExperimentContext', 'ImageContext', + 'OCRExperimentContext', 'ContextVisor', 'ImageSelector', 'OCRContextVisor', 'OCRModel', 'ModelSelector', + 'DisplayOptions', 'ContentSelector', 'ImageContextVisor', 'Experiment', 'ExperimentOCR', + 'ExperimentOCRMethod', 'ResultVisor', 'ExperimentVisor', 'ExperimentsVisor'] + +# %% experiments.ipynb 8 +from helpers import * +from ocr_metric import * + + +# %% experiments.ipynb 12 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% experiments.ipynb 20 +def remove_multiple_whitespaces(text): + return ' '.join(text.split()) + + +def postprocess_ocr(text): + "Basic postprocessing for English Tesseract OCR results." + return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize() + +def accuracy_ocr_naive(text, ground_truth): + return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text) + + +def accuracy_ocr_difflib(text, ground_truth): + """ + Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text, + using difflib's SequenceMatcher to account for differences in a manner similar to git diffs. + + :param text: The OCR-generated text. + :param ground_truth: The ground truth text. + :return: A float representing the similarity ratio between the OCR text and the ground truth, + where 1.0 is identical. + """ + # Initialize the SequenceMatcher with the OCR text and the ground truth + matcher = difflib.SequenceMatcher(None, text, ground_truth) + + # Get the similarity ratio + similarity_ratio = matcher.ratio() + + return similarity_ratio + +# %% experiments.ipynb 22 +def ground_truth_path(page_data: st.PageData): + path = Path(page_data.original_path) + return path.with_stem(path.stem + '_gt').with_suffix('.txt') + + +def read_ground_truth(page_data: st.PageData): + gts_path = ground_truth_path(page_data) + if gts_path.exists(): + gts = gts_path.read_text(encoding="utf-8").splitlines() + else: + gts = ["" for _ in range(len(page_data.boxes))] + return gts + + +# %% experiments.ipynb 24 +def dilate_by_fractional_pixel(image, dilation_fraction, filter_base_size=3): + """ + Dilates an image by a specified fractional pixel amount. The function calculates + the necessary scaling factor and filter size based on the desired dilation fraction. + + :param image: A PIL Image object (1-bit mode). + :param dilation_fraction: The desired fractional pixel amount for dilation (e.g., 0.2). + :param filter_base_size: The base size of the dilation filter to apply on the scaled image. + This size is adjusted based on the scaling factor to achieve the + desired dilation effect. + :return: A PIL Image object after dilation, converted back to grayscale. + """ + # Calculate the scale factor based on the desired dilation fraction + scale_factor = int(1 / dilation_fraction) + + # Adjust the filter size based on the scale factor + # This ensures the dilation effect is proportional to the desired fraction + filter_size = max(1, filter_base_size * scale_factor // 5) + + # Convert the image to grayscale for more nuanced intermediate values + image_gray = image.convert("L") + + # Resize the image to a larger size using bicubic interpolation + larger_size = (int(image.width * scale_factor), int(image.height * scale_factor)) + image_resized = image_gray.resize(larger_size, Image.BICUBIC) + + # Apply the dilation filter to the resized image + dilated_image = image_resized.filter(ImageFilter.MaxFilter(filter_size)) + + # Resize the image back to its original size using bicubic interpolation + image_dilated_fractional_pixel = dilated_image.resize(image.size, Image.BICUBIC) + + return image_dilated_fractional_pixel + + +# %% experiments.ipynb 25 +def extract_text(image, text_mask, box): + cropped_image = crop_box(box, image) + cropped_mask = crop_box(box, text_mask) + extracted = ops.extract_text(cropped_image, cropped_mask) + return cropped_image, cropped_mask, extracted + + +# %% experiments.ipynb 27 +_lang2pcleaner = {'English': st.DetectedLang.ENG, 'Japanese': st.DetectedLang.JA, 'Spanish': st.DetectedLang.ENG, + 'French':st.DetectedLang.ENG} +# _lang2tesseract = {'English': 'eng', 'Japanese': 'jpn'} +_lang2tesseract = {'English': 'eng', 'Japanese': 'jpn_vert', 'Spanish': 'spa', 'French': 'fra'} + + +# %% experiments.ipynb 28 +def lang2pcleaner(lang: str): + return _lang2pcleaner[lang] + +def lang2tesseract(lang: str): + return _lang2tesseract[lang] + + +# %% experiments.ipynb 35 +@dataclasses.dataclass +class ResultOCR: + block_idx: int + image: Image.Image | None + ocr: str + page_data: st.PageData + gts: list[str] + description: str = dataclasses.field(default='', kw_only=True) + + def __post_init__(self): + if self.image is None: + cache_path = self.cache_path() + if cache_path.exists(): + self.image = Image.open(cache_path) + + @property + def acc(self): + self._acc = accuracy_ocr_difflib(self.ocr, self.gts[self.block_idx]) + return self._acc + @property + def suffix(self): return f"{self.block_idx}_{self.description}" + + def diff_tagged(self): + _, html2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr, False) + return f"{html2}" + + def cache_path(self, suffix: str | None = None): + suffix = self.suffix + (('_'+suffix) if suffix else '') + parent = Path(self.page_data.image_path).parent + img_name = Path(self.page_data.original_path).stem + box_image_path = parent / f"{img_name}_{suffix}.png" + return box_image_path + + def cache_image(self, image: Image.Image | None = None, suffix: str | None = None): + image = image or (self.image if not suffix else None) + box_image_path = self.cache_path(suffix) + if image and not box_image_path.exists(): + image.save(box_image_path) + return box_image_path + + + def as_html(self): + acc_html = f"
{self.acc:.2f}" + box_image_path = self.cache_image() + html1 = get_columns_html([[box_image_path], [self.ocr + acc_html]]) + html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr) + html2 = f"
{html_str1}
{html_str2}
" + return html1 + '\n
\n' + html2 + + def __repr__(self): + return f"{type(self).__name__}#block {self.block_idx:02}: {self.acc:.2f}||{self.ocr}" + + def display(self): display(HTML(self.as_html())) + + def _ipython_display_(self): self.display() + + def to_dict(self): + d = dataclasses.asdict(self) + d['image'] = d['page_data'] = d['gts'] = None + return d + + # @classmethod + # def from_dict(cls, d: dict, page_data: st.PageData, gts: list[str]): + # return cls(**(d | {'page_data':page_data, 'gts':gts})) + + +@dataclasses.dataclass +class ResultOCRExtracted(ResultOCR): + + def __repr__(self): return super().__repr__() + def as_html(self): + html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr) + diff_html = f"
{html_str1}
{html_str2}
" + cropped_image_path = self.cache_image(None, "cropped") + cropped_mask_path = self.cache_image(None, "mask") + result_path = self.cache_image() + return '\n
\n'.join([ + get_image_grid_html([cropped_image_path, cropped_mask_path, result_path], 1, 3), + acc_as_html(self.acc), + diff_html + ]) + + +# %% experiments.ipynb 37 +class CropMethod(Enum): + INITIAL_BOX = 'Initial box' + DEFAULT = 'Default' + DEFAULT_GREY_PAD = 'Default, grey pad' + PADDED_4 = 'Padded 4px' + PADDED_8 = 'Padded 8px' + EXTRACTED_INIT_BOX = 'Extracted, init box' + PADDED_4_EXTRACTED = 'Padded 4, extracted' + PADDED_8_EXTRACTED = 'Padded 8, extracted' + PADDED_8_DILATION_1 = 'Padded 8, dilation 1' + PAD_8_FRACT_0_5 = 'Pad 8, fract. 0.5' + PAD_8_FRACT_0_2 = 'Pad 8, fract. 0.2' + + @classmethod + def __display_names__(cls): + return dict( + zip([_.value for _ in cls], + cls)) + + +CM = CropMethod + +_IMAGE_METHODS = [CM.INITIAL_BOX, CM.DEFAULT, CM.DEFAULT_GREY_PAD, + CM.PADDED_4, CM.PADDED_8] +_EXTRACTED_METHODS = [CM.EXTRACTED_INIT_BOX, CM.PADDED_4_EXTRACTED, + CM.PADDED_8_EXTRACTED, CM.PADDED_8_DILATION_1, + CM.PAD_8_FRACT_0_5, CM.PAD_8_FRACT_0_2] + + +def crop_by_image(method: CM, + box: st.Box, + base: Image.Image, + preproc: cfg.PreprocessorConfig, + ): + image = None + match method: + case CM.INITIAL_BOX : + image = crop_box(box, base) + case CM.DEFAULT: + padded2_4 = ( + box.pad(preproc.box_padding_initial, base.size).right_pad( + preproc.box_right_padding_initial, base.size)) + image = crop_box(padded2_4, base) + case CM.DEFAULT_GREY_PAD: + image = crop_box(box, base) + image = ops.pad_image(image, 8, fill_color=(128, 128, 128)) + case CM.PADDED_4: + padded4 = box.pad(4, base.size) + image = crop_box(padded4, base) + case CM.PADDED_8: + padded4 = box.pad(8, base.size) + image = crop_box(padded4, base) + case _: pass + return image + + +def crop_by_extracted(method: CM, + box: st.Box, + base: Image.Image, + mask: Image.Image, + cropped_image_path: Path, + cropped_mask_path: Path, + dilated: dict[float, Image.Image] + ): + cropped_image, cropped_mask, image = None, None, None + if method in _EXTRACTED_METHODS: + if not cropped_image_path.exists() or not cropped_mask_path.exists(): + match method: + case CM.EXTRACTED_INIT_BOX: + cropped_image, cropped_mask, image = extract_text(base, mask, box) + case CM.PADDED_4_EXTRACTED: + padded4 = box.pad(4, base.size) + cropped_image, cropped_mask, image = extract_text(base, mask, padded4) + case CM.PADDED_8_EXTRACTED: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text(base, mask, padded8) + case CM.PADDED_8_DILATION_1: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text( + base, dilated[1], padded8) + case CM.PAD_8_FRACT_0_5: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text( + base, dilated[0.5], padded8) + case CM.PAD_8_FRACT_0_2: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text( + base, dilated[0.2], padded8) + case _: pass + + return image, cropped_image, cropped_mask + + + +# %% experiments.ipynb 39 +SubjIdT: TypeAlias = int +ImgIdT = SubjIdT +BoxIdT: TypeAlias = int + +class ResultSet(dict[BoxIdT, dict[CropMethod, ResultOCR]]): ... + +class ResultSetDefault(defaultdict[BoxIdT, dict[CropMethod, ResultOCR]]): ... + +def results_to_dict(results: ResultSet) -> dict[BoxIdT, dict[str, str]]: + d = {} + for box, box_methods in results.items(): + for method, result in box_methods.items(): + if box not in d: + d[box] = {} + d[box][method.name] = result.ocr + return d + +def dict_to_results( + image_idx: ImgIdT, + results_dict: dict[BoxIdT, dict[str, str]], + result_factory: Callable + ) -> ResultSetDefault: + results = ResultSetDefault(dict[CropMethod, ResultOCR]) + for box_idx, box_methods in results_dict.items(): + box_idx = int(box_idx) + for method, ocr in box_methods.items(): + m = CM[method] + results[box_idx][m] = result_factory(image_idx, box_idx, m, ocr) + return results + + + +# %% experiments.ipynb 41 +# class ExperimentSubject(Protocol): +# @property +# def exp(self) -> 'ExperimentContext': ... +# @property +# def idx(self) -> SubjIdT: ... +# def setup(self, +# exp: 'ExperimentContext', +# idx: Any, +# *args, **kwargs +# ): ... + + +# class ExperimentContext(Protocol): +# def subject_factory(self) -> Callable[..., ExperimentSubject]: ... +# def normalize_idx(self, idx: Any) -> SubjIdT: ... +# def experiment_subject(self, idx: Any, /, +# create: bool = False, *args, **kwargs) -> ExperimentSubject | None: +# """Get or create an `ExperimentSubject` for the given identifier. +# Returns `None` if `idx` is out of domain range. +# """ +# ... + + +# %% experiments.ipynb 42 +class ExperimentSubject: + exp: ExperimentContext + idx: SubjIdT + + def setup(self, exp: ExperimentContext, idx: Any, *args, **kwargs): + self.exp = exp + self.idx = cast(SubjIdT, exp.normalize_idx(idx)) + return self + + def __new__(cls, + exp: ExperimentContext, + idx: Any, + *args, **kwargs): + self = exp.experiment_subject(idx) + if self is None: + self = super().__new__(cls) + self = exp.experiment_subject(idx, new_subject=self, *args, **kwargs) + if self is None: + raise ValueError(f"Can't create new subject with idx: {idx}: out of range") + return self + + +class ExperimentContext: + "Class to maintain shared state across all file-based experiments within the experiment domain." + + subject_cls: Callable[..., ExperimentSubject] + def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls + + def normalize_idx(self, idx: int | str | Path) -> SubjIdT | None: + nidx = None + if isinstance(idx, int) and idx < len(self._paths): + nidx = idx + elif isinstance(idx, str): + try: + nidx = [_.name for _ in self._paths].index(idx) + except Exception: + pass + elif isinstance(idx, Path): + idx = idx.resolve() + if idx in self._paths: + nidx = self._paths.index(idx) + return nidx + + def path_from_idx(self, idx: int | str | Path): + _idx = self.normalize_idx(idx) + if _idx is None: + raise ValueError(f"{_idx} not found in context.") + path = Path(self._paths[_idx]) + if not path.exists(): + raise ValueError(f"{path} not found in context.") + return path + + @property + def count(self): return len(self._paths) + @property + def cache_dir(self): return Path(".cache/") + @functools.lru_cache() + def _cache_dir(self, idx: SubjIdT): + # create one folder for each image to cache and save results + path = self.path_from_idx(idx) + cache_dir = self.cache_dir / path.stem + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + def subject_cache_dir(self, idx: int | str | Path): + return self._cache_dir(idx) + + def empty_cache(self, idx: SubjIdT | None = None): + cache_dir = self.cache_dir + if idx is None: + shutil.rmtree(cache_dir, ignore_errors=True) + cache_dir.mkdir(parents=True, exist_ok=True) + else: + path = Path(self._paths[idx]) + cache_dir = cache_dir / path.stem + for p in cache_dir.glob("*"): + p.unlink(missing_ok=True) + if not any(cache_dir.iterdir()): + cache_dir.rmdir() + + def empty_cache_warn(self, idx: SubjIdT | None=None, *, warn: bool=True, out: W.Output | None=None): + def on_confirm_clicked(b): + try: + self.empty_cache(idx) + print("Cache cleared successfully.") + except Exception as e: + print(f"Failed to clear cache: {e}") + finally: + for widget in confirmation_box.children: + widget.close() + + def on_cancel_clicked(b): + print("Cache clear cancelled.") + for widget in confirmation_box.children: + widget.close() + + if out is None: + out = W.Output() + with out: + if FC.IN_NOTEBOOK: + confirm_button = W.Button(description="Confirm") + cancel_button = W.Button(description="Cancel") + confirm_button.on_click(on_confirm_clicked) + cancel_button.on_click(on_cancel_clicked) + label = W.Label('Are you sure you want to clear the cache? This action cannot be undone.') + confirmation_box = W.VBox([label, W.HBox([confirm_button, cancel_button])]) + display(confirmation_box) + else: + on_confirm_clicked(None) + + def experiment_subject(self, idx: SubjIdT | str | Path, /, + new_subject: ExperimentSubject | None = None, *args, **kwargs) -> ExperimentSubject | None: + "Cached subject. If provided, `new_subject` replaces value at the index." + if (nidx := self.normalize_idx(idx)) is None: + return None + if new_subject is None: + subject = self._subjects.get(nidx) + else: + new_subject.setup(self, nidx, *args, **kwargs) + self._subjects[nidx] = subject = new_subject + return subject + + def reset(self): + self._subjects.clear() + self._cache_dir.cache_clear() + + def __init__(self, paths: list[Path], root: Path | None = None): + self._root = (root or Path('.')).resolve() + self._paths = [p.resolve().relative_to(self._root) for p in paths] + self._subjects: dict[SubjIdT, ExperimentSubject] = {} + + +# %% experiments.ipynb 48 +ImgSpecT: TypeAlias = ImgIdT | str | Path + +class ImageContext(ExperimentSubject): + """ + A utility class to maintain image state for a ExperimentContext. + This class encapsulates state necessary for conducting OCR experiments. + + Attributes: + json_data (dict): JSON data loaded from cached files. + page_data (st.PageData): PanelClaner page data. + base_image (Image.Image): The base image loaded from the page data. + mask (Image.Image): The mask image used for text detection. + gts (list[str]): Ground truth data for the text in the images. + ocr_model (str): Name or identifier of the OCR model used. + mocr (ocr.OCRModel): OCR model configured for the experiment. + mask_dilated1 (Image.Image): Image mask dilated by 1 pixel. + mask_dilated05 (Image.Image): Image mask dilated by 0.5 pixels. + mask_dilated02 (Image.Image): Image mask dilated by 0.2 pixels. + + Methods: + init(config: cfg.Config, img_path: Path, cache_dir: Path, ocr_model: str): + Initializes the experiment context. It also handles the generation of text boxes + if they are not already present. + + setup_ground_truth(): + Loads or initializes ground truth data for the experiment based on the page data. + + setup_crop_masks(): + Prepares various dilated versions of the mask image to be used in different cropping + strategies during the experiments. + """ + exp: ExperimentContext + idx: ImgIdT + base_image: Image.Image + mask: Image.Image + json_data: dict | None + page_data: st.PageData + # ocr_model: str + # mocr: ocr.OCRModel + # postprocess_ocr: Callable[..., str] + _page_lang: str + _gts: list[str] + _mask_dilated1: Image.Image | None + _mask_dilated05: Image.Image | None + _mask_dilated02: Image.Image | None + + + # # this methods will be set downstream, declared here to make the type checker happy + # def result(self: Self, + # box_idx: int, method: CropMethod, ocr: bool = True, reset: bool=False) -> ResultOCR: ... + # def summary_box(self: Self, box_idx: int): ... + + def to_dict(self): + return { + 'image_idx': self.idx, + 'page_lang': self.page_lang, + } + + @property + def image_idx(self): return self.idx + @property + def cache_dir(self): + return self.exp.subject_cache_dir(self.idx) + cache_dir_image = cache_dir + + @property + def image_info(self): + img = self.base_image + w, h = img.size + print_size_in = size(w, h, 'in', 300) + print_size_cm = size(w, h, 'cm', 300) + required_dpi = dpi(w, h, 'Modern Age') + return (w, h), print_size_in, print_size_cm, required_dpi + + @property + def original_image_path(self): return Path(self.page_data.original_path) + @property + def image_path(self): return Path(self.page_data.image_path) + @property + def image_name(self): return self.original_image_path.name + @property + def image_size(self): return self.base_image.size + @property + def image_dim(self):return size(*self.image_size) + @property + def image_dpi(self): return dpi(*self.image_size) + @property + def image_print(self): + return self.image_size, self.image_dim, self.image_dpi + @property + def image_name_rich(self): + siz, dim, res = self.image_print + return f"{self.image_name} - {siz[0]}x{siz[1]} px: {dim[0]:.2f}x{dim[1]:.2f}\" @ {res:.2f} dpi" + + def setup_page_lang(self, page_lang: str | None = None): + path = Path(self.page_data.original_path).with_suffix('.json') + metadata = json.load(open(path)) if path.exists() else {} + if 'lang' in metadata and (page_lang == metadata['lang'] or page_lang is None): + self._page_lang = metadata['lang'] + return + self._page_lang = metadata['lang'] = page_lang or 'English' + json.dump(metadata, open(path, 'w'), indent=2) + @property + def page_lang(self): + if self._page_lang == None: + self.setup_page_lang() + return self._page_lang + + @property + def boxes(self): return self.page_data.boxes + + def setup_ground_truth(self): + self._gts = read_ground_truth(self.page_data) + @property + def gts(self): + if self._gts is None: + self.setup_ground_truth() + return self._gts + + @functools.lru_cache(typed=True) + def dilated_mask(self, fraction: float): + return dilate_by_fractional_pixel(self.mask, fraction) + + def mask_dilated1(self): + if self._mask_dilated1 is None: + self._mask_dilated1 = self.mask.filter(ImageFilter.MaxFilter(3)) + return self._mask_dilated1 + + def mask_dilated05(self): + if self._mask_dilated05 is None: + self._mask_dilated05 = self.dilated_mask(0.5) + return self._mask_dilated05 + + def mask_dilated02(self): + if self._mask_dilated02 is None: + self._mask_dilated02 = self.dilated_mask(0.2) + return self._mask_dilated02 + + def dilated(self): + return {1: self.mask_dilated1(), + 0.5: self.mask_dilated05(), + 0.2: self.mask_dilated02(),} + + def __new__(cls, + exp: ExperimentContext, + idx: ImgSpecT, + *args, **kwargs) -> Self: + return super().__new__(cls, exp, idx, *args, **kwargs) # type: ignore + + +# %% experiments.ipynb 50 +class OCRExperimentContext(ExperimentContext): + """ + A utility class to maintain shared state across all experiments within OCR domain. + This class encapsulates state necessary for conducting PanelCleaner OCR experiments. + """ + + config: cfg.Config + image_paths: list[Path] + # OCR engine -> Image index -> Box index -> Crop method -> Result + _results: dict[str, dict[ImgIdT, ResultSet]] + + + engines = { + 'Tesseract': cfg.OCREngine.TESSERACT, + 'Idefics': None, + 'manga-ocr': cfg.OCREngine.MANGAOCR} + + # subject_cls: ImageContext + # def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls + + @classmethod + def get_config(cls, cache_dir: Path | None = None) -> cfg.Config: + config = cfg.load_config() + config.cache_dir = cache_dir or Path(".") + profile = config.current_profile + preprocessor_conf = profile.preprocessor + # Modify the profile to OCR all boxes. + # Make sure OCR is enabled. + preprocessor_conf.ocr_enabled = True + # Make sure the max size is infinite, so no boxes are skipped in the OCR process. + preprocessor_conf.ocr_max_size = 10**10 + # Make sure the sus box min size is infinite, so all boxes with "unknown" language are skipped. + preprocessor_conf.suspicious_box_min_size = 10**10 + # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics. + preprocessor_conf.ocr_blacklist_pattern = ".*" + return config + + def to_dict(self): + return { + 'image_paths': list(map(str, self.image_paths)), + 'cache_dir': str(self.config.cache_dir) + } + def to_json(self): + return json.dumps(self.to_dict(), indent=2) + @classmethod + def from_json_data(cls, d: dict): + return cls(cls.get_config(Path(d['cache_dir'])), d['image_paths']) + @classmethod + def from_json_path(cls, path: Path): + return cls.from_json_data(json.loads(path.read_text())) + + + @functools.lru_cache() + def mocr(self, ocr_model: str, lang: str): + engine = self.engines[ocr_model] + ocr_processor = ocr.get_ocr_processor(True, engine) + proc = ocr_processor[lang2pcleaner(lang)] + if isinstance(proc, TesseractOcr): + proc.lang = lang2tesseract(lang) + return proc + + def ocr_box(self, result: ResultOCR, ocr_model: str, lang: str): + assert result.image is not None + text = self.mocr(ocr_model, lang)(result.image) + result.ocr = postprocess_ocr(text) + return result + + @property + def cache_dir(self): return self.config.get_cleaner_cache_dir() + image_cache_dir = ExperimentContext.subject_cache_dir + + @functools.lru_cache() + def _load_page_data(self, image_idx: int): + config = self.config + cache_dir = self.image_cache_dir(image_idx) + img_path = self.path_from_idx(image_idx) + image_name = img_path.stem + # read cached json + jsons = [_ for _ in cache_dir.glob("*#raw.json") if image_name in _.stem] + assert len(jsons) <= 1 + # generate text boxes if needed + if not jsons: + pfl = config.current_profile + gpu = torch.cuda.is_available() or torch.backends.mps.is_available() + model_path = config.get_model_path(gpu) + ctm.model2annotations(pfl.general, pfl.text_detector, model_path, [img_path], cache_dir) + # we don't need unique names for this tests, strip uuids + for p in cache_dir.glob(f"*{image_name}*"): + p.rename(strip_uuid(p)) + jsons = [_ for _ in cache_dir.glob("*#raw.json") if image_name in _.stem] + + # adapt paths to be relative to this notebook + this_path = self._root + json_file_path = jsons[0] + json_data = json.loads(json_file_path.read_text(encoding="utf-8")) + json_data["image_path"] = str(strip_uuid(json_data["image_path"]).relative_to(this_path)) + json_data["mask_path"] = str(strip_uuid(json_data["mask_path"]).relative_to(this_path)) + json.dump(json_data, open(json_file_path, "w"), indent=2) + else: + json_file_path = jsons[0] + json_data = json.loads(json_file_path.read_text(encoding="utf-8")) + + page_data = st.PageData( + json_data["image_path"], json_data["mask_path"], + json_data["original_path"], json_data["scale"], + [st.Box(*data["xyxy"]) for data in json_data["blk_list"]], + [], [], []) + # Merge boxes that have mutually overlapping centers. + page_data.resolve_total_overlaps() + return json_data, page_data + + def page_data(self, image_idx: int): + _, page_data = self._load_page_data(image_idx) + return page_data + def json_data(self, image_idx: int): + json_data, _ = self._load_page_data(image_idx) + return json_data + + def experiment_image(self, image_idx: ImgIdT | str | Path) -> ImageContext | None: + "Cached image context." + return cast(ImageContext, self.experiment_subject(image_idx)) + + def update_results(self, ocr_model: str, img_idx: ImgIdT, results: ResultSetDefault): + self._results[ocr_model][img_idx] = cast(ResultSet, results) + + + def _result_from(self, image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None): + img_ctx = ImageContext(self, image_idx) + extracted = method in _EXTRACTED_METHODS + result_cls = ResultOCRExtracted if extracted else ResultOCR + result = result_cls(int(box_idx), None, '', img_ctx.page_data, + img_ctx.gts, description=f"{method.value}") + if ocr is not None: + result.ocr = ocr + return result + + def result(self, + ocr_model: str, + image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, + ocr: bool=True, + rebuild: bool=False) -> ResultOCR | None: + img_ctx = ImageContext(self, image_idx) + result = self._results[ocr_model][image_idx][box_idx].get(method) + if not rebuild and result is not None: + return result + + result = self._result_from(image_idx, box_idx, method) + image, cropped_image, cropped_mask = result.image, None, None + base_image = img_ctx.base_image + box = img_ctx.boxes[box_idx] + if image is None and method in _IMAGE_METHODS: + image = crop_by_image( + method, box, base_image, self.config.current_profile.preprocessor) + + if image is None and method in _EXTRACTED_METHODS: + mask = img_ctx.mask + cropped_image_path = result.cache_image(cropped_image, "cropped") + cropped_mask_path = result.cache_image(cropped_mask, "mask") + if not cropped_image_path.exists() or not cropped_mask_path.exists(): + image, cropped_image, cropped_mask = crop_by_extracted( + method, box, base_image, mask, + cropped_image_path, cropped_mask_path, img_ctx.dilated()) + + assert image is not None + if result.image is None: + result.image = image + result.cache_image() + if cropped_image is not None: + result.cache_image(cropped_image, "cropped") + if cropped_mask is not None: + result.cache_image(cropped_mask, "mask") + + if ocr: + result = self.ocr_box(result, ocr_model, img_ctx.page_lang) + self._results[ocr_model][image_idx][box_idx][method] = result + return result + + def results(self, ocr_model: str | None = None, img_idx: ImgIdT | None = None): + if ocr_model is None: return self._results + if img_idx is None: return self._results[ocr_model] + return self._results[ocr_model][img_idx] + def model_results(self, ocr_model: str): + return cast(dict[ImgIdT, ResultSet], self.results(ocr_model)) + def image_results(self, ocr_model: str, img_idx: ImgIdT): + return cast(ResultSet, self.results(ocr_model, img_idx)) + def box_results(self, ocr_model: str, img_idx: ImgIdT, box_idx: BoxIdT): + return cast(ResultSet, self.results(ocr_model, img_idx))[box_idx] + def method_results(self, ocr_model: str, img_idx: ImgIdT, method: CropMethod): + image_results = self.image_results(ocr_model, img_idx) + return {i: box_results.get(method) for i,box_results in image_results.items()} + + def _reset_results(self): + results = defaultdict(lambda: defaultdict(lambda: ResultSetDefault(dict))) + self._results = cast(dict[str, dict[ImgIdT, ResultSet]], results) + def reset_results(self, + ocr_model: str | None = None, + image_idx: int | None = None, + box_idx: int | None = None, + method: CropMethod | None = None): + if ocr_model is None and image_idx is None and box_idx is None and method is None: + self._reset_results() + return + results = self._results + models = tuple(results.keys()) if ocr_model is None else [ocr_model] if ocr_model in results else [] + for ocr_model in models: + img_nodes = results[ocr_model] + imgs = tuple(img_nodes.keys()) if image_idx is None else [image_idx] if image_idx in img_nodes else [] + for img_idx in imgs: + box_nodes = img_nodes[img_idx] + boxes = tuple(box_nodes.keys()) if box_idx is None else [box_idx] if box_idx in box_nodes else [] + for box_idx in boxes: + if method is None: + del box_nodes[box_idx] + else: + methods = box_nodes[box_idx] + if method in methods: + del methods[method] + if not box_nodes[box_idx]: + del box_nodes[box_idx] + if not img_nodes[img_idx]: + del img_nodes[img_idx] + if not results[ocr_model]: + del results[ocr_model] + def reset(self): + super().reset() + self.reset_results() + self._load_page_data.cache_clear() + self.mocr.cache_clear() + + def __init__(self, + config: cfg.Config | None, + image_paths: list[Path] + ): + super().__init__(list(map(lambda p: p.resolve(), image_paths))) + self.config = config or type(self).get_config() + self.image_paths = self._paths + self._reset_results() + self._images = self._subjects + + +# %% experiments.ipynb 51 +@FC.patch_to(ImageContext) +def setup(self, exp: OCRExperimentContext, image_idx: ImgSpecT, page_lang: str | None = None): + super(type(self), self).setup(exp, image_idx) + self._mask_dilated1 = self._mask_dilated05 = self._mask_dilated02 = None + # if ocr_model not in exp.engines: + # raise ValueError(f"OCR model {ocr_model} not supported.") + # self.ocr_model = ocr_model + # self.idx = exp.normalize_idx(image_idx) + self.json_data, self.page_data = exp._load_page_data(self.idx) + self.setup_page_lang(page_lang) + self.mask = Image.open(self.page_data.mask_path) + self.base_image = Image.open(self.page_data.image_path) + self.setup_ground_truth() + + + +# %% experiments.ipynb 54 +class ContextVisor: + ctx: Any + # control_names: list[str] + values: dict[str, Any] + + _css = '' + + _ctxs: dict[str, ContextVisor] + _hdlrs: dict[str, ContextVisor] + + @property + def w(self) -> W.DOMWidget: + if getattr(self, '_w', None) is None: + self._w = self.setup_ui() + return self._w + @property + def out(self) -> W.Output: + if getattr(self, '_out', None) is None: + self._out = W.Output() + self._out.clear_output(wait=True) + return self._out # type: ignore + @property + def controls(self) -> dict[str, W.ValueWidget | W.fixed]: + if getattr(self, '_controls', None) is None: + self._controls = self.setup_controls() + return self._controls + @property + def all_controls(self) -> dict[str, W.ValueWidget | W.fixed]: + if getattr(self, '_all_controls', None) is None: + controls = {} + for visor in self._ctxs.values(): + controls.update(visor.all_controls) + controls.update(self.controls) + self._all_controls = controls + return self._all_controls + + @property + def all_values(self): + return {**{k:v.values for k,v in (self._ctxs | {'self': self}).items()}, **self.values} + + @property + def comps(self): return self._ctxs + def comp(self, k: str) -> ContextVisor | None: + return self._ctxs.get(k) + def handler(self, k: str) -> ContextVisor | None: + return self._hdlrs.get(k) + + @property + def styler(self) -> W.Output | None: + if (stl := self.setup_style()) is None: + return None + if getattr(self, '_style', None) is None: + self._style = W.Output(layout={'height': '0px'}) + with self._style: + display(stl) + return self._style + def setup_style(self): + return HTML(f"") if self._css else None + + def update_output(self, **kwargs): + cprint(kwargs) + + def setup_controls(self) -> dict[str, W.ValueWidget | W.fixed]: + return {k: W.Label(value=k) for k,v in self.values.items()} + + def hide(self): + self.w.layout.visibility = 'hidden' + def show(self): + self.w.layout.visibility = 'visible' + + def setup_ui(self): + comps = [] + for visor in self._ctxs.values(): + comps.append(visor.w) + return W.HBox([*comps, *self.controls.values()]) + + def setup_display(self): + if getattr(self, '_w', None) is None: + self._w = self.setup_ui() + + + def _output(self, **kwargs): + collator = defaultdict(dict) + show_inline_matplotlib_plots() + with self.out: + clear_output(wait=True) + for k,v in kwargs.items(): + if (comp := self.handler(k)) is not None: + collator[comp][k] = v + else: + assert 0 + # self.update_output(**{k: v}) + for comp, kw in collator.items(): + comp.update_output(**kw) + show_inline_matplotlib_plots() + def interactive_output(self): + controls = self.all_controls + controls2names = {v:k for k,v in controls.items()} + def observer(change): + control_name = controls2names[change['owner']] + kwargs = {control_name: change['new']} + updated = self._update(**kwargs) + self._output(**updated) + for w in controls.values(): + w.observe(observer, 'value') + def display(self, **kwargs): + if getattr(self, '_w', None) is None: + self.setup_display() + self.interactive_output() + self._update(**(self.values | kwargs)) + all_values= {} + for comp in list(self.comps.values()) + [self]: all_values.update(comp.values) + self._hdlrs = {k:self._hdlrs.get(k, self) for k in all_values} + self._output(**all_values) + display(self.styler, self.w, self.out) if self.styler else display(self.w, self.out) + else: + self.update(**kwargs) + def _ipython_display_(self): self.display() + + def _update(self, update_value: bool=True, **kwargs): + updated = {} + for visor in self.comps.values(): + updated.update(visor._update(update_value=update_value, **kwargs)) + values = self.values + my_vals = _pops_(kwargs, self.values.keys()) + for k,v in my_vals.items(): + if v is not None and v != values[k]: + if update_value: values[k] = v + updated[k] = v + return updated + def update(self, **kwargs): + updated = self._update(update_value=False, **kwargs) + controls = self.all_controls + for k in updated: + controls[k].value = updated[k] + # self._output(**updated) + + def __init__(self, + ctx: Any, + values: dict[str, Any], + out: W.Output | None = None, + ctxs: dict[str, ContextVisor] | None = None, + hdlrs: dict[str, ContextVisor] | None = None, + ): + self._ctxs = ctxs or {} + self._hdlrs = hdlrs or {} + self.ctx = ctx + self._out = out + self.values = values + + + +# %% experiments.ipynb 62 +class ImageSelector(ContextVisor): + ctx: OCRExperimentContext + + @property + def image_ctx(self): + return ImageContext(self.ctx, self.values['image_idx']) + + def setup_controls(self): + paths = self.ctx.image_paths + w = W.Dropdown( + options={_.stem:i for i,_ in enumerate(paths)}, + value=self.values['image_idx'], + layout={'width': 'fit-content'}, + style={'description_width': 'initial'}) + return {'image_idx': w} + + def update(self, image_idx: ImgSpecT | None = None, **kwargs): + if image_idx is None: return + idx = self.ctx.normalize_idx(image_idx) + if idx is None: return + super().update(image_idx=idx, **kwargs) + + + def __init__(self, + ctx: OCRExperimentContext, /, + image_idx: ImgSpecT = 0, *, + out: W.Output | None=None): + idx = ctx.normalize_idx(image_idx) + assert idx is not None, f"Image {image_idx} not found in experiment context" + super().__init__(ctx, {'image_idx': idx}, out) + + +# %% experiments.ipynb 66 +class OCRContextVisor(ContextVisor): + ctx: OCRExperimentContext + + def update_output(self, /, image_idx: ImgIdT, **kwargs): + img_path = self.ctx.path_from_idx(image_idx) + display_image_grid([img_path], 1, 1) + + def update(self, image_idx: ImgSpecT | None = None, **kwargs): + if image_idx is None: return + idx = self.ctx.normalize_idx(image_idx) + if idx is None: return + super().update(image_idx=idx, **kwargs) + + def __init__(self, + ctx: OCRExperimentContext, /, + image_idx: ImgSpecT = 0, *, + out: W.Output | None=None): + super().__init__(ctx, {}, out, + ctxs={'image_idx': ImageSelector(ctx, image_idx, out=self.out)}) + + +# %% experiments.ipynb 84 +class OCRModel(Enum): + TESSERACT = 0 + IDEFICS = 1 + @staticmethod + def __display_names__() -> dict[str, OCRModel]: + return dict( + zip("Tesseract, Idefics".split(', '), + OCRModel)) + + +class ModelSelector(ContextVisor): + ctx: OCRExperimentContext + + def setup_controls(self): + options = self.models + w = W.Dropdown( + options=options, + value=self.values['model'], + layout={'width': 'fit-content'}, + style={'description_width': 'initial'}) + return {'model': w} + + def setup_ui(self): + ctls = self.controls + model_grp = W.HBox([ctls['model']]) + model_grp.add_class('model_grp') + comps = [] + for visor in self.comps.values(): + comps.append(visor.setup_ui()) + ui = W.HBox([*comps, model_grp]) + return ui + + def __init__(self, + exp_ctx: OCRExperimentContext, + ocr_model: OCRModel | None=OCRModel.TESSERACT, + ocr_models: dict[str, OCRModel] | None = None, + out: W.Output | None = None + ): + self.models: dict[str, OCRModel] = ocr_models or OCRModel.__display_names__() + super().__init__(exp_ctx, + {'model': ocr_model or OCRModel.TESSERACT}, + out=out or self.out)#, ctxs=[exp_visor]) + + +# %% experiments.ipynb 87 +class DisplayOptions(Enum): + BOXES = 0 + IMAGE = 1 + MASK = 2 + IMAGE_MASK = 3 + PAGE_DATA = 4 + GROUND_TRUTH = 5 + ALL = 6 + RESULTS = 7 + BEST_RESULTS = 8 + DATAFRAME = 9 + + @staticmethod + def __display_names__(): + return dict( + zip("Boxes, Image, Mask, Image & Mask, Page data, Ground truth, All, Results, " + "Best results, Dataframe".split(', '), + DisplayOptions)) + + +class ContentSelector(ContextVisor): + ctx: OCRExperimentContext + + def image_info(self, image_ctx: ImageContext): + img = image_ctx.base_image + (w, h), print_size_in, print_size_cm, required_dpi = image_ctx.image_info + format = PRINT_FORMATS['Modern Age'] + cprint( f"{'Width x Height':>30}: {w} x {h} pixels\n" + f"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\n" + f"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in" + f" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\n" + f"Required DPI Modern Age format: {required_dpi:.3f} dpi " + f"({format[0]:.3f} x {format[1]:.3f} in)") + + + def display_content(self, image_ctx: ImageContext, display_option: DisplayOptions): + page_data = image_ctx.page_data + if display_option in (DisplayOptions.ALL, DisplayOptions.PAGE_DATA): + self.image_info(image_ctx) + RenderJSON(image_ctx.json_data, 350, 2).display() + if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH): + cprint(image_ctx.gts) + if display_option == DisplayOptions.IMAGE: + display_image_grid([page_data.image_path], 1, 1) + if display_option == DisplayOptions.MASK: + display_image_grid([page_data.mask_path], 1, 1) + if display_option in (DisplayOptions.ALL, DisplayOptions.IMAGE_MASK): + display_image_grid([page_data.image_path, page_data.mask_path], 1, 2) + if display_option in (DisplayOptions.ALL, DisplayOptions.BOXES): + _, out_path = page_boxes(page_data) + display_image_grid([out_path], 1, 1) + + + def setup_controls(self): + options = self.display_options or {**DisplayOptions.__display_names__()} + display_option_wdgt = W.Dropdown( + options=options, + value=self.values['display_option'], + layout={'width': '120px'}, + style={'description_width': 'initial'}) + return {'display_option': display_option_wdgt} + + + def setup_ui(self): + ctls = self.controls + display_option_grp = W.HBox([ctls['display_option']]) + display_option_grp.add_class('display_option_grp') + comps = [] + for visor in self.comps.values(): + comps.append(visor.setup_ui()) + ui = W.HBox([*comps, display_option_grp]) + return ui + + + def __init__(self, + exp_ctx: OCRExperimentContext, + display_option: DisplayOptions | None=DisplayOptions.BOXES, + display_options: Mapping[str, DisplayOptions] | None = None, + out: W.Output | None = None + ): + self.display_options = display_options + super().__init__(exp_ctx, + {'display_option': display_option or DisplayOptions.BOXES}, + out=out or self.out)#, ctxs=[exp_visor]) + + +# %% experiments.ipynb 91 +class ImageContextVisor(ContextVisor): + ctx: ImageContext + # control_names: list[str] = ['display_option'] + + _css = """ + .display_option_grp { + background-color: lightblue; + } + """ + + def image_info(self): + content_selector = cast(ContentSelector, self.comp('display_option')) + content_selector.image_info(self.ctx) + + def update_output(self, + display_option: DisplayOptions | None = None, + image_idx: ImgIdT | None = None, + **kwargs): + content_selector = cast(ContentSelector, self.comp('display_option')) + if image_idx is not None and image_idx != self.ctx.image_idx: + ctx = ImageContext(self.ctx.exp, image_idx) + assert ctx is not None + self.ctx = ctx + display_option = content_selector.values['display_option'] + if display_option is None: + return + content_selector.display_content(self.ctx, display_option) + + def update(self, + display_option: DisplayOptions | None=None, + image_idx: ImgSpecT | None=None, + **kwargs): + if image_idx is not None: + if (idx := self.ctx.exp.normalize_idx(image_idx)) is not None: + kwargs['image_idx'] = idx + super().update(display_option=display_option, **kwargs) + + def __init__(self, + exp_ctx: OCRExperimentContext, + img_idx: ImgIdT | str | Path | ImageContext, + display_option: DisplayOptions=DisplayOptions.BOXES, + display_options: Mapping[str, DisplayOptions] | None = None, + out: W.Output | None = None + ): + if isinstance(img_idx, ImageContext): + ctx = img_idx + else: + assert exp_ctx is not None, "exp_ctx must be provided if img_idx is not an ImageContext" + ctx = ImageContext(exp_ctx, img_idx) + assert ctx is not None, f"Image {img_idx} not found in experiment context" + if display_options is None: + display_options = {**DisplayOptions.__display_names__()} + del display_options['Results'] + out = out or self.out + content_selector = ContentSelector(exp_ctx, + display_option=display_option, display_options=display_options, out=out) + image_selector = ImageSelector(exp_ctx, ctx.image_idx, out=out) + super().__init__(ctx, {}, out=out, + ctxs={'image_idx': image_selector, 'display_option': content_selector}) + + +# %% experiments.ipynb 104 +def trimmed_mean(data, trim_percent): + sorted_data = np.sort(data) + n = len(data) + trim_count = int(trim_percent * n) + trimmed_data = sorted_data[trim_count:-trim_count] + return np.mean(trimmed_data) + +def mad_based_outlier(points, threshold=3.5): + median = np.median(points) + diff = np.abs(points - median) + mad = np.median(diff) + modified_z_score = 0.6745 * diff / mad + return points[modified_z_score < threshold] + +def iqr_outlier_removal(data): + q1 = np.percentile(data, 25) + q3 = np.percentile(data, 75) + iqr = q3 - q1 + lower_bound = q1 - 1.5 * iqr + upper_bound = q3 + 1.5 * iqr + return data[(data >= lower_bound) & (data <= upper_bound)] + + +# %% experiments.ipynb 105 +@dataclasses.dataclass +class Experiment: + ctx: ExperimentContext + + +@dataclasses.dataclass +class ExperimentOCR(Experiment): + ctx: ImageContext + ocr_model: str + + @property + def img_ctx(self): return self.ctx + @property + def ctxs(self): + img_ctx = self.img_ctx + return cast(OCRExperimentContext, img_ctx.exp), img_ctx + + @classmethod + def file_path_of(cls, page_data: st.PageData, ocr_model: str): + return f"{Path(page_data.original_path).stem}_{ocr_model}.json" + + def file_path(self): + img_ctx = self.img_ctx + return type(self).file_path_of(img_ctx.page_data, self.ocr_model) + + def to_dict(self): + "JSON serializable dict of the experiment" + img_ctx = self.img_ctx + img_idx = img_ctx.image_idx + results = results_to_dict(self.results()) + return { + 'image_name': img_ctx.image_name, + 'ocr_model': self.ocr_model, + 'results': results, + } + + def to_json(self, out_dir: Path | None = None): + img_ctx = self.img_ctx + fp = (out_dir or img_ctx.cache_dir_image) / self.file_path() + data = self.to_dict() + with open(fp, 'w') as f: + json.dump(data, f, indent=2) + return fp, data + + @classmethod + def from_json(cls, experiment: OCRExperimentContext, json_path: Path) -> Self: + try: + with open(json_path, 'r') as f: + data = json.load(f) + except Exception as e: + logger.error(f"Error loading {json_path}: {e}") + raise e + ocr_model = data['ocr_model'] + img_ctx = ImageContext(experiment, data['image_name']) + results: ResultSetDefault = dict_to_results( + img_ctx.image_idx, + data['results'], + result_factory=experiment._result_from) + experiment.update_results(ocr_model, img_ctx.image_idx, results) + return cls(img_ctx, ocr_model) + + @classmethod + def from_image(cls, + ctx: OCRExperimentContext, + ocr_model: str, + image_idx: ImgSpecT): + idx = cast(ImgIdT, ctx.normalize_idx(image_idx)) + img_ctx = ImageContext(ctx, idx) + if img_ctx is None: + raise ValueError(f"Image {image_idx} not found in experiment context") + fp = img_ctx.cache_dir / cls.file_path_of(img_ctx.page_data, ocr_model) + if fp.exists(): + return cast(Self, cls.from_json(cast(OCRExperimentContext, img_ctx.exp), fp)) + return cls(img_ctx, ocr_model) + + @classmethod + def from_method(cls, + ctx: OCRExperimentContext, + ocr_model: str, + image_idx: ImgIdT | str | Path, + method: CropMethod): + experiment = cls.from_image(ctx, ocr_model, image_idx) + if experiment is None: + return None + return experiment.method_experiment(method) + + @classmethod + def saved_experiment(cls, + ctx: OCRExperimentContext, ocr_model: str, image_idx: ImgIdT | str | Path): + idx = ctx.normalize_idx(image_idx) + if idx is None: + logger.warning(f"Image {image_idx} not found in experiment context") + return None + return cls.from_image(ctx, ocr_model, idx) + + @classmethod + def saved_experiments(cls, ctx: OCRExperimentContext, ocr_model: str) -> list[Self]: + return [exp for i in range(len(ctx.image_paths)) + if (exp := cls.from_image(ctx, ocr_model, i)) is not None] + + + def result(self, box_idx: BoxIdT, method: CropMethod, ocr: bool=True, rebuild: bool=False): + ctx, img_ctx = self.ctxs + return ctx.result(self.ocr_model, img_ctx.image_idx, box_idx, method, ocr, rebuild) + + def results(self): + ctx, img_ctx = self.ctxs + return cast(ResultSet, ctx.results(self.ocr_model, img_ctx.image_idx)) + + def has_run(self): + "at least one method has run" + img_ctx = self.img_ctx + return len(self.results()) == len(img_ctx.page_data.boxes) + + def best_results(self): + img_ctx = self.img_ctx + results = self.results() + if len(results) < len(img_ctx.page_data.boxes): # at least one method has run + return None + best = [] + for box_idx in results: + methods = results[box_idx] + best_method = max(methods, key=lambda m: methods[m].acc) # type: ignore + best.append((best_method, methods[best_method])) + return best + + def save_results_as_ground_truth(self, overwrite=False): + img_ctx = self.img_ctx + gts_path = ground_truth_path(img_ctx.page_data) + if overwrite or not gts_path.exists(): + best_results = self.best_results() + if best_results: + tt = [r.ocr for m,r in best_results] + gts_path.write_text('\n'.join(tt), encoding="utf-8") + img_ctx.setup_ground_truth() + logger.info(f"Ground truth data saved successfully to {gts_path}") + return True + else: + logger.info("No best results available to save.") + return False + else: + return False + + @property + def experiments(self): + if not hasattr(self, '_experiments'): + self._experiments = {} + return self._experiments + def method_experiment(self, method: CropMethod) -> ExperimentOCRMethod: + if method not in self.experiments: + self.experiments[method] = ExperimentOCRMethod(self, method) + return self.experiments[method] + + + def to_dataframe(self): + "Dataframe with crop methods as columns and box ids as rows" + methods = list(CropMethod.__members__.values()) + experiments = [self.method_experiment(m) for m in methods] + accuracies = [[result.acc for result in exp.results()] for exp in experiments] + # transpose accuracies + accuracies = list(zip(*accuracies)) + return pd.DataFrame(accuracies, columns=CropMethod.__display_names__()) + + def plot_accuracies(self, + methods: list[CropMethod] | None = None, + ): + "Plots a horizontal bar chart of the accuracies for a list of method experiments." + methods = methods or list(CropMethod.__members__.values()) + experiments = [self.method_experiment(m) for m in methods] + if not experiments: return + + ctx, img_ctx = self.ctxs + page_data = img_ctx.page_data + model = self.ocr_model + accuracies = [[result.acc for result in exp.results()] for exp in experiments] + accuracies = [np.mean(a) for a in accuracies] + # accuracies = [np.mean([result.acc for result in exp.results()]) for exp in experiments] + + _, ax = plt.subplots(figsize=(10, 5)) + + # Normalize the accuracies for color mapping + norm = plt.Normalize(min(accuracies), max(accuracies)) + # Color map from red to green + cmap = plt.get_cmap('RdYlGn') + colors = cmap(norm(accuracies)) + + ax.barh([m.value for m in methods], accuracies, color=colors) + + ax.set_xscale('log') # Set the x-axis to a logarithmic scale + ax.set_xlabel('Average Accuracy (log scale)', fontsize=12, fontweight='bold') + + ax.set_ylabel('Method', fontsize=12, fontweight='bold') + ax.set_yticks(range(len(methods))) + ax.set_yticklabels([f'{method.value} ({acc:.2f})' + for method, acc in zip(methods, accuracies)], fontsize=12) + max_acc_index = np.argmax(accuracies) + ax.get_yticklabels()[max_acc_index].set(color='blue', fontweight='bold') + + title_text = (f"{page_data.original_path} - OCR model: {model}") + ax.set_title(title_text, fontsize=12, fontweight='bold') + + plt.tight_layout() + plt.show() + + + def summary_box(self, box_idx: int): + results: list[tuple[CropMethod, ResultOCR]] = [] + pb = tqdm(CropMethod.__members__.values(), leave=False, desc=f"Box #{box_idx+1}") + for m in pb: + r = cast(ResultOCR, self.result(box_idx, m)) + results.append((m, r)) + methods, images, ocrs, accs = zip( + *map( + lambda t: (t[0].value, t[1].cache_image(), t[1].diff_tagged(), acc_as_html(t[1].acc)), + results)) + display_columns([methods, images, accs, ocrs], + headers=["Method", f"Box #{box_idx+1}", "Accuracy", "OCR"]) + + + def summary_method(self, method: CropMethod): + results = self.method_experiment(method).results() + methods, images, ocrs, accs = zip( + *map( + lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), + results)) + display_columns([methods, images, accs, ocrs], + headers=["Box #", "Box", "Accuracy", f"{method.value} OCR"]) + + + def display(self): + out = [] + for method in CropMethod: + out.append(f"---------- {method.value} ----------") + results = self.method_experiment(method).results() + out.extend(results) + out.append('\n') + cprint(*out, soft_wrap=True) + + + def reset(self, box_idx: int | None = None, method: CropMethod | None = None): + ctx, img_ctx = self.ctxs + ctx.reset_results(None, img_ctx.image_idx, box_idx, method) + + def perform_methods(self, + methods: CropMethod | list[CropMethod] | None = None, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + rebuild: bool = False, + plot_acc: bool = False + ): + if methods is None: + methods = [*CropMethod.__members__.values()] + elif isinstance(methods, CropMethod): + methods = [methods] + if rebuild: + _methods = tqdm(methods, desc="Methods") + else: + _methods = methods + for method in _methods: + method_exp = self.method_experiment(method) + if method_exp: + if rebuild: + method_exp(box_idxs, rebuild=rebuild) + if plot_acc: + self.plot_accuracies() + + def __call__(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + methods: CropMethod | list[CropMethod] | None = None, + save: bool = True, + display=False, + rebuild: bool=False, + save_as_ground_truth=False): + self.perform_methods(methods, box_idxs, rebuild=rebuild) + if save_as_ground_truth: + self.save_results_as_ground_truth(overwrite=True) + if save: + self.to_json() + if display: + self.display() + + +@dataclasses.dataclass +class ExperimentOCRMethod: + ctx: ExperimentOCR + method: CropMethod + + @property + def exp_ctx(self): return self.ctx + @property + def img_ctx(self): return self.ctx.ctx + @property + def ctxs(self): + img_ctx = self.img_ctx + return cast(OCRExperimentContext, img_ctx.exp), img_ctx, self.ctx + + def result(self, box_idx: BoxIdT, ocr: bool=True, rebuild: bool=False) -> ResultOCR | None: + ctx, img_ctx, exp_ctx = self.ctxs + return ctx.result(exp_ctx.ocr_model, img_ctx.image_idx, box_idx, self.method, ocr, rebuild) + + def results(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + ocr: bool=True, rebuild: bool=False) -> list[ResultOCR]: + ctx, img_ctx, exp_ctx = self.ctxs + if box_idxs is None: + box_idxs = list(range(len(img_ctx.boxes))) + elif isinstance(box_idxs, int): + box_idxs = [box_idxs] + model = exp_ctx.ocr_model + results = ctx.method_results(model, img_ctx.image_idx, self.method) + results = {i:results[i] if i in results else None for i in box_idxs} + pb = rebuild or not results or any(r is None for r in results.values()) + if pb and len(results) > 2: + progress_bar = tqdm(list(results.keys()), desc=f"{self.method.value} - {model}") + else: + progress_bar = list(results.keys()) + results = [] + for i in progress_bar: + results.append(self.result(i, ocr, rebuild=rebuild)) + return results + + + def get_results_html(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + max_image_width: int | None = None): + _, img_ctx, exp_ctx = self.ctxs + results: list[ResultOCR] = self.results(box_idxs) + accs = np.array([r.acc for r in results]) + mean_accuracy = np.mean(accs) + mean_trimmed = trimmed_mean(accs, 0.1) + # filtered_data = mad_based_outlier(accs) + # mean_mad = np.mean(filtered_data) + # filtered_data = iqr_outlier_removal(accs) + # mean_iqr = np.mean(filtered_data) + + descriptions, images, ocrs, accs = zip(*map( + lambda r: ( + r.block_idx+1, + r.cache_image(), + r.diff_tagged(), + acc_as_html(r.acc) + ), results)) + non_breakin_space = u'\u00A0' + tmpl = "{}" + padded_s = lambda s,n: tmpl.format(s.rjust(n)) + acc_fmt = f"{mean_accuracy:.2f}/{mean_trimmed:.2f}" + w, h = img_ctx.base_image.size + dim, _dpi = size(w, h), dpi(w, h) + dim_fmt = f"{w}x{h} px: {dim[0]:.2f} x {dim[1]:.2f} in @ {_dpi:.2f} dpi" + return '\n
\n'.join([ + ("
" + f"{padded_s('Page', 24)}: {img_ctx.page_data.original_path}
" + f"{padded_s('Size', 24)}: {dim_fmt}
" + f"{padded_s('Model', 24)}: {exp_ctx.ocr_model}
" + f"{padded_s('Crop Method', 24)}: {self.method.value}
" + f"{padded_s('Accuracy Mean/Trimmed', 24)}: {acc_fmt}" + "
"), + get_columns_html( + [descriptions, images, accs, ocrs], + max_image_width, + headers=["Box #", "Image", "Accuracy", "OCR"]), + ]) + + def display(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, max_image_width: int | None = None): + display(HTML(self.get_results_html(box_idxs, max_image_width))) + + + def summary(self): + results = self.results() + methods, images, ocrs, accs = zip( + *map( + lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), + results)) + display_columns([methods, images, accs, ocrs], + headers=["Box #", "Box", "Accuracy", f"{self.method.value} OCR"]) + + + def reset(self): + _, _, exp_ctx = self.ctxs + exp_ctx.reset(method=self.method) + + def __call__(self, box_idxs: BoxIdT | list[BoxIdT] | None = None, display=False, rebuild=False): + if isinstance(box_idxs, int): + result = self.result(cast(BoxIdT, box_idxs), rebuild=rebuild) + if result is not None and display: + result.display() + else: + results = self.results(box_idxs, rebuild=rebuild) + if results and display: + self.display(box_idxs) + + +# %% experiments.ipynb 141 +class ResultVisor(ContextVisor): + ctx: ExperimentOCR + control_names: list[str] = ['all_boxes', 'box_idx', 'all_methods', 'method'] + + _css = """ + .box_grp { + background-color: aliceblue; + } + .method_grp { + background-color: #ededed; + } + """ + + def best_results(self): + ll = self.ctx.best_results() + if ll: + cprint([(m.value, f"{r.acc:.3f}", r.ocr) for m,r in ll]) + + def pd_to_html(self): + df = self.ctx.to_dataframe() + # set float precision + df = df.round(3) + # display floats with 3 decimal digits + df = df.applymap(lambda x: f"{x:.3f}") + # highlight max value in each row + stl = df.style.highlight_max(axis=0) + display(HTML(stl.to_html())) + + def update_output(self, **kwargs): + all_boxes: bool = self.values['all_boxes'] + box_idx: int = self.values['box_idx'] + all_methods: bool = self.values['all_methods'] + method: CropMethod = self.values['method'] + + # cprint(f"all_boxes: {all_boxes}, box_idx: {box_idx}, all_methods: {all_methods}, method: {method}") + + if all_boxes and all_methods: + self.ctx.plot_accuracies() + elif all_boxes: + self.ctx.summary_method(method) + elif all_methods: + self.ctx.summary_box(box_idx) + else: + result = self.ctx.result(box_idx, method) + if result is not None: + result.display() + + def setup_controls(self): + _, img_ctx = self.ctx.ctxs + values = self.values + box_wdgt = W.BoundedIntText( + value=values['box_idx'], min=0, max=len(img_ctx.boxes)-1, step=1, + disabled=values['all_boxes'], + layout={'width': '50px'}, + style={'description_width': 'initial'}) + methods_wdgt = W.Dropdown( + options=CropMethod.__display_names__(), + value=values['method'], + layout={'width': '150px'}, + style={'description_width': 'initial'}) + all_boxes_wdgt = W.Checkbox(label='All', value=values['all_boxes'], + description="all", + layout={'width': 'initial'}, + style={'description_width': '0px'}) + all_methods_wdgt = W.Checkbox(label='All', value=values['all_methods'], + description="all", + layout={'width': 'initial'}, + style={'description_width': '0px'}) + return {'all_boxes': all_boxes_wdgt, 'box_idx': box_wdgt, + 'all_methods': all_methods_wdgt, 'method': methods_wdgt} + + def setup_ui(self): + ctls = self.controls + _, img_ctx = self.ctx.ctxs + box_label = W.Label( + value=f"Box # (of {len(img_ctx.boxes)}):", + layout={'width': 'initial', 'padding': '0px 0px 0px 10px'}) + method_label = W.Label(value='Method:', layout={'width': 'initial', 'padding': '0px 0px 0px 10px'}) + + box_grp = W.HBox([box_label, ctls['all_boxes'], ctls['box_idx']]) + box_grp.add_class('box_grp') + method_grp = W.HBox([method_label, ctls['all_methods'], ctls['method']]) + method_grp.add_class('method_grp') + + return W.HBox([box_grp, method_grp]) + + def __init__(self, + ctx: OCRExperimentContext | ExperimentOCR, + img_idx: int | str | Path | None = None, + all_boxes: bool = False, + box_idx: int = 0, + all_methods: bool = False, + method: CropMethod=CropMethod.INITIAL_BOX, + out: W.Output | None = None, + ): + if isinstance(ctx, OCRExperimentContext): + assert img_idx is not None, "img_idx must be provided if ctx is an ExperimentContext" + exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx) + if not exp: + raise ValueError(f"Image {img_idx} not found in experiment context") + ctx = exp + else: + if not isinstance(ctx, ExperimentOCR): + raise ValueError("ctx must be an ExperimentOCR or OCRExperimentContext") + + super().__init__(ctx, {'all_boxes': all_boxes, 'box_idx': box_idx, + 'all_methods': all_methods, 'method': method}, out=out or self.out) + + +# %% experiments.ipynb 144 +class ExperimentVisor(ContextVisor): + ctx: ExperimentOCR + + def update_output(self, + image_idx: int | None = None, + **kwargs): + exp_ctx, img_ctx = self.ctx.ctxs + if image_idx is not None and image_idx != img_ctx.image_idx: + ctx = ImageContext(exp_ctx, image_idx) + assert ctx is not None + self.ctx.ctx = ctx + result_visor = self.comp('result_visor') + if result_visor is not None: + result_visor.update_output(**kwargs) + + def __init__(self, + ctx: OCRExperimentContext | ExperimentOCR, + img_idx: int | str | Path | None = None, + all_boxes: bool = False, + box_idx: int = 0, + all_methods: bool = False, + method: CropMethod=CropMethod.INITIAL_BOX, + out: W.Output | None = None, + ): + if isinstance(ctx, OCRExperimentContext): + assert img_idx is not None, "img_idx must be provided if ctx is an ExperimentContext" + exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx) + if not exp: + raise ValueError(f"Image {img_idx} not found in experiment context") + ctx = exp + else: + if not issubclass(type(ctx), ExperimentOCR): + raise ValueError("ctx must be an ExperimentOCR or OCRExperimentContext") + + exp_ctx, img_ctx = ctx.ctxs + out = out or self.out + image_selector = ImageSelector(exp_ctx, image_idx=img_ctx.image_idx, out=out) + result_visor = ResultVisor(ctx, out=out, + all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method) + + super().__init__(ctx, {}, out=out, + ctxs={'image_selector': image_selector, 'result_visor': result_visor}, + hdlrs={'display_option': result_visor} + ) + + +# %% experiments.ipynb 198 +class ExperimentsVisor(ContextVisor): + ctx: OCRExperimentContext + + def update_output(self, + model: OCRModel | None = None, + image_idx: ImgIdT | None = None, + display_option: DisplayOptions | None = None, + **kwargs): + model_selector, image_selector, content_selector, result_visor = self._comps() + if model is not None: + exp_ctx = result_visor.ctx + exp_ctx.ocr_model = list(model_selector.models.keys())[model.value] + result_visor.ctx = exp_ctx + if image_idx is not None: + img_ctx = ImageContext(self.ctx, image_idx) + result_visor.ctx.ctx = img_ctx + display_option = content_selector.values['display_option'] + if display_option is not None and display_option != DisplayOptions.RESULTS: + result_visor.hide() + if display_option == DisplayOptions.BEST_RESULTS: + result_visor.best_results() + elif display_option == DisplayOptions.DATAFRAME: + result_visor.pd_to_html() + else: + content_selector.display_content(image_selector.image_ctx, display_option) + else: + result_visor.show() + result_visor.update_output(**kwargs) + + def _comps(self): + cc = self.comps + msel: ModelSelector = cc['model_selector'] # type: ignore + isel: ImageSelector = cc['image_selector'] # type: ignore + cs: ContentSelector = cc['content_selector'] # type: ignore + rv: ResultVisor = cc['result_visor'] # type: ignore + return msel, isel, cs, rv + + def setup_ui(self): + ctls = self.controls.values() + msw, isw, csw, rvw = [_.w for _ in self._comps()] + return W.VBox([W.HBox([msw, isw, csw, *ctls]), rvw,]) + + def __init__(self, + ctx: OCRExperimentContext, + image_idx: ImgIdT | str | Path = 0, + ocr_model: OCRModel = OCRModel.TESSERACT, + display_option: DisplayOptions = DisplayOptions.RESULTS, + all_boxes: bool = False, + box_idx: int = 0, + all_methods: bool = False, + method: CropMethod=CropMethod.INITIAL_BOX, + ocr_models: dict[str, OCRModel] = {'Tesseract': OCRModel.TESSERACT}, + out: W.Output | None = None, + ): + if not isinstance(ctx, OCRExperimentContext): + raise ValueError("ctx must be an OCRExperimentContext") + exp = ExperimentOCR.from_image(ctx, 'Tesseract', image_idx) + if not exp: + raise ValueError(f"Image {image_idx} not found in experiment context") + + out = out or self.out + model_selector = ModelSelector(ctx, ocr_model=ocr_model, + ocr_models=ocr_models, out=out) + image_selector = ImageSelector(ctx, image_idx=image_idx, out=out) + content_selector = ContentSelector(ctx, display_option=display_option, out=out) + result_visor = ResultVisor(exp, out=out, + all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method) + + super().__init__(ctx, {}, out=out, + ctxs={'model_selector': model_selector, 'image_selector': image_selector, 'content_selector': content_selector, + 'result_visor': result_visor} + ) + diff --git a/_testbed/helpers.ipynb b/_testbed/helpers.ipynb new file mode 100644 index 00000000..32a6fa6d --- /dev/null +++ b/_testbed/helpers.ipynb @@ -0,0 +1,835 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# %reload_ext autoreload\n", + "# %autoreload 0\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# try: \n", + "# import fastcore as FC\n", + "# except ImportError: \n", + "# !pip install -q fastcore\n", + "# try:\n", + "# import rich\n", + "# except ImportError:\n", + "# !pip install -q rich\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@basic-tesseract" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `Tesseract` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from __future__ import annotations\n", + "\n", + "import base64\n", + "import json\n", + "import re\n", + "import sys\n", + "import uuid\n", + "from importlib import resources\n", + "from io import BytesIO\n", + "from pathlib import Path\n", + "from typing import Any\n", + "from typing import Iterable\n", + "from typing import Mapping\n", + "from typing import Sequence\n", + "\n", + "import pcleaner.data\n", + "import pcleaner.structures as st\n", + "from IPython.display import clear_output\n", + "from IPython.display import display\n", + "from IPython.display import HTML\n", + "from PIL import Image\n", + "from PIL import ImageDraw\n", + "from PIL import ImageFont\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "import fastcore.xtras # patch Path with some utils\n", + "import ipywidgets as W\n", + "import rich\n", + "from fastcore.test import * # type: ignore\n", + "from rich.console import Console\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## dict helpers: _pops_\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "_all_ = ['_pops_', '_pops_values_', '_gets_']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def _pops_(d: dict, ks: Iterable) -> dict: \n", + " \"Pops `ks` keys from `d` and returns them in a dict. Note: `d` is changed in-place.\"\n", + " return {k:d.pop(k) for k in ks if k in d}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(_pops_({'a': 1, 'b': 2, 'c': 3}, ['a', 'b']), {'a': 1, 'b': 2})\n", + "test_eq(_pops_({'a': 1, 'b': 2, 'c': 3}, ['d']), {})\n", + "test_eq(_pops_({'a': 1, 'b': 2, 'c': 3}, ['a', 'c', 'd']), {'a': 1, 'c': 3})\n", + "test_eq(_pops_({}, ['a']), {})\n", + "test_eq(_pops_({'a': 1}, ['a', 'a']), {'a': 1})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def _pops_values_(d: dict, ks: Iterable) -> tuple:\n", + " \"Pops `ks` keys from `d` and returns them as a tuple. Note: `d` is changed in-place.\"\n", + " return tuple(d.pop(k, None) for k in ks)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(_pops_values_({'a': 1, 'b': 2, 'c': 3}, ['a', 'b']), (1, 2))\n", + "test_eq(_pops_values_({'a': 1, 'b': 2, 'c': 3}, ['d']), (None,))\n", + "test_eq(_pops_values_({'a': 1, 'b': 2, 'c': 3}, ['a', 'c', 'd']), (1, 3, None))\n", + "test_eq(_pops_values_({}, ['a']), (None,))\n", + "test_eq(_pops_values_({'a': 1}, ['a', 'a']), (1, None))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def _gets_(d: Mapping[str, Any], ks: Iterable):\n", + " \"Fetches values from a mapping for a given list of keys, returning `None` for missing keys.\"\n", + " return (d.get(k, None) for k in ks)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(_gets_({'a': 1, 'b': 2}, ('a', 'c', 'b')), [1, None, 2])\n", + "test_eq(_gets_({'a': 1, 'b': 2}, ()), [])\n", + "a, b = _gets_({'a': 1, 'b': 2}, ('b', 'a'))\n", + "test_eq((a, b), (2, 1))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## cleanupwidget\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "def _get_globals(mod: str):\n", + " if hasattr(sys, '_getframe'):\n", + " glb = sys._getframe(2).f_globals\n", + " else:\n", + " glb = sys.modules[mod].__dict__\n", + " return glb\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# _all_ = ['_get_globals']\n", + "def _gtest():\n", + " return _get_globals(__name__)\n", + "g1 = _gtest()\n", + "g2 = globals()\n", + "test_eq(g1, g2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def cleanupwidgets(*ws, mod: str|None=None, clear=True):\n", + " glb = _get_globals(mod or __name__)\n", + " if clear: clear_output(wait=True)\n", + " for w in ws:\n", + " _w = glb.get(w) if isinstance(w, str) else w\n", + " if _w:\n", + " try: _w.close() # type: ignore\n", + " except: pass\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "_b = W.Button()\n", + "test_ne(_b.comm, None)\n", + "cleanupwidgets('_b')\n", + "test_is(_b.comm, None)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Collapsable JSON in a notebook cell" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class RenderJSON(object):\n", + " def __init__(self, json_data, max_height=200, init_level=0):\n", + " if isinstance(json_data, (Sequence, Mapping)):\n", + " s = json.dumps(json_data)\n", + " elif hasattr(json_data, 'to_dict'):\n", + " s = json.dumps(json_data.to_dict())\n", + " elif hasattr(json_data, 'to_json'):\n", + " s = json_data.to_json()\n", + " else:\n", + " s = json_data\n", + " self.json_str = s\n", + " self.uuid = str(uuid.uuid4())\n", + " self.max_height = max_height\n", + " self.init_level = init_level\n", + "\n", + " def display(self):\n", + " html_content = f\"\"\"\n", + "
\n", + "
\n", + " \n", + "
\n", + " \"\"\"\n", + " display(HTML(html_content))\n", + "\n", + " def _ipython_display_(self):\n", + " self.display()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "json_data = {\n", + " \"name\": \"Petronila\",\n", + " \"age\": 28,\n", + " \"interests\": [\"reading\", \"cycling\", \"technology\"],\n", + " \"education\": {\n", + " \"bachelor\": \"Computer Science\",\n", + " \"master\": \"Data Science\",\n", + " \"phd\": \"Not enrolled\"\n", + " }\n", + "}\n", + "\n", + "RenderJSON(json_data, init_level=1).display()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize boxes on the page image\n", + "> adapted from `PageData.visualize` but returns the image instead of saving it." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def page_boxes(self: st.PageData, out_dir: Path | None = None) -> tuple[Image.Image, Path]:\n", + " \"\"\"\n", + " Visualize the boxes on an image.\n", + " Typically, this would be used to check where on the original image the\n", + " boxes are located.\n", + "\n", + " :param image_path: The path to the image to visualize the boxes on.\n", + " \"\"\"\n", + " image_path = Path(self.image_path)\n", + " image = Image.open(image_path)\n", + " draw = ImageDraw.Draw(image)\n", + " data_path = resources.files(pcleaner.data)\n", + " font_path = str(data_path / \"LiberationSans-Regular.ttf\")\n", + " # Figure out the optimal font size based on the image size. E.g. 30 for a 1600px image.\n", + " font_size = int(image.size[0] / 50) + 5\n", + "\n", + " for index, box in enumerate(self.boxes):\n", + " draw.rectangle(box.as_tuple, outline=\"green\")\n", + " # Draw the box number, with a white background, respecting font size.\n", + " draw.text(\n", + " (box.x1 + 4, box.y1),\n", + " str(index + 1),\n", + " fill=\"green\",\n", + " font=ImageFont.truetype(font_path, font_size),\n", + " stroke_fill=\"white\",\n", + " stroke_width=3,\n", + " )\n", + "\n", + " for box in self.extended_boxes:\n", + " draw.rectangle(box.as_tuple, outline=\"red\")\n", + " for box in self.merged_extended_boxes:\n", + " draw.rectangle(box.as_tuple, outline=\"purple\")\n", + " for box in self.reference_boxes:\n", + " draw.rectangle(box.as_tuple, outline=\"blue\")\n", + "\n", + " # Save the image.\n", + " extension = \"_boxes\"\n", + " out_path = image_path.with_stem(image_path.stem + extension)\n", + " if out_dir is not None:\n", + " out_dir.mkdir(parents=True, exist_ok=True)\n", + " out_path = out_dir / image_path.name\n", + " image.save(out_path)\n", + "\n", + " return image, out_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simple crop" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def crop_box(box: st.Box, image: Image.Image) -> Image.Image:\n", + " return image.crop(box.as_tuple)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Print size & resolution" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "PRINT_FORMATS = {\n", + " 'Golden Age': (7.75, 10.5), # (1930s-40s) \n", + " 'Siver Age': (7, 10.375), # (1950s-60s)\n", + " 'Modern Age': (6.625,10.25), # North American comic books\n", + " 'Magazine': (8.5, 11), \n", + " 'Digest': (5.5, 8.5), \n", + " 'Manga': (5.0, 7.5),\n", + "}\n", + "\n", + "\n", + "def size(w: int, h: int, unit: str = 'in', dpi: float = 300.) -> tuple:\n", + " \"\"\"\n", + " Calculate the print size of an image in inches or centimeters.\n", + "\n", + " Args:\n", + " w (int): Width of the image in pixels.\n", + " h (int): Height of the image in pixels.\n", + " unit (str): Unit of measurement ('in' for inches, 'cm' for centimeters).\n", + " dpi (float): Dots per inch (resolution).\n", + "\n", + " Returns:\n", + " tuple: Width and height of the image in the specified unit.\n", + " \"\"\"\n", + " if unit == 'cm':\n", + " return (w / dpi * 2.54, h / dpi * 2.54)\n", + " else: # default to inches\n", + " return (w / dpi, h / dpi)\n", + "\n", + "\n", + "def dpi(w: int, h: int, print_format: str = 'Modern Age') -> float:\n", + " \"\"\"\n", + " Calculate the dpi (dots per inch) needed to print an image at a specified format size.\n", + "\n", + " Args:\n", + " w (int): Width of the image in pixels.\n", + " h (int): Height of the image in pixels.\n", + " print_format (str): Print format as defined in the formats dictionary.\n", + "\n", + " Returns:\n", + " float: Required dpi to achieve the desired print format size.\n", + " \"\"\"\n", + " # Default to 'Modern Age' if format not found\n", + " format_size = PRINT_FORMATS.get(print_format, PRINT_FORMATS['Modern Age'])\n", + " width_inch, height_inch = format_size\n", + " dpi_w = w / width_inch\n", + " dpi_h = h / height_inch\n", + " return (dpi_w + dpi_h) / 2 # Average dpi for width and height\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Show images and texts on HTML tables" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def get_image_html(image: Image.Image | Path | str, max_width: int | None = None):\n", + " \"\"\"\n", + " Converts a PIL image to an HTML image tag containing the image as a base64 blob.\n", + "\n", + " :param image: A PIL Image object.\n", + " :param max_size: A PIL Image object.\n", + " :return: A string containing an HTML tag with the image.\n", + " \"\"\"\n", + " style = f' style=\"max-width: {max_width}px;\"' if max_width is not None else ''\n", + " if isinstance(image, (Path, str)):\n", + " return f''\n", + " else:\n", + " buffered = BytesIO()\n", + " image.save(buffered, format='PNG')\n", + " img_str = base64.b64encode(buffered.getvalue()).decode()\n", + " return f''\n", + "\n", + "\n", + "def get_columns_html(\n", + " columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None\n", + "):\n", + " if not all(len(col) == len(columns[0]) for col in columns):\n", + " raise ValueError(\"All columns must have the same length.\")\n", + "\n", + " # Calculate the maximum width of images in each column\n", + " max_widths = []\n", + " for col_index in range(len(columns)):\n", + " max_col_width = 0\n", + " for item in columns[col_index]:\n", + " if isinstance(item, (Image.Image, Path)):\n", + " if isinstance(item, (Path, str)):\n", + " item = Image.open(item)\n", + " width, _ = item.size\n", + " max_col_width = max(max_col_width, width)\n", + " if max_col_width > 0:\n", + " max_widths.append(\n", + " f\"{min(max_col_width, max_image_width)}px\"\n", + " if max_image_width is not None else \n", + " f\"{max_col_width}px\"\n", + " )\n", + " else:\n", + " max_widths.append('auto')\n", + "\n", + " html_str = \"\"\n", + "\n", + " # Apply calculated column widths using and elements\n", + " html_str += \"\"\n", + " for width in max_widths:\n", + " html_str += f\"\"\n", + " html_str += \"\"\n", + "\n", + " if headers:\n", + " if len(headers) != len(columns):\n", + " raise ValueError(\"Headers list must match the number of columns.\")\n", + " html_str += (\n", + " \"\"\n", + " + \"\".join(\n", + " f\"\"\n", + " for header in headers\n", + " )\n", + " + \"\"\n", + " )\n", + "\n", + " for row_items in zip(*columns):\n", + " html_str += \"\"\n", + " for i, item in enumerate(row_items):\n", + " if isinstance(item, (Image.Image, Path)):\n", + " img_html = get_image_html(item, max_width=max_image_width)\n", + " html_str += f\"\"\n", + " else: # Assume the item is a string\n", + " style = \"font-weight: bold;\" if i == 0 else \"\"\n", + " html_str += f\"\"\n", + " html_str += \"\"\n", + "\n", + " html_str += \"
{header}
{img_html}{item}
\"\n", + " return html_str\n", + "\n", + "\n", + "def display_columns(\n", + " columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None\n", + "):\n", + " \"\"\"\n", + " Displays a table with any combination of columns, which can be lists of strings or lists \n", + " of PIL Image objects, within a Jupyter notebook cell.\n", + "\n", + " :param columns: A list of lists, where each sublist represents a column in the table. \n", + " Each sublist can contain either strings or PIL Image objects.\n", + " :param max_image_width: The maximum size of the images in pixels. This controls the max-height \n", + " of the images.\n", + " :param headers: A list of header labels for the table. If None, no headers are displayed.\n", + " \"\"\"\n", + " return display(HTML(get_columns_html(columns, max_image_width, headers)))\n", + "\n", + "\n", + "def get_image_grid_html(\n", + " images: list[Image.Image | Path | str],\n", + " rows: int,\n", + " columns: int,\n", + " titles: list[str] | None = None,\n", + " max_image_width: int | None = None,\n", + " caption: str | None = None\n", + "):\n", + " if titles and len(titles) != len(images):\n", + " raise ValueError(\"Titles list must match the number of images if provided.\")\n", + "\n", + " html_str = \"\"\n", + "\n", + " if caption:\n", + " html_str += (f\"\")\n", + "\n", + " image_index = 0\n", + " for row in range(rows):\n", + " html_str += \"\"\n", + " for col in range(columns):\n", + " if image_index < len(images):\n", + " img_html = get_image_html(images[image_index], max_width=max_image_width)\n", + " title_html = (\n", + " f\"
{titles[image_index]}
\"\n", + " if titles\n", + " else \"\"\n", + " )\n", + " html_str += f\"\"\n", + " else:\n", + " html_str += \"\" # Empty cell if no more images\n", + " image_index += 1\n", + " html_str += \"\"\n", + "\n", + " html_str += \"
{caption}
{title_html}{img_html}
\"\n", + " return html_str\n", + "\n", + "\n", + "def display_image_grid(\n", + " images: list[Image.Image | Path | str],\n", + " rows: int,\n", + " columns: int,\n", + " titles: list[str] | None = None,\n", + " max_image_width: int | None = None,\n", + " caption: str | None = None,\n", + "):\n", + " \"\"\"\n", + " Displays a grid of images in a HTML table within a Jupyter notebook cell.\n", + "\n", + " :param images: A list of PIL Image objects to be displayed.\n", + " :param rows: The number of rows in the grid.\n", + " :param columns: The number of columns in the grid.\n", + " :param titles: An optional list of titles for each image. If provided, it must match the length \n", + " of the images list.\n", + " :param max_image_width: The maximum width of the images in pixels.\n", + " \"\"\"\n", + " display(HTML(get_image_grid_html(images, rows, columns, titles, max_image_width, caption)))\n", + "\n", + "\n", + "def acc_as_html(acc):\n", + " return f\"
{acc:.2f}
\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## UUIDs" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def strip_uuid(p: Path | str):\n", + " _p: Path = p if isinstance(p, Path) else Path(p)\n", + " new_stem = re.sub(r'(?i)[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', '', _p.stem).strip('_')\n", + " return _p.with_stem(new_stem)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Path('a/b/c/Strange Tales 172_boxes.png')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "strip_uuid(Path(\"a/b/c/ac265dc1-51a0-46ca-9101-7195cbad33f2_Strange Tales 172_boxes.png\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# Deep copy a defaultdict of defaultdicts to a dict of dicts if it is not already a dict\n", + "def defaultdict_to_dict(d) -> dict:\n", + " if not isinstance(d, defaultdict):\n", + " return d\n", + " return {k: defaultdict_to_dict(v) for k, v in d.items()}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aaa\n" + ] + } + ], + "source": [ + "print('aaa')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Colophon\n", + "----\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "from nbdev.export import nb_export\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_NOTEBOOK:\n", + " nb_export('helpers.ipynb', '.')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/_testbed/helpers.py b/_testbed/helpers.py new file mode 100644 index 00000000..a7095701 --- /dev/null +++ b/_testbed/helpers.py @@ -0,0 +1,372 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: helpers.ipynb. + +# %% helpers.ipynb 7 +from __future__ import annotations + +import base64 +import json +import re +import sys +import uuid +from importlib import resources +from io import BytesIO +from pathlib import Path +from typing import Any +from typing import Iterable +from typing import Mapping +from typing import Sequence + +import pcleaner.data +import pcleaner.structures as st +from IPython.display import clear_output +from IPython.display import display +from IPython.display import HTML +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont + + +# %% auto 0 +__all__ = ['PRINT_FORMATS', 'cleanupwidgets', 'RenderJSON', 'page_boxes', 'crop_box', 'size', 'dpi', 'get_image_html', + 'get_columns_html', 'display_columns', 'get_image_grid_html', 'display_image_grid', 'acc_as_html', + 'strip_uuid', '_pops_', '_pops_values_', '_gets_'] + +# %% helpers.ipynb 13 +_all_ = ['_pops_', '_pops_values_', '_gets_'] + + +# %% helpers.ipynb 14 +def _pops_(d: dict, ks: Iterable) -> dict: + "Pops `ks` keys from `d` and returns them in a dict. Note: `d` is changed in-place." + return {k:d.pop(k) for k in ks if k in d} + + +# %% helpers.ipynb 16 +def _pops_values_(d: dict, ks: Iterable) -> tuple: + "Pops `ks` keys from `d` and returns them as a tuple. Note: `d` is changed in-place." + return tuple(d.pop(k, None) for k in ks) + + +# %% helpers.ipynb 18 +def _gets_(d: Mapping[str, Any], ks: Iterable): + "Fetches values from a mapping for a given list of keys, returning `None` for missing keys." + return (d.get(k, None) for k in ks) + + +# %% helpers.ipynb 21 +def _get_globals(mod: str): + if hasattr(sys, '_getframe'): + glb = sys._getframe(2).f_globals + else: + glb = sys.modules[mod].__dict__ + return glb + + +# %% helpers.ipynb 23 +def cleanupwidgets(*ws, mod: str|None=None, clear=True): + glb = _get_globals(mod or __name__) + if clear: clear_output(wait=True) + for w in ws: + _w = glb.get(w) if isinstance(w, str) else w + if _w: + try: _w.close() # type: ignore + except: pass + + +# %% helpers.ipynb 26 +class RenderJSON(object): + def __init__(self, json_data, max_height=200, init_level=0): + if isinstance(json_data, (Sequence, Mapping)): + s = json.dumps(json_data) + elif hasattr(json_data, 'to_dict'): + s = json.dumps(json_data.to_dict()) + elif hasattr(json_data, 'to_json'): + s = json_data.to_json() + else: + s = json_data + self.json_str = s + self.uuid = str(uuid.uuid4()) + self.max_height = max_height + self.init_level = init_level + + def display(self): + html_content = f""" +
+
+ +
+ """ + display(HTML(html_content)) + + def _ipython_display_(self): + self.display() + +# %% helpers.ipynb 29 +def page_boxes(self: st.PageData, out_dir: Path | None = None) -> tuple[Image.Image, Path]: + """ + Visualize the boxes on an image. + Typically, this would be used to check where on the original image the + boxes are located. + + :param image_path: The path to the image to visualize the boxes on. + """ + image_path = Path(self.image_path) + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + data_path = resources.files(pcleaner.data) + font_path = str(data_path / "LiberationSans-Regular.ttf") + # Figure out the optimal font size based on the image size. E.g. 30 for a 1600px image. + font_size = int(image.size[0] / 50) + 5 + + for index, box in enumerate(self.boxes): + draw.rectangle(box.as_tuple, outline="green") + # Draw the box number, with a white background, respecting font size. + draw.text( + (box.x1 + 4, box.y1), + str(index + 1), + fill="green", + font=ImageFont.truetype(font_path, font_size), + stroke_fill="white", + stroke_width=3, + ) + + for box in self.extended_boxes: + draw.rectangle(box.as_tuple, outline="red") + for box in self.merged_extended_boxes: + draw.rectangle(box.as_tuple, outline="purple") + for box in self.reference_boxes: + draw.rectangle(box.as_tuple, outline="blue") + + # Save the image. + extension = "_boxes" + out_path = image_path.with_stem(image_path.stem + extension) + if out_dir is not None: + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / image_path.name + image.save(out_path) + + return image, out_path + +# %% helpers.ipynb 31 +def crop_box(box: st.Box, image: Image.Image) -> Image.Image: + return image.crop(box.as_tuple) + +# %% helpers.ipynb 33 +PRINT_FORMATS = { + 'Golden Age': (7.75, 10.5), # (1930s-40s) + 'Siver Age': (7, 10.375), # (1950s-60s) + 'Modern Age': (6.625,10.25), # North American comic books + 'Magazine': (8.5, 11), + 'Digest': (5.5, 8.5), + 'Manga': (5.0, 7.5), +} + + +def size(w: int, h: int, unit: str = 'in', dpi: float = 300.) -> tuple: + """ + Calculate the print size of an image in inches or centimeters. + + Args: + w (int): Width of the image in pixels. + h (int): Height of the image in pixels. + unit (str): Unit of measurement ('in' for inches, 'cm' for centimeters). + dpi (float): Dots per inch (resolution). + + Returns: + tuple: Width and height of the image in the specified unit. + """ + if unit == 'cm': + return (w / dpi * 2.54, h / dpi * 2.54) + else: # default to inches + return (w / dpi, h / dpi) + + +def dpi(w: int, h: int, print_format: str = 'Modern Age') -> float: + """ + Calculate the dpi (dots per inch) needed to print an image at a specified format size. + + Args: + w (int): Width of the image in pixels. + h (int): Height of the image in pixels. + print_format (str): Print format as defined in the formats dictionary. + + Returns: + float: Required dpi to achieve the desired print format size. + """ + # Default to 'Modern Age' if format not found + format_size = PRINT_FORMATS.get(print_format, PRINT_FORMATS['Modern Age']) + width_inch, height_inch = format_size + dpi_w = w / width_inch + dpi_h = h / height_inch + return (dpi_w + dpi_h) / 2 # Average dpi for width and height + + +# %% helpers.ipynb 35 +def get_image_html(image: Image.Image | Path | str, max_width: int | None = None): + """ + Converts a PIL image to an HTML image tag containing the image as a base64 blob. + + :param image: A PIL Image object. + :param max_size: A PIL Image object. + :return: A string containing an HTML tag with the image. + """ + style = f' style="max-width: {max_width}px;"' if max_width is not None else '' + if isinstance(image, (Path, str)): + return f'' + else: + buffered = BytesIO() + image.save(buffered, format='PNG') + img_str = base64.b64encode(buffered.getvalue()).decode() + return f'' + + +def get_columns_html( + columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None +): + if not all(len(col) == len(columns[0]) for col in columns): + raise ValueError("All columns must have the same length.") + + # Calculate the maximum width of images in each column + max_widths = [] + for col_index in range(len(columns)): + max_col_width = 0 + for item in columns[col_index]: + if isinstance(item, (Image.Image, Path)): + if isinstance(item, (Path, str)): + item = Image.open(item) + width, _ = item.size + max_col_width = max(max_col_width, width) + if max_col_width > 0: + max_widths.append( + f"{min(max_col_width, max_image_width)}px" + if max_image_width is not None else + f"{max_col_width}px" + ) + else: + max_widths.append('auto') + + html_str = "" + + # Apply calculated column widths using and elements + html_str += "" + for width in max_widths: + html_str += f"" + html_str += "" + + if headers: + if len(headers) != len(columns): + raise ValueError("Headers list must match the number of columns.") + html_str += ( + "" + + "".join( + f"" + for header in headers + ) + + "" + ) + + for row_items in zip(*columns): + html_str += "" + for i, item in enumerate(row_items): + if isinstance(item, (Image.Image, Path)): + img_html = get_image_html(item, max_width=max_image_width) + html_str += f"" + else: # Assume the item is a string + style = "font-weight: bold;" if i == 0 else "" + html_str += f"" + html_str += "" + + html_str += "
{header}
{img_html}{item}
" + return html_str + + +def display_columns( + columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None +): + """ + Displays a table with any combination of columns, which can be lists of strings or lists + of PIL Image objects, within a Jupyter notebook cell. + + :param columns: A list of lists, where each sublist represents a column in the table. + Each sublist can contain either strings or PIL Image objects. + :param max_image_width: The maximum size of the images in pixels. This controls the max-height + of the images. + :param headers: A list of header labels for the table. If None, no headers are displayed. + """ + return display(HTML(get_columns_html(columns, max_image_width, headers))) + + +def get_image_grid_html( + images: list[Image.Image | Path | str], + rows: int, + columns: int, + titles: list[str] | None = None, + max_image_width: int | None = None, + caption: str | None = None +): + if titles and len(titles) != len(images): + raise ValueError("Titles list must match the number of images if provided.") + + html_str = "" + + if caption: + html_str += (f"") + + image_index = 0 + for row in range(rows): + html_str += "" + for col in range(columns): + if image_index < len(images): + img_html = get_image_html(images[image_index], max_width=max_image_width) + title_html = ( + f"
{titles[image_index]}
" + if titles + else "" + ) + html_str += f"" + else: + html_str += "" # Empty cell if no more images + image_index += 1 + html_str += "" + + html_str += "
{caption}
{title_html}{img_html}
" + return html_str + + +def display_image_grid( + images: list[Image.Image | Path | str], + rows: int, + columns: int, + titles: list[str] | None = None, + max_image_width: int | None = None, + caption: str | None = None, +): + """ + Displays a grid of images in a HTML table within a Jupyter notebook cell. + + :param images: A list of PIL Image objects to be displayed. + :param rows: The number of rows in the grid. + :param columns: The number of columns in the grid. + :param titles: An optional list of titles for each image. If provided, it must match the length + of the images list. + :param max_image_width: The maximum width of the images in pixels. + """ + display(HTML(get_image_grid_html(images, rows, columns, titles, max_image_width, caption))) + + +def acc_as_html(acc): + return f"
{acc:.2f}
" + + +# %% helpers.ipynb 37 +def strip_uuid(p: Path | str): + _p: Path = p if isinstance(p, Path) else Path(p) + new_stem = re.sub(r'(?i)[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', '', _p.stem).strip('_') + return _p.with_stem(new_stem) + diff --git a/_testbed/ocr_idefics.py b/_testbed/ocr_idefics.py new file mode 100644 index 00000000..67a50c42 --- /dev/null +++ b/_testbed/ocr_idefics.py @@ -0,0 +1,184 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: test_idefics.ipynb. + +# %% test_idefics.ipynb 12 +from __future__ import annotations + +import functools +from pathlib import Path + +import pcleaner.ocr.ocr as ocr +import torch +import transformers +from pcleaner.ocr.ocr_tesseract import TesseractOcr +from PIL import Image +from rich.console import Console +from transformers import AutoProcessor +from transformers import Idefics2ForConditionalGeneration +from transformers import PreTrainedModel + + +# %% auto 0 +__all__ = ['IdeficsOCR', 'IdeficsExperimentContext'] + +# %% test_idefics.ipynb 17 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% test_idefics.ipynb 20 +from experiments import * +from helpers import * +from ocr_metric import * + + +# %% test_idefics.ipynb 21 +def load_image(img_or_path) -> Image.Image: + if isinstance(img_or_path, (str, Path)): + return Image.open(img_or_path) + elif isinstance(img_or_path, Image.Image): + return img_or_path + else: + raise ValueError(f"img_or_path must be a path or PIL.Image, got: {type(img_or_path)}") + + +# %% test_idefics.ipynb 36 +processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b") + +# %% test_idefics.ipynb 37 +device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu" + +model = Idefics2ForConditionalGeneration.from_pretrained( + "HuggingFaceM4/idefics2-8b", + torch_dtype=torch.bfloat16, + _attn_implementation="flash_attention_2", + ).to(device) # type: ignore + + +# %% test_idefics.ipynb 39 +prompt_text_tmpl = ( + "Please perform optical character recognition (OCR) on this image, which displays " + "speech balloons from a comic book. The text is in {}. Extract the text and " + "format it as follows: transcribe in standard sentence case, avoid using all capital " + "letters. Provide the transcribed text clearly and double check the sentence is not all capital letters.") + +# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " +# f"speech balloons from a manga comic. The text is in {}. Extract the text and " +# "format it without newlines. Provide the transcribed text clearly.") + +# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " +# "speech balloons from a comic book. The text is in {}. Extract the text and " +# "format it as follows: transcribe in standard sentence case (avoid using all capital " +# "letters) and use asterisks to denote any words that appear in bold within the image. " +# "Provide the transcribed text clearly.") + +# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " +# "speech balloons from a comic book. The text is in {}. Extract the text and " +# "format it as follows: transcribe in standard sentence case, capitalized. Avoid using " +# "all capital letters. In comics, it is common to use two hyphens '--' to interrupt a sentence. " +# "Retain any hyphens as they appear in the original text. Provide the transcribed text " +# "clearly, ensuring it is capitalized where appropriate, including proper nouns.") + +prompt_text_tmpl = ( + "Please perform optical character recognition (OCR) on this image, which displays " + "speech balloons from a comic book. The text is in {}. Extract the text and " + "format it as follows: transcribe in standard sentence case, capitalized. Avoid using " + "all capital letters, but ensure it is capitalized where appropriate, including proper nouns. " + "Provide the transcribed text clearly. Double check the text is not all capital letters.") + +default_prompt_text_tmpl = prompt_text_tmpl + +# %% test_idefics.ipynb 41 +class IdeficsOCR: + prompt_text_tmpl: str = default_prompt_text_tmpl + + def __init__(self, + lang: str | None = None, + prompt_text_tmpl: str|None = None, + device: str | None = None + ): + self.lang = lang + self.prompt_text_tmpl = prompt_text_tmpl or self.prompt_text_tmpl + self.device = (device or + "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu") + + @staticmethod + def is_idefics_available() -> bool: + return True + + def _generation_args(self, image: Image.Image, resulting_messages: list[dict]): + prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True) + inputs = processor(text=prompt, images=[image], return_tensors="pt") + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + max_new_tokens = 512 + repetition_penalty = 1.2 + decoding_strategy = "Greedy" + temperature = 0.4 + top_p = 0.8 + + generation_args = { + "max_new_tokens": max_new_tokens, + "repetition_penalty": repetition_penalty, + } + + assert decoding_strategy in [ + "Greedy", + "Top P Sampling", + ] + + if decoding_strategy == "Greedy": + generation_args["do_sample"] = False + elif decoding_strategy == "Top P Sampling": + generation_args["temperature"] = temperature + generation_args["do_sample"] = True + generation_args["top_p"] = top_p + + generation_args.update(inputs) + return prompt, generation_args + + def __call__( + self, + img_or_path: Image.Image | Path | str, + prompt_text: str | None = None, + lang: str | None = None, + config: str | None = None, + show_prompt: bool = False, + **kwargs, + ) -> str: + if not self.is_idefics_available(): + raise RuntimeError("Idefics is not installed or not found.") + resulting_messages = [ + { + "role": "user", + "content": [{"type": "image"}] + [ + {"type": "text", "text": prompt_text or self.prompt_text_tmpl.format(lang or self.lang)} + ] + } + ] + image = load_image(img_or_path) + prompt, generation_args = self._generation_args(image, resulting_messages) + generated_ids = model.generate(**generation_args) + generated_texts = processor.batch_decode( + generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True) + if show_prompt: + cprint("INPUT:", prompt, "|OUTPUT:", generated_texts) + return generated_texts[0]#.strip('"') + + def postprocess_ocr(self, text): + return ' '.join(remove_multiple_whitespaces(text).splitlines()) + + +# %% test_idefics.ipynb 43 +class IdeficsExperimentContext(OCRExperimentContext): + @functools.lru_cache() + def mocr(self, ocr_model: str, lang: str): + if ocr_model == 'Idefics': + proc = IdeficsOCR(lang) + else: + engine = self.engines[ocr_model] + ocr_processor = ocr.get_ocr_processor(True, engine) + proc = ocr_processor[lang2pcleaner(lang)] + if isinstance(proc, TesseractOcr): + proc.lang = lang2tesseract(lang) + return proc + diff --git a/_testbed/ocr_metric.ipynb b/_testbed/ocr_metric.ipynb new file mode 100644 index 00000000..41f2fad4 --- /dev/null +++ b/_testbed/ocr_metric.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp ocr_metric" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# %reload_ext autoreload\n", + "# %autoreload 0\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# try: \n", + "# import fastcore as FC\n", + "# except ImportError: \n", + "# !pip install -q fastcore\n", + "# try:\n", + "# import rich\n", + "# except ImportError:\n", + "# !pip install -q rich\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Developing a metric for OCR of Comics/Manga texts\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from __future__ import annotations\n", + "\n", + "import difflib\n", + "import html\n", + "\n", + "from IPython.display import display\n", + "from IPython.display import HTML\n", + "from rich.console import Console\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "import fastcore.xtras # patch Path with some utils\n", + "import rich\n", + "from fastcore.test import * # type: ignore\n", + "from loguru import logger\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OCR metric\n", + "> Some basic ways to compare OCR results" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def get_text_diffs_html(str1, str2, ignore_align: bool = False):\n", + " matcher = difflib.SequenceMatcher(None, str1, str2)\n", + " html_str1, html_str2 = \"\", \"\"\n", + " _ch ='⎕' # ▿\n", + " ch = f'&#x{ord(_ch):x};'\n", + " span1_g = lambda l: f\"{ch*l}\" if l > 0 else \"\"\n", + " span1_r = lambda l: f\"{ch*l}\" if l > 0 else \"\"\n", + " span2 = lambda s: f\"{html.escape(s)}\" if s else \"\"\n", + "\n", + " for opcode in matcher.get_opcodes():\n", + " tag, i1, i2, j1, j2 = opcode\n", + " if tag == \"equal\":\n", + " html_str1 += html.escape(str1[i1:i2])\n", + " html_str2 += html.escape(str2[j1:j2])\n", + " elif tag == \"replace\":\n", + " max_span = max(i2 - i1, j2 - j1)\n", + " # str1_segment = str1[i1:i2].ljust(max_span)\n", + " html_str1 += html.escape(str1[i1:i2]) + span1_g(max_span - (i2 - i1))\n", + " html_str2 += span2(str2[j1:j2]) + (span1_r(max_span - (j2 - j1)) if not ignore_align else '')\n", + " elif tag == \"delete\":\n", + " deleted_segment = str1[i1:i2]\n", + " html_str1 += html.escape(deleted_segment)\n", + " if not ignore_align: html_str2 += span1_r(len(deleted_segment))\n", + " elif tag == \"insert\":\n", + " inserted_segment = str2[j1:j2].replace(\" \", _ch)\n", + " html_str1 += span1_g(len(inserted_segment))\n", + " html_str2 += span2(inserted_segment)\n", + " html_str1 = f\"
{html_str1}
\"\n", + " html_str2 = f\"
{html_str2}
\"\n", + " return html_str1, html_str2\n", + "\n", + "def display_text_diffs(str1, str2):\n", + " \"\"\"\n", + " Displays two strings one above the other, with differing characters highlighted in red in the \n", + " second string only, using difflib.SequenceMatcher to align the strings and ensure matching \n", + " sequences are vertically aligned.\n", + "\n", + " :param str1: The first string to compare.\n", + " :param str2: The second string to compare.\n", + " \"\"\"\n", + " html_str1, html_str2 = get_text_diffs_html(str1, str2)\n", + " display(HTML(f\"
{html_str1}
{html_str2}
\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
This is an awesome_⎕⎕⎕ test string.This is an awesome_⎕⎕⎕ test string.This is an awesome_⎕⎕⎕ test string.This is an awesome_⎕⎕⎕ test string.

This was an a⎕⎕⎕⎕mazing test spring.This was an a⎕⎕⎕⎕mazing test spring.This was an a⎕⎕⎕⎕mazing test spring.This was an a⎕⎕⎕⎕mazing test spring.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "str1 = \"This is an awesome_ test string.\"*4\n", + "str2 = \"This was an amazing test▿ spring.\"*4\n", + "display_text_diffs(str1, str2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
I was in a bad mood, and Curt sensed it immediately...

I was in a bad mood, and Curt sensed it immediately...
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "str1 = \"I was in a bad mood, and Curt sensed it immediately...\"\n", + "str2 = \"I was in a bad mood, and Curt sensed it immediately ...\"\n", + "display_text_diffs(str1, str2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Colophon\n", + "----\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "from nbdev.export import nb_export\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_NOTEBOOK:\n", + " nb_export('ocr_metric.ipynb', '.')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/_testbed/ocr_metric.py b/_testbed/ocr_metric.py new file mode 100644 index 00000000..f81b9a5e --- /dev/null +++ b/_testbed/ocr_metric.py @@ -0,0 +1,64 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ocr_metric.ipynb. + +# %% ocr_metric.ipynb 6 +from __future__ import annotations + +import difflib +import html + +from IPython.display import display +from IPython.display import HTML +from rich.console import Console + + +# %% auto 0 +__all__ = ['get_text_diffs_html', 'display_text_diffs'] + +# %% ocr_metric.ipynb 10 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% ocr_metric.ipynb 12 +def get_text_diffs_html(str1, str2, ignore_align: bool = False): + matcher = difflib.SequenceMatcher(None, str1, str2) + html_str1, html_str2 = "", "" + _ch ='⎕' # ▿ + ch = f'&#x{ord(_ch):x};' + span1_g = lambda l: f"{ch*l}" if l > 0 else "" + span1_r = lambda l: f"{ch*l}" if l > 0 else "" + span2 = lambda s: f"{html.escape(s)}" if s else "" + + for opcode in matcher.get_opcodes(): + tag, i1, i2, j1, j2 = opcode + if tag == "equal": + html_str1 += html.escape(str1[i1:i2]) + html_str2 += html.escape(str2[j1:j2]) + elif tag == "replace": + max_span = max(i2 - i1, j2 - j1) + # str1_segment = str1[i1:i2].ljust(max_span) + html_str1 += html.escape(str1[i1:i2]) + span1_g(max_span - (i2 - i1)) + html_str2 += span2(str2[j1:j2]) + (span1_r(max_span - (j2 - j1)) if not ignore_align else '') + elif tag == "delete": + deleted_segment = str1[i1:i2] + html_str1 += html.escape(deleted_segment) + if not ignore_align: html_str2 += span1_r(len(deleted_segment)) + elif tag == "insert": + inserted_segment = str2[j1:j2].replace(" ", _ch) + html_str1 += span1_g(len(inserted_segment)) + html_str2 += span2(inserted_segment) + html_str1 = f"
{html_str1}
" + html_str2 = f"
{html_str2}
" + return html_str1, html_str2 + +def display_text_diffs(str1, str2): + """ + Displays two strings one above the other, with differing characters highlighted in red in the + second string only, using difflib.SequenceMatcher to align the strings and ensure matching + sequences are vertically aligned. + + :param str1: The first string to compare. + :param str2: The second string to compare. + """ + html_str1, html_str2 = get_text_diffs_html(str1, str2) + display(HTML(f"
{html_str1}
{html_str2}
")) diff --git a/_testbed/requirements.txt b/_testbed/requirements.txt new file mode 100644 index 00000000..4e3d1c68 --- /dev/null +++ b/_testbed/requirements.txt @@ -0,0 +1,5 @@ +matplotlib +rich +fastcore +nbdev +ipywidgets diff --git a/_testbed/test_idefics.ipynb b/_testbed/test_idefics.ipynb new file mode 100644 index 00000000..a8978d12 --- /dev/null +++ b/_testbed/test_idefics.ipynb @@ -0,0 +1,1497 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp ocr_idefics" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# %reload_ext autoreload\n", + "# %autoreload 0\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# try: \n", + "# import fastcore as FC\n", + "# except ImportError: \n", + "# !pip install -q fastcore\n", + "# try:\n", + "# import rich\n", + "# except ImportError:\n", + "# !pip install -q rich\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@basic-tesseract" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "need version >4.40 of transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install git+https://github.com/huggingface/transformers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fash attention doesn't support Metal [#412](https://github.com/Dao-AILab/flash-attention/issues/412) (but see [metal-flash-attention](https://github.com/philipturner/metal-flash-attention))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# %env FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE\n", + "# %pip install flash-attn --no-build-isolation" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "notebookRunGroups": { + "groupValue": "" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fri May 10 18:32:10 2024 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.161.08 Driver Version: 535.161.08 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 NVIDIA GeForce RTX 3090 Ti On | 00000000:65:00.0 Off | Off |\n", + "| 0% 50C P8 33W / 480W | 1MiB / 24564MiB | 0% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| No running processes found |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `Idefics` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from __future__ import annotations\n", + "\n", + "import functools\n", + "from pathlib import Path\n", + "\n", + "import pcleaner.ocr.ocr as ocr\n", + "import torch\n", + "import transformers\n", + "from pcleaner.ocr.ocr_tesseract import TesseractOcr\n", + "from PIL import Image\n", + "from rich.console import Console\n", + "from transformers import AutoProcessor\n", + "from transformers import Idefics2ForConditionalGeneration\n", + "from transformers import PreTrainedModel\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from typing import cast\n", + "\n", + "import fastcore.xtras # patch Path with some utils\n", + "import pcleaner.config as cfg\n", + "from fastcore.test import * # type: ignore\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "notebookRunGroups": { + "groupValue": "" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'4.41.0.dev0'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformers.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "notebookRunGroups": { + "groupValue": "" + } + }, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "notebookRunGroups": { + "groupValue": "" + } + }, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Force reload of `experiments` module" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "if 'experiments' in sys.modules:\n", + " import importlib; importlib.reload(experiments) # type: ignore\n", + "else:\n", + " import experiments\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from experiments import *\n", + "from helpers import *\n", + "from ocr_metric import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "def load_image(img_or_path) -> Image.Image:\n", + " if isinstance(img_or_path, (str, Path)):\n", + " return Image.open(img_or_path)\n", + " elif isinstance(img_or_path, Image.Image):\n", + " return img_or_path\n", + " else:\n", + " raise ValueError(f\"img_or_path must be a path or PIL.Image, got: {type(img_or_path)}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Idefics basic usage\n", + "\n", + "not working, cuda memory error" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# # Note that passing the image urls (instead of the actual pil images) to the processor is also possible\n", + "# # image1 = load_image(\"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\")\n", + "# # image2 = load_image(\"https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg\")\n", + "# # image3 = load_image(\"https://cdn.britannica.com/68/170868-050-8DDE8263/Golden-Gate-Bridge-San-Francisco.jpg\")\n", + "\n", + "# image1 = Image.open(\"media/Statue-of-Liberty-Island-New-York-Bay.webp\")\n", + "# image2 = Image.open(\"media/Skyline-Chicago.webp\")\n", + "# image3 = Image.open(\"media/Golden-Gate-Bridge-San-Francisco.webp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# processor = AutoProcessor.from_pretrained(\"HuggingFaceM4/idefics2-8b\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# model = Idefics2ForConditionalGeneration.from_pretrained(\n", + "# \"HuggingFaceM4/idefics2-8b\",\n", + "# torch_dtype=torch.bfloat16,\n", + "# #_attn_implementation=\"flash_attention_2\",\n", + "# )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# assert isinstance(model, PreTrainedModel)\n", + "# model.to(DEVICE)\n", + "# type(model), model.device\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create inputs" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# messages = [\n", + "# {\n", + "# \"role\": \"user\",\n", + "# \"content\": [\n", + "# {\"type\": \"image\"},\n", + "# {\"type\": \"text\", \"text\": \"What do we see in this image?\"},\n", + "# ]\n", + "# },\n", + "# {\n", + "# \"role\": \"assistant\",\n", + "# \"content\": [\n", + "# {\"type\": \"text\", \"text\": \"In this image, we can see the city of New York, and more specifically the Statue of Liberty.\"},\n", + "# ]\n", + "# },\n", + "# {\n", + "# \"role\": \"user\",\n", + "# \"content\": [\n", + "# {\"type\": \"image\"},\n", + "# {\"type\": \"text\", \"text\": \"And how about this image?\"},\n", + "# ]\n", + "# }, \n", + "# ]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# prompt = processor.apply_chat_template(messages, add_generation_prompt=True)\n", + "# inputs = processor(text=prompt, images=[image1, image2], return_tensors=\"pt\")\n", + "# inputs = {k: v.to(DEVICE) for k, v in inputs.items()}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# generated_ids = model.generate(**inputs, max_new_tokens=500)\n", + "# generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)\n", + "\n", + "# print(generated_texts)\n", + "# # ['User: What do we see in this image? \\nAssistant: In this image, we can see the city of New York, and more specifically the Statue of Liberty. \\nUser: And how about this image? \\nAssistant: In this image we can see buildings, trees, lights, water and sky.']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# [\n", + "# 'User: What do we see in this image? '\n", + "# 'Assistant: In this image, we can see the city of New York, and more specifically the Statue of Liberty. '\n", + "# 'User: And how about this image? '\n", + "# 'Assistant: In this image we can see buildings, trees, lights, water and sky.'\n", + "# ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Idefics experiments\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Idefics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Idefics initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + ] + } + ], + "source": [ + "#| exporti\n", + "\n", + "processor = AutoProcessor.from_pretrained(\"HuggingFaceM4/idefics2-8b\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5a21b61268674a159f210694841c2149", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/7 [00:00 bool:\n", + " return True\n", + "\n", + " def _generation_args(self, image: Image.Image, resulting_messages: list[dict]):\n", + " prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)\n", + " inputs = processor(text=prompt, images=[image], return_tensors=\"pt\")\n", + " inputs = {k: v.to(self.device) for k, v in inputs.items()}\n", + " \n", + " max_new_tokens = 512\n", + " repetition_penalty = 1.2\n", + " decoding_strategy = \"Greedy\"\n", + " temperature = 0.4\n", + " top_p = 0.8\n", + "\n", + " generation_args = {\n", + " \"max_new_tokens\": max_new_tokens,\n", + " \"repetition_penalty\": repetition_penalty,\n", + " }\n", + "\n", + " assert decoding_strategy in [\n", + " \"Greedy\",\n", + " \"Top P Sampling\",\n", + " ]\n", + "\n", + " if decoding_strategy == \"Greedy\":\n", + " generation_args[\"do_sample\"] = False\n", + " elif decoding_strategy == \"Top P Sampling\":\n", + " generation_args[\"temperature\"] = temperature\n", + " generation_args[\"do_sample\"] = True\n", + " generation_args[\"top_p\"] = top_p\n", + "\n", + " generation_args.update(inputs)\n", + " return prompt, generation_args\n", + "\n", + " def __call__(\n", + " self,\n", + " img_or_path: Image.Image | Path | str,\n", + " prompt_text: str | None = None,\n", + " lang: str | None = None,\n", + " config: str | None = None,\n", + " show_prompt: bool = False,\n", + " **kwargs,\n", + " ) -> str:\n", + " if not self.is_idefics_available():\n", + " raise RuntimeError(\"Idefics is not installed or not found.\")\n", + " resulting_messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [{\"type\": \"image\"}] + [\n", + " {\"type\": \"text\", \"text\": prompt_text or self.prompt_text_tmpl.format(lang or self.lang)}\n", + " ]\n", + " }\n", + " ]\n", + " image = load_image(img_or_path)\n", + " prompt, generation_args = self._generation_args(image, resulting_messages)\n", + " generated_ids = model.generate(**generation_args)\n", + " generated_texts = processor.batch_decode(\n", + " generated_ids[:, generation_args[\"input_ids\"].size(1):], skip_special_tokens=True)\n", + " if show_prompt:\n", + " cprint(\"INPUT:\", prompt, \"|OUTPUT:\", generated_texts)\n", + " return generated_texts[0]#.strip('\"')\n", + "\n", + " def postprocess_ocr(self, text):\n", + " return ' '.join(remove_multiple_whitespaces(text).splitlines())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## IdeficsExperimentContext" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class IdeficsExperimentContext(OCRExperimentContext):\n", + " @functools.lru_cache()\n", + " def mocr(self, ocr_model: str, lang: str):\n", + " if ocr_model == 'Idefics':\n", + " proc = IdeficsOCR(lang)\n", + " else:\n", + " engine = self.engines[ocr_model]\n", + " ocr_processor = ocr.get_ocr_processor(True, engine)\n", + " proc = ocr_processor[lang2pcleaner(lang)]\n", + " if isinstance(proc, TesseractOcr):\n", + " proc.lang = lang2tesseract(lang)\n", + " return proc\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PanelCleaner Configuration\n", + "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "config = cfg.load_config()\n", + "config.cache_dir = Path(\".\")\n", + "\n", + "cache_dir = config.get_cleaner_cache_dir()\n", + "\n", + "profile = config.current_profile\n", + "preprocessor_conf = profile.preprocessor\n", + "# Modify the profile to OCR all boxes.\n", + "# Make sure OCR is enabled.\n", + "preprocessor_conf.ocr_enabled = True\n", + "# Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", + "preprocessor_conf.ocr_max_size = 10**10\n", + "# Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", + "preprocessor_conf.suspicious_box_min_size = 10**10\n", + "# Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", + "preprocessor_conf.ocr_blacklist_pattern = \".*\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n", + "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "media_path = Path(\"media/\")\n", + "\n", + "IMAGE_PATHS = sorted(\n", + " [_ for _ in media_path.glob(\"*\") if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", + "\n", + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /home/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- victmang: /home/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: System default\n", + "Cache Directory: .\n", + "Default Torch Model Path: /home/vic/.cache/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /home/vic/.cache/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /home/vic/.config/pcleaner/pcleanerrc\n", + "System default cache directory: /home/vic/.cache/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
      cache_dir: Path('cleaner')\n",
+       "     model_path: Path('/home/vic/.cache/pcleaner/model/comictextdetector.pt')\n",
+       "         device: 'cuda'\n",
+       "
\n" + ], + "text/plain": [ + " cache_dir: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'cleaner'\u001b[0m\u001b[1m)\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/vic/.cache/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'cuda'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONTEXT = IdeficsExperimentContext(None, IMAGE_PATHS)\n", + "\n", + "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + "model_path = CONTEXT.config.get_model_path(gpu)\n", + "DEVICE = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", + "\n", + "CONTEXT.config.show()\n", + "cprint(\n", + " f\"{'cache_dir':>15}: {repr(cache_dir)}\\n\"\n", + " f\"{'model_path':>15}: {repr(model_path)}\\n\"\n", + " f\"{'device':>15}: {repr(DEVICE)}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base image\n", + "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", + "assert CONTEXT.path_from_idx(BASE_IMAGE_IDX).exists()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualize images\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "IMAGE_CONTEXT = ImageContext(CONTEXT, BASE_IMAGE_IDX)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9c0379d4776a4e0f9facd7e0092c79f3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output(layout=Layout(height='0px'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5f0ae88943a14ea38efb029c40183ffd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "26c6ecdbae6f4952aab7ff3106400f04", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "img_visor = ImageContextVisor(CONTEXT, BASE_IMAGE_IDX)\n", + "img_visor\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Box id\n", + "> change `BOX_IDX` to use any box to test crop methods" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "BOX_IDX = 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Idefics inference" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "page_lang = IMAGE_CONTEXT.page_lang\n", + "\n", + "resulting_messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [{\"type\": \"image\"}] + [\n", + " {\"type\": \"text\", \"text\": prompt_text_tmpl.format(page_lang)}\n", + " ]\n", + " }\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "def idefics_generation_args(image: Image.Image, resulting_messages: list[dict]):\n", + " prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)\n", + " inputs = processor(text=prompt, images=[image], return_tensors=\"pt\")\n", + " inputs = {k: v.to(DEVICE) for k, v in inputs.items()}\n", + " \n", + " max_new_tokens = 512\n", + " repetition_penalty = 1.2\n", + " decoding_strategy = \"Greedy\"\n", + " temperature = 0.4\n", + " top_p = 0.8\n", + "\n", + " generation_args = {\n", + " \"max_new_tokens\": max_new_tokens,\n", + " \"repetition_penalty\": repetition_penalty,\n", + " }\n", + "\n", + " assert decoding_strategy in [\n", + " \"Greedy\",\n", + " \"Top P Sampling\",\n", + " ]\n", + "\n", + " if decoding_strategy == \"Greedy\":\n", + " generation_args[\"do_sample\"] = False\n", + " elif decoding_strategy == \"Top P Sampling\":\n", + " generation_args[\"temperature\"] = temperature\n", + " generation_args[\"do_sample\"] = True\n", + " generation_args[\"top_p\"] = top_p\n", + "\n", + " generation_args.update(inputs)\n", + " return prompt, generation_args\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Crop methods" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Idefics')\n", + "image_experiment = ExperimentOCR.from_image(CONTEXT, 'Idefics', IMAGE_CONTEXT.image_idx) # use cache\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "method = CropMethod.INITIAL_BOX\n", + "\n", + "result = cast(ResultOCR, image_experiment.result(BOX_IDX, method, ocr=False))\n", + "image = cast(Image.Image, result.image)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.\n", + "The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.\n" + ] + }, + { + "data": { + "text/html": [ + "
INPUT: User:<image>Please perform optical character recognition (OCR) on this image, which displays \n",
+       "speech balloons from a comic book. The text is in English. Extract the text and format it as follows: \n",
+       "transcribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \n",
+       "capitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \n",
+       "check the text is not all capital letters.<end_of_utterance>\n",
+       "Assistant: |OUTPUT:\n",
+       "[\n",
+       "    'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \n",
+       "Orleans, kept tidy by a white-haired old man known only as Bambu.'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "INPUT: User:\u001b[1m<\u001b[0m\u001b[1;95mimage\u001b[0m\u001b[39m>Please perform optical character recognition \u001b[0m\u001b[1;39m(\u001b[0m\u001b[39mOCR\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m on this image, which displays \u001b[0m\n", + "\u001b[39mspeech balloons from a comic book. The text is in English. Extract the text and format it as follows: \u001b[0m\n", + "\u001b[39mtranscribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \u001b[0m\n", + "\u001b[39mcapitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \u001b[0m\n", + "\u001b[39mcheck the text is not all capital letters.\u001b[0m\n", + "Assistant: |OUTPUT:\n", + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \u001b[0m\n", + "\u001b[32mOrleans, kept tidy by a white-haired old man known only as Bambu.'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "prompt, generation_args = idefics_generation_args(image, resulting_messages)\n", + "generated_ids = model.generate(**generation_args)\n", + "\n", + "generated_texts = processor.batch_decode(\n", + " generated_ids[:, generation_args[\"input_ids\"].size(1):], skip_special_tokens=True)\n", + "cprint(\"INPUT:\", prompt, \"|OUTPUT:\", generated_texts)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
1.00
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result.ocr = generated_texts[0]\n", + "result\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
INPUT: User:<image>Please perform optical character recognition (OCR) on this image, which displays \n",
+       "speech balloons from a comic book. The text is in English. Extract the text and format it as follows: \n",
+       "transcribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \n",
+       "capitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \n",
+       "check the text is not all capital letters.<end_of_utterance>\n",
+       "Assistant: |OUTPUT:\n",
+       "[\n",
+       "    'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \n",
+       "Orleans, kept tidy by a white-haired old man known only as Bambu.'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "INPUT: User:\u001b[1m<\u001b[0m\u001b[1;95mimage\u001b[0m\u001b[39m>Please perform optical character recognition \u001b[0m\u001b[1;39m(\u001b[0m\u001b[39mOCR\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m on this image, which displays \u001b[0m\n", + "\u001b[39mspeech balloons from a comic book. The text is in English. Extract the text and format it as follows: \u001b[0m\n", + "\u001b[39mtranscribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \u001b[0m\n", + "\u001b[39mcapitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \u001b[0m\n", + "\u001b[39mcheck the text is not all capital letters.\u001b[0m\n", + "Assistant: |OUTPUT:\n", + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \u001b[0m\n", + "\u001b[32mOrleans, kept tidy by a white-haired old man known only as Bambu.'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
1.00
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "method = CropMethod.INITIAL_BOX\n", + "\n", + "result = cast(ResultOCR, image_experiment.result(BOX_IDX, method, ocr=False))\n", + "image = cast(Image.Image, result.image)\n", + "\n", + "mocr: IdeficsOCR = cast(IdeficsOCR, CONTEXT.mocr('Idefics', page_lang))\n", + "text = mocr(image, show_prompt=True)\n", + "result.ocr = mocr.postprocess_ocr(text)\n", + "result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
0.98
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PADDED_4)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
0.94
\n", + "
\n", + "
Embow⎕⎕ered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Encountered by great charles cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_experiment.result(BOX_IDX, CropMethod.PAD_8_FRACT_0_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Visualize results" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "522220949ff540fdbd864dc8d8722cf0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output(layout=Layout(height='0px'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "13a3ec1b0ea04630b339e5026e01eee2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Label(value='Box # (of 15):', layout=Layout(padding='0px 0px 0px 10px', width='i…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "989b4acac70848f2a5da0a74f99bc181", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result_visor = ResultVisor(image_experiment)\n", + "result_visor\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Visualize Experiment" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# p, d = image_experiment.to_json()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bae922f0f53b47b5909334ca7f5d24fc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9c8a0db76d0a40eb9c3451129e803124", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "exp_visor = ExperimentVisor(image_experiment)\n", + "exp_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# p, d = exp_visor.ctx.to_json()\n", + "# p" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# EEAaO" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d441a9415ca94481a85523a6d30eca92", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(HBox(children=(Dropdown(index=1, layout=Layout(width='fit-content'), options={'T…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4f568a40794a4cba951fb6652b5446dd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "idefics_experiment = ExperimentsVisor(CONTEXT, BASE_IMAGE_IDX, \n", + " box_idx=13, method=CropMethod.DEFAULT_GREY_PAD,\n", + " ocr_model=OCRModel.IDEFICS, \n", + " ocr_models={'Tesseract': OCRModel.TESSERACT, 'Idefics': OCRModel.IDEFICS})\n", + "idefics_experiment\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "idefics_experiment.update(model=OCRModel.TESSERACT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Colophon\n", + "----\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "from nbdev.export import nb_export\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_NOTEBOOK:\n", + " nb_export('test_idefics.ipynb', '.')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "panel-cleaner", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/_testbed/test_tesseract.ipynb b/_testbed/test_tesseract.ipynb new file mode 100644 index 00000000..3cb1dc79 --- /dev/null +++ b/_testbed/test_tesseract.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# try: \n", + "# import fastcore as FC\n", + "# except ImportError: \n", + "# !pip install -q fastcore\n", + "# try:\n", + "# import rich\n", + "# except ImportError:\n", + "# !pip install -q rich\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `Tesseract` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "from pathlib import Path\n", + "from typing import cast\n", + "\n", + "import pcleaner.config as cfg\n", + "import torch\n", + "from rich.console import Console\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from experiments import *\n", + "from helpers import *\n", + "from ocr_metric import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.xtras # patch Path with some utils\n", + "from fastcore.test import * # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tesseract installation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tesseract 5.3.4',\n", + " ' leptonica-1.84.1',\n", + " ' libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp 1.4.0 : libopenjp2 2.5.2',\n", + " ' Found NEON',\n", + " ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n", + " ' Found libcurl/8.4.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.11 nghttp2/1.51.0']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = !tesseract --version\n", + "out\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install jpn_vert tesserac lang\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```bash\n", + "cd model\n", + "sudo ln -s jpn_vert_tessdata_best.traineddata /usr/share/tesseract-ocr/5/tessdata/jpn_vert.traineddata\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Path('/opt/homebrew/share/tessdata'),\n", + " ['afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n", + " 'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n", + " 'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n", + " 'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n", + " 'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n", + " 'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n", + " 'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n", + " 'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, script/Kannada, script/Khmer',\n", + " 'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n", + " 'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n", + " 'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = !tesseract --list-langs\n", + "tessdata = Path(out[0].split('\"')[1])\n", + "tessdata, [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'),\n",
+       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'),\n",
+       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata')\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "langs = tessdata.ls()\n", + "cprint([p.resolve() for p in langs if 'eng' in p.name] + [p.resolve() for p in langs if 'jpn' in p.name])\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Tesseract experiments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PanelCleaner Configuration\n", + "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "config = cfg.load_config()\n", + "config.cache_dir = Path(\".\")\n", + "\n", + "cache_dir = config.get_cleaner_cache_dir()\n", + "\n", + "profile = config.current_profile\n", + "preprocessor_conf = profile.preprocessor\n", + "# Modify the profile to OCR all boxes.\n", + "# Make sure OCR is enabled.\n", + "preprocessor_conf.ocr_enabled = True\n", + "# Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", + "preprocessor_conf.ocr_max_size = 10**10\n", + "# Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", + "preprocessor_conf.suspicious_box_min_size = 10**10\n", + "# Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", + "preprocessor_conf.ocr_blacklist_pattern = \".*\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n", + "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "media_path = Path(\"media/\")\n", + "\n", + "IMAGE_PATHS = sorted(\n", + " [_ for _ in media_path.glob(\"*\") if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", + "\n", + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT\n", + "> `CONTEXT` is an `OCRExperimentContext` object that contains the configuration and the list of image paths.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can get the configuration with `OCRExperimentContext.get_config()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: .\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
      cache_dir: Path('cleaner')\n",
+       "     model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "         device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " cache_dir: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'cleaner'\u001b[0m\u001b[1m)\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONTEXT = OCRExperimentContext(None, IMAGE_PATHS)\n", + "\n", + "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + "model_path = CONTEXT.config.get_model_path(gpu)\n", + "DEVICE = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", + "\n", + "CONTEXT.config.show()\n", + "cprint(\n", + " f\"{'cache_dir':>15}: {repr(cache_dir)}\\n\"\n", + " f\"{'model_path':>15}: {repr(model_path)}\\n\"\n", + " f\"{'device':>15}: {repr(DEVICE)}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base image\n", + "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", + "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"0033\")\n", + "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"INOUE_KYOUMEN_002\")\n", + "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"Action_Comics_1960-01-00_(262)\")\n", + "\n", + "assert BASE_IMAGE_IDX is not None\n", + "img_path = Path(CONTEXT.image_paths[BASE_IMAGE_IDX])\n", + "assert img_path.exists()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Empty cache\n", + "> Clear the image cache used profusely throughout the examples below." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "You will be warned before the cache is emptied." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# CONTEXT.empty_cache_warn()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# CONTEXT.empty_cache_warn(BASE_IMAGE_IDX)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base image\n", + "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", + "assert CONTEXT.path_from_idx(BASE_IMAGE_IDX).exists()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualize images\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3de3bd90585a452ab7bd9f5dce716e4e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output(layout=Layout(height='0px'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6ab61afc65d84115b81c248ed1d0ab03", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8c79a1393b5a4feaaa8c6d7cf2b458bc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "img_visor = ImageContextVisor(CONTEXT, BASE_IMAGE_IDX)\n", + "img_visor\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tesseract experiments\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d65bd435e6774637ac667defde594c4d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), options={'Tesseract'…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f2ca36a4d81144d59ba835c40b990d34", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# tesseract_experiment = ExperimentsVisor(CONTEXT)\n", + "tesseract_experiment = ExperimentsVisor(CONTEXT, BASE_IMAGE_IDX)\n", + "\n", + "test_eq(tesseract_experiment.all_values, {\n", + " 'image_selector': {'image_idx': 20},\n", + " 'content_selector': {'display_option': DisplayOptions.RESULTS},\n", + " 'result_visor': {\n", + " 'all_boxes': False,\n", + " 'box_idx': 0,\n", + " 'all_methods': False,\n", + " 'method': CropMethod.INITIAL_BOX,\n", + " },\n", + " 'model_selector': {'model': OCRModel.TESSERACT},\n", + " 'self': {}\n", + "})\n", + "\n", + "tesseract_experiment\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 3ab93d9b65c8efd750d6b6ef232599afa3cf0eef Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 10 May 2024 20:50:13 +0200 Subject: [PATCH 02/27] Add .gitkeep files --- _testbed/cleaner/.gitkeep | 0 _testbed/media/.gitkeep | 0 _testbed/model/.gitkeep | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 _testbed/cleaner/.gitkeep create mode 100644 _testbed/media/.gitkeep create mode 100644 _testbed/model/.gitkeep diff --git a/_testbed/cleaner/.gitkeep b/_testbed/cleaner/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/_testbed/media/.gitkeep b/_testbed/media/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/_testbed/model/.gitkeep b/_testbed/model/.gitkeep new file mode 100644 index 00000000..e69de29b From fba9f88c25e895f2f8b36be9177f63d750c821fa Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 10 May 2024 21:21:00 +0200 Subject: [PATCH 03/27] fix GDrive links --- .gitignore | 3 +++ _testbed/README.md | 2 +- _testbed/experiments.ipynb | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 8fc7b143..6bf84825 100644 --- a/.gitignore +++ b/.gitignore @@ -169,6 +169,9 @@ cython_debug/ *.sh .DS_Store + _testbed/media/** _testbed/cleaner/** _testbed/model/** + +!.gitkeep \ No newline at end of file diff --git a/_testbed/README.md b/_testbed/README.md index 702df030..5c782408 100644 --- a/_testbed/README.md +++ b/_testbed/README.md @@ -24,7 +24,7 @@ Instructions to use Google Colab are included in the notebooks (TBD). ## Install Test Images The test images are not included in the repository but can be downloaded from the following link: -- [Test images](https://drive.google.com/drive/folders/101_1_20240229) +- [Test images](https://drive.google.com/drive/folders/0BzW8DiWVVMiMN2JaZkg4cHN0NFU?resourcekey=0-ddnFRuwaLHH4FH5bPVa1SA&usp=drive_link) After downloading, place the test images in the [media](media) directory. If you want to use your own, each image should have a corresponding text file with the same name, but with the extension `.txt`, which contains the ground truth data, one line per box (as calculated by PanelCleaner). Optionally, you can also include a `.json` file with the same name, specifying the language of the page: ```json diff --git a/_testbed/experiments.ipynb b/_testbed/experiments.ipynb index 6c8d3c81..15cb4f85 100644 --- a/_testbed/experiments.ipynb +++ b/_testbed/experiments.ipynb @@ -227,7 +227,7 @@ "\n", "Note: I've not play much with this one, `managa-ocr` is surely a much better fit, but it can be educational to compare.\n", "\n", - "I have copied models in my GDrive, and installed (in my Ubuntu, similar in Mac):\n", + "I have copied models in my [drive](https://drive.google.com/drive/folders/0BzW8DiWVVMiMN2JaZkg4cHN0NFU?resourcekey=0-ddnFRuwaLHH4FH5bPVa1SA&usp=drive_link), and installed (in my Ubuntu, similar in Mac):\n", "```bash\n", "cd model\n", "ln -s jpn_vert_tessdata_best.traineddata /usr/share/tesseract-ocr/5/tessdata/jpn_vert.traineddata\n", From 88fd30b6009dedab8b46155dc2b6f5dd42014a01 Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 24 May 2024 19:24:07 +0200 Subject: [PATCH 04/27] Moved to pclaner/ --- .gitignore | 7 +- _testbed/README.md | 55 - _testbed/cleaner/.gitkeep | 0 _testbed/experiments.ipynb | 6810 --------------------------------- _testbed/experiments.py | 1995 ---------- _testbed/helpers.ipynb | 835 ---- _testbed/helpers.py | 372 -- _testbed/media/.gitkeep | 0 _testbed/model/.gitkeep | 0 _testbed/ocr_idefics.py | 184 - _testbed/ocr_metric.ipynb | 276 -- _testbed/ocr_metric.py | 64 - _testbed/requirements.txt | 5 - _testbed/test_idefics.ipynb | 1497 -------- _testbed/test_tesseract.ipynb | 638 --- 15 files changed, 3 insertions(+), 12735 deletions(-) delete mode 100644 _testbed/README.md delete mode 100644 _testbed/cleaner/.gitkeep delete mode 100644 _testbed/experiments.ipynb delete mode 100644 _testbed/experiments.py delete mode 100644 _testbed/helpers.ipynb delete mode 100644 _testbed/helpers.py delete mode 100644 _testbed/media/.gitkeep delete mode 100644 _testbed/model/.gitkeep delete mode 100644 _testbed/ocr_idefics.py delete mode 100644 _testbed/ocr_metric.ipynb delete mode 100644 _testbed/ocr_metric.py delete mode 100644 _testbed/requirements.txt delete mode 100644 _testbed/test_idefics.ipynb delete mode 100644 _testbed/test_tesseract.ipynb diff --git a/.gitignore b/.gitignore index 6bf84825..ce5d862b 100644 --- a/.gitignore +++ b/.gitignore @@ -170,8 +170,7 @@ cython_debug/ .DS_Store -_testbed/media/** -_testbed/cleaner/** -_testbed/model/** +*/_testbed/experiment/cache/** +*/_testbed/experiment/source/** -!.gitkeep \ No newline at end of file +!.gitkeep diff --git a/_testbed/README.md b/_testbed/README.md deleted file mode 100644 index 5c782408..00000000 --- a/_testbed/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# PanelCleaner Testbed - -## Overview -The **PanelCleaner** testbed serves as a dedicated area for experimenting and testing new ideas with *PanelCleaner* using Jupyter Notebooks. Currently, it focuses on **OCR** technologies, primarily using **Tesseract** and **IDefics** models. The testbed also begins the development of an evaluation framework to support future experiments. This project utilizes the `nbdev` literate programming environment. - -## Installation -To get started with the notebooks, you'll need Jupyter Lab/Notebook or any Python IDE that supports Jupyter notebooks like *VSCode* or *Google Colab*. -The setup mostly shares the same requirements as PanelCleaner and its CLI, with a few additional dependencies. -Here’s how to set up your environment: -1. Activate a virtual environment. -2. Navigate to the `_testbed` directory: - ```bash - cd _testbed - ``` -3. Install the required dependencies: - ```bash - pip install -r requirements.txt - ``` -Note: Each notebook may require the installation of additional dependencies. - -## Google Colab Support -The notebooks are ready to use on Google Colab, allowing you to run them directly on the platform without any extra setup or local GPU rigs. -Instructions to use Google Colab are included in the notebooks (TBD). - -## Install Test Images -The test images are not included in the repository but can be downloaded from the following link: -- [Test images](https://drive.google.com/drive/folders/0BzW8DiWVVMiMN2JaZkg4cHN0NFU?resourcekey=0-ddnFRuwaLHH4FH5bPVa1SA&usp=drive_link) - -After downloading, place the test images in the [media](media) directory. If you want to use your own, each image should have a corresponding text file with the same name, but with the extension `.txt`, which contains the ground truth data, one line per box (as calculated by PanelCleaner). Optionally, you can also include a `.json` file with the same name, specifying the language of the page: -```json -{ - "lang": "Spanish" -} -``` -If no language file is found, English will be used by default. In the near future, language detection will be automated. - -## Introduction to nbdev -[nbdev](https://nbdev.fast.ai/) is a **literate programming** environment that allows you to develop a Python library in Jupyter Notebooks, integrating exploratory programming, code, tests, and documentation into a single cohesive workflow. Inspired by **Donald Knuth**'s concept of literate programming, this approach not only makes the development process more intuitive but also eases the maintenance and understanding of the codebase. - -## Notebooks (WIP) - -#### [helpers.ipynb](helpers.ipynb) -This notebook includes utility functions and helpers that support the experiments in other notebooks, streamlining repetitive tasks and data manipulation. - -#### [ocr_metric.ipynb](ocr_metric.ipynb) -This notebook focuses on defining and implementing metrics to evaluate the performance and accuracy of OCR engines, crucial for assessing the effectiveness of OCR technologies in various scenarios. It currently develops a basic metric for evaluating OCR models. In the near future, additional metrics will be added, such as precision and recall using Levenshtein distance (edit distance). More importantly, it will introduce a metric tailored to the unique characteristics of Comics/Manga OCR, a topic currently unexplored in technical literature. - -#### [experiments.ipynb](experiments.ipynb) -This notebook details the development of the evaluation framework used in other notebooks, with Tesseract as a case study to illustrate the evaluation process. It's a work in progress, and will be updated continuously. If you're only interested in visualizing the results of the experiments, go directly to `Test_tesseract.ipynb` or `Test_idefics.ipynb`, which are much shorter and more to the point. - -#### [test_tesseract.ipynb](test_tesseract.ipynb) -This notebook is dedicated to testing the Tesseract OCR engine, offering insights into its capabilities and limitations through hands-on experiments. - -#### [test_idefics.ipynb](test_idefics.ipynb) -Similar to `test_tesseract.ipynb`, this notebook focuses on the IDefics LVM model, evaluating its performance and accuracy under different conditions. Here you can compare the results of the Tesseract OCR engine with the IDefics LVM model to see how the two compare in terms of accuracy and performance. diff --git a/_testbed/cleaner/.gitkeep b/_testbed/cleaner/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/_testbed/experiments.ipynb b/_testbed/experiments.ipynb deleted file mode 100644 index 15cb4f85..00000000 --- a/_testbed/experiments.ipynb +++ /dev/null @@ -1,6810 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp experiments" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# %reload_ext autoreload\n", - "# %autoreload 0\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# install (Colab)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# try: \n", - "# import fastcore as FC\n", - "# except ImportError: \n", - "# !pip install -q fastcore\n", - "# try:\n", - "# import rich\n", - "# except ImportError:\n", - "# !pip install -q rich\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note: we're using the `testbed` branch of PanelCleaner.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Testing `Tesseract` OCR for Comics\n", - "> Accuracy Enhancements for OCR in `PanelCleaner`\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prologue" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "from __future__ import annotations\n", - "\n", - "import dataclasses\n", - "import difflib\n", - "import functools\n", - "import json\n", - "import shutil\n", - "from collections import defaultdict\n", - "from enum import Enum\n", - "from pathlib import Path\n", - "from typing import Any\n", - "from typing import Callable\n", - "from typing import cast\n", - "from typing import Mapping\n", - "from typing import Self\n", - "from typing import TypeAlias\n", - "\n", - "import fastcore.all as FC\n", - "import ipywidgets as W\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "import pcleaner.config as cfg\n", - "import pcleaner.ctd_interface as ctm\n", - "import pcleaner.image_ops as ops\n", - "import pcleaner.ocr.ocr as ocr\n", - "import pcleaner.structures as st\n", - "import torch\n", - "from IPython.display import clear_output\n", - "from IPython.display import display\n", - "from IPython.display import HTML\n", - "from ipywidgets.widgets.interaction import show_inline_matplotlib_plots\n", - "from loguru import logger\n", - "from pcleaner.ocr.ocr_tesseract import TesseractOcr\n", - "from PIL import Image\n", - "from PIL import ImageFilter\n", - "from rich.console import Console\n", - "from tqdm.notebook import tqdm\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "from helpers import *\n", - "from ocr_metric import *\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "\n", - "import fastcore.xtras # patch Path with some utils\n", - "import pcleaner.cli_utils as cli\n", - "import pcleaner.preprocessor as pp\n", - "import rich\n", - "from fastcore.test import * # type: ignore\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Helpers" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# pretty print by default\n", - "# %load_ext rich" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "console = Console(width=104, tab_size=4, force_jupyter=True)\n", - "cprint = console.print\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Tesseract installation" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['tesseract 5.3.4',\n", - " ' leptonica-1.84.1',\n", - " ' libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp 1.4.0 : libopenjp2 2.5.2',\n", - " ' Found NEON',\n", - " ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n", - " ' Found libcurl/8.4.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.11 nghttp2/1.51.0']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = !tesseract --version\n", - "out\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install jpn_vert tesserac lang\n", - "> It has much better results than the default `jpn` language model.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best), or from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) trained for vertical Japanese text as found in manga.\n", - "\n", - "Note: I've not play much with this one, `managa-ocr` is surely a much better fit, but it can be educational to compare.\n", - "\n", - "I have copied models in my [drive](https://drive.google.com/drive/folders/0BzW8DiWVVMiMN2JaZkg4cHN0NFU?resourcekey=0-ddnFRuwaLHH4FH5bPVa1SA&usp=drive_link), and installed (in my Ubuntu, similar in Mac):\n", - "```bash\n", - "cd model\n", - "ln -s jpn_vert_tessdata_best.traineddata /usr/share/tesseract-ocr/5/tessdata/jpn_vert.traineddata\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(Path('/opt/homebrew/share/tessdata'),\n", - " ['afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n", - " 'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n", - " 'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n", - " 'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n", - " 'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n", - " 'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n", - " 'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n", - " 'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, script/Kannada, script/Khmer',\n", - " 'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n", - " 'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n", - " 'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = !tesseract --list-langs\n", - "tessdata = Path(out[0].split('\"')[1])\n", - "tessdata, [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'),\n",
-       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'),\n",
-       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata')\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "langs = tessdata.ls()\n", - "cprint([p.resolve() for p in langs if 'eng' in p.name] + [p.resolve() for p in langs if 'jpn' in p.name])\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## OCR results clean-up" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def remove_multiple_whitespaces(text):\n", - " return ' '.join(text.split())\n", - "\n", - " \n", - "def postprocess_ocr(text):\n", - " \"Basic postprocessing for English Tesseract OCR results.\"\n", - " return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize()\n", - "\n", - "def accuracy_ocr_naive(text, ground_truth):\n", - " return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text)\n", - "\n", - "\n", - "def accuracy_ocr_difflib(text, ground_truth):\n", - " \"\"\"\n", - " Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text,\n", - " using difflib's SequenceMatcher to account for differences in a manner similar to git diffs.\n", - "\n", - " :param text: The OCR-generated text.\n", - " :param ground_truth: The ground truth text.\n", - " :return: A float representing the similarity ratio between the OCR text and the ground truth, \n", - " where 1.0 is identical.\n", - " \"\"\"\n", - " # Initialize the SequenceMatcher with the OCR text and the ground truth\n", - " matcher = difflib.SequenceMatcher(None, text, ground_truth)\n", - " \n", - " # Get the similarity ratio\n", - " similarity_ratio = matcher.ratio()\n", - " \n", - " return similarity_ratio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ground truth" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def ground_truth_path(page_data: st.PageData):\n", - " path = Path(page_data.original_path)\n", - " return path.with_stem(path.stem + '_gt').with_suffix('.txt')\n", - "\n", - "\n", - "def read_ground_truth(page_data: st.PageData):\n", - " gts_path = ground_truth_path(page_data)\n", - " if gts_path.exists():\n", - " gts = gts_path.read_text(encoding=\"utf-8\").splitlines()\n", - " else:\n", - " gts = [\"\" for _ in range(len(page_data.boxes))]\n", - " return gts\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cropping" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def dilate_by_fractional_pixel(image, dilation_fraction, filter_base_size=3):\n", - " \"\"\"\n", - " Dilates an image by a specified fractional pixel amount. The function calculates \n", - " the necessary scaling factor and filter size based on the desired dilation fraction.\n", - "\n", - " :param image: A PIL Image object (1-bit mode).\n", - " :param dilation_fraction: The desired fractional pixel amount for dilation (e.g., 0.2).\n", - " :param filter_base_size: The base size of the dilation filter to apply on the scaled image.\n", - " This size is adjusted based on the scaling factor to achieve the\n", - " desired dilation effect.\n", - " :return: A PIL Image object after dilation, converted back to grayscale.\n", - " \"\"\"\n", - " # Calculate the scale factor based on the desired dilation fraction\n", - " scale_factor = int(1 / dilation_fraction)\n", - " \n", - " # Adjust the filter size based on the scale factor\n", - " # This ensures the dilation effect is proportional to the desired fraction\n", - " filter_size = max(1, filter_base_size * scale_factor // 5)\n", - "\n", - " # Convert the image to grayscale for more nuanced intermediate values\n", - " image_gray = image.convert(\"L\")\n", - "\n", - " # Resize the image to a larger size using bicubic interpolation\n", - " larger_size = (int(image.width * scale_factor), int(image.height * scale_factor))\n", - " image_resized = image_gray.resize(larger_size, Image.BICUBIC)\n", - "\n", - " # Apply the dilation filter to the resized image\n", - " dilated_image = image_resized.filter(ImageFilter.MaxFilter(filter_size))\n", - "\n", - " # Resize the image back to its original size using bicubic interpolation\n", - " image_dilated_fractional_pixel = dilated_image.resize(image.size, Image.BICUBIC)\n", - "\n", - " return image_dilated_fractional_pixel\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def extract_text(image, text_mask, box):\n", - " cropped_image = crop_box(box, image)\n", - " cropped_mask = crop_box(box, text_mask)\n", - " extracted = ops.extract_text(cropped_image, cropped_mask)\n", - " return cropped_image, cropped_mask, extracted\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Lang\n", - "> language name to a language code \n", - "> every one has language codes: tesseract, comic-text-detector, earthlings...\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "_lang2pcleaner = {'English': st.DetectedLang.ENG, 'Japanese': st.DetectedLang.JA, 'Spanish': st.DetectedLang.ENG,\n", - " 'French':st.DetectedLang.ENG}\n", - "# _lang2tesseract = {'English': 'eng', 'Japanese': 'jpn'}\n", - "_lang2tesseract = {'English': 'eng', 'Japanese': 'jpn_vert', 'Spanish': 'spa', 'French': 'fra'}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def lang2pcleaner(lang: str):\n", - " return _lang2pcleaner[lang]\n", - "\n", - "def lang2tesseract(lang: str):\n", - " return _lang2tesseract[lang]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Tesseract experiments" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# PanelCleaner Configuration\n", - "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "config = cfg.load_config()\n", - "config.cache_dir = Path(\".\")\n", - "\n", - "cache_dir = config.get_cleaner_cache_dir()\n", - "\n", - "profile = config.current_profile\n", - "preprocessor_conf = profile.preprocessor\n", - "# Modify the profile to OCR all boxes.\n", - "# Make sure OCR is enabled.\n", - "preprocessor_conf.ocr_enabled = True\n", - "# Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", - "preprocessor_conf.ocr_max_size = 10**10\n", - "# Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", - "preprocessor_conf.suspicious_box_min_size = 10**10\n", - "# Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", - "preprocessor_conf.ocr_blacklist_pattern = \".*\"\n", - "\n", - "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", - "model_path = config.get_model_path(gpu)\n", - "device = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test images\n", - "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['00: Action_Comics_1960-01-00_(262).JPG',\n", - " '01: Adolf_Cap_01_008.jpg',\n", - " '02: Barnaby_v1-028.png',\n", - " '03: Barnaby_v1-029.png',\n", - " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", - " '05: Cannon-292.jpg',\n", - " '06: Contrato_con_Dios_028.jpg',\n", - " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", - " '08: FOX_CHILLINTALES_T17_012.jpg',\n", - " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", - " '10: Galactus_12.jpg',\n", - " '11: INOUE_KYOUMEN_002.png',\n", - " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", - " '13: MCCAY_LITTLENEMO_090.jpg',\n", - " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", - " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", - " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", - " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", - " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", - " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", - " '20: Strange_Tales_172005.jpg',\n", - " '21: Strange_Tales_172021.jpg',\n", - " '22: Tarzan_014-21.JPG',\n", - " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", - " '24: Transformers_-_Unicron_000-004.jpg',\n", - " '25: Transformers_-_Unicron_000-016.jpg',\n", - " '26: WARE_ACME_024.jpg',\n", - " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", - " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", - " '29: manga_0033.jpg',\n", - " '30: ronson-031.jpg',\n", - " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "media_path = Path(\"media/\")\n", - "\n", - "IMAGE_PATHS = sorted(\n", - " [_ for _ in media_path.glob(\"*\") if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", - "\n", - "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Results helper\n", - "> Dataclass helper to store and display results\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "@dataclasses.dataclass\n", - "class ResultOCR:\n", - " block_idx: int\n", - " image: Image.Image | None\n", - " ocr: str\n", - " page_data: st.PageData\n", - " gts: list[str]\n", - " description: str = dataclasses.field(default='', kw_only=True)\n", - "\n", - " def __post_init__(self): \n", - " if self.image is None:\n", - " cache_path = self.cache_path()\n", - " if cache_path.exists():\n", - " self.image = Image.open(cache_path)\n", - "\n", - " @property\n", - " def acc(self):\n", - " self._acc = accuracy_ocr_difflib(self.ocr, self.gts[self.block_idx])\n", - " return self._acc\n", - " @property\n", - " def suffix(self): return f\"{self.block_idx}_{self.description}\"\n", - "\n", - " def diff_tagged(self):\n", - " _, html2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr, False)\n", - " return f\"{html2}\"\n", - " \n", - " def cache_path(self, suffix: str | None = None):\n", - " suffix = self.suffix + (('_'+suffix) if suffix else '')\n", - " parent = Path(self.page_data.image_path).parent\n", - " img_name = Path(self.page_data.original_path).stem\n", - " box_image_path = parent / f\"{img_name}_{suffix}.png\"\n", - " return box_image_path\n", - " \n", - " def cache_image(self, image: Image.Image | None = None, suffix: str | None = None):\n", - " image = image or (self.image if not suffix else None)\n", - " box_image_path = self.cache_path(suffix)\n", - " if image and not box_image_path.exists():\n", - " image.save(box_image_path)\n", - " return box_image_path\n", - "\n", - "\n", - " def as_html(self):\n", - " acc_html = f\"
{self.acc:.2f}\"\n", - " box_image_path = self.cache_image()\n", - " html1 = get_columns_html([[box_image_path], [self.ocr + acc_html]])\n", - " html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr)\n", - " html2 = f\"
{html_str1}
{html_str2}
\"\n", - " return html1 + '\\n
\\n' + html2\n", - "\n", - " def __repr__(self): \n", - " return f\"{type(self).__name__}#block {self.block_idx:02}: {self.acc:.2f}||{self.ocr}\"\n", - " \n", - " def display(self): display(HTML(self.as_html()))\n", - " \n", - " def _ipython_display_(self): self.display()\n", - "\n", - " def to_dict(self):\n", - " d = dataclasses.asdict(self)\n", - " d['image'] = d['page_data'] = d['gts'] = None\n", - " return d\n", - "\n", - " # @classmethod\n", - " # def from_dict(cls, d: dict, page_data: st.PageData, gts: list[str]):\n", - " # return cls(**(d | {'page_data':page_data, 'gts':gts}))\n", - "\n", - "\n", - "@dataclasses.dataclass\n", - "class ResultOCRExtracted(ResultOCR):\n", - "\n", - " def __repr__(self): return super().__repr__()\n", - " def as_html(self):\n", - " html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr)\n", - " diff_html = f\"
{html_str1}
{html_str2}
\"\n", - " cropped_image_path = self.cache_image(None, \"cropped\")\n", - " cropped_mask_path = self.cache_image(None, \"mask\")\n", - " result_path = self.cache_image()\n", - " return '\\n
\\n'.join([\n", - " get_image_grid_html([cropped_image_path, cropped_mask_path, result_path], 1, 3), \n", - " acc_as_html(self.acc), \n", - " diff_html\n", - " ])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CropMethod\n", - "> Box cropping methods.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class CropMethod(Enum):\n", - " INITIAL_BOX = 'Initial box'\n", - " DEFAULT = 'Default'\n", - " DEFAULT_GREY_PAD = 'Default, grey pad'\n", - " PADDED_4 = 'Padded 4px'\n", - " PADDED_8 = 'Padded 8px'\n", - " EXTRACTED_INIT_BOX = 'Extracted, init box'\n", - " PADDED_4_EXTRACTED = 'Padded 4, extracted'\n", - " PADDED_8_EXTRACTED = 'Padded 8, extracted'\n", - " PADDED_8_DILATION_1 = 'Padded 8, dilation 1'\n", - " PAD_8_FRACT_0_5 = 'Pad 8, fract. 0.5'\n", - " PAD_8_FRACT_0_2 = 'Pad 8, fract. 0.2'\n", - "\n", - " @classmethod\n", - " def __display_names__(cls):\n", - " return dict(\n", - " zip([_.value for _ in cls], \n", - " cls))\n", - "\n", - "\n", - "CM = CropMethod\n", - "\n", - "_IMAGE_METHODS = [CM.INITIAL_BOX, CM.DEFAULT, CM.DEFAULT_GREY_PAD, \n", - " CM.PADDED_4, CM.PADDED_8]\n", - "_EXTRACTED_METHODS = [CM.EXTRACTED_INIT_BOX, CM.PADDED_4_EXTRACTED, \n", - " CM.PADDED_8_EXTRACTED, CM.PADDED_8_DILATION_1, \n", - " CM.PAD_8_FRACT_0_5, CM.PAD_8_FRACT_0_2]\n", - "\n", - "\n", - "def crop_by_image(method: CM, \n", - " box: st.Box, \n", - " base: Image.Image, \n", - " preproc: cfg.PreprocessorConfig,\n", - " ):\n", - " image = None\n", - " match method:\n", - " case CM.INITIAL_BOX :\n", - " image = crop_box(box, base)\n", - " case CM.DEFAULT:\n", - " padded2_4 = (\n", - " box.pad(preproc.box_padding_initial, base.size).right_pad(\n", - " preproc.box_right_padding_initial, base.size))\n", - " image = crop_box(padded2_4, base)\n", - " case CM.DEFAULT_GREY_PAD:\n", - " image = crop_box(box, base)\n", - " image = ops.pad_image(image, 8, fill_color=(128, 128, 128))\n", - " case CM.PADDED_4:\n", - " padded4 = box.pad(4, base.size)\n", - " image = crop_box(padded4, base)\n", - " case CM.PADDED_8:\n", - " padded4 = box.pad(8, base.size)\n", - " image = crop_box(padded4, base)\n", - " case _: pass\n", - " return image\n", - "\n", - "\n", - "def crop_by_extracted(method: CM, \n", - " box: st.Box, \n", - " base: Image.Image, \n", - " mask: Image.Image,\n", - " cropped_image_path: Path,\n", - " cropped_mask_path: Path,\n", - " dilated: dict[float, Image.Image]\n", - " ):\n", - " cropped_image, cropped_mask, image = None, None, None\n", - " if method in _EXTRACTED_METHODS:\n", - " if not cropped_image_path.exists() or not cropped_mask_path.exists():\n", - " match method:\n", - " case CM.EXTRACTED_INIT_BOX:\n", - " cropped_image, cropped_mask, image = extract_text(base, mask, box)\n", - " case CM.PADDED_4_EXTRACTED:\n", - " padded4 = box.pad(4, base.size)\n", - " cropped_image, cropped_mask, image = extract_text(base, mask, padded4)\n", - " case CM.PADDED_8_EXTRACTED:\n", - " padded8 = box.pad(8, base.size)\n", - " cropped_image, cropped_mask, image = extract_text(base, mask, padded8)\n", - " case CM.PADDED_8_DILATION_1:\n", - " padded8 = box.pad(8, base.size)\n", - " cropped_image, cropped_mask, image = extract_text(\n", - " base, dilated[1], padded8)\n", - " case CM.PAD_8_FRACT_0_5:\n", - " padded8 = box.pad(8, base.size)\n", - " cropped_image, cropped_mask, image = extract_text(\n", - " base, dilated[0.5], padded8)\n", - " case CM.PAD_8_FRACT_0_2:\n", - " padded8 = box.pad(8, base.size)\n", - " cropped_image, cropped_mask, image = extract_text(\n", - " base, dilated[0.2], padded8)\n", - " case _: pass\n", - "\n", - " return image, cropped_image, cropped_mask\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ResultSet\n", - "> tagged nested dict to store image results keyed by box, and crop method\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "SubjIdT: TypeAlias = int\n", - "ImgIdT = SubjIdT\n", - "BoxIdT: TypeAlias = int\n", - "\n", - "class ResultSet(dict[BoxIdT, dict[CropMethod, ResultOCR]]): ...\n", - "\n", - "class ResultSetDefault(defaultdict[BoxIdT, dict[CropMethod, ResultOCR]]): ...\n", - "\n", - "def results_to_dict(results: ResultSet) -> dict[BoxIdT, dict[str, str]]:\n", - " d = {}\n", - " for box, box_methods in results.items():\n", - " for method, result in box_methods.items():\n", - " if box not in d:\n", - " d[box] = {}\n", - " d[box][method.name] = result.ocr\n", - " return d\n", - "\n", - "def dict_to_results(\n", - " image_idx: ImgIdT, \n", - " results_dict: dict[BoxIdT, dict[str, str]],\n", - " result_factory: Callable\n", - " ) -> ResultSetDefault:\n", - " results = ResultSetDefault(dict[CropMethod, ResultOCR])\n", - " for box_idx, box_methods in results_dict.items():\n", - " box_idx = int(box_idx)\n", - " for method, ocr in box_methods.items():\n", - " m = CM[method]\n", - " results[box_idx][m] = result_factory(image_idx, box_idx, m, ocr)\n", - " return results\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ExperimentContext\n", - "> Utility class to maintain shared state across all experiments.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "# class ExperimentSubject(Protocol):\n", - "# @property\n", - "# def exp(self) -> 'ExperimentContext': ...\n", - "# @property\n", - "# def idx(self) -> SubjIdT: ...\n", - "# def setup(self,\n", - "# exp: 'ExperimentContext',\n", - "# idx: Any,\n", - "# *args, **kwargs\n", - "# ): ...\n", - "\n", - "\n", - "# class ExperimentContext(Protocol):\n", - "# def subject_factory(self) -> Callable[..., ExperimentSubject]: ...\n", - "# def normalize_idx(self, idx: Any) -> SubjIdT: ...\n", - "# def experiment_subject(self, idx: Any, /, \n", - "# create: bool = False, *args, **kwargs) -> ExperimentSubject | None: \n", - "# \"\"\"Get or create an `ExperimentSubject` for the given identifier. \n", - "# Returns `None` if `idx` is out of domain range.\n", - "# \"\"\"\n", - "# ...\n" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class ExperimentSubject:\n", - " exp: ExperimentContext\n", - " idx: SubjIdT\n", - "\n", - " def setup(self, exp: ExperimentContext, idx: Any, *args, **kwargs): \n", - " self.exp = exp\n", - " self.idx = cast(SubjIdT, exp.normalize_idx(idx))\n", - " return self\n", - "\n", - " def __new__(cls,\n", - " exp: ExperimentContext,\n", - " idx: Any,\n", - " *args, **kwargs):\n", - " self = exp.experiment_subject(idx)\n", - " if self is None:\n", - " self = super().__new__(cls)\n", - " self = exp.experiment_subject(idx, new_subject=self, *args, **kwargs)\n", - " if self is None:\n", - " raise ValueError(f\"Can't create new subject with idx: {idx}: out of range\")\n", - " return self\n", - "\n", - "\n", - "class ExperimentContext:\n", - " \"Class to maintain shared state across all file-based experiments within the experiment domain.\"\n", - "\n", - " subject_cls: Callable[..., ExperimentSubject]\n", - " def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls\n", - "\n", - " def normalize_idx(self, idx: int | str | Path) -> SubjIdT | None:\n", - " nidx = None\n", - " if isinstance(idx, int) and idx < len(self._paths):\n", - " nidx = idx\n", - " elif isinstance(idx, str):\n", - " try:\n", - " nidx = [_.name for _ in self._paths].index(idx)\n", - " except Exception:\n", - " pass\n", - " elif isinstance(idx, Path):\n", - " idx = idx.resolve()\n", - " if idx in self._paths:\n", - " nidx = self._paths.index(idx)\n", - " return nidx\n", - " \n", - " def path_from_idx(self, idx: int | str | Path):\n", - " _idx = self.normalize_idx(idx)\n", - " if _idx is None:\n", - " raise ValueError(f\"{_idx} not found in context.\")\n", - " path = Path(self._paths[_idx])\n", - " if not path.exists():\n", - " raise ValueError(f\"{path} not found in context.\")\n", - " return path\n", - " \n", - " @property\n", - " def count(self): return len(self._paths)\n", - " @property\n", - " def cache_dir(self): return Path(\".cache/\")\n", - " @functools.lru_cache()\n", - " def _cache_dir(self, idx: SubjIdT):\n", - " # create one folder for each image to cache and save results\n", - " path = self.path_from_idx(idx)\n", - " cache_dir = self.cache_dir / path.stem\n", - " cache_dir.mkdir(parents=True, exist_ok=True)\n", - " return cache_dir\n", - " def subject_cache_dir(self, idx: int | str | Path):\n", - " return self._cache_dir(idx)\n", - "\n", - " def empty_cache(self, idx: SubjIdT | None = None):\n", - " cache_dir = self.cache_dir\n", - " if idx is None:\n", - " shutil.rmtree(cache_dir, ignore_errors=True)\n", - " cache_dir.mkdir(parents=True, exist_ok=True)\n", - " else:\n", - " path = Path(self._paths[idx])\n", - " cache_dir = cache_dir / path.stem\n", - " for p in cache_dir.glob(\"*\"):\n", - " p.unlink(missing_ok=True)\n", - " if not any(cache_dir.iterdir()):\n", - " cache_dir.rmdir()\n", - "\n", - " def empty_cache_warn(self, idx: SubjIdT | None=None, *, warn: bool=True, out: W.Output | None=None):\n", - " def on_confirm_clicked(b):\n", - " try:\n", - " self.empty_cache(idx)\n", - " print(\"Cache cleared successfully.\")\n", - " except Exception as e:\n", - " print(f\"Failed to clear cache: {e}\")\n", - " finally:\n", - " for widget in confirmation_box.children:\n", - " widget.close()\n", - "\n", - " def on_cancel_clicked(b):\n", - " print(\"Cache clear cancelled.\")\n", - " for widget in confirmation_box.children:\n", - " widget.close()\n", - "\n", - " if out is None:\n", - " out = W.Output()\n", - " with out:\n", - " if FC.IN_NOTEBOOK:\n", - " confirm_button = W.Button(description=\"Confirm\")\n", - " cancel_button = W.Button(description=\"Cancel\")\n", - " confirm_button.on_click(on_confirm_clicked)\n", - " cancel_button.on_click(on_cancel_clicked)\n", - " label = W.Label('Are you sure you want to clear the cache? This action cannot be undone.')\n", - " confirmation_box = W.VBox([label, W.HBox([confirm_button, cancel_button])])\n", - " display(confirmation_box)\n", - " else:\n", - " on_confirm_clicked(None)\n", - "\n", - " def experiment_subject(self, idx: SubjIdT | str | Path, /, \n", - " new_subject: ExperimentSubject | None = None, *args, **kwargs) -> ExperimentSubject | None:\n", - " \"Cached subject. If provided, `new_subject` replaces value at the index.\"\n", - " if (nidx := self.normalize_idx(idx)) is None:\n", - " return None\n", - " if new_subject is None:\n", - " subject = self._subjects.get(nidx)\n", - " else:\n", - " new_subject.setup(self, nidx, *args, **kwargs)\n", - " self._subjects[nidx] = subject = new_subject\n", - " return subject\n", - "\n", - " def reset(self):\n", - " self._subjects.clear()\n", - " self._cache_dir.cache_clear()\n", - " \n", - " def __init__(self, paths: list[Path], root: Path | None = None):\n", - " self._root = (root or Path('.')).resolve()\n", - " self._paths = [p.resolve().relative_to(self._root) for p in paths]\n", - " self._subjects: dict[SubjIdT, ExperimentSubject] = {}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`ExperimentSubject`s are singletons" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "exp = ExperimentContext([Path('a'), Path('b')])\n", - "subj = exp.experiment_subject(5)\n", - "test_eq(subj, None)\n", - "\n", - "_ = exp.experiment_subject(1)\n", - "test_is(_, None)\n", - "\n", - "subj1 = ExperimentSubject(exp, 1)\n", - "_ = exp.experiment_subject(1)\n", - "test_eq(_ is not None, True)\n", - "test_is(_, subj1)\n", - "test_is(subj1, ExperimentSubject(exp, 1))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can't create `ExperimentSubject`s beyond `ExperimentContext` domain." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "test_fail(lambda:ExperimentSubject(exp, 2), 'out of range')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ImageContext\n", - "> A utility class to maintain image state for a `OCRExperimentContext`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "ImgSpecT: TypeAlias = ImgIdT | str | Path\n", - "\n", - "class ImageContext(ExperimentSubject):\n", - " \"\"\"\n", - " A utility class to maintain image state for a ExperimentContext.\n", - " This class encapsulates state necessary for conducting OCR experiments.\n", - "\n", - " Attributes:\n", - " json_data (dict): JSON data loaded from cached files.\n", - " page_data (st.PageData): PanelClaner page data.\n", - " base_image (Image.Image): The base image loaded from the page data.\n", - " mask (Image.Image): The mask image used for text detection.\n", - " gts (list[str]): Ground truth data for the text in the images.\n", - " ocr_model (str): Name or identifier of the OCR model used.\n", - " mocr (ocr.OCRModel): OCR model configured for the experiment.\n", - " mask_dilated1 (Image.Image): Image mask dilated by 1 pixel.\n", - " mask_dilated05 (Image.Image): Image mask dilated by 0.5 pixels.\n", - " mask_dilated02 (Image.Image): Image mask dilated by 0.2 pixels.\n", - "\n", - " Methods:\n", - " init(config: cfg.Config, img_path: Path, cache_dir: Path, ocr_model: str):\n", - " Initializes the experiment context. It also handles the generation of text boxes \n", - " if they are not already present.\n", - "\n", - " setup_ground_truth():\n", - " Loads or initializes ground truth data for the experiment based on the page data.\n", - "\n", - " setup_crop_masks():\n", - " Prepares various dilated versions of the mask image to be used in different cropping \n", - " strategies during the experiments.\n", - " \"\"\"\n", - " exp: ExperimentContext\n", - " idx: ImgIdT\n", - " base_image: Image.Image\n", - " mask: Image.Image\n", - " json_data: dict | None\n", - " page_data: st.PageData\n", - " # ocr_model: str\n", - " # mocr: ocr.OCRModel\n", - " # postprocess_ocr: Callable[..., str]\n", - " _page_lang: str\n", - " _gts: list[str]\n", - " _mask_dilated1: Image.Image | None\n", - " _mask_dilated05: Image.Image | None\n", - " _mask_dilated02: Image.Image | None\n", - " \n", - "\n", - " # # this methods will be set downstream, declared here to make the type checker happy\n", - " # def result(self: Self, \n", - " # box_idx: int, method: CropMethod, ocr: bool = True, reset: bool=False) -> ResultOCR: ...\n", - " # def summary_box(self: Self, box_idx: int): ...\n", - "\n", - " def to_dict(self):\n", - " return {\n", - " 'image_idx': self.idx,\n", - " 'page_lang': self.page_lang,\n", - " }\n", - " \n", - " @property\n", - " def image_idx(self): return self.idx\n", - " @property\n", - " def cache_dir(self): \n", - " return self.exp.subject_cache_dir(self.idx)\n", - " cache_dir_image = cache_dir\n", - " \n", - " @property\n", - " def image_info(self): \n", - " img = self.base_image\n", - " w, h = img.size\n", - " print_size_in = size(w, h, 'in', 300)\n", - " print_size_cm = size(w, h, 'cm', 300)\n", - " required_dpi = dpi(w, h, 'Modern Age')\n", - " return (w, h), print_size_in, print_size_cm, required_dpi\n", - "\n", - " @property\n", - " def original_image_path(self): return Path(self.page_data.original_path)\n", - " @property\n", - " def image_path(self): return Path(self.page_data.image_path)\n", - " @property\n", - " def image_name(self): return self.original_image_path.name\n", - " @property\n", - " def image_size(self): return self.base_image.size\n", - " @property\n", - " def image_dim(self):return size(*self.image_size)\n", - " @property\n", - " def image_dpi(self): return dpi(*self.image_size)\n", - " @property\n", - " def image_print(self):\n", - " return self.image_size, self.image_dim, self.image_dpi\n", - " @property\n", - " def image_name_rich(self):\n", - " siz, dim, res = self.image_print\n", - " return f\"{self.image_name} - {siz[0]}x{siz[1]} px: {dim[0]:.2f}x{dim[1]:.2f}\\\" @ {res:.2f} dpi\"\n", - " \n", - " def setup_page_lang(self, page_lang: str | None = None):\n", - " path = Path(self.page_data.original_path).with_suffix('.json')\n", - " metadata = json.load(open(path)) if path.exists() else {}\n", - " if 'lang' in metadata and (page_lang == metadata['lang'] or page_lang is None):\n", - " self._page_lang = metadata['lang']\n", - " return\n", - " self._page_lang = metadata['lang'] = page_lang or 'English'\n", - " json.dump(metadata, open(path, 'w'), indent=2)\n", - " @property\n", - " def page_lang(self):\n", - " if self._page_lang == None:\n", - " self.setup_page_lang()\n", - " return self._page_lang\n", - " \n", - " @property\n", - " def boxes(self): return self.page_data.boxes\n", - " \n", - " def setup_ground_truth(self):\n", - " self._gts = read_ground_truth(self.page_data)\n", - " @property\n", - " def gts(self): \n", - " if self._gts is None:\n", - " self.setup_ground_truth()\n", - " return self._gts\n", - " \n", - " @functools.lru_cache(typed=True)\n", - " def dilated_mask(self, fraction: float):\n", - " return dilate_by_fractional_pixel(self.mask, fraction)\n", - " \n", - " def mask_dilated1(self): \n", - " if self._mask_dilated1 is None:\n", - " self._mask_dilated1 = self.mask.filter(ImageFilter.MaxFilter(3))\n", - " return self._mask_dilated1\n", - " \n", - " def mask_dilated05(self): \n", - " if self._mask_dilated05 is None:\n", - " self._mask_dilated05 = self.dilated_mask(0.5)\n", - " return self._mask_dilated05\n", - " \n", - " def mask_dilated02(self): \n", - " if self._mask_dilated02 is None:\n", - " self._mask_dilated02 = self.dilated_mask(0.2)\n", - " return self._mask_dilated02\n", - " \n", - " def dilated(self):\n", - " return {1: self.mask_dilated1(),\n", - " 0.5: self.mask_dilated05(),\n", - " 0.2: self.mask_dilated02(),}\n", - "\n", - " def __new__(cls,\n", - " exp: ExperimentContext,\n", - " idx: ImgSpecT,\n", - " *args, **kwargs) -> Self:\n", - " return super().__new__(cls, exp, idx, *args, **kwargs) # type: ignore\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# OCRExperimentContext" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class OCRExperimentContext(ExperimentContext):\n", - " \"\"\"\n", - " A utility class to maintain shared state across all experiments within OCR domain.\n", - " This class encapsulates state necessary for conducting PanelCleaner OCR experiments.\n", - " \"\"\"\n", - "\n", - " config: cfg.Config\n", - " image_paths: list[Path]\n", - " # OCR engine -> Image index -> Box index -> Crop method -> Result\n", - " _results: dict[str, dict[ImgIdT, ResultSet]]\n", - "\n", - " \n", - " engines = {\n", - " 'Tesseract': cfg.OCREngine.TESSERACT, \n", - " 'Idefics': None, \n", - " 'manga-ocr': cfg.OCREngine.MANGAOCR}\n", - "\n", - " # subject_cls: ImageContext\n", - " # def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls\n", - "\n", - " @classmethod\n", - " def get_config(cls, cache_dir: Path | None = None) -> cfg.Config:\n", - " config = cfg.load_config()\n", - " config.cache_dir = cache_dir or Path(\".\")\n", - " profile = config.current_profile\n", - " preprocessor_conf = profile.preprocessor\n", - " # Modify the profile to OCR all boxes.\n", - " # Make sure OCR is enabled.\n", - " preprocessor_conf.ocr_enabled = True\n", - " # Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", - " preprocessor_conf.ocr_max_size = 10**10\n", - " # Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", - " preprocessor_conf.suspicious_box_min_size = 10**10\n", - " # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", - " preprocessor_conf.ocr_blacklist_pattern = \".*\"\n", - " return config\n", - "\n", - " def to_dict(self):\n", - " return {\n", - " 'image_paths': list(map(str, self.image_paths)),\n", - " 'cache_dir': str(self.config.cache_dir)\n", - " }\n", - " def to_json(self):\n", - " return json.dumps(self.to_dict(), indent=2)\n", - " @classmethod\n", - " def from_json_data(cls, d: dict):\n", - " return cls(cls.get_config(Path(d['cache_dir'])), d['image_paths'])\n", - " @classmethod\n", - " def from_json_path(cls, path: Path):\n", - " return cls.from_json_data(json.loads(path.read_text()))\n", - "\n", - " \n", - " @functools.lru_cache()\n", - " def mocr(self, ocr_model: str, lang: str):\n", - " engine = self.engines[ocr_model]\n", - " ocr_processor = ocr.get_ocr_processor(True, engine)\n", - " proc = ocr_processor[lang2pcleaner(lang)]\n", - " if isinstance(proc, TesseractOcr):\n", - " proc.lang = lang2tesseract(lang)\n", - " return proc\n", - "\n", - " def ocr_box(self, result: ResultOCR, ocr_model: str, lang: str): \n", - " assert result.image is not None\n", - " text = self.mocr(ocr_model, lang)(result.image)\n", - " result.ocr = postprocess_ocr(text)\n", - " return result\n", - "\n", - " @property\n", - " def cache_dir(self): return self.config.get_cleaner_cache_dir()\n", - " image_cache_dir = ExperimentContext.subject_cache_dir\n", - "\n", - " @functools.lru_cache()\n", - " def _load_page_data(self, image_idx: int):\n", - " config = self.config\n", - " cache_dir = self.image_cache_dir(image_idx)\n", - " img_path = self.path_from_idx(image_idx)\n", - " image_name = img_path.stem\n", - " # read cached json\n", - " jsons = [_ for _ in cache_dir.glob(\"*#raw.json\") if image_name in _.stem]\n", - " assert len(jsons) <= 1\n", - " # generate text boxes if needed\n", - " if not jsons:\n", - " pfl = config.current_profile\n", - " gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", - " model_path = config.get_model_path(gpu)\n", - " ctm.model2annotations(pfl.general, pfl.text_detector, model_path, [img_path], cache_dir)\n", - " # we don't need unique names for this tests, strip uuids\n", - " for p in cache_dir.glob(f\"*{image_name}*\"):\n", - " p.rename(strip_uuid(p))\n", - " jsons = [_ for _ in cache_dir.glob(\"*#raw.json\") if image_name in _.stem]\n", - "\n", - " # adapt paths to be relative to this notebook\n", - " this_path = self._root\n", - " json_file_path = jsons[0]\n", - " json_data = json.loads(json_file_path.read_text(encoding=\"utf-8\"))\n", - " json_data[\"image_path\"] = str(strip_uuid(json_data[\"image_path\"]).relative_to(this_path))\n", - " json_data[\"mask_path\"] = str(strip_uuid(json_data[\"mask_path\"]).relative_to(this_path))\n", - " json.dump(json_data, open(json_file_path, \"w\"), indent=2)\n", - " else:\n", - " json_file_path = jsons[0]\n", - " json_data = json.loads(json_file_path.read_text(encoding=\"utf-8\"))\n", - "\n", - " page_data = st.PageData(\n", - " json_data[\"image_path\"], json_data[\"mask_path\"], \n", - " json_data[\"original_path\"], json_data[\"scale\"], \n", - " [st.Box(*data[\"xyxy\"]) for data in json_data[\"blk_list\"]], \n", - " [], [], [])\n", - " # Merge boxes that have mutually overlapping centers.\n", - " page_data.resolve_total_overlaps()\n", - " return json_data, page_data\n", - "\n", - " def page_data(self, image_idx: int):\n", - " _, page_data = self._load_page_data(image_idx)\n", - " return page_data\n", - " def json_data(self, image_idx: int):\n", - " json_data, _ = self._load_page_data(image_idx)\n", - " return json_data\n", - "\n", - " def experiment_image(self, image_idx: ImgIdT | str | Path) -> ImageContext | None:\n", - " \"Cached image context.\"\n", - " return cast(ImageContext, self.experiment_subject(image_idx))\n", - "\n", - " def update_results(self, ocr_model: str, img_idx: ImgIdT, results: ResultSetDefault):\n", - " self._results[ocr_model][img_idx] = cast(ResultSet, results)\n", - " \n", - " \n", - " def _result_from(self, image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None):\n", - " img_ctx = ImageContext(self, image_idx)\n", - " extracted = method in _EXTRACTED_METHODS\n", - " result_cls = ResultOCRExtracted if extracted else ResultOCR\n", - " result = result_cls(int(box_idx), None, '', img_ctx.page_data, \n", - " img_ctx.gts, description=f\"{method.value}\")\n", - " if ocr is not None:\n", - " result.ocr = ocr\n", - " return result\n", - " \n", - " def result(self, \n", - " ocr_model: str,\n", - " image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, \n", - " ocr: bool=True, \n", - " rebuild: bool=False) -> ResultOCR | None:\n", - " img_ctx = ImageContext(self, image_idx)\n", - " result = self._results[ocr_model][image_idx][box_idx].get(method)\n", - " if not rebuild and result is not None:\n", - " return result\n", - " \n", - " result = self._result_from(image_idx, box_idx, method)\n", - " image, cropped_image, cropped_mask = result.image, None, None\n", - " base_image = img_ctx.base_image\n", - " box = img_ctx.boxes[box_idx]\n", - " if image is None and method in _IMAGE_METHODS:\n", - " image = crop_by_image(\n", - " method, box, base_image, self.config.current_profile.preprocessor)\n", - "\n", - " if image is None and method in _EXTRACTED_METHODS:\n", - " mask = img_ctx.mask\n", - " cropped_image_path = result.cache_image(cropped_image, \"cropped\")\n", - " cropped_mask_path = result.cache_image(cropped_mask, \"mask\")\n", - " if not cropped_image_path.exists() or not cropped_mask_path.exists():\n", - " image, cropped_image, cropped_mask = crop_by_extracted(\n", - " method, box, base_image, mask, \n", - " cropped_image_path, cropped_mask_path, img_ctx.dilated())\n", - " \n", - " assert image is not None\n", - " if result.image is None:\n", - " result.image = image\n", - " result.cache_image()\n", - " if cropped_image is not None:\n", - " result.cache_image(cropped_image, \"cropped\")\n", - " if cropped_mask is not None:\n", - " result.cache_image(cropped_mask, \"mask\")\n", - " \n", - " if ocr:\n", - " result = self.ocr_box(result, ocr_model, img_ctx.page_lang)\n", - " self._results[ocr_model][image_idx][box_idx][method] = result\n", - " return result\n", - "\n", - " def results(self, ocr_model: str | None = None, img_idx: ImgIdT | None = None):\n", - " if ocr_model is None: return self._results\n", - " if img_idx is None: return self._results[ocr_model]\n", - " return self._results[ocr_model][img_idx]\n", - " def model_results(self, ocr_model: str):\n", - " return cast(dict[ImgIdT, ResultSet], self.results(ocr_model))\n", - " def image_results(self, ocr_model: str, img_idx: ImgIdT):\n", - " return cast(ResultSet, self.results(ocr_model, img_idx))\n", - " def box_results(self, ocr_model: str, img_idx: ImgIdT, box_idx: BoxIdT):\n", - " return cast(ResultSet, self.results(ocr_model, img_idx))[box_idx]\n", - " def method_results(self, ocr_model: str, img_idx: ImgIdT, method: CropMethod):\n", - " image_results = self.image_results(ocr_model, img_idx)\n", - " return {i: box_results.get(method) for i,box_results in image_results.items()}\n", - "\n", - " def _reset_results(self):\n", - " results = defaultdict(lambda: defaultdict(lambda: ResultSetDefault(dict)))\n", - " self._results = cast(dict[str, dict[ImgIdT, ResultSet]], results)\n", - " def reset_results(self, \n", - " ocr_model: str | None = None, \n", - " image_idx: int | None = None, \n", - " box_idx: int | None = None, \n", - " method: CropMethod | None = None):\n", - " if ocr_model is None and image_idx is None and box_idx is None and method is None:\n", - " self._reset_results()\n", - " return\n", - " results = self._results\n", - " models = tuple(results.keys()) if ocr_model is None else [ocr_model] if ocr_model in results else []\n", - " for ocr_model in models:\n", - " img_nodes = results[ocr_model]\n", - " imgs = tuple(img_nodes.keys()) if image_idx is None else [image_idx] if image_idx in img_nodes else []\n", - " for img_idx in imgs:\n", - " box_nodes = img_nodes[img_idx]\n", - " boxes = tuple(box_nodes.keys()) if box_idx is None else [box_idx] if box_idx in box_nodes else []\n", - " for box_idx in boxes:\n", - " if method is None:\n", - " del box_nodes[box_idx]\n", - " else:\n", - " methods = box_nodes[box_idx]\n", - " if method in methods:\n", - " del methods[method]\n", - " if not box_nodes[box_idx]:\n", - " del box_nodes[box_idx]\n", - " if not img_nodes[img_idx]:\n", - " del img_nodes[img_idx]\n", - " if not results[ocr_model]:\n", - " del results[ocr_model]\n", - " def reset(self):\n", - " super().reset()\n", - " self.reset_results()\n", - " self._load_page_data.cache_clear()\n", - " self.mocr.cache_clear()\n", - "\n", - " def __init__(self, \n", - " config: cfg.Config | None, \n", - " image_paths: list[Path]\n", - " ):\n", - " super().__init__(list(map(lambda p: p.resolve(), image_paths)))\n", - " self.config = config or type(self).get_config()\n", - " self.image_paths = self._paths\n", - " self._reset_results()\n", - " self._images = self._subjects\n" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "\n", - "@FC.patch_to(ImageContext)\n", - "def setup(self, exp: OCRExperimentContext, image_idx: ImgSpecT, page_lang: str | None = None):\n", - " super(type(self), self).setup(exp, image_idx)\n", - " self._mask_dilated1 = self._mask_dilated05 = self._mask_dilated02 = None\n", - " # if ocr_model not in exp.engines:\n", - " # raise ValueError(f\"OCR model {ocr_model} not supported.\")\n", - " # self.ocr_model = ocr_model\n", - " # self.idx = exp.normalize_idx(image_idx)\n", - " self.json_data, self.page_data = exp._load_page_data(self.idx)\n", - " self.setup_page_lang(page_lang)\n", - " self.mask = Image.open(self.page_data.mask_path)\n", - " self.base_image = Image.open(self.page_data.image_path)\n", - " self.setup_ground_truth()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "tirar = OCRExperimentContext(None, [])\n", - "test_eq(rr := tirar._results, {})\n", - "test_eq(rr['Tesseract'][0][0], {})\n", - "test_eq(rr, {'Tesseract': {0: {0: {}}}})\n", - "test_eq(rr['Tesseract'][0][0].get(CM.INITIAL_BOX), None)\n", - "rr['Tesseract'][0][0][CM.INITIAL_BOX] = 'a' # type: ignore\n", - "test_eq(rr, {'Tesseract': {0: {0: {CM.INITIAL_BOX: 'a'}}}})\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ContextVisor\n" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class ContextVisor:\n", - " ctx: Any\n", - " # control_names: list[str]\n", - " values: dict[str, Any]\n", - "\n", - " _css = ''\n", - "\n", - " _ctxs: dict[str, ContextVisor]\n", - " _hdlrs: dict[str, ContextVisor]\n", - "\n", - " @property\n", - " def w(self) -> W.DOMWidget:\n", - " if getattr(self, '_w', None) is None:\n", - " self._w = self.setup_ui()\n", - " return self._w\n", - " @property\n", - " def out(self) -> W.Output:\n", - " if getattr(self, '_out', None) is None:\n", - " self._out = W.Output()\n", - " self._out.clear_output(wait=True)\n", - " return self._out # type: ignore\n", - " @property\n", - " def controls(self) -> dict[str, W.ValueWidget | W.fixed]:\n", - " if getattr(self, '_controls', None) is None:\n", - " self._controls = self.setup_controls()\n", - " return self._controls\n", - " @property\n", - " def all_controls(self) -> dict[str, W.ValueWidget | W.fixed]:\n", - " if getattr(self, '_all_controls', None) is None:\n", - " controls = {}\n", - " for visor in self._ctxs.values():\n", - " controls.update(visor.all_controls)\n", - " controls.update(self.controls)\n", - " self._all_controls = controls\n", - " return self._all_controls\n", - " \n", - " @property\n", - " def all_values(self):\n", - " return {**{k:v.values for k,v in (self._ctxs | {'self': self}).items()}, **self.values}\n", - " \n", - " @property\n", - " def comps(self): return self._ctxs\n", - " def comp(self, k: str) -> ContextVisor | None:\n", - " return self._ctxs.get(k)\n", - " def handler(self, k: str) -> ContextVisor | None:\n", - " return self._hdlrs.get(k)\n", - " \n", - " @property\n", - " def styler(self) -> W.Output | None:\n", - " if (stl := self.setup_style()) is None: \n", - " return None\n", - " if getattr(self, '_style', None) is None:\n", - " self._style = W.Output(layout={'height': '0px'})\n", - " with self._style:\n", - " display(stl)\n", - " return self._style\n", - " def setup_style(self):\n", - " return HTML(f\"\") if self._css else None\n", - " \n", - " def update_output(self, **kwargs): \n", - " cprint(kwargs)\n", - " \n", - " def setup_controls(self) -> dict[str, W.ValueWidget | W.fixed]:\n", - " return {k: W.Label(value=k) for k,v in self.values.items()}\n", - " \n", - " def hide(self):\n", - " self.w.layout.visibility = 'hidden'\n", - " def show(self):\n", - " self.w.layout.visibility = 'visible'\n", - "\n", - " def setup_ui(self):\n", - " comps = []\n", - " for visor in self._ctxs.values():\n", - " comps.append(visor.w)\n", - " return W.HBox([*comps, *self.controls.values()])\n", - "\n", - " def setup_display(self): \n", - " if getattr(self, '_w', None) is None:\n", - " self._w = self.setup_ui()\n", - " \n", - "\n", - " def _output(self, **kwargs):\n", - " collator = defaultdict(dict)\n", - " show_inline_matplotlib_plots()\n", - " with self.out:\n", - " clear_output(wait=True)\n", - " for k,v in kwargs.items():\n", - " if (comp := self.handler(k)) is not None:\n", - " collator[comp][k] = v\n", - " else:\n", - " assert 0\n", - " # self.update_output(**{k: v})\n", - " for comp, kw in collator.items():\n", - " comp.update_output(**kw)\n", - " show_inline_matplotlib_plots()\n", - " def interactive_output(self):\n", - " controls = self.all_controls\n", - " controls2names = {v:k for k,v in controls.items()}\n", - " def observer(change):\n", - " control_name = controls2names[change['owner']]\n", - " kwargs = {control_name: change['new']}\n", - " updated = self._update(**kwargs)\n", - " self._output(**updated)\n", - " for w in controls.values():\n", - " w.observe(observer, 'value')\n", - " def display(self, **kwargs): \n", - " if getattr(self, '_w', None) is None:\n", - " self.setup_display()\n", - " self.interactive_output()\n", - " self._update(**(self.values | kwargs))\n", - " all_values= {}\n", - " for comp in list(self.comps.values()) + [self]: all_values.update(comp.values)\n", - " self._hdlrs = {k:self._hdlrs.get(k, self) for k in all_values}\n", - " self._output(**all_values)\n", - " display(self.styler, self.w, self.out) if self.styler else display(self.w, self.out)\n", - " else:\n", - " self.update(**kwargs)\n", - " def _ipython_display_(self): self.display()\n", - "\n", - " def _update(self, update_value: bool=True, **kwargs):\n", - " updated = {}\n", - " for visor in self.comps.values():\n", - " updated.update(visor._update(update_value=update_value, **kwargs))\n", - " values = self.values\n", - " my_vals = _pops_(kwargs, self.values.keys())\n", - " for k,v in my_vals.items():\n", - " if v is not None and v != values[k]:\n", - " if update_value: values[k] = v\n", - " updated[k] = v\n", - " return updated\n", - " def update(self, **kwargs):\n", - " updated = self._update(update_value=False, **kwargs)\n", - " controls = self.all_controls\n", - " for k in updated:\n", - " controls[k].value = updated[k]\n", - " # self._output(**updated)\n", - " \n", - " def __init__(self, \n", - " ctx: Any, \n", - " values: dict[str, Any], \n", - " out: W.Output | None = None,\n", - " ctxs: dict[str, ContextVisor] | None = None,\n", - " hdlrs: dict[str, ContextVisor] | None = None,\n", - " ):\n", - " self._ctxs = ctxs or {}\n", - " self._hdlrs = hdlrs or {}\n", - " self.ctx = ctx\n", - " self._out = out\n", - " self.values = values\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "82c802235bf84f6eb36d87cd72607440", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Label(value='a'),))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4f0809d6276b4bea9f0f992b337817a8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cleanupwidgets('test_visor')\n", - "\n", - "test_visor = ContextVisor(None, {'a': 1})\n", - "test_visor\n" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "test_eq(test_visor.values, {'a': 1})\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CONTEXT\n", - "> `CONTEXT` is an `OCRExperimentContext` object that contains the configuration and the list of image paths.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can get the configuration with `OCRExperimentContext.get_config()`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Configuration:\n", - "\n", - "Locale: System default\n", - "Default Profile: Built-in\n", - "Saved Profiles:\n", - "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", - "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", - "\n", - "Profile Editor: cursor\n", - "Cache Directory: .\n", - "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", - "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", - "GUI Theme: System default\n", - "\n", - "--------------------\n", - "\n", - "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", - "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" - ] - }, - { - "data": { - "text/html": [ - "
      cache_dir: Path('cleaner')\n",
-       "     model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
-       "         device: 'mps'\n",
-       "
\n" - ], - "text/plain": [ - " cache_dir: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'cleaner'\u001b[0m\u001b[1m)\u001b[0m\n", - " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", - " device: \u001b[32m'mps'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "CONFIG = OCRExperimentContext.get_config()\n", - "\n", - "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", - "model_path = CONFIG.get_model_path(gpu)\n", - "device = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", - "\n", - "CONFIG.show()\n", - "cprint(\n", - " f\"{'cache_dir':>15}: {repr(cache_dir)}\\n\"\n", - " f\"{'model_path':>15}: {repr(model_path)}\\n\"\n", - " f\"{'device':>15}: {repr(device)}\")\n", - "\n", - "test_eq(CONFIG.cache_dir, Path(\".\"))\n", - "test_eq(CONFIG.current_profile.preprocessor.ocr_enabled, True)\n", - "test_eq(CONFIG.current_profile.preprocessor.ocr_max_size, 10**10)\n", - "test_eq(CONFIG.current_profile.preprocessor.suspicious_box_min_size, 10**10)\n", - "test_eq(CONFIG.current_profile.preprocessor.ocr_blacklist_pattern, \".*\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT = OCRExperimentContext(CONFIG, IMAGE_PATHS)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ImageSelector" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class ImageSelector(ContextVisor):\n", - " ctx: OCRExperimentContext\n", - "\n", - " @property\n", - " def image_ctx(self):\n", - " return ImageContext(self.ctx, self.values['image_idx'])\n", - " \n", - " def setup_controls(self):\n", - " paths = self.ctx.image_paths\n", - " w = W.Dropdown(\n", - " options={_.stem:i for i,_ in enumerate(paths)}, \n", - " value=self.values['image_idx'],\n", - " layout={'width': 'fit-content'},\n", - " style={'description_width': 'initial'})\n", - " return {'image_idx': w}\n", - "\n", - " def update(self, image_idx: ImgSpecT | None = None, **kwargs):\n", - " if image_idx is None: return\n", - " idx = self.ctx.normalize_idx(image_idx)\n", - " if idx is None: return\n", - " super().update(image_idx=idx, **kwargs)\n", - "\n", - "\n", - " def __init__(self, \n", - " ctx: OCRExperimentContext, /, \n", - " image_idx: ImgSpecT = 0, *, \n", - " out: W.Output | None=None):\n", - " idx = ctx.normalize_idx(image_idx)\n", - " assert idx is not None, f\"Image {image_idx} not found in experiment context\"\n", - " super().__init__(ctx, {'image_idx': idx}, out)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c033b2c76629478f8d7702e1f3f8666a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Dropdown(index=2, layout=Layout(width='fit-content'), options={'Action_Comics_1960-01-00_(262)'…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d78cb5cf72c34bcab9274cfd77f585a5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cleanupwidgets('image_selector')\n", - "\n", - "image_selector = ImageSelector(CONTEXT, 2)\n", - "image_selector\n" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "image_selector.update(13)\n", - "test_eq(image_selector.values['image_idx'], 13)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# OCRContextVisor" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class OCRContextVisor(ContextVisor):\n", - " ctx: OCRExperimentContext\n", - " \n", - " def update_output(self, /, image_idx: ImgIdT, **kwargs):\n", - " img_path = self.ctx.path_from_idx(image_idx)\n", - " display_image_grid([img_path], 1, 1)\n", - "\n", - " def update(self, image_idx: ImgSpecT | None = None, **kwargs):\n", - " if image_idx is None: return\n", - " idx = self.ctx.normalize_idx(image_idx)\n", - " if idx is None: return\n", - " super().update(image_idx=idx, **kwargs)\n", - " \n", - " def __init__(self, \n", - " ctx: OCRExperimentContext, /, \n", - " image_idx: ImgSpecT = 0, *, \n", - " out: W.Output | None=None):\n", - " super().__init__(ctx, {}, out, \n", - " ctxs={'image_idx': ImageSelector(ctx, image_idx, out=self.out)})\n" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3b40f82e57564fbcae7913d7a76fbc32", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(index=2, layout=Layout(width='fit-content'), options={'Action_Comics_19…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "174fbbc4b6ff42ac8013984946e787e4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cleanupwidgets('ctx_visor')\n", - "\n", - "# ContextVisor(CONTEXT)\n", - "# ContextVisor(CONTEXT).display(3)\n", - "ctx_visor = OCRContextVisor(CONTEXT, 2)\n", - "ctx_visor\n" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "ctx_visor.update('Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Base image\n", - "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [], - "source": [ - "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", - "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"0033\")\n", - "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"INOUE_KYOUMEN_002\")\n", - "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"Action_Comics_1960-01-00_(262)\")\n", - "\n", - "assert BASE_IMAGE_IDX is not None\n", - "img_path = Path(CONTEXT.image_paths[BASE_IMAGE_IDX])\n", - "assert img_path.exists()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Empty cache\n", - "> Clear the image cache used profusely throughout the examples below." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "You will be warned before the cache is emptied." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "# CONTEXT.empty_cache_warn()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "# CONTEXT.empty_cache_warn(BASE_IMAGE_IDX)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ImageContext of base image\n", - "> Creates the `ImageContext` for the base image.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If `PanelCleaner` page data is already cached, it is loaded from the cache.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT.reset()" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "page_lang = 'English'\n", - "# page_lang = 'Japanese'\n", - "# page_lang = 'Spanish'\n", - "# page_lang = 'French'\n", - "\n", - "IMAGE_CONTEXT = ImageContext(CONTEXT, BASE_IMAGE_IDX, page_lang=page_lang)\n", - "test_eq(IMAGE_CONTEXT.page_data is not None, True)\n", - "# cprint(IMAGE_CONTEXT.page_data.boxes)\n", - "RenderJSON(IMAGE_CONTEXT.json_data, 360, 2)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "test_is(IMAGE_CONTEXT, ImageContext(CONTEXT, BASE_IMAGE_IDX))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Visualize image" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Strange_Tales_172005.jpg - 1275x1888 px: 4.25x6.29\" @ 188.32 dpi
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "page_data = IMAGE_CONTEXT.page_data\n", - "display_image_grid([page_data.image_path, page_data.mask_path], 1, 2, caption=IMAGE_CONTEXT.image_name_rich)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "_, out_path = page_boxes(page_data)\n", - "display_image_grid([out_path], 1, 1)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ModelSelector\n" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class OCRModel(Enum):\n", - " TESSERACT = 0\n", - " IDEFICS = 1\n", - " @staticmethod\n", - " def __display_names__() -> dict[str, OCRModel]:\n", - " return dict(\n", - " zip(\"Tesseract, Idefics\".split(', '), \n", - " OCRModel))\n", - "\n", - "\n", - "class ModelSelector(ContextVisor):\n", - " ctx: OCRExperimentContext\n", - " \n", - " def setup_controls(self):\n", - " options = self.models\n", - " w = W.Dropdown(\n", - " options=options, \n", - " value=self.values['model'],\n", - " layout={'width': 'fit-content'},\n", - " style={'description_width': 'initial'})\n", - " return {'model': w}\n", - "\n", - " def setup_ui(self):\n", - " ctls = self.controls\n", - " model_grp = W.HBox([ctls['model']])\n", - " model_grp.add_class('model_grp')\n", - " comps = []\n", - " for visor in self.comps.values():\n", - " comps.append(visor.setup_ui())\n", - " ui = W.HBox([*comps, model_grp])\n", - " return ui\n", - "\n", - " def __init__(self, \n", - " exp_ctx: OCRExperimentContext,\n", - " ocr_model: OCRModel | None=OCRModel.TESSERACT,\n", - " ocr_models: dict[str, OCRModel] | None = None,\n", - " out: W.Output | None = None\n", - " ):\n", - " self.models: dict[str, OCRModel] = ocr_models or OCRModel.__display_names__()\n", - " super().__init__(exp_ctx, \n", - " {'model': ocr_model or OCRModel.TESSERACT}, \n", - " out=out or self.out)#, ctxs=[exp_visor])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "413f66f1d43a4d849e43a79ca9b16502", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), options={'Tesseract': 30}: {w} x {h} pixels\\n\"\n", - " f\"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\\n\"\n", - " f\"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in\"\n", - " f\" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\\n\"\n", - " f\"Required DPI Modern Age format: {required_dpi:.3f} dpi \"\n", - " f\"({format[0]:.3f} x {format[1]:.3f} in)\")\n", - "\n", - "\n", - " def display_content(self, image_ctx: ImageContext, display_option: DisplayOptions):\n", - " page_data = image_ctx.page_data\n", - " if display_option in (DisplayOptions.ALL, DisplayOptions.PAGE_DATA):\n", - " self.image_info(image_ctx)\n", - " RenderJSON(image_ctx.json_data, 350, 2).display()\n", - " if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH):\n", - " cprint(image_ctx.gts)\n", - " if display_option == DisplayOptions.IMAGE:\n", - " display_image_grid([page_data.image_path], 1, 1)\n", - " if display_option == DisplayOptions.MASK:\n", - " display_image_grid([page_data.mask_path], 1, 1)\n", - " if display_option in (DisplayOptions.ALL, DisplayOptions.IMAGE_MASK):\n", - " display_image_grid([page_data.image_path, page_data.mask_path], 1, 2)\n", - " if display_option in (DisplayOptions.ALL, DisplayOptions.BOXES):\n", - " _, out_path = page_boxes(page_data)\n", - " display_image_grid([out_path], 1, 1)\n", - "\n", - "\n", - " def setup_controls(self):\n", - " options = self.display_options or {**DisplayOptions.__display_names__()}\n", - " display_option_wdgt = W.Dropdown(\n", - " options=options, \n", - " value=self.values['display_option'],\n", - " layout={'width': '120px'},\n", - " style={'description_width': 'initial'})\n", - " return {'display_option': display_option_wdgt}\n", - "\n", - "\n", - " def setup_ui(self):\n", - " ctls = self.controls\n", - " display_option_grp = W.HBox([ctls['display_option']])\n", - " display_option_grp.add_class('display_option_grp')\n", - " comps = []\n", - " for visor in self.comps.values():\n", - " comps.append(visor.setup_ui())\n", - " ui = W.HBox([*comps, display_option_grp])\n", - " return ui\n", - "\n", - "\n", - " def __init__(self, \n", - " exp_ctx: OCRExperimentContext,\n", - " display_option: DisplayOptions | None=DisplayOptions.BOXES,\n", - " display_options: Mapping[str, DisplayOptions] | None = None,\n", - " out: W.Output | None = None\n", - " ):\n", - " self.display_options = display_options\n", - " super().__init__(exp_ctx, \n", - " {'display_option': display_option or DisplayOptions.BOXES}, \n", - " out=out or self.out)#, ctxs=[exp_visor])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "51b272e199e94fe69465b30306499d8a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(index=2, layout=Layout(width='120px'), options={'Boxes': Width x Height: 1275 x 1888 pixels\n", - " PIL Info DPI: None\n", - " Print Size 300 DPI: 4.250 x 6.293 in / 10.795 x 15.985 cm\n", - "Required DPI Modern Age format: 188.324 dpi (6.625 x 10.250 in)\n", - "\n" - ], - "text/plain": [ - " Width x Height: \u001b[1;36m1275\u001b[0m x \u001b[1;36m1888\u001b[0m pixels\n", - " PIL Info DPI: \u001b[3;35mNone\u001b[0m\n", - " Print Size \u001b[1;36m300\u001b[0m DPI: \u001b[1;36m4.250\u001b[0m x \u001b[1;36m6.293\u001b[0m in \u001b[35m/\u001b[0m \u001b[1;36m10.795\u001b[0m x \u001b[1;36m15.985\u001b[0m cm\n", - "Required DPI Modern Age format: \u001b[1;36m188.324\u001b[0m dpi \u001b[1m(\u001b[0m\u001b[1;36m6.625\u001b[0m x \u001b[1;36m10.250\u001b[0m in\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "img = IMAGE_CONTEXT.base_image\n", - "w, h = img.size\n", - "\n", - "print_size_in = size(w, h, 'in', 300)\n", - "print_size_cm = size(w, h, 'cm', 300)\n", - "required_dpi = dpi(w, h, 'Modern Age')\n", - "format = PRINT_FORMATS['Modern Age']\n", - "cprint( f\"{'Width x Height':>30}: {w} x {h} pixels\\n\"\n", - " f\"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\\n\"\n", - " f\"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in\"\n", - " f\" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\\n\"\n", - " f\"Required DPI Modern Age format: {required_dpi:.3f} dpi ({format[0]:.3f} x {format[1]:.3f} in)\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "((1275, 1888),\n", - " (4.25, 6.293333333333333),\n", - " (10.795, 15.985066666666667),\n", - " 188.32397606994937)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                Width x Height: 804 x 1241 pixels\n",
-       "                  PIL Info DPI: None\n",
-       "            Print Size 300 DPI: 2.680 x 4.137 in / 6.807 x 10.507 cm\n",
-       "Required DPI Modern Age format: 121.216 dpi (6.625 x 10.250 in)\n",
-       "
\n" - ], - "text/plain": [ - " Width x Height: \u001b[1;36m804\u001b[0m x \u001b[1;36m1241\u001b[0m pixels\n", - " PIL Info DPI: \u001b[3;35mNone\u001b[0m\n", - " Print Size \u001b[1;36m300\u001b[0m DPI: \u001b[1;36m2.680\u001b[0m x \u001b[1;36m4.137\u001b[0m in \u001b[35m/\u001b[0m \u001b[1;36m6.807\u001b[0m x \u001b[1;36m10.507\u001b[0m cm\n", - "Required DPI Modern Age format: \u001b[1;36m121.216\u001b[0m dpi \u001b[1m(\u001b[0m\u001b[1;36m6.625\u001b[0m x \u001b[1;36m10.250\u001b[0m in\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(IMAGE_CONTEXT.image_info)\n", - "img_visor.image_info()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Balloons and Captions Ground Truth\n", - "> The ground truth for the balloons and captions is read from a `.txt` file.\n", - "\n", - "The file is named `.gt.txt` and contains one entry per line, corresponding to each balloon or caption in the order found in PanelClenaer page data.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.',\n", - " 'The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home--',\n", - " 'And one in need of some help, it would appear.',\n", - " 'Bambu-- we have a guest.',\n", - " '--and tonight, he comes most urgently, slamming open the oaken front doors!',\n", - " 'Tell me, master-- how may Bambu serve?',\n", - " 'Some blankets to keep her warm, Bambu-- and perhaps some dry clothes',\n", - " \"The echo of the old man's footsteps fades down the hall as...\",\n", - " 'How curious the whims of fate. Had I not chanced to stroll along the river tonight--',\n", - " 'As quickly as I can, master',\n", - " '--the girl would most surely be dead by now.',\n", - " 'Ghede has been generous. the Death God has given the girl a second chance at--',\n", - " \"Easy, girl-- there's nothing to scream about anymore.\",\n", - " \"You're among friends now, you're safe!\",\n", - " 'Continued after next page']" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "IMAGE_CONTEXT.gts\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Experiment\n", - "> Use `ExperimentOCR` to perform OCR on the page balloons given a `CropMethod` and a model (i.e., `'Tesseract'` or `'Idefics'`)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "\n", - "def trimmed_mean(data, trim_percent):\n", - " sorted_data = np.sort(data)\n", - " n = len(data)\n", - " trim_count = int(trim_percent * n)\n", - " trimmed_data = sorted_data[trim_count:-trim_count]\n", - " return np.mean(trimmed_data)\n", - "\n", - "def mad_based_outlier(points, threshold=3.5):\n", - " median = np.median(points)\n", - " diff = np.abs(points - median)\n", - " mad = np.median(diff)\n", - " modified_z_score = 0.6745 * diff / mad\n", - " return points[modified_z_score < threshold]\n", - "\n", - "def iqr_outlier_removal(data):\n", - " q1 = np.percentile(data, 25)\n", - " q3 = np.percentile(data, 75)\n", - " iqr = q3 - q1\n", - " lower_bound = q1 - 1.5 * iqr\n", - " upper_bound = q3 + 1.5 * iqr\n", - " return data[(data >= lower_bound) & (data <= upper_bound)]\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "@dataclasses.dataclass\n", - "class Experiment:\n", - " ctx: ExperimentContext\n", - "\n", - "\n", - "@dataclasses.dataclass\n", - "class ExperimentOCR(Experiment):\n", - " ctx: ImageContext\n", - " ocr_model: str\n", - "\n", - " @property\n", - " def img_ctx(self): return self.ctx\n", - " @property\n", - " def ctxs(self):\n", - " img_ctx = self.img_ctx\n", - " return cast(OCRExperimentContext, img_ctx.exp), img_ctx\n", - "\n", - " @classmethod\n", - " def file_path_of(cls, page_data: st.PageData, ocr_model: str):\n", - " return f\"{Path(page_data.original_path).stem}_{ocr_model}.json\"\n", - " \n", - " def file_path(self):\n", - " img_ctx = self.img_ctx\n", - " return type(self).file_path_of(img_ctx.page_data, self.ocr_model)\n", - " \n", - " def to_dict(self):\n", - " \"JSON serializable dict of the experiment\"\n", - " img_ctx = self.img_ctx\n", - " img_idx = img_ctx.image_idx\n", - " results = results_to_dict(self.results())\n", - " return {\n", - " 'image_name': img_ctx.image_name,\n", - " 'ocr_model': self.ocr_model, \n", - " 'results': results,\n", - " }\n", - "\n", - " def to_json(self, out_dir: Path | None = None):\n", - " img_ctx = self.img_ctx\n", - " fp = (out_dir or img_ctx.cache_dir_image) / self.file_path()\n", - " data = self.to_dict()\n", - " with open(fp, 'w') as f:\n", - " json.dump(data, f, indent=2)\n", - " return fp, data\n", - "\n", - " @classmethod\n", - " def from_json(cls, experiment: OCRExperimentContext, json_path: Path) -> Self:\n", - " try:\n", - " with open(json_path, 'r') as f:\n", - " data = json.load(f)\n", - " except Exception as e:\n", - " logger.error(f\"Error loading {json_path}: {e}\")\n", - " raise e\n", - " ocr_model = data['ocr_model']\n", - " img_ctx = ImageContext(experiment, data['image_name'])\n", - " results: ResultSetDefault = dict_to_results(\n", - " img_ctx.image_idx, \n", - " data['results'], \n", - " result_factory=experiment._result_from)\n", - " experiment.update_results(ocr_model, img_ctx.image_idx, results)\n", - " return cls(img_ctx, ocr_model)\n", - "\n", - " @classmethod\n", - " def from_image(cls, \n", - " ctx: OCRExperimentContext, \n", - " ocr_model: str, \n", - " image_idx: ImgSpecT):\n", - " idx = cast(ImgIdT, ctx.normalize_idx(image_idx))\n", - " img_ctx = ImageContext(ctx, idx)\n", - " if img_ctx is None:\n", - " raise ValueError(f\"Image {image_idx} not found in experiment context\")\n", - " fp = img_ctx.cache_dir / cls.file_path_of(img_ctx.page_data, ocr_model)\n", - " if fp.exists(): \n", - " return cast(Self, cls.from_json(cast(OCRExperimentContext, img_ctx.exp), fp))\n", - " return cls(img_ctx, ocr_model)\n", - "\n", - " @classmethod\n", - " def from_method(cls, \n", - " ctx: OCRExperimentContext, \n", - " ocr_model: str, \n", - " image_idx: ImgIdT | str | Path, \n", - " method: CropMethod):\n", - " experiment = cls.from_image(ctx, ocr_model, image_idx)\n", - " if experiment is None:\n", - " return None\n", - " return experiment.method_experiment(method)\n", - "\n", - " @classmethod\n", - " def saved_experiment(cls, \n", - " ctx: OCRExperimentContext, ocr_model: str, image_idx: ImgIdT | str | Path):\n", - " idx = ctx.normalize_idx(image_idx)\n", - " if idx is None: \n", - " logger.warning(f\"Image {image_idx} not found in experiment context\")\n", - " return None\n", - " return cls.from_image(ctx, ocr_model, idx)\n", - "\n", - " @classmethod\n", - " def saved_experiments(cls, ctx: OCRExperimentContext, ocr_model: str) -> list[Self]:\n", - " return [exp for i in range(len(ctx.image_paths))\n", - " if (exp := cls.from_image(ctx, ocr_model, i)) is not None]\n", - " \n", - "\n", - " def result(self, box_idx: BoxIdT, method: CropMethod, ocr: bool=True, rebuild: bool=False):\n", - " ctx, img_ctx = self.ctxs\n", - " return ctx.result(self.ocr_model, img_ctx.image_idx, box_idx, method, ocr, rebuild)\n", - "\n", - " def results(self):\n", - " ctx, img_ctx = self.ctxs\n", - " return cast(ResultSet, ctx.results(self.ocr_model, img_ctx.image_idx))\n", - "\n", - " def has_run(self):\n", - " \"at least one method has run\"\n", - " img_ctx = self.img_ctx\n", - " return len(self.results()) == len(img_ctx.page_data.boxes)\n", - " \n", - " def best_results(self):\n", - " img_ctx = self.img_ctx\n", - " results = self.results()\n", - " if len(results) < len(img_ctx.page_data.boxes): # at least one method has run\n", - " return None\n", - " best = []\n", - " for box_idx in results:\n", - " methods = results[box_idx]\n", - " best_method = max(methods, key=lambda m: methods[m].acc) # type: ignore\n", - " best.append((best_method, methods[best_method]))\n", - " return best\n", - "\n", - " def save_results_as_ground_truth(self, overwrite=False):\n", - " img_ctx = self.img_ctx\n", - " gts_path = ground_truth_path(img_ctx.page_data)\n", - " if overwrite or not gts_path.exists():\n", - " best_results = self.best_results()\n", - " if best_results:\n", - " tt = [r.ocr for m,r in best_results]\n", - " gts_path.write_text('\\n'.join(tt), encoding=\"utf-8\")\n", - " img_ctx.setup_ground_truth()\n", - " logger.info(f\"Ground truth data saved successfully to {gts_path}\")\n", - " return True\n", - " else:\n", - " logger.info(\"No best results available to save.\")\n", - " return False\n", - " else:\n", - " return False\n", - "\n", - " @property\n", - " def experiments(self):\n", - " if not hasattr(self, '_experiments'):\n", - " self._experiments = {}\n", - " return self._experiments\n", - " def method_experiment(self, method: CropMethod) -> ExperimentOCRMethod:\n", - " if method not in self.experiments:\n", - " self.experiments[method] = ExperimentOCRMethod(self, method)\n", - " return self.experiments[method]\n", - " \n", - "\n", - " def to_dataframe(self):\n", - " \"Dataframe with crop methods as columns and box ids as rows\"\n", - " methods = list(CropMethod.__members__.values())\n", - " experiments = [self.method_experiment(m) for m in methods]\n", - " accuracies = [[result.acc for result in exp.results()] for exp in experiments]\n", - " # transpose accuracies\n", - " accuracies = list(zip(*accuracies))\n", - " return pd.DataFrame(accuracies, columns=CropMethod.__display_names__())\n", - "\n", - " def plot_accuracies(self, \n", - " methods: list[CropMethod] | None = None, \n", - " ):\n", - " \"Plots a horizontal bar chart of the accuracies for a list of method experiments.\"\n", - " methods = methods or list(CropMethod.__members__.values())\n", - " experiments = [self.method_experiment(m) for m in methods]\n", - " if not experiments: return\n", - "\n", - " ctx, img_ctx = self.ctxs\n", - " page_data = img_ctx.page_data\n", - " model = self.ocr_model\n", - " accuracies = [[result.acc for result in exp.results()] for exp in experiments]\n", - " accuracies = [np.mean(a) for a in accuracies]\n", - " # accuracies = [np.mean([result.acc for result in exp.results()]) for exp in experiments]\n", - "\n", - " _, ax = plt.subplots(figsize=(10, 5))\n", - " \n", - " # Normalize the accuracies for color mapping\n", - " norm = plt.Normalize(min(accuracies), max(accuracies))\n", - " # Color map from red to green\n", - " cmap = plt.get_cmap('RdYlGn')\n", - " colors = cmap(norm(accuracies))\n", - "\n", - " ax.barh([m.value for m in methods], accuracies, color=colors)\n", - "\n", - " ax.set_xscale('log') # Set the x-axis to a logarithmic scale\n", - " ax.set_xlabel('Average Accuracy (log scale)', fontsize=12, fontweight='bold')\n", - "\n", - " ax.set_ylabel('Method', fontsize=12, fontweight='bold')\n", - " ax.set_yticks(range(len(methods)))\n", - " ax.set_yticklabels([f'{method.value} ({acc:.2f})' \n", - " for method, acc in zip(methods, accuracies)], fontsize=12)\n", - " max_acc_index = np.argmax(accuracies)\n", - " ax.get_yticklabels()[max_acc_index].set(color='blue', fontweight='bold')\n", - "\n", - " title_text = (f\"{page_data.original_path} - OCR model: {model}\")\n", - " ax.set_title(title_text, fontsize=12, fontweight='bold')\n", - "\n", - " plt.tight_layout()\n", - " plt.show()\n", - "\n", - "\n", - " def summary_box(self, box_idx: int):\n", - " results: list[tuple[CropMethod, ResultOCR]] = []\n", - " pb = tqdm(CropMethod.__members__.values(), leave=False, desc=f\"Box #{box_idx+1}\")\n", - " for m in pb:\n", - " r = cast(ResultOCR, self.result(box_idx, m))\n", - " results.append((m, r))\n", - " methods, images, ocrs, accs = zip(\n", - " *map(\n", - " lambda t: (t[0].value, t[1].cache_image(), t[1].diff_tagged(), acc_as_html(t[1].acc)), \n", - " results))\n", - " display_columns([methods, images, accs, ocrs], \n", - " headers=[\"Method\", f\"Box #{box_idx+1}\", \"Accuracy\", \"OCR\"])\n", - "\n", - "\n", - " def summary_method(self, method: CropMethod):\n", - " results = self.method_experiment(method).results()\n", - " methods, images, ocrs, accs = zip(\n", - " *map(\n", - " lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), \n", - " results))\n", - " display_columns([methods, images, accs, ocrs], \n", - " headers=[\"Box #\", \"Box\", \"Accuracy\", f\"{method.value} OCR\"])\n", - "\n", - "\n", - " def display(self):\n", - " out = []\n", - " for method in CropMethod:\n", - " out.append(f\"---------- {method.value} ----------\")\n", - " results = self.method_experiment(method).results()\n", - " out.extend(results)\n", - " out.append('\\n')\n", - " cprint(*out, soft_wrap=True)\n", - "\n", - "\n", - " def reset(self, box_idx: int | None = None, method: CropMethod | None = None):\n", - " ctx, img_ctx = self.ctxs\n", - " ctx.reset_results(None, img_ctx.image_idx, box_idx, method)\n", - "\n", - " def perform_methods(self, \n", - " methods: CropMethod | list[CropMethod] | None = None, \n", - " box_idxs: BoxIdT | list[BoxIdT] | None = None,\n", - " rebuild: bool = False,\n", - " plot_acc: bool = False\n", - " ):\n", - " if methods is None:\n", - " methods = [*CropMethod.__members__.values()]\n", - " elif isinstance(methods, CropMethod):\n", - " methods = [methods]\n", - " if rebuild:\n", - " _methods = tqdm(methods, desc=\"Methods\")\n", - " else:\n", - " _methods = methods\n", - " for method in _methods:\n", - " method_exp = self.method_experiment(method)\n", - " if method_exp: \n", - " if rebuild:\n", - " method_exp(box_idxs, rebuild=rebuild)\n", - " if plot_acc:\n", - " self.plot_accuracies()\n", - "\n", - " def __call__(self, \n", - " box_idxs: BoxIdT | list[BoxIdT] | None = None,\n", - " methods: CropMethod | list[CropMethod] | None = None, \n", - " save: bool = True,\n", - " display=False, \n", - " rebuild: bool=False, \n", - " save_as_ground_truth=False):\n", - " self.perform_methods(methods, box_idxs, rebuild=rebuild)\n", - " if save_as_ground_truth:\n", - " self.save_results_as_ground_truth(overwrite=True)\n", - " if save:\n", - " self.to_json()\n", - " if display:\n", - " self.display()\n", - " \n", - "\n", - "@dataclasses.dataclass\n", - "class ExperimentOCRMethod:\n", - " ctx: ExperimentOCR\n", - " method: CropMethod\n", - "\n", - " @property\n", - " def exp_ctx(self): return self.ctx\n", - " @property\n", - " def img_ctx(self): return self.ctx.ctx\n", - " @property\n", - " def ctxs(self):\n", - " img_ctx = self.img_ctx\n", - " return cast(OCRExperimentContext, img_ctx.exp), img_ctx, self.ctx\n", - " \n", - " def result(self, box_idx: BoxIdT, ocr: bool=True, rebuild: bool=False) -> ResultOCR | None:\n", - " ctx, img_ctx, exp_ctx = self.ctxs\n", - " return ctx.result(exp_ctx.ocr_model, img_ctx.image_idx, box_idx, self.method, ocr, rebuild)\n", - "\n", - " def results(self, \n", - " box_idxs: BoxIdT | list[BoxIdT] | None = None, \n", - " ocr: bool=True, rebuild: bool=False) -> list[ResultOCR]:\n", - " ctx, img_ctx, exp_ctx = self.ctxs\n", - " if box_idxs is None:\n", - " box_idxs = list(range(len(img_ctx.boxes)))\n", - " elif isinstance(box_idxs, int):\n", - " box_idxs = [box_idxs]\n", - " model = exp_ctx.ocr_model\n", - " results = ctx.method_results(model, img_ctx.image_idx, self.method)\n", - " results = {i:results[i] if i in results else None for i in box_idxs}\n", - " pb = rebuild or not results or any(r is None for r in results.values())\n", - " if pb and len(results) > 2:\n", - " progress_bar = tqdm(list(results.keys()), desc=f\"{self.method.value} - {model}\")\n", - " else:\n", - " progress_bar = list(results.keys())\n", - " results = []\n", - " for i in progress_bar:\n", - " results.append(self.result(i, ocr, rebuild=rebuild))\n", - " return results\n", - "\n", - "\n", - " def get_results_html(self, \n", - " box_idxs: BoxIdT | list[BoxIdT] | None = None,\n", - " max_image_width: int | None = None): \n", - " _, img_ctx, exp_ctx = self.ctxs\n", - " results: list[ResultOCR] = self.results(box_idxs)\n", - " accs = np.array([r.acc for r in results])\n", - " mean_accuracy = np.mean(accs)\n", - " mean_trimmed = trimmed_mean(accs, 0.1)\n", - " # filtered_data = mad_based_outlier(accs)\n", - " # mean_mad = np.mean(filtered_data)\n", - " # filtered_data = iqr_outlier_removal(accs)\n", - " # mean_iqr = np.mean(filtered_data)\n", - " \n", - " descriptions, images, ocrs, accs = zip(*map(\n", - " lambda r: (\n", - " r.block_idx+1, \n", - " r.cache_image(), \n", - " r.diff_tagged(), \n", - " acc_as_html(r.acc)\n", - " ), results))\n", - " non_breakin_space = u'\\u00A0'\n", - " tmpl = \"{}\"\n", - " padded_s = lambda s,n: tmpl.format(s.rjust(n))\n", - " acc_fmt = f\"{mean_accuracy:.2f}/{mean_trimmed:.2f}\"\n", - " w, h = img_ctx.base_image.size\n", - " dim, _dpi = size(w, h), dpi(w, h)\n", - " dim_fmt = f\"{w}x{h} px: {dim[0]:.2f} x {dim[1]:.2f} in @ {_dpi:.2f} dpi\"\n", - " return '\\n
\\n'.join([\n", - " (\"
\"\n", - " f\"{padded_s('Page', 24)}: {img_ctx.page_data.original_path}
\"\n", - " f\"{padded_s('Size', 24)}: {dim_fmt}
\"\n", - " f\"{padded_s('Model', 24)}: {exp_ctx.ocr_model}
\"\n", - " f\"{padded_s('Crop Method', 24)}: {self.method.value}
\"\n", - " f\"{padded_s('Accuracy Mean/Trimmed', 24)}: {acc_fmt}\"\n", - " \"
\"), \n", - " get_columns_html(\n", - " [descriptions, images, accs, ocrs], \n", - " max_image_width, \n", - " headers=[\"Box #\", \"Image\", \"Accuracy\", \"OCR\"]),\n", - " ])\n", - "\n", - " def display(self, \n", - " box_idxs: BoxIdT | list[BoxIdT] | None = None, max_image_width: int | None = None):\n", - " display(HTML(self.get_results_html(box_idxs, max_image_width)))\n", - "\n", - "\n", - " def summary(self):\n", - " results = self.results()\n", - " methods, images, ocrs, accs = zip(\n", - " *map(\n", - " lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), \n", - " results))\n", - " display_columns([methods, images, accs, ocrs], \n", - " headers=[\"Box #\", \"Box\", \"Accuracy\", f\"{self.method.value} OCR\"])\n", - "\n", - "\n", - " def reset(self):\n", - " _, _, exp_ctx = self.ctxs\n", - " exp_ctx.reset(method=self.method)\n", - " \n", - " def __call__(self, box_idxs: BoxIdT | list[BoxIdT] | None = None, display=False, rebuild=False):\n", - " if isinstance(box_idxs, int):\n", - " result = self.result(cast(BoxIdT, box_idxs), rebuild=rebuild)\n", - " if result is not None and display:\n", - " result.display()\n", - " else:\n", - " results = self.results(box_idxs, rebuild=rebuild)\n", - " if results and display:\n", - " self.display(box_idxs)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Box id\n", - "> change `BOX_IDX` to use any box to test crop methods" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "BOX_IDX = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Crop methods testing" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT.reset()\n", - "test_eq(CONTEXT.results(), {})\n", - "\n", - "image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Tesseract')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Single box results\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### comics_text_detector initial boxes + padding" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Initial box" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-05-10 20:25:26.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpcleaner.ocr.ocr_mangaocr\u001b[0m:\u001b[36m__new__\u001b[0m:\u001b[36m15\u001b[0m - \u001b[1mCreating the MangaOcr instance\u001b[0m\n" - ] - }, - { - "data": { - "text/html": [ - "
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
0.90
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.⎕⎕

Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method = CropMethod.INITIAL_BOX\n", - "\n", - "result = image_experiment.result(BOX_IDX, method, ocr=False)\n", - "assert result is not None\n", - "\n", - "image = result.image\n", - "assert image is not None\n", - "text = CONTEXT.mocr('Tesseract', page_lang)(image)\n", - "result.ocr = postprocess_ocr(text)\n", - "result\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### PanelCleaner default pad" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as
0.85
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orl⎕⎕⎕eans, kept tidy by a white-haired old man known only as Bambu.

Eneowered by great gnarled cypress jrfes, the ancient manor !⎕⎕⎕⎕⎕ alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired ao⎕⎕ han known only as⎕⎕⎕⎕⎕⎕⎕
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method = CropMethod.DEFAULT\n", - "\n", - "result = image_experiment.result(BOX_IDX, method, ocr=False)\n", - "assert result is not None\n", - "\n", - "CONTEXT.ocr_box(result, 'Tesseract', page_lang)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### PanelCleaner default pad, grey pad" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as
0.95
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as⎕⎕⎕⎕⎕⎕⎕
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method = CropMethod.DEFAULT_GREY_PAD\n", - "result = image_experiment.result(BOX_IDX, method)\n", - "result\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded, 4px" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as
0.88
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orl⎕⎕⎕eans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired aolo man known only as⎕⎕⎕⎕⎕⎕⎕
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PADDED_4)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded, 8px" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as
0.88
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired aolo man known omy as⎕⎕⎕⎕⎕⎕⎕
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PADDED_8)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Extracted text" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Extracted text, initial box\n", - "\n", - "Unfortunately, the `comic-text_detector` does not remove letter holes from the text mask, despite using OpenCV. This oversight likely impacts the accuracy of the OCR results." - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.92
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu⎕⎕.

Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a white-haire old man known only as bambi] .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method = CropMethod.EXTRACTED_INIT_BOX\n", - "# results[method] = IMAGE_CONTEXT.result(BOX_IDX, method)\n", - "# image = results[method].image\n", - "# assert image is not None\n", - "# results[method].ocr = postprocess_ocr(IMAGE_CONTEXT.mocr(image))\n", - "# display_extracted_result(None, None, results[method], IMAGE_CONTEXT.gts[BOX_IDX])\n", - "image_experiment.result(BOX_IDX, method)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded 4, extracted" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.91
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonsred by great shale cypress trees, the anci⎕⎕⎕ manor stands alone on the [tskirts of new orleans, kept tidy by a white-haired old man known only as b8ambl .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PADDED_4_EXTRACTED)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded 8, extracted" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.94
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu⎕⎕.

Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as 8ambli .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PADDED_8_EXTRACTED)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded 8, dilation 1, extracted" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.61
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

O⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕utskirts of new orleans, kept tipy by a white-haired old man known only as sams .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PADDED_8_DILATION_1)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded 8, dilation 0.5, extracted" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.94
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PAD_8_FRACT_0_5)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### padded 8, dilation 0.2, extracted" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.94
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PAD_8_FRACT_0_2)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Summary\n", - "> Use `ImageContext.summary_box` to display the results of the crop methods for OCR of a given box index.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2b5b75333766431bb85d2ae72ee47b50", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Box #1: 0%| | 0/11 [00:00MethodBox #1AccuracyOCRInitial box
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
Default
0.85
Eneowered by great gnarled cypress jrfes, the ancient manor !⎕⎕⎕⎕⎕ alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired ao⎕⎕ han known only as⎕⎕⎕⎕⎕⎕⎕
Default, grey pad
0.95
Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as⎕⎕⎕⎕⎕⎕⎕
Padded 4px
0.88
Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired aolo man known only as⎕⎕⎕⎕⎕⎕⎕
Padded 8px
0.88
Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired aolo man known omy as⎕⎕⎕⎕⎕⎕⎕
Extracted, init box
0.92
Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a white-haire old man known only as bambi] .
Padded 4, extracted
0.91
Enbonsred by great shale cypress trees, the anci⎕⎕⎕ manor stands alone on the [tskirts of new orleans, kept tidy by a white-haired old man known only as b8ambl .
Padded 8, extracted
0.94
Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as 8ambli .
Padded 8, dilation 1
0.61
O⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕utskirts of new orleans, kept tipy by a white-haired old man known only as sams .
Pad 8, fract. 0.5
0.94
Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
Pad 8, fract. 0.2
0.94
Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# methods, images, ocrs, accs = zip(\n", - "# *map(lambda t: (t[0].value, t[1].cache_image(), t[1].diff_tagged(), acc_as_html(t[1].acc)), \n", - "# IMAGE_CONTEXT.results[BOX_IDX].items()))\n", - "# display_columns([methods, images, accs, ocrs], headers=[\"Method\", \"Box\", \"Accuracy\", \"OCR\"])\n", - "\n", - "image_experiment.summary_box(BOX_IDX)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Show result for any box # and any method" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Tonight, he comes host slamming open
0.61
\n", - "
\n", - "
--and tonight, he comes most urgently, slamming open the oaken front doors!

T⎕⎕⎕⎕⎕⎕onight, he comes host⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(4, CropMethod.PADDED_8)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.67
\n", - "
\n", - "
Bambu-- we have a guest.

⎕⎕⎕⎕⎕⎕ we have a i=s7t.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(3, CropMethod.EXTRACTED_INIT_BOX)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ResultVisor" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class ResultVisor(ContextVisor):\n", - " ctx: ExperimentOCR\n", - " control_names: list[str] = ['all_boxes', 'box_idx', 'all_methods', 'method']\n", - "\n", - " _css = \"\"\"\n", - " .box_grp {\n", - " background-color: aliceblue;\n", - " }\n", - " .method_grp {\n", - " background-color: #ededed;\n", - " }\n", - " \"\"\"\n", - " \n", - " def best_results(self): \n", - " ll = self.ctx.best_results()\n", - " if ll:\n", - " cprint([(m.value, f\"{r.acc:.3f}\", r.ocr) for m,r in ll])\n", - "\n", - " def pd_to_html(self):\n", - " df = self.ctx.to_dataframe()\n", - " # set float precision\n", - " df = df.round(3)\n", - " # display floats with 3 decimal digits\n", - " df = df.applymap(lambda x: f\"{x:.3f}\")\n", - " # highlight max value in each row\n", - " stl = df.style.highlight_max(axis=0)\n", - " display(HTML(stl.to_html()))\n", - "\n", - " def update_output(self, **kwargs):\n", - " all_boxes: bool = self.values['all_boxes']\n", - " box_idx: int = self.values['box_idx']\n", - " all_methods: bool = self.values['all_methods']\n", - " method: CropMethod = self.values['method']\n", - "\n", - " # cprint(f\"all_boxes: {all_boxes}, box_idx: {box_idx}, all_methods: {all_methods}, method: {method}\")\n", - "\n", - " if all_boxes and all_methods:\n", - " self.ctx.plot_accuracies()\n", - " elif all_boxes:\n", - " self.ctx.summary_method(method)\n", - " elif all_methods:\n", - " self.ctx.summary_box(box_idx)\n", - " else:\n", - " result = self.ctx.result(box_idx, method)\n", - " if result is not None:\n", - " result.display()\n", - "\n", - " def setup_controls(self):\n", - " _, img_ctx = self.ctx.ctxs\n", - " values = self.values\n", - " box_wdgt = W.BoundedIntText(\n", - " value=values['box_idx'], min=0, max=len(img_ctx.boxes)-1, step=1,\n", - " disabled=values['all_boxes'],\n", - " layout={'width': '50px'},\n", - " style={'description_width': 'initial'})\n", - " methods_wdgt = W.Dropdown(\n", - " options=CropMethod.__display_names__(), \n", - " value=values['method'],\n", - " layout={'width': '150px'},\n", - " style={'description_width': 'initial'})\n", - " all_boxes_wdgt = W.Checkbox(label='All', value=values['all_boxes'], \n", - " description=\"all\", \n", - " layout={'width': 'initial'},\n", - " style={'description_width': '0px'})\n", - " all_methods_wdgt = W.Checkbox(label='All', value=values['all_methods'], \n", - " description=\"all\", \n", - " layout={'width': 'initial'},\n", - " style={'description_width': '0px'})\n", - " return {'all_boxes': all_boxes_wdgt, 'box_idx': box_wdgt, \n", - " 'all_methods': all_methods_wdgt, 'method': methods_wdgt}\n", - " \n", - " def setup_ui(self):\n", - " ctls = self.controls\n", - " _, img_ctx = self.ctx.ctxs\n", - " box_label = W.Label(\n", - " value=f\"Box # (of {len(img_ctx.boxes)}):\", \n", - " layout={'width': 'initial', 'padding': '0px 0px 0px 10px'})\n", - " method_label = W.Label(value='Method:', layout={'width': 'initial', 'padding': '0px 0px 0px 10px'})\n", - "\n", - " box_grp = W.HBox([box_label, ctls['all_boxes'], ctls['box_idx']])\n", - " box_grp.add_class('box_grp')\n", - " method_grp = W.HBox([method_label, ctls['all_methods'], ctls['method']])\n", - " method_grp.add_class('method_grp')\n", - " \n", - " return W.HBox([box_grp, method_grp])\n", - "\n", - " def __init__(self, \n", - " ctx: OCRExperimentContext | ExperimentOCR,\n", - " img_idx: int | str | Path | None = None,\n", - " all_boxes: bool = False,\n", - " box_idx: int = 0,\n", - " all_methods: bool = False,\n", - " method: CropMethod=CropMethod.INITIAL_BOX,\n", - " out: W.Output | None = None,\n", - " ):\n", - " if isinstance(ctx, OCRExperimentContext):\n", - " assert img_idx is not None, \"img_idx must be provided if ctx is an ExperimentContext\"\n", - " exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx)\n", - " if not exp:\n", - " raise ValueError(f\"Image {img_idx} not found in experiment context\")\n", - " ctx = exp\n", - " else:\n", - " if not isinstance(ctx, ExperimentOCR):\n", - " raise ValueError(\"ctx must be an ExperimentOCR or OCRExperimentContext\")\n", - " \n", - " super().__init__(ctx, {'all_boxes': all_boxes, 'box_idx': box_idx, \n", - " 'all_methods': all_methods, 'method': method}, out=out or self.out)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "400781b58e3e43cb868c0c278fd3ecd2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output(layout=Layout(height='0px'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2f2917080eb943acb8b665e1fa607c13", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Label(value='Box # (of 15):', layout=Layout(padding='0px 0px 0px 10px', width='i…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "81027eadfa3d40b6a81efcd991f2379a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cleanupwidgets('result_visor')\n", - "\n", - "result_visor = ResultVisor(image_experiment)\n", - "result_visor\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ExperimentVisor\n" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class ExperimentVisor(ContextVisor):\n", - " ctx: ExperimentOCR\n", - "\n", - " def update_output(self, \n", - " image_idx: int | None = None,\n", - " **kwargs):\n", - " exp_ctx, img_ctx = self.ctx.ctxs\n", - " if image_idx is not None and image_idx != img_ctx.image_idx:\n", - " ctx = ImageContext(exp_ctx, image_idx)\n", - " assert ctx is not None\n", - " self.ctx.ctx = ctx\n", - " result_visor = self.comp('result_visor')\n", - " if result_visor is not None:\n", - " result_visor.update_output(**kwargs)\n", - "\n", - " def __init__(self, \n", - " ctx: OCRExperimentContext | ExperimentOCR,\n", - " img_idx: int | str | Path | None = None,\n", - " all_boxes: bool = False,\n", - " box_idx: int = 0,\n", - " all_methods: bool = False,\n", - " method: CropMethod=CropMethod.INITIAL_BOX,\n", - " out: W.Output | None = None,\n", - " ):\n", - " if isinstance(ctx, OCRExperimentContext):\n", - " assert img_idx is not None, \"img_idx must be provided if ctx is an ExperimentContext\"\n", - " exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx)\n", - " if not exp:\n", - " raise ValueError(f\"Image {img_idx} not found in experiment context\")\n", - " ctx = exp\n", - " else:\n", - " if not issubclass(type(ctx), ExperimentOCR):\n", - " raise ValueError(\"ctx must be an ExperimentOCR or OCRExperimentContext\")\n", - " \n", - " exp_ctx, img_ctx = ctx.ctxs\n", - " out = out or self.out\n", - " image_selector = ImageSelector(exp_ctx, image_idx=img_ctx.image_idx, out=out)\n", - " result_visor = ResultVisor(ctx, out=out,\n", - " all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method)\n", - "\n", - " super().__init__(ctx, {}, out=out, \n", - " ctxs={'image_selector': image_selector, 'result_visor': result_visor},\n", - " hdlrs={'display_option': result_visor}\n", - " )\n" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1e40a9f23a674b48abd50720e6523c2c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5af1d96d5d9942fd8e895787ff50bbe5", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "cleanupwidgets('exp_visor')\n", - "\n", - "exp_visor = ExperimentVisor(image_experiment)\n", - "exp_visor\n" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [], - "source": [ - "exp_visor.update(box_idx=1)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [], - "source": [ - "exp_visor.update(image_idx=0)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Method experiment\n", - "> perform method on one or more boxes" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT.reset()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [], - "source": [ - "image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Tesseract')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Visualize summary of crop methods on a given box\n" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "151bc83d198e447d8932fe48aa7ef2e6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Box #2: 0%| | 0/11 [00:00MethodBox #2AccuracyOCRInitial box
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
Default
0.96
The house and the old man are alike in many ways; tall, proud, patient, contentel always to wait until their. master cones home ~~
Default, grey pad
0.96
The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their.⎕* master cones home⎕~-
Padded 4px
0.93
The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }
Padded 8px
0.99
The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home⎕⎕
Extracted, init box
0.93
Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~
Padded 4, extracted
0.98
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home--
Padded 8, extracted
0.98
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home--
Padded 8, dilation 1
0.88
The house and the old man are alike in many ways, tall, proud, ⎕⎕⎕⎕⎕nt, contented live⎕⎕⎕⎕ walt gtie their, master comes home-=
Pad 8, fract. 0.5
0.97
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~
Pad 8, fract. 0.2
0.97
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.summary_box(1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Results for any crop method" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Box #BoxAccuracyPadded 4px OCR
1
0.88
Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit⎕⎕⎕⎕⎕⎕ of mew rce: eans, kept tipy by a white-haired aolo man known only as⎕⎕⎕⎕⎕⎕⎕
2
0.93
The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }
3
0.69
F and one in ee⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.77
" bambli- we have a gliest.
5
0.55
P⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ comes⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open the caken⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
6
0.57
Tel oe ⎕⎕⎕⎕er-- 5 ow a =⎕⎕⎕⎕ 7⎕⎕⎕⎕⎕
7
0.38
W⎕⎕e⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps c oe /⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
8
0.75
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall sra⎕⎕⎕⎕
9
0.92
How curious the a⎕whims of fate. - had i not chanced to stroll along the river yl⎕tonight~-
10
0.79
A⎕⎕⎕ulckly as t can, masrer.
11
0.92
<the girl wolld⎕- most surely be dead by now.
12
0.88
Ghede has been generous. the oeath gop has given -⎕the girl. a second chance ye,⎕alem
13
0.67
Soe⎕⎕ ⎕⎕⎕⎕⎕⎕⎕⎕⎕ereke othing to scream ay⎕⎕⎕ anymore.
14
0.94
"you're among friends now. you're safe!
15
1.00
Continued after next page
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.summary_method(CropMethod.PADDED_4)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use method experiment directly" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
0.90
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.⎕⎕

Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method_experiment = cast(ExperimentOCRMethod, \n", - " ExperimentOCR.from_method(CONTEXT, 'Tesseract', IMAGE_CONTEXT.image_idx, CropMethod.INITIAL_BOX))\n", - "method_experiment(BOX_IDX, display=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "for all boxes" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Initial box
Accuracy Mean/Trimmed: 0.79/0.80
\n", - "
\n", - "
Box #ImageAccuracyOCR
1
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
2
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
3
0.70
“and one in ee⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.62
Re bambli-~ we have a⎕⎕⎕⎕⎕⎕⎕
5
0.70
T⎕⎕⎕⎕⎕⎕onight, he comes noost⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open the caken⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
6
0.82
Tell me naster. how may bambli serve 7
7
0.56
£7⎕⎕ »⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps some dry clothes...⎕7⎕/
8
0.81
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man'⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall as...⎕7
9
0.85
How curious the 4⎕fate.⎕whims of h⎕⎕⎕⎕⎕⎕ad t not chanced to stroll along the river yl⎕tonight ==
10
0.80
Fas oulckly as t ca, master.
11
0.91
<the girl would -⎕most slirely be dead by now.
12
0.47
A⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕th⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ the girl. a second chance ge⎕a ee yg adil
13
0.84
Ah⎕⎕⎕ girl--there's othing to scream nt⎕⎕t anymore.
14
0.93
You're among friends now. you're sale
15
1.00
Continued after next page
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method_experiment(display=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "or selected boxes" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT.reset()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Initial box
Accuracy Mean/Trimmed: 0.80/nan
\n", - "
\n", - "
Box #ImageAccuracyOCR
1
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
2
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
7
0.56
£7⎕⎕ »⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps some dry clothes...⎕7⎕/
10
0.80
Fas oulckly as t ca, master.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method_experiment = cast(ExperimentOCRMethod, \n", - " ExperimentOCR.from_method(CONTEXT, 'Tesseract', IMAGE_CONTEXT.image_idx, CropMethod.INITIAL_BOX))\n", - "method_experiment([0, 1, 6, 9], display=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Full page results\n", - "> all methods on all boxes" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT.reset()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [], - "source": [ - "image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Tesseract')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Initial box
Accuracy Mean/Trimmed: 0.79/0.80
\n", - "
\n", - "
Box #ImageAccuracyOCR
1
0.90
Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3
2
0.93
The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~
3
0.70
“and one in ee⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.62
Re bambli-~ we have a⎕⎕⎕⎕⎕⎕⎕
5
0.70
T⎕⎕⎕⎕⎕⎕onight, he comes noost⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ slamming open the caken⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕
6
0.82
Tell me naster. how may bambli serve 7
7
0.56
£7⎕⎕ »⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ and perhaps some dry clothes...⎕7⎕/
8
0.81
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man'⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall as...⎕7
9
0.85
How curious the 4⎕fate.⎕whims of h⎕⎕⎕⎕⎕⎕ad t not chanced to stroll along the river yl⎕tonight ==
10
0.80
Fas oulckly as t ca, master.
11
0.91
<the girl would -⎕most slirely be dead by now.
12
0.47
A⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕th⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕ the girl. a second chance ge⎕a ee yg adil
13
0.84
Ah⎕⎕⎕ girl--there's othing to scream nt⎕⎕t anymore.
14
0.93
You're among friends now. you're sale
15
1.00
Continued after next page
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method = CropMethod.INITIAL_BOX\n", - "# method = CropMethod.DEFAULT\n", - "# method = CropMethod.PADDED_4\n", - "# method = CropMethod.PADDED_8\n", - "# method = CropMethod.EXTRACTED_INIT_BOX\n", - "# method = CropMethod.PAD_8_FRACT_0_5\n", - "# method = CropMethod.PAD_8_FRACT_0_2\n", - "\n", - "# image_experiment.method_experiment(CropMethod.INITIAL_BOX).results()\n", - "initial_box_exp = image_experiment.method_experiment(CropMethod.INITIAL_BOX)\n", - "initial_box_exp(display=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Other method" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as,\n", - " ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~,\n", - " ResultOCR#block 02: 0.74||And one in ee would appear.,\n", - " ResultOCR#block 03: 0.41||Rir guest.,\n", - " ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo,\n", - " ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _,\n", - " ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps,\n", - " ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00,\n", - " ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >,\n", - " ResultOCR#block 09: 0.50||Aulckly “master as t can,,\n", - " ResultOCR#block 10: 0.94||" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.perform_methods(plot_acc=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
---------- Initial box ----------\n",
-       "ResultOCR#block 00: 0.90||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3\n",
-       "ResultOCR#block 01: 0.93||The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~\n",
-       "ResultOCR#block 02: 0.70||“and one in ee would appear.\n",
-       "ResultOCR#block 03: 0.62||Re bambli-~ we have a\n",
-       "ResultOCR#block 04: 0.70||Tonight, he comes noost slamming open the caken\n",
-       "ResultOCR#block 05: 0.82||Tell me naster. how may bambli serve 7\n",
-       "ResultOCR#block 06: 0.56||£7 » and perhaps some dry clothes... 7 /\n",
-       "ResultOCR#block 07: 0.81||The the old man's fades down the hall as... 7\n",
-       "ResultOCR#block 08: 0.85||How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==\n",
-       "ResultOCR#block 09: 0.80||Fas oulckly as t ca, master.\n",
-       "ResultOCR#block 10: 0.91||<the girl would - most slirely be dead by now.\n",
-       "ResultOCR#block 11: 0.47||Ath the girl. a second chance ge a ee yg adil\n",
-       "ResultOCR#block 12: 0.84||Ah girl--there's othing to scream ntt anymore .\n",
-       "ResultOCR#block 13: 0.93||You're among friends now. you're sale\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Default ----------\n",
-       "ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as\n",
-       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~\n",
-       "ResultOCR#block 02: 0.74||And one in ee would appear.\n",
-       "ResultOCR#block 03: 0.41||Rir guest.\n",
-       "ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo\n",
-       "ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _\n",
-       "ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps\n",
-       "ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00\n",
-       "ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >\n",
-       "ResultOCR#block 09: 0.50||Aulckly “master as t can,\n",
-       "ResultOCR#block 10: 0.94||<the girl would - most surely be dead by now.\n",
-       "ResultOCR#block 11: 0.50||Ath - the girl. a second chance ee oo tr tt\n",
-       "ResultOCR#block 12: 0.84||Oe girl--there's othing to scream nt anymore. 4\n",
-       "ResultOCR#block 13: 0.96||You're among friends now. youre safe!\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Default, grey pad ----------\n",
-       "ResultOCR#block 00: 0.95||Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as\n",
-       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-\n",
-       "ResultOCR#block 02: 0.94||“and one in > need of some help, it would appear .\n",
-       "ResultOCR#block 03: 0.88||\" bambl-- we have a guest.\n",
-       "ResultOCR#block 04: 0.72||~~and tonight, he comes urgently, slanming open\n",
-       "ResultOCR#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
-       "ResultOCR#block 06: 0.90||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.\n",
-       "ResultOCR#block 07: 0.06||As.\n",
-       "ResultOCR#block 08: 0.91||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n",
-       "ResultOCR#block 09: 0.55||Ickl as t can\n",
-       "ResultOCR#block 10: 0.00||\n",
-       "ResultOCR#block 11: 0.85||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n",
-       "ResultOCR#block 12: 0.95||Easy, girl--there's | nothing to scream about anyaore.\n",
-       "ResultOCR#block 13: 0.97||You're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 0.54||“continued a\n",
-       "\n",
-       " ---------- Padded 4px ----------\n",
-       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n",
-       "ResultOCR#block 01: 0.93||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }\n",
-       "ResultOCR#block 02: 0.69||F and one in ee would appear.\n",
-       "ResultOCR#block 03: 0.77||\" bambli-— we have a gliest.\n",
-       "ResultOCR#block 04: 0.55||P comes slamming open the caken\n",
-       "ResultOCR#block 05: 0.57||Tel oe er-- 5 ow a = 7\n",
-       "ResultOCR#block 06: 0.38||We and perhaps c oe /\n",
-       "ResultOCR#block 07: 0.75||The the old mans fades down the hall sra\n",
-       "ResultOCR#block 08: 0.92||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n",
-       "ResultOCR#block 09: 0.79||Aulckly as t can, ‘masrer.\n",
-       "ResultOCR#block 10: 0.92||<the girl wolld - most surely be dead by now.\n",
-       "ResultOCR#block 11: 0.88||Ghede has been generous. the oeath gop has given - the girl. a second chance ye, alem\n",
-       "ResultOCR#block 12: 0.67||Soe er eke othing to scream ay anymore.\n",
-       "ResultOCR#block 13: 0.94||\"you're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Padded 8px ----------\n",
-       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as\n",
-       "ResultOCR#block 01: 0.99||The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home\n",
-       "ResultOCR#block 02: 0.67||7 and one in ee would appear,\n",
-       "ResultOCR#block 03: 0.76||Zf mbl == we have a guest.\n",
-       "ResultOCR#block 04: 0.61||Tonight, he comes host slamming open\n",
-       "ResultOCR#block 05: 0.75||Yy i tell me master - how may bambli serve 7 _,\n",
-       "ResultOCR#block 06: 0.89||Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s\n",
-       "ResultOCR#block 07: 0.72||The the old mans fades down the hal. srl see\n",
-       "ResultOCR#block 08: 0.88||* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~\n",
-       "ResultOCR#block 09: 0.65||Tiie as t can, \\ master ,\n",
-       "ResultOCR#block 10: 0.86||The girl wolld - most slirely be - dead by now.\n",
-       "ResultOCR#block 11: 0.62||Ghede has been generous. : the crn son ue;\n",
-       "ResultOCR#block 12: 0.62||Soe er eke othing to scream hbolt anhore hr\n",
-       "ResultOCR#block 13: 0.92||” you're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 0.94||“continued after next page\n",
-       "\n",
-       " ---------- Extracted, init box ----------\n",
-       "ResultOCRExtracted#block 00: 0.92||Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .\n",
-       "ResultOCRExtracted#block 01: 0.93||Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~\n",
-       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
-       "ResultOCRExtracted#block 03: 0.67||— we have a i=s7t.\n",
-       "ResultOCRExtracted#block 04: 0.74||~and tonight, he comes urgently, slamming open\n",
-       "ResultOCRExtracted#block 05: 0.85||Tell me master how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.93||Some blankets to keep her warm, banbli-- and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.77||The the old man's fades down the hall s.,00\n",
-       "ResultOCRExtracted#block 08: 0.77||Hin ef fare” had i not chanced to stroll along the river. tonigmt=~\n",
-       "ResultOCRExtracted#block 09: 0.85||Aulckly as t can, master,\n",
-       "ResultOCRExtracted#block 10: 1.00||--the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.51||Ath the girl a second chance ro\n",
-       "ResultOCRExtracted#block 12: 0.56||Cas ee, othing to scream pls aa .\n",
-       "ResultOCRExtracted#block 13: 0.95||You're among friends now. you're sale!\n",
-       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
-       "\n",
-       " ---------- Padded 4, extracted ----------\n",
-       "ResultOCRExtracted#block 00: 0.91||Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home --\n",
-       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
-       "ResultOCRExtracted#block 03: 0.83||Bambli we have a gliest.\n",
-       "ResultOCRExtracted#block 04: 0.74||=~and tonight, he comes urgently, slamming open\n",
-       "ResultOCRExtracted#block 05: 0.84||Tell me master. how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.53||Warm, bambli-- and perhaps. som\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sra\n",
-       "ResultOCRExtracted#block 08: 0.76||We sea had i not chanced to stroll along the river. tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.76||Alley as t can, master,\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.92||Chepe has been generous. the peath god has given the girl a second chance amr\n",
-       "ResultOCRExtracted#block 12: 0.73||Cas gr theres othing to scream pissy tore .\n",
-       "ResultOCRExtracted#block 13: 0.95||You're among eriends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
-       "\n",
-       " ---------- Padded 8, extracted ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .\n",
-       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home --\n",
-       "ResultOCRExtracted#block 02: 0.70||And one in fee wolld appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve't\n",
-       "ResultOCRExtracted#block 06: 0.91||Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sire\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.77||Allckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the peath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.74||Cas gr theres othing to scream pps hore .\n",
-       "ResultOCRExtracted#block 13: 0.42||You're safe § r\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Padded 8, dilation 1 ----------\n",
-       "ResultOCRExtracted#block 00: 0.61||Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .\n",
-       "ResultOCRExtracted#block 01: 0.88||The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=\n",
-       "ResultOCRExtracted#block 02: 0.97||And one in need of some help, it wolld appear .\n",
-       "ResultOCRExtracted#block 03: 0.78||Bambli ~~ we have a gliest.\n",
-       "ResultOCRExtracted#block 04: 0.79||=and tonight, he comes most slamming open the front\n",
-       "ResultOCRExtracted#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.85||Gone blankets to keep her. warm, bambli-~ and perhaps some dry\n",
-       "ResultOCRExtracted#block 07: 0.73||The old man's footsteps the hall as.re\n",
-       "ResultOCRExtracted#block 08: 0.94||How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.68||Aulckly as t can,\n",
-       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.75||Ee lh boe ene. the death gop has the girl. a second chance\n",
-       "ResultOCRExtracted#block 12: 0.92||Easy, girl--there's nothing to scream abolit anymo!\n",
-       "ResultOCRExtracted#block 13: 0.97||You're among friends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Pad 8, fract. 0.5 ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~\n",
-       "ResultOCRExtracted#block 02: 0.78||And one in eee pe would appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
-       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.73||The the old mans fades donn the hall sire\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.92||Ghede hag been generous. the peath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.76||Cas srl theres othing to scream seoit hore .\n",
-       "ResultOCRExtracted#block 13: 0.42||You're safe 4 ’\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Pad 8, fract. 0.2 ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~\n",
-       "ResultOCRExtracted#block 02: 0.77||And one in eet sve would appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
-       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sere\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the ceath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.67||Yi renee othing to scream seat anhore .\n",
-       "ResultOCRExtracted#block 13: 0.93||Youre among eriends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "---------- Initial box ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski\u001b[1m)\u001b[0m \u001b[1;36m2\u001b[0m of mew ce eans, kept tidy by a white-haired old man known only as bambs, \u001b[1;36m3\u001b[0m\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the old man are alike in many ways; tall, prolid, patient, contented always \u001b[1;36m0\u001b[0m wait until. their. master cones mome ~~\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.70\u001b[0m||“and one in ee would appear.\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.62\u001b[0m||Re bambli-~ we have a\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.70\u001b[0m||Tonight, he comes noost slamming open the caken\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.82\u001b[0m||Tell me naster. how may bambli serve \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.56\u001b[0m||£\u001b[1;36m7\u001b[0m » and perhaps some dry clothes\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.81\u001b[0m||The the old man's fades down the hall as\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.85\u001b[0m||How curious the \u001b[1;36m4\u001b[0m fate. whims of had t not chanced to stroll along the river yl tonight ==\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.80\u001b[0m||Fas oulckly as t ca, master.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.91\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.50\u001b[0m||Aulckly “master as t can,\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m|| need of some help, it would appear .\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.88\u001b[0m||\" bambl-- we have a guest.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.72\u001b[0m||~~and tonight, he comes urgently, slanming open\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.86\u001b[0m||Tell me, master: how may bambli serve \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes-\u001b[1;36m-7\u001b[0m \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.06\u001b[0m||As.\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.91\u001b[0m||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.55\u001b[0m||Ickl as t can\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.85\u001b[0m||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.95\u001b[0m||Easy, girl--there's | nothing to scream about anyaore.\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.97\u001b[0m||You're among friends now. you're safe!\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.54\u001b[0m||“continued a\n", - "\n", - " ---------- Padded 4px ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.88\u001b[0m||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways \u001b[1;36m0\u001b[0m wait until their. aster comes home ~~ | \u001b[1m}\u001b[0m\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.69\u001b[0m||F and one in ee would appear.\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.77\u001b[0m||\" bambli-— we have a gliest.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.55\u001b[0m||P comes slamming open the caken\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.57\u001b[0m||Tel oe er-- \u001b[1;36m5\u001b[0m ow a = \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.38\u001b[0m||We and perhaps c oe \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.75\u001b[0m||The the old mans fades down the hall sra\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.92\u001b[0m||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.79\u001b[0m||Aulckly as t can, ‘masrer.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.92\u001b[0m||" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.plot_accuracies()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "RenderJSON(image_experiment.to_dict(), 400, 2)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "best results" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "    (\n",
-       "        'Default, grey pad',\n",
-       "        '0.953',\n",
-       "        'Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of \n",
-       "new orleans, kept tipy by a white-haired old man known only as'\n",
-       "    ),\n",
-       "    (\n",
-       "        'Padded 8px',\n",
-       "        '0.988',\n",
-       "        'The house and the old man are alike in many ways; tall, proud, patient, contented always to \n",
-       "wait until their. master comes home'\n",
-       "    ),\n",
-       "    ('Padded 8, dilation 1', '0.968', 'And one in need of some help, it wolld appear .'),\n",
-       "    ('Default, grey pad', '0.880', '\" bambl-- we have a guest.'),\n",
-       "    ('Padded 8, dilation 1', '0.794', '=and tonight, he comes most slamming open the front'),\n",
-       "    ('Default, grey pad', '0.857', 'Tell me, master: how may bambli serve 7'),\n",
-       "    (\n",
-       "        'Pad 8, fract. 0.5',\n",
-       "        '0.935',\n",
-       "        'Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes'\n",
-       "    ),\n",
-       "    ('Initial box', '0.811', \"The the old man's fades down the hall as... 7\"),\n",
-       "    (\n",
-       "        'Padded 8, extracted',\n",
-       "        '0.959',\n",
-       "        'How curious the whims of fate . had i not chanced to stroll along the river tonight-~'\n",
-       "    ),\n",
-       "    ('Extracted, init box', '0.846', 'Aulckly as t can, master,'),\n",
-       "    ('Extracted, init box', '1.000', '--the girl would most surely be dead by now.'),\n",
-       "    (\n",
-       "        'Padded 8, extracted',\n",
-       "        '0.935',\n",
-       "        'Ghede has been generous. the peath god has given the girl a second chance po'\n",
-       "    ),\n",
-       "    ('Default, grey pad', '0.953', \"Easy, girl--there's | nothing to scream about anyaore.\"),\n",
-       "    ('Default, grey pad', '0.974', \"You're among friends now. you're safe!\"),\n",
-       "    ('Initial box', '1.000', 'Continued after next page')\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - " \u001b[1m(\u001b[0m\n", - " \u001b[32m'Default, grey pad'\u001b[0m,\n", - " \u001b[32m'0.953'\u001b[0m,\n", - " \u001b[32m'Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of \u001b[0m\n", - "\u001b[32mnew orleans, kept tipy by a white-haired old man known only as'\u001b[0m\n", - " \u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\n", - " \u001b[32m'Padded 8px'\u001b[0m,\n", - " \u001b[32m'0.988'\u001b[0m,\n", - " \u001b[32m'The house and the old man are alike in many ways; tall, proud, patient, contented always to \u001b[0m\n", - "\u001b[32mwait until their. master comes home'\u001b[0m\n", - " \u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Padded 8, dilation 1'\u001b[0m, \u001b[32m'0.968'\u001b[0m, \u001b[32m'And one in need of some help, it wolld appear .'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.880'\u001b[0m, \u001b[32m'\" bambl-- we have a guest.'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Padded 8, dilation 1'\u001b[0m, \u001b[32m'0.794'\u001b[0m, \u001b[32m'=and tonight, he comes most slamming open the front'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.857'\u001b[0m, \u001b[32m'Tell me, master: how may bambli serve 7'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\n", - " \u001b[32m'Pad 8, fract. 0.5'\u001b[0m,\n", - " \u001b[32m'0.935'\u001b[0m,\n", - " \u001b[32m'Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes'\u001b[0m\n", - " \u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Initial box'\u001b[0m, \u001b[32m'0.811'\u001b[0m, \u001b[32m\"The the old man's fades down the hall as... 7\"\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\n", - " \u001b[32m'Padded 8, extracted'\u001b[0m,\n", - " \u001b[32m'0.959'\u001b[0m,\n", - " \u001b[32m'How curious the whims of fate . had i not chanced to stroll along the river tonight-~'\u001b[0m\n", - " \u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Extracted, init box'\u001b[0m, \u001b[32m'0.846'\u001b[0m, \u001b[32m'Aulckly as t can, master,'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Extracted, init box'\u001b[0m, \u001b[32m'1.000'\u001b[0m, \u001b[32m'--the girl would most surely be dead by now.'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\n", - " \u001b[32m'Padded 8, extracted'\u001b[0m,\n", - " \u001b[32m'0.935'\u001b[0m,\n", - " \u001b[32m'Ghede has been generous. the peath god has given the girl a second chance po'\u001b[0m\n", - " \u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.953'\u001b[0m, \u001b[32m\"Easy, girl--there's | nothing to scream about anyaore.\"\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Default, grey pad'\u001b[0m, \u001b[32m'0.974'\u001b[0m, \u001b[32m\"You're among friends now. you're safe!\"\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1m(\u001b[0m\u001b[32m'Initial box'\u001b[0m, \u001b[32m'1.000'\u001b[0m, \u001b[32m'Continued after next page'\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ll = image_experiment.best_results()\n", - "if ll:\n", - " cprint([(m.value, f\"{r.acc:.3f}\", r.ocr) for m,r in ll])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Perfom experiments given a list of `CropMethod`s\n" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [], - "source": [ - "# methods = [*CropMethod.__members__.values()]\n", - "methods = [CropMethod.INITIAL_BOX, CropMethod.DEFAULT]\n", - "image_experiment.perform_methods(methods)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot the perfomance of the experiments" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# image_experiment.plot_accuracies(exps, IMAGE_CONTEXT)\n", - "image_experiment.plot_accuracies(methods)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save the results to a file\n" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
cleaner/Strange_Tales_172005/Strange_Tales_172005_Tesseract.json\n",
-       "
\n" - ], - "text/plain": [ - "cleaner/Strange_Tales_172005/Strange_Tales_172005_Tesseract.json\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fp, json_results = image_experiment.to_json()\n", - "cprint(fp)\n", - "RenderJSON(json_results, 300, 2)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Load the results from a file\n" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
---------- Initial box ----------\n",
-       "ResultOCR#block 00: 0.90||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3\n",
-       "ResultOCR#block 01: 0.93||The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~\n",
-       "ResultOCR#block 02: 0.70||“and one in ee would appear.\n",
-       "ResultOCR#block 03: 0.62||Re bambli-~ we have a\n",
-       "ResultOCR#block 04: 0.70||Tonight, he comes noost slamming open the caken\n",
-       "ResultOCR#block 05: 0.82||Tell me naster. how may bambli serve 7\n",
-       "ResultOCR#block 06: 0.56||£7 » and perhaps some dry clothes... 7 /\n",
-       "ResultOCR#block 07: 0.81||The the old man's fades down the hall as... 7\n",
-       "ResultOCR#block 08: 0.85||How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==\n",
-       "ResultOCR#block 09: 0.80||Fas oulckly as t ca, master.\n",
-       "ResultOCR#block 10: 0.91||<the girl would - most slirely be dead by now.\n",
-       "ResultOCR#block 11: 0.47||Ath the girl. a second chance ge a ee yg adil\n",
-       "ResultOCR#block 12: 0.84||Ah girl--there's othing to scream ntt anymore .\n",
-       "ResultOCR#block 13: 0.93||You're among friends now. you're sale\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Default ----------\n",
-       "ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as\n",
-       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~\n",
-       "ResultOCR#block 02: 0.74||And one in ee would appear.\n",
-       "ResultOCR#block 03: 0.41||Rir guest.\n",
-       "ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo\n",
-       "ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _\n",
-       "ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps\n",
-       "ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00\n",
-       "ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >\n",
-       "ResultOCR#block 09: 0.50||Aulckly “master as t can,\n",
-       "ResultOCR#block 10: 0.94||<the girl would - most surely be dead by now.\n",
-       "ResultOCR#block 11: 0.50||Ath - the girl. a second chance ee oo tr tt\n",
-       "ResultOCR#block 12: 0.84||Oe girl--there's othing to scream nt anymore. 4\n",
-       "ResultOCR#block 13: 0.96||You're among friends now. youre safe!\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Default, grey pad ----------\n",
-       "ResultOCR#block 00: 0.95||Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as\n",
-       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-\n",
-       "ResultOCR#block 02: 0.94||“and one in > need of some help, it would appear .\n",
-       "ResultOCR#block 03: 0.88||\" bambl-- we have a guest.\n",
-       "ResultOCR#block 04: 0.72||~~and tonight, he comes urgently, slanming open\n",
-       "ResultOCR#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
-       "ResultOCR#block 06: 0.90||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.\n",
-       "ResultOCR#block 07: 0.06||As.\n",
-       "ResultOCR#block 08: 0.91||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n",
-       "ResultOCR#block 09: 0.55||Ickl as t can\n",
-       "ResultOCR#block 10: 0.00||\n",
-       "ResultOCR#block 11: 0.85||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n",
-       "ResultOCR#block 12: 0.95||Easy, girl--there's | nothing to scream about anyaore.\n",
-       "ResultOCR#block 13: 0.97||You're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 0.54||“continued a\n",
-       "\n",
-       " ---------- Padded 4px ----------\n",
-       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n",
-       "ResultOCR#block 01: 0.93||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }\n",
-       "ResultOCR#block 02: 0.69||F and one in ee would appear.\n",
-       "ResultOCR#block 03: 0.77||\" bambli-— we have a gliest.\n",
-       "ResultOCR#block 04: 0.55||P comes slamming open the caken\n",
-       "ResultOCR#block 05: 0.57||Tel oe er-- 5 ow a = 7\n",
-       "ResultOCR#block 06: 0.38||We and perhaps c oe /\n",
-       "ResultOCR#block 07: 0.75||The the old mans fades down the hall sra\n",
-       "ResultOCR#block 08: 0.92||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n",
-       "ResultOCR#block 09: 0.79||Aulckly as t can, ‘masrer.\n",
-       "ResultOCR#block 10: 0.92||<the girl wolld - most surely be dead by now.\n",
-       "ResultOCR#block 11: 0.88||Ghede has been generous. the oeath gop has given - the girl. a second chance ye, alem\n",
-       "ResultOCR#block 12: 0.67||Soe er eke othing to scream ay anymore.\n",
-       "ResultOCR#block 13: 0.94||\"you're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Padded 8px ----------\n",
-       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as\n",
-       "ResultOCR#block 01: 0.99||The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home\n",
-       "ResultOCR#block 02: 0.67||7 and one in ee would appear,\n",
-       "ResultOCR#block 03: 0.76||Zf mbl == we have a guest.\n",
-       "ResultOCR#block 04: 0.61||Tonight, he comes host slamming open\n",
-       "ResultOCR#block 05: 0.75||Yy i tell me master - how may bambli serve 7 _,\n",
-       "ResultOCR#block 06: 0.89||Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s\n",
-       "ResultOCR#block 07: 0.72||The the old mans fades down the hal. srl see\n",
-       "ResultOCR#block 08: 0.88||* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~\n",
-       "ResultOCR#block 09: 0.65||Tiie as t can, \\ master ,\n",
-       "ResultOCR#block 10: 0.86||The girl wolld - most slirely be - dead by now.\n",
-       "ResultOCR#block 11: 0.62||Ghede has been generous. : the crn son ue;\n",
-       "ResultOCR#block 12: 0.62||Soe er eke othing to scream hbolt anhore hr\n",
-       "ResultOCR#block 13: 0.92||” you're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 0.94||“continued after next page\n",
-       "\n",
-       " ---------- Extracted, init box ----------\n",
-       "ResultOCRExtracted#block 00: 0.92||Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .\n",
-       "ResultOCRExtracted#block 01: 0.93||Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~\n",
-       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
-       "ResultOCRExtracted#block 03: 0.67||— we have a i=s7t.\n",
-       "ResultOCRExtracted#block 04: 0.74||~and tonight, he comes urgently, slamming open\n",
-       "ResultOCRExtracted#block 05: 0.85||Tell me master how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.93||Some blankets to keep her warm, banbli-- and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.77||The the old man's fades down the hall s.,00\n",
-       "ResultOCRExtracted#block 08: 0.77||Hin ef fare” had i not chanced to stroll along the river. tonigmt=~\n",
-       "ResultOCRExtracted#block 09: 0.85||Aulckly as t can, master,\n",
-       "ResultOCRExtracted#block 10: 1.00||--the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.51||Ath the girl a second chance ro\n",
-       "ResultOCRExtracted#block 12: 0.56||Cas ee, othing to scream pls aa .\n",
-       "ResultOCRExtracted#block 13: 0.95||You're among friends now. you're sale!\n",
-       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
-       "\n",
-       " ---------- Padded 4, extracted ----------\n",
-       "ResultOCRExtracted#block 00: 0.91||Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home --\n",
-       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
-       "ResultOCRExtracted#block 03: 0.83||Bambli we have a gliest.\n",
-       "ResultOCRExtracted#block 04: 0.74||=~and tonight, he comes urgently, slamming open\n",
-       "ResultOCRExtracted#block 05: 0.84||Tell me master. how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.53||Warm, bambli-- and perhaps. som\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sra\n",
-       "ResultOCRExtracted#block 08: 0.76||We sea had i not chanced to stroll along the river. tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.76||Alley as t can, master,\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.92||Chepe has been generous. the peath god has given the girl a second chance amr\n",
-       "ResultOCRExtracted#block 12: 0.73||Cas gr theres othing to scream pissy tore .\n",
-       "ResultOCRExtracted#block 13: 0.95||You're among eriends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
-       "\n",
-       " ---------- Padded 8, extracted ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .\n",
-       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home --\n",
-       "ResultOCRExtracted#block 02: 0.70||And one in fee wolld appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve't\n",
-       "ResultOCRExtracted#block 06: 0.91||Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sire\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.77||Allckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the peath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.74||Cas gr theres othing to scream pps hore .\n",
-       "ResultOCRExtracted#block 13: 0.42||You're safe § r\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Padded 8, dilation 1 ----------\n",
-       "ResultOCRExtracted#block 00: 0.61||Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .\n",
-       "ResultOCRExtracted#block 01: 0.88||The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=\n",
-       "ResultOCRExtracted#block 02: 0.97||And one in need of some help, it wolld appear .\n",
-       "ResultOCRExtracted#block 03: 0.78||Bambli ~~ we have a gliest.\n",
-       "ResultOCRExtracted#block 04: 0.79||=and tonight, he comes most slamming open the front\n",
-       "ResultOCRExtracted#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.85||Gone blankets to keep her. warm, bambli-~ and perhaps some dry\n",
-       "ResultOCRExtracted#block 07: 0.73||The old man's footsteps the hall as.re\n",
-       "ResultOCRExtracted#block 08: 0.94||How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.68||Aulckly as t can,\n",
-       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.75||Ee lh boe ene. the death gop has the girl. a second chance\n",
-       "ResultOCRExtracted#block 12: 0.92||Easy, girl--there's nothing to scream abolit anymo!\n",
-       "ResultOCRExtracted#block 13: 0.97||You're among friends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Pad 8, fract. 0.5 ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~\n",
-       "ResultOCRExtracted#block 02: 0.78||And one in eee pe would appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
-       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.73||The the old mans fades donn the hall sire\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.92||Ghede hag been generous. the peath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.76||Cas srl theres othing to scream seoit hore .\n",
-       "ResultOCRExtracted#block 13: 0.42||You're safe 4 ’\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Pad 8, fract. 0.2 ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~\n",
-       "ResultOCRExtracted#block 02: 0.77||And one in eet sve would appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
-       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sere\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the ceath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.67||Yi renee othing to scream seat anhore .\n",
-       "ResultOCRExtracted#block 13: 0.93||Youre among eriends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "---------- Initial box ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski\u001b[1m)\u001b[0m \u001b[1;36m2\u001b[0m of mew ce eans, kept tidy by a white-haired old man known only as bambs, \u001b[1;36m3\u001b[0m\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the old man are alike in many ways; tall, prolid, patient, contented always \u001b[1;36m0\u001b[0m wait until. their. master cones mome ~~\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.70\u001b[0m||“and one in ee would appear.\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.62\u001b[0m||Re bambli-~ we have a\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.70\u001b[0m||Tonight, he comes noost slamming open the caken\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.82\u001b[0m||Tell me naster. how may bambli serve \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.56\u001b[0m||£\u001b[1;36m7\u001b[0m » and perhaps some dry clothes\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.81\u001b[0m||The the old man's fades down the hall as\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.85\u001b[0m||How curious the \u001b[1;36m4\u001b[0m fate. whims of had t not chanced to stroll along the river yl tonight ==\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.80\u001b[0m||Fas oulckly as t ca, master.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.91\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.50\u001b[0m||Aulckly “master as t can,\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m|| need of some help, it would appear .\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.88\u001b[0m||\" bambl-- we have a guest.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.72\u001b[0m||~~and tonight, he comes urgently, slanming open\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.86\u001b[0m||Tell me, master: how may bambli serve \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes-\u001b[1;36m-7\u001b[0m \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.06\u001b[0m||As.\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.91\u001b[0m||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.55\u001b[0m||Ickl as t can\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.85\u001b[0m||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.95\u001b[0m||Easy, girl--there's | nothing to scream about anyaore.\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.97\u001b[0m||You're among friends now. you're safe!\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.54\u001b[0m||“continued a\n", - "\n", - " ---------- Padded 4px ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.88\u001b[0m||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways \u001b[1;36m0\u001b[0m wait until their. aster comes home ~~ | \u001b[1m}\u001b[0m\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.69\u001b[0m||F and one in ee would appear.\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.77\u001b[0m||\" bambli-— we have a gliest.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.55\u001b[0m||P comes slamming open the caken\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.57\u001b[0m||Tel oe er-- \u001b[1;36m5\u001b[0m ow a = \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.38\u001b[0m||We and perhaps c oe \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.75\u001b[0m||The the old mans fades down the hall sra\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.92\u001b[0m||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.79\u001b[0m||Aulckly as t can, ‘masrer.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.92\u001b[0m|| Page: media/Strange_Tales_172005.jpg
Size: 1275x1888 px: 4.25 x 6.29 in @ 188.32 dpi
Model: Tesseract
Crop Method: Pad 8, fract. 0.2
Accuracy Mean/Trimmed: 0.85/0.86
\n", - "
\n", - "
Box #ImageAccuracyOCR
1
0.94
Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as b8ambl .
2
0.97
The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~
3
0.77
And one in eet⎕⎕⎕ sve⎕⎕⎕⎕⎕⎕⎕⎕⎕ would appear.
4
0.86
Bambl ~~ we have a guest.
5
0.64
=~and tonight, he comes ⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕slamming open urg⎕⎕⎕⎕en⎕⎕⎕⎕⎕tly,⎕⎕⎕⎕
6
0.82
Tell me master... how may bambli serve'7
7
0.94
Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes
8
0.75
The⎕⎕⎕⎕⎕⎕⎕⎕ the old man⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕s fades down the hall sere
9
0.96
How curious the whims of fate. had i not chanced to stroll along the river tonight-~
10
0.81
A⎕⎕⎕ulckry as t can, master.
11
0.95
~<the girl would most surely be dead by now.
12
0.94
Ghede has been generous. the ceath god has given the girl a second chance po⎕⎕
13
0.67
Yi⎕⎕⎕ ⎕⎕⎕⎕⎕⎕⎕⎕⎕⎕renee othing to scream sea⎕⎕⎕t anhore.
14
0.93
Youre among eriends now. you're safe!
15
0.83
| continued af⎕⎕⎕ ext page
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "saved_exp = ExperimentOCR.saved_experiment(CONTEXT, 'Tesseract', IMAGE_CONTEXT.image_idx)\n", - "# saved_exp = ExperimentOCR.saved_experiment(IMAGE_CONTEXT, 'Tesseract', 'Action_Comics_1960-01-00_(262).JPG')\n", - "if saved_exp:\n", - " saved_exp.method_experiment(CropMethod.PAD_8_FRACT_0_2).display()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Perform experiments for selected boxes y methods" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
---------- Initial box ----------\n",
-       "ResultOCR#block 00: 0.90||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3\n",
-       "ResultOCR#block 01: 0.93||The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~\n",
-       "ResultOCR#block 02: 0.70||“and one in ee would appear.\n",
-       "ResultOCR#block 03: 0.62||Re bambli-~ we have a\n",
-       "ResultOCR#block 04: 0.70||Tonight, he comes noost slamming open the caken\n",
-       "ResultOCR#block 05: 0.82||Tell me naster. how may bambli serve 7\n",
-       "ResultOCR#block 06: 0.56||£7 » and perhaps some dry clothes... 7 /\n",
-       "ResultOCR#block 07: 0.81||The the old man's fades down the hall as... 7\n",
-       "ResultOCR#block 08: 0.85||How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==\n",
-       "ResultOCR#block 09: 0.80||Fas oulckly as t ca, master.\n",
-       "ResultOCR#block 10: 0.91||<the girl would - most slirely be dead by now.\n",
-       "ResultOCR#block 11: 0.47||Ath the girl. a second chance ge a ee yg adil\n",
-       "ResultOCR#block 12: 0.84||Ah girl--there's othing to scream ntt anymore .\n",
-       "ResultOCR#block 13: 0.93||You're among friends now. you're sale\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Default ----------\n",
-       "ResultOCR#block 00: 0.85||Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as\n",
-       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~\n",
-       "ResultOCR#block 02: 0.74||And one in ee would appear.\n",
-       "ResultOCR#block 03: 0.41||Rir guest.\n",
-       "ResultOCR#block 04: 0.59||=~and tonight, he comes host sane oo\n",
-       "ResultOCR#block 05: 0.78||Tell me masts - how may bambli . serve 7 _\n",
-       "ResultOCR#block 06: 0.48||R warm, bambli-~ and perhaps\n",
-       "ResultOCR#block 07: 0.76||The the old mans fades down the hall s.00\n",
-       "ResultOCR#block 08: 0.92||How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >\n",
-       "ResultOCR#block 09: 0.50||Aulckly “master as t can,\n",
-       "ResultOCR#block 10: 0.94||<the girl would - most surely be dead by now.\n",
-       "ResultOCR#block 11: 0.50||Ath - the girl. a second chance ee oo tr tt\n",
-       "ResultOCR#block 12: 0.84||Oe girl--there's othing to scream nt anymore. 4\n",
-       "ResultOCR#block 13: 0.96||You're among friends now. youre safe!\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Default, grey pad ----------\n",
-       "ResultOCR#block 00: 0.95||Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as\n",
-       "ResultOCR#block 01: 0.96||The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-\n",
-       "ResultOCR#block 02: 0.94||“and one in > need of some help, it would appear .\n",
-       "ResultOCR#block 03: 0.88||\" bambl-- we have a guest.\n",
-       "ResultOCR#block 04: 0.72||~~and tonight, he comes urgently, slanming open\n",
-       "ResultOCR#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
-       "ResultOCR#block 06: 0.90||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.\n",
-       "ResultOCR#block 07: 0.06||As.\n",
-       "ResultOCR#block 08: 0.91||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n",
-       "ResultOCR#block 09: 0.55||Ickl as t can\n",
-       "ResultOCR#block 10: 0.00||\n",
-       "ResultOCR#block 11: 0.85||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n",
-       "ResultOCR#block 12: 0.95||Easy, girl--there's | nothing to scream about anyaore.\n",
-       "ResultOCR#block 13: 0.97||You're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 0.54||“continued a\n",
-       "\n",
-       " ---------- Padded 4px ----------\n",
-       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n",
-       "ResultOCR#block 01: 0.93||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }\n",
-       "ResultOCR#block 02: 0.69||F and one in ee would appear.\n",
-       "ResultOCR#block 03: 0.77||\" bambli-— we have a gliest.\n",
-       "ResultOCR#block 04: 0.55||P comes slamming open the caken\n",
-       "ResultOCR#block 05: 0.57||Tel oe er-- 5 ow a = 7\n",
-       "ResultOCR#block 06: 0.38||We and perhaps c oe /\n",
-       "ResultOCR#block 07: 0.75||The the old mans fades down the hall sra\n",
-       "ResultOCR#block 08: 0.92||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n",
-       "ResultOCR#block 09: 0.79||Aulckly as t can, ‘masrer.\n",
-       "ResultOCR#block 10: 0.92||<the girl wolld - most surely be dead by now.\n",
-       "ResultOCR#block 11: 0.88||Ghede has been generous. the oeath gop has given - the girl. a second chance ye, alem\n",
-       "ResultOCR#block 12: 0.67||Soe er eke othing to scream ay anymore.\n",
-       "ResultOCR#block 13: 0.94||\"you're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 1.00||Continued after next page\n",
-       "\n",
-       " ---------- Padded 8px ----------\n",
-       "ResultOCR#block 00: 0.88||Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as\n",
-       "ResultOCR#block 01: 0.99||The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home\n",
-       "ResultOCR#block 02: 0.67||7 and one in ee would appear,\n",
-       "ResultOCR#block 03: 0.76||Zf mbl == we have a guest.\n",
-       "ResultOCR#block 04: 0.61||Tonight, he comes host slamming open\n",
-       "ResultOCR#block 05: 0.75||Yy i tell me master - how may bambli serve 7 _,\n",
-       "ResultOCR#block 06: 0.89||Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s\n",
-       "ResultOCR#block 07: 0.72||The the old mans fades down the hal. srl see\n",
-       "ResultOCR#block 08: 0.88||* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~\n",
-       "ResultOCR#block 09: 0.65||Tiie as t can, \\ master ,\n",
-       "ResultOCR#block 10: 0.86||The girl wolld - most slirely be - dead by now.\n",
-       "ResultOCR#block 11: 0.62||Ghede has been generous. : the crn son ue;\n",
-       "ResultOCR#block 12: 0.62||Soe er eke othing to scream hbolt anhore hr\n",
-       "ResultOCR#block 13: 0.92||” you're among friends now. you're safe!\n",
-       "ResultOCR#block 14: 0.94||“continued after next page\n",
-       "\n",
-       " ---------- Extracted, init box ----------\n",
-       "ResultOCRExtracted#block 00: 0.92||Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .\n",
-       "ResultOCRExtracted#block 01: 0.93||Ee house and the old man por alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home ~~\n",
-       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
-       "ResultOCRExtracted#block 03: 0.67||— we have a i=s7t.\n",
-       "ResultOCRExtracted#block 04: 0.74||~and tonight, he comes urgently, slamming open\n",
-       "ResultOCRExtracted#block 05: 0.85||Tell me master how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.93||Some blankets to keep her warm, banbli-- and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.77||The the old man's fades down the hall s.,00\n",
-       "ResultOCRExtracted#block 08: 0.77||Hin ef fare” had i not chanced to stroll along the river. tonigmt=~\n",
-       "ResultOCRExtracted#block 09: 0.85||Aulckly as t can, master,\n",
-       "ResultOCRExtracted#block 10: 1.00||--the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.51||Ath the girl a second chance ro\n",
-       "ResultOCRExtracted#block 12: 0.56||Cas ee, othing to scream pls aa .\n",
-       "ResultOCRExtracted#block 13: 0.95||You're among friends now. you're sale!\n",
-       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
-       "\n",
-       " ---------- Padded 4, extracted ----------\n",
-       "ResultOCRExtracted#block 00: 0.91||Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master cones home --\n",
-       "ResultOCRExtracted#block 02: 0.73||And one in fee would appear.\n",
-       "ResultOCRExtracted#block 03: 0.83||Bambli we have a gliest.\n",
-       "ResultOCRExtracted#block 04: 0.74||=~and tonight, he comes urgently, slamming open\n",
-       "ResultOCRExtracted#block 05: 0.84||Tell me master. how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.53||Warm, bambli-- and perhaps. som\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sra\n",
-       "ResultOCRExtracted#block 08: 0.76||We sea had i not chanced to stroll along the river. tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.76||Alley as t can, master,\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.92||Chepe has been generous. the peath god has given the girl a second chance amr\n",
-       "ResultOCRExtracted#block 12: 0.73||Cas gr theres othing to scream pissy tore .\n",
-       "ResultOCRExtracted#block 13: 0.95||You're among eriends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.91||Continued af ext page\n",
-       "\n",
-       " ---------- Padded 8, extracted ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .\n",
-       "ResultOCRExtracted#block 01: 0.98||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home --\n",
-       "ResultOCRExtracted#block 02: 0.70||And one in fee wolld appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve't\n",
-       "ResultOCRExtracted#block 06: 0.91||Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sire\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.77||Allckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the peath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.74||Cas gr theres othing to scream pps hore .\n",
-       "ResultOCRExtracted#block 13: 0.42||You're safe § r\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Padded 8, dilation 1 ----------\n",
-       "ResultOCRExtracted#block 00: 0.61||Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .\n",
-       "ResultOCRExtracted#block 01: 0.88||The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=\n",
-       "ResultOCRExtracted#block 02: 0.97||And one in need of some help, it wolld appear .\n",
-       "ResultOCRExtracted#block 03: 0.78||Bambli ~~ we have a gliest.\n",
-       "ResultOCRExtracted#block 04: 0.79||=and tonight, he comes most slamming open the front\n",
-       "ResultOCRExtracted#block 05: 0.86||Tell me, master: how may bambli serve 7\n",
-       "ResultOCRExtracted#block 06: 0.85||Gone blankets to keep her. warm, bambli-~ and perhaps some dry\n",
-       "ResultOCRExtracted#block 07: 0.73||The old man's footsteps the hall as.re\n",
-       "ResultOCRExtracted#block 08: 0.94||How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.68||Aulckly as t can,\n",
-       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.75||Ee lh boe ene. the death gop has the girl. a second chance\n",
-       "ResultOCRExtracted#block 12: 0.92||Easy, girl--there's nothing to scream abolit anymo!\n",
-       "ResultOCRExtracted#block 13: 0.97||You're among friends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Pad 8, fract. 0.5 ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until. their. master comes home ~~\n",
-       "ResultOCRExtracted#block 02: 0.78||And one in eee pe would appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
-       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.73||The the old mans fades donn the hall sire\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.98||~-the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.92||Ghede hag been generous. the peath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.76||Cas srl theres othing to scream seoit hore .\n",
-       "ResultOCRExtracted#block 13: 0.42||You're safe 4 ’\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       " ---------- Pad 8, fract. 0.2 ----------\n",
-       "ResultOCRExtracted#block 00: 0.94||Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .\n",
-       "ResultOCRExtracted#block 01: 0.97||The house and the old man are alike in many ways; tall, proud, patient, contented always t° wait until their. master comes home ~~\n",
-       "ResultOCRExtracted#block 02: 0.77||And one in eet sve would appear.\n",
-       "ResultOCRExtracted#block 03: 0.86||Bambl ~~ we have a guest.\n",
-       "ResultOCRExtracted#block 04: 0.64||=~and tonight, he comes slamming open urgently,\n",
-       "ResultOCRExtracted#block 05: 0.82||Tell me master... how may bambli serve'7\n",
-       "ResultOCRExtracted#block 06: 0.94||Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes\n",
-       "ResultOCRExtracted#block 07: 0.75||The the old mans fades down the hall sere\n",
-       "ResultOCRExtracted#block 08: 0.96||How curious the whims of fate . had i not chanced to stroll along the river tonight-~\n",
-       "ResultOCRExtracted#block 09: 0.81||Aulckry as t can, master.\n",
-       "ResultOCRExtracted#block 10: 0.95||~<the girl would most surely be dead by now.\n",
-       "ResultOCRExtracted#block 11: 0.94||Ghede has been generous. the ceath god has given the girl a second chance po\n",
-       "ResultOCRExtracted#block 12: 0.67||Yi renee othing to scream seat anhore .\n",
-       "ResultOCRExtracted#block 13: 0.93||Youre among eriends now. you're safe!\n",
-       "ResultOCRExtracted#block 14: 0.83||| continued af ext page\n",
-       "\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "---------- Initial box ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski\u001b[1m)\u001b[0m \u001b[1;36m2\u001b[0m of mew ce eans, kept tidy by a white-haired old man known only as bambs, \u001b[1;36m3\u001b[0m\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the old man are alike in many ways; tall, prolid, patient, contented always \u001b[1;36m0\u001b[0m wait until. their. master cones mome ~~\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.70\u001b[0m||“and one in ee would appear.\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.62\u001b[0m||Re bambli-~ we have a\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.70\u001b[0m||Tonight, he comes noost slamming open the caken\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.82\u001b[0m||Tell me naster. how may bambli serve \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.56\u001b[0m||£\u001b[1;36m7\u001b[0m » and perhaps some dry clothes\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.81\u001b[0m||The the old man's fades down the hall as\u001b[33m...\u001b[0m \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.85\u001b[0m||How curious the \u001b[1;36m4\u001b[0m fate. whims of had t not chanced to stroll along the river yl tonight ==\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.80\u001b[0m||Fas oulckly as t ca, master.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.91\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.50\u001b[0m||Aulckly “master as t can,\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m|| need of some help, it would appear .\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.88\u001b[0m||\" bambl-- we have a guest.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.72\u001b[0m||~~and tonight, he comes urgently, slanming open\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.86\u001b[0m||Tell me, master: how may bambli serve \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes-\u001b[1;36m-7\u001b[0m \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.06\u001b[0m||As.\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.91\u001b[0m||How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.55\u001b[0m||Ickl as t can\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.85\u001b[0m||Ghede has been generous. the death god has gen + the girl. a second chance te oe ato\" pd ate\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.95\u001b[0m||Easy, girl--there's | nothing to scream about anyaore.\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.97\u001b[0m||You're among friends now. you're safe!\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.54\u001b[0m||“continued a\n", - "\n", - " ---------- Padded 4px ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.88\u001b[0m||Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.93\u001b[0m||The house and the oldman are alike in many ways; tall, proud, patient, contented a ways \u001b[1;36m0\u001b[0m wait until their. aster comes home ~~ | \u001b[1m}\u001b[0m\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.69\u001b[0m||F and one in ee would appear.\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.77\u001b[0m||\" bambli-— we have a gliest.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.55\u001b[0m||P comes slamming open the caken\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.57\u001b[0m||Tel oe er-- \u001b[1;36m5\u001b[0m ow a = \u001b[1;36m7\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.38\u001b[0m||We and perhaps c oe \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.75\u001b[0m||The the old mans fades down the hall sra\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.92\u001b[0m||How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.79\u001b[0m||Aulckly as t can, ‘masrer.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.92\u001b[0m||---------- Initial box ----------\n", - "ResultOCR#block 00: 0.90||Suddenly.\n", - "ResultOCR#block 01: 0.81||>gasp!z everything's w- whirling around me!t can't stand sr rea\n", - "ResultOCR#block 02: 0.81||Clark!i'm falling' “help! help! —\n", - "ResultOCR#block 03: 0.67||I-i'm bs \"passing ohhh\n", - "ResultOCR#block 04: 0.92||Action comics\n", - "ResultOCR#block 05: 1.00||Then, seconds later...\n", - "ResultOCR#block 06: 0.89||Great caesars ghost! { this /s black magic! we've been transportel to the weirdest world, tit ever saw!\n", - "ResultOCR#block 07: 0.91||...|t certainly isn't our earth, perry. look at the size of those bees.\n", - "ResultOCR#block 08: 0.88||Watch out, clark!\n", - "ResultOCR#block 09: 0.73||Owwww.\n", - "ResultOCR#block 10: 0.85||Yet the bee's stinger went ws. right through my uniform and penetrated my skin! that means. the fabric of pe ay costume has become dj} satie fued 1,\n", - "ResultOCR#block 11: 0.87||Hurry. let's beat it before we get stung, foo = aii\n", - "ResultOCR#block 12: 0.86||Ggreat guns!...2g, .pa/n i feel n pain! as superman, i should be > invulnerable! 1 have unbreakabl ‘skin! under my clark kent clothes, im wearing an woestructisle superman uniform ! =\n", - "ResultOCR#block 13: 1.00||Abruptly...\n", - "ResultOCR#block 14: 0.77||Great caesar's ghost. he's spinning a web of g/ant, sr strands --\n", - "ResultOCR#block 15: 0.89||I-i feel the heat of the sun...the pain of the bee-sting ... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers! i've become an ordinary mortal in this world. /\n", - "ResultOCR#block 16: 0.84||Enormous spider- like creature is going berserk, as if the sight of us excited him (into mad spinning get back! that 2\n", - "\n", - " ---------- Default ----------\n", - "ResultOCR#block 00: 1.00||Suddenly...\n", - "ResultOCR#block 01: 0.80||Gasp! everything's w- whi rns atound me!i can't stand we a\n", - "ResultOCR#block 02: 0.87||Clark!i'm falling! help! help\n", - "ResultOCR#block 03: 0.45||I-i'm se ou\n", - "ResultOCR#block 04: 0.92||Action comics\n", - "ResultOCR#block 05: 0.95||Then, seconds later.\n", - "ResultOCR#block 06: 0.92||Great caesar's ghost! this /s black magic! we've been transported, to the weirdest world, i ever saw!\n", - "ResultOCR#block 07: 0.93||...|t certainly isn't our earth, perry look at the size of those bees!\n", - "ResultOCR#block 08: 0.88||Watch out, clark)\n", - "ResultOCR#block 09: 0.67||Owwww.,\n", - "ResultOCR#block 10: 0.97||Yet the bee's stinger went = right through my uniform ando penetrated my skin! that mean the fabric of my superman costume has become ordinary, cloth! £\n", - "ResultOCR#block 11: 0.89||Hurry. let's beat it before we get stung, tod yi\n", - "ResultOCR#block 12: 0.88||Ggreat guns!...2gasp/z...pain i feel n fan! as superman, i should be invulnerable! 1 have unbreakable skin! under my clark kent clothes, i'm wearing an /noestruct/ele superman uniform !\n", - "ResultOCR#block 13: 1.00||Abruptly...\n", - "ResultOCR#block 14: 0.76||Great caesars ° ghost! he's spinning & web of g/a, silk strands\n", - "ResultOCR#block 15: 0.89||I-i feel the heat of the sum...the pain of the bee-sting... the heavy weight of my { pack! every human discomfort... good grief! i've lost all my super -powersx ve become an ordinary mortal in this world! j\n", - "ResultOCR#block 16: 0.70||Like creature is going berserk, as if the sight of us excited him into mad spinning, get back! that \\ enormous spider-\n", - "\n", - " ---------- Default, grey pad ----------\n", - "ResultOCR#block 00: 0.78||Sudden.\n", - "ResultOCR#block 01: 0.82||5gasp!z everything's | w- whirling around] me!t can't stand ue a\n", - "ResultOCR#block 02: 0.57||Ark!i'm falling\n", - "ResultOCR#block 03: 0.85||I-t'm eq \"passing out... ohhh.\n", - "ResultOCR#block 04: 0.92||Action comics\n", - "ResultOCR#block 05: 0.00||\n", - "ResultOCR#block 06: 0.88||‘great caesar's ghost! || this /§ black magic! we've been transportel to the weirdest world, i ever saw.\n", - "ResultOCR#block 07: 0.87||\"...it certainly isn't our | earth, perry look at the| \\s|ze of those bees.\n", - "ResultOCR#block 08: 0.88||Watch out, clark!\n", - "ResultOCR#block 09: 0.73||Owwww.\n", - "ResultOCR#block 10: 0.90||Yet the bee's stinger went ~ right through my uniform and penetrated my skin! that means. the fabric of sera ry costume vis become din a s clots! a\n", - "ResultOCR#block 11: 0.80||Hurry. let's ) beat it before] we get stung, k 00! __j\n", - "ResultOCR#block 12: 0.86||Ggreat guns!...3gasp/=... rain t feel fan! as superman, i should be invulnerable® 1 have unbreakabli skin! under my clark kent clothes, ia wearing an /noestryct/iele supe iperman uniform !\n", - "ResultOCR#block 13: 0.90||Abruptly.\n", - "ResultOCR#block 14: 0.92||Great caesar's | ghost! he's spinning k web of giant, silk strands -- as tough as steel! ne]\n", - "ResultOCR#block 15: 0.89||\\i-t feel the heat of the sun...the pain of the bee-sting... the heavy weight of my & pack! every human discomfort... good grief! i've lost all my super-powers i've become an ordinary mortal in this world. /.\n", - "ResultOCR#block 16: 0.94||(get back! that enormous spider- like creature 1 § going berserk, as if the sight of us excited him into mad spinning]\n", - "\n", - " ---------- Padded 4px ----------\n", - "ResultOCR#block 00: 0.90||Suddenly.\n", - "ResultOCR#block 01: 0.83||Fgasp/= everything's whirling around me!i can't stand ea\n", - "ResultOCR#block 02: 0.68||Ie lottie eto clark!i'm falling help! help!\n", - "ResultOCR#block 03: 0.55||I-i'm yr a ohh hh.\n", - "ResultOCR#block 04: 0.92||Action comics\n", - "ResultOCR#block 05: 0.74||Then, seconds\n", - "ResultOCR#block 06: 0.62||Great caesar's ghost! \\ this /§ black magic! i ever saw!\n", - "ResultOCR#block 07: 0.87||T certainly isn't our earth, perry! look at the \\s|ze of those bees.\n", - "ResultOCR#block 08: 0.76||#3 watch out, clark!\n", - "ResultOCR#block 09: 0.73||Owwww.\n", - "ResultOCR#block 10: 0.95||\"yet the bee's stinger went ~~ right through my uniform and enetrated my skin! that means. the fabric of my superman costume has become ordinary, clot! &\n", - "ResultOCR#block 11: 0.86||Hurry! let's beat it b=fore we get stung, ¢ tool: puma\n", - "ResultOCR#block 12: 0.86||‘ggreat guns!...3gasp/5... pain i feel pain? as superman, i should be invilnerable | t ne unbreakasle gkin! under my clark kent clothes, tih wearing an indestructible superman uniform\n", - "ResultOCR#block 13: 1.00||Abruptly...\n", - "ResultOCR#block 14: 0.91||Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!\n", - "ResultOCR#block 15: 0.89||I-i feel the heat of the sun...the pain of the bee-sting... the heavy weight of my { pack! every human discomfort... good grief! i've lost all \\my super-powersx ve become an ordinary mortal in this world.\n", - "ResultOCR#block 16: 0.81||(get back! that enormous spider- as if the sight of us excited him [into mad spinning\n", - "\n", - " ---------- Padded 8px ----------\n", - "ResultOCR#block 00: 0.00||\n", - "ResultOCR#block 01: 0.76||Ega5p/% everything s\\ w- whirling around me! t can't stand ue... slee\n", - "ResultOCR#block 02: 0.41||\\ help! help’\n", - "ResultOCR#block 03: 0.86||-i'm passing qut... ohh hh.\n", - "ResultOCR#block 04: 0.92||Action comics\n", - "ResultOCR#block 05: 0.00||\n", - "ResultOCR#block 06: 0.18||I ever saw/\n", - "ResultOCR#block 07: 0.86||It certainly isn't our earth, perry! poor a the size of thos!\n", - "ResultOCR#block 08: 0.62||“watch out” ial ae clark!\n", - "ResultOCR#block 09: 0.73||Owwww.\n", - "ResultOCR#block 10: 0.98||Yet the bee's stinger went right through my uniform and \\penetrated my skin! that means. the fabric of my superman costume has become ordinar! cloth!\n", - "ResultOCR#block 11: 0.93||Hurry. let's beat it before we get stung, too!\n", - "ResultOCR#block 12: 0.88||Ggreat guns!...2gasp/z...pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ele $ superman uniform ! &\n", - "ResultOCR#block 13: 0.00||\n", - "ResultOCR#block 14: 0.91||Great caesai ghost. he's spinning a web of g/ant, silk strands -- as tough as steel!\n", - "ResultOCR#block 15: 0.88||I-i feel the heat of the sun...the pain of the bee-sting... the heavy weight of my ! every human discomfort... good grief! i've lost all \\my super-powers! ‘ve become an ordinary mortal in this world. x\n", - "ResultOCR#block 16: 0.60||As if the sight of us excited him {into mad spinning\n", - "\n", - " ---------- Extracted, init box ----------\n", - "ResultOCRExtracted#block 00: 0.90||Suddenly.\n", - "ResultOCRExtracted#block 01: 0.83||Fgasp!z everything § w- whirling around me!i can't stand ue.\n", - "ResultOCRExtracted#block 02: 0.89||Clark!i'm falling! help! help!\n", - "ResultOCRExtracted#block 03: 0.71||I-i'm passing ohhh?\n", - "ResultOCRExtracted#block 04: 0.92||Action comics\n", - "ResultOCRExtracted#block 05: 0.98||Then, seconds later..\n", - "ResultOCRExtracted#block 06: 0.91||Great caesar's ghost! this /s black magic! we've been transportel to the weirdest world tit ever saw.\n", - "ResultOCRExtracted#block 07: 0.90||...it certainly isnt our earth, perry. look at the size of those bees.\n", - "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", - "ResultOCRExtracted#block 09: 0.62||Oowwwww.\n", - "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block 11: 0.93||Hurry. let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block 12: 0.88||Ggreat guns!...2g/ pain t feel pain? as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform *\n", - "ResultOCRExtracted#block 13: 0.96||Abruptly. ..\n", - "ResultOCRExtracted#block 14: 0.80||Great caesar's ghost. he's spinning a web of g/ant, silk strands --\n", - "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal im this world.\n", - "ResultOCRExtracted#block 16: 0.97||Get back. that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning\n", - "\n", - " ---------- Padded 4, extracted ----------\n", - "ResultOCRExtracted#block 00: 1.00||Suddenly...\n", - "ResultOCRExtracted#block 01: 0.83||Sgasp/z everything s w- whirling around me!i can't stand ue.\n", - "ResultOCRExtracted#block 02: 0.87||Clark! i'm falling’ help! help!\n", - "ResultOCRExtracted#block 03: 0.78||I-i'm passing ohhhh.\n", - "ResultOCRExtracted#block 04: 0.92||Action comics\n", - "ResultOCRExtracted#block 05: 0.98||Then, seconds later..\n", - "ResultOCRExtracted#block 06: 0.93||Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw!\n", - "ResultOCRExtracted#block 07: 0.90||...it certainly isnt our earth, perry. look at the size of those bees.\n", - "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", - "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", - "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block 12: 0.89||Ggreat guns!...2gasp/:... pain t feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform !\n", - "ResultOCRExtracted#block 13: 1.00||Abruptly...\n", - "ResultOCRExtracted#block 14: 0.94||Great caesar's ghost! he's spinning a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block 16: 0.97||Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning\n", - "\n", - " ---------- Padded 8, extracted ----------\n", - "ResultOCRExtracted#block 00: 0.91||Suddemly...\n", - "ResultOCRExtracted#block 01: 0.85||Sgasp/z everything s w- whirling around me!i can't stand up.\n", - "ResultOCRExtracted#block 02: 0.84||Clark! i'm falling’ _ help! help!\n", - "ResultOCRExtracted#block 03: 0.73||I-i'm passing ohhha.\n", - "ResultOCRExtracted#block 04: 0.92||Action comics\n", - "ResultOCRExtracted#block 05: 1.00||Then, seconds later...\n", - "ResultOCRExtracted#block 06: 0.90||Great caesar's ghost! f this /§ black magic! : we've been transported to the weirdest world i ever saw.\n", - "ResultOCRExtracted#block 07: 0.90||...it certainly isnt our earth, perry. look at the size of those bees.\n", - "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", - "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", - "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block 12: 0.89||Ggreat guns!...2gasp/:... pain t feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform !\n", - "ResultOCRExtracted#block 13: 0.96||Abruptly. ..\n", - "ResultOCRExtracted#block 14: 0.93||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block 15: 0.89||I-t feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block 16: 0.95||Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning g [\n", - "\n", - " ---------- Padded 8, dilation 1 ----------\n", - "ResultOCRExtracted#block 00: 1.00||Suddenly...\n", - "ResultOCRExtracted#block 01: 0.83||Gasp! everything s w-whirling around wel] i can't stand\n", - "ResultOCRExtracted#block 02: 0.86||Clark!i'm falling! . help! help!\n", - "ResultOCRExtracted#block 03: 0.73||I-i'm passing ohhha.\n", - "ResultOCRExtracted#block 04: 0.92||Action comics\n", - "ResultOCRExtracted#block 05: 0.95||Then, seconds later.\n", - "ResultOCRExtracted#block 06: 0.91||Great caesar's ghost! e this /5 black magic! we've been transported to the weirdest world i ever saw/\n", - "ResultOCRExtracted#block 07: 0.91||...|it certainly isnt our earth, perry” look at the size of those bees!\n", - "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", - "ResultOCRExtracted#block 09: 0.73||Owwww.\n", - "ResultOCRExtracted#block 10: 0.94||Yet the bee's stinger we right through my tform and penetrated my skin! that means. the fabric of my superman othe has become ordinary clot?\n", - "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block 12: 0.82||Fain! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", - "ResultOCRExtracted#block 13: 0.00||\n", - "ResultOCRExtracted#block 14: 0.67||Great caesars ghost! he's spinning _ a web of g/ant,\n", - "ResultOCRExtracted#block 15: 0.86||I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve ie an ordinary mortal in this world.\n", - "ResultOCRExtracted#block 16: 0.96||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning’ g [\n", - "\n", - " ---------- Pad 8, fract. 0.5 ----------\n", - "ResultOCRExtracted#block 00: 0.91||Suddemly...\n", - "ResultOCRExtracted#block 01: 0.87||Gasp/z everything s w- whirling around me!i can't stand up.\n", - "ResultOCRExtracted#block 02: 0.84||Clark! i'm falling’ _ help! help!\n", - "ResultOCRExtracted#block 03: 0.78||I-i'm passing ohhhh.\n", - "ResultOCRExtracted#block 04: 0.92||Action comics\n", - "ResultOCRExtracted#block 05: 1.00||Then, seconds later...\n", - "ResultOCRExtracted#block 06: 0.90||Great caesar's ghost! f this /§ black magic! : we've been transported to the weirdest world i ever saw.\n", - "ResultOCRExtracted#block 07: 0.92||...it certainly isnt our earth, perry! look at the size of those bees.\n", - "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", - "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", - "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block 12: 0.90||Ggreat guns!...2gasp/:...pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /nodestruct/ible superman uniform !\n", - "ResultOCRExtracted#block 13: 0.96||Abruptly. ..\n", - "ResultOCRExtracted#block 14: 0.93||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block 16: 0.95||Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning g [\n", - "\n", - " ---------- Pad 8, fract. 0.2 ----------\n", - "ResultOCRExtracted#block 00: 1.00||Suddenly...\n", - "ResultOCRExtracted#block 01: 0.87||Gasp/z everything s w- whirling around me!i can't stand up.\n", - "ResultOCRExtracted#block 02: 0.84||Clark! i'm falling’ _ help! help!\n", - "ResultOCRExtracted#block 03: 0.73||I-i'm passing ohhha.\n", - "ResultOCRExtracted#block 04: 0.92||Action comics\n", - "ResultOCRExtracted#block 05: 1.00||Then, seconds later...\n", - "ResultOCRExtracted#block 06: 0.90||Great caesar's ghost! f this /§ black magic! : we've been transported to the weirdest world i ever saw!\n", - "ResultOCRExtracted#block 07: 0.92||...it certainly isnt our earth, perry! look at the size of those bees.\n", - "ResultOCRExtracted#block 08: 0.88||Watch out, clark!\n", - "ResultOCRExtracted#block 09: 0.67||Owwww.,\n", - "ResultOCRExtracted#block 10: 0.99||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block 11: 0.96||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block 12: 0.91||Ggreat guns!...2gasp/:... pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", - "ResultOCRExtracted#block 13: 0.96||Abruptly....\n", - "ResultOCRExtracted#block 14: 0.93||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block 15: 0.89||I-i feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block 16: 0.96||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning gs [\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "---------- Initial box ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Suddenly.\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.81\u001b[0m||>gasp!z everything's w- whirling around me!t can't stand sr rea\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.81\u001b[0m||Clark!i'm falling' “help! help! —\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.67\u001b[0m||I-i'm bs \"passing ohhh\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.89\u001b[0m||Great caesars ghost! \u001b[1m{\u001b[0m this \u001b[35m/\u001b[0m\u001b[95ms\u001b[0m black magic! we've been transportel to the weirdest world, tit ever saw!\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.91\u001b[0m||\u001b[33m...\u001b[0m|t certainly isn't our earth, perry. look at the size of those bees.\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.85\u001b[0m||Yet the bee's stinger went ws. right through my uniform and penetrated my skin! that means. the fabric of pe ay costume has become dj\u001b[1m}\u001b[0m satie fued \u001b[1;36m1\u001b[0m,\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.87\u001b[0m||Hurry. let's beat it before we get stung, foo = aii\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.86\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2g, .pa/n i feel n pain! as superman, i should be > invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl ‘skin! under my clark kent clothes, im wearing an woestructisle superman uniform ! =\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.77\u001b[0m||Great caesar's ghost. he's spinning a web of g/ant, sr strands --\n", - "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting \u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers! i've become an ordinary mortal in this world. \u001b[35m/\u001b[0m\n", - "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.84\u001b[0m||Enormous spider- like creature is going berserk, as if the sight of us excited him \u001b[1m(\u001b[0minto mad spinning get back! that \u001b[1;36m2\u001b[0m\n", - "\n", - " ---------- Default ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.80\u001b[0m||Gasp! everything's w- whi rns atound me!i can't stand we a\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.87\u001b[0m||Clark!i'm falling! help! help\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.45\u001b[0m||I-i'm se ou\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.95\u001b[0m||Then, seconds later.\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.92\u001b[0m||Great caesar's ghost! this \u001b[35m/\u001b[0m\u001b[95ms\u001b[0m black magic! we've been transported, to the weirdest world, i ever saw!\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.93\u001b[0m||\u001b[33m...\u001b[0m|t certainly isn't our earth, perry look at the size of those bees!\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark\u001b[1m)\u001b[0m\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.97\u001b[0m||Yet the bee's stinger went = right through my uniform ando penetrated my skin! that mean the fabric of my superman costume has become ordinary, cloth! £\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.89\u001b[0m||Hurry. let's beat it before we get stung, tod yi\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.88\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/z\u001b[33m...\u001b[0mpain i feel n fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakable skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mele\u001b[0m superman uniform !\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.76\u001b[0m||Great caesars ° ghost! he's spinning & web of g/a, silk strands\n", - "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sum\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my \u001b[1m{\u001b[0m pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super -powersx ve become an ordinary mortal in this world! j\n", - "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.70\u001b[0m||Like creature is going berserk, as if the sight of us excited him into mad spinning, get back! that \\ enormous spider-\n", - "\n", - " ---------- Default, grey pad ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.78\u001b[0m||Sudden.\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.82\u001b[0m||5gasp!z everything's | w- whirling around\u001b[1m]\u001b[0m me!t can't stand ue a\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.57\u001b[0m||Ark!i'm falling\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.85\u001b[0m||I-t'm eq \"passing out\u001b[33m...\u001b[0m ohhh.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.88\u001b[0m||‘great caesar's ghost! || this \u001b[35m/\u001b[0m§ black magic! we've been transportel to the weirdest world, i ever saw.\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.87\u001b[0m||\"\u001b[33m...\u001b[0mit certainly isn't our | earth, perry look at the| \\s|ze of those bees.\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.90\u001b[0m||Yet the bee's stinger went ~ right through my uniform and penetrated my skin! that means. the fabric of sera ry costume vis become din a s clots! a\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.80\u001b[0m||Hurry. let's \u001b[1m)\u001b[0m beat it before\u001b[1m]\u001b[0m we get stung, k \u001b[1;36m00\u001b[0m! __j\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.86\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m3gasp/=\u001b[33m...\u001b[0m rain t feel fan! as superman, i should be invulnerable® \u001b[1;36m1\u001b[0m have unbreakabli skin! under my clark kent clothes, ia wearing an \u001b[35m/noestryct/\u001b[0m\u001b[95miele\u001b[0m supe iperman uniform !\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.90\u001b[0m||Abruptly.\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.92\u001b[0m||Great caesar's | ghost! he's spinning k web of giant, silk strands -- as tough as steel! ne\u001b[1m]\u001b[0m\n", - "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||\\i-t feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my & pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers i've become an ordinary mortal in this world. \u001b[35m/\u001b[0m\u001b[95m.\u001b[0m\n", - "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.94\u001b[0m||\u001b[1m(\u001b[0mget back! that enormous spider- like creature \u001b[1;36m1\u001b[0m § going berserk, as if the sight of us excited him into mad spinning\u001b[1m]\u001b[0m\n", - "\n", - " ---------- Padded 4px ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Suddenly.\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Fgasp/= everything's whirling around me!i can't stand ea\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.68\u001b[0m||Ie lottie eto clark!i'm falling help! help!\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.55\u001b[0m||I-i'm yr a ohh hh.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.74\u001b[0m||Then, seconds\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.62\u001b[0m||Great caesar's ghost! \\ this \u001b[35m/\u001b[0m§ black magic! i ever saw!\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.87\u001b[0m||T certainly isn't our earth, perry! look at the \\s|ze of those bees.\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.76\u001b[0m||#\u001b[1;36m3\u001b[0m watch out, clark!\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.95\u001b[0m||\"yet the bee's stinger went ~~ right through my uniform and enetrated my skin! that means. the fabric of my superman costume has become ordinary, clot! &\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.86\u001b[0m||Hurry! let's beat it \u001b[33mb\u001b[0m=\u001b[35mfore\u001b[0m we get stung, ¢ tool: puma\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.86\u001b[0m||‘ggreat guns!\u001b[33m...\u001b[0m3gasp/\u001b[1;36m5\u001b[0m\u001b[33m...\u001b[0m pain i feel pain? as superman, i should be invilnerable | t ne unbreakasle gkin! under my clark kent clothes, tih wearing an indestructible superman uniform\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!\n", - "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my \u001b[1m{\u001b[0m pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all \\my super-powersx ve become an ordinary mortal in this world.\n", - "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.81\u001b[0m||\u001b[1m(\u001b[0mget back! that enormous spider- as if the sight of us excited him \u001b[1m[\u001b[0minto mad spinning\n", - "\n", - " ---------- Padded 8px ----------\n", - "ResultOCR#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.76\u001b[0m||Ega5p/% everything s\\ w- whirling around me! t can't stand ue\u001b[33m...\u001b[0m slee\n", - "ResultOCR#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.41\u001b[0m||\\ help! help’\n", - "ResultOCR#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.86\u001b[0m||-i'm passing qut\u001b[33m...\u001b[0m ohh hh.\n", - "ResultOCR#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCR#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.18\u001b[0m||I ever saw/\n", - "ResultOCR#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.86\u001b[0m||It certainly isn't our earth, perry! poor a the size of thos!\n", - "ResultOCR#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.62\u001b[0m||“watch out” ial ae clark!\n", - "ResultOCR#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", - "ResultOCR#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.98\u001b[0m||Yet the bee's stinger went right through my uniform and \\penetrated my skin! that means. the fabric of my superman costume has become ordinar! cloth!\n", - "ResultOCR#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.93\u001b[0m||Hurry. let's beat it before we get stung, too!\n", - "ResultOCR#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.88\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/z\u001b[33m...\u001b[0mpain i feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mele\u001b[0m $ superman uniform ! &\n", - "ResultOCR#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCR#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesai ghost. he's spinning a web of g/ant, silk strands -- as tough as steel!\n", - "ResultOCR#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.88\u001b[0m||I-i feel the heat of the sun\u001b[33m...\u001b[0mthe pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my ! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all \\my super-powers! ‘ve become an ordinary mortal in this world. x\n", - "ResultOCR#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.60\u001b[0m||As if the sight of us excited him \u001b[1m{\u001b[0minto mad spinning\n", - "\n", - " ---------- Extracted, init box ----------\n", - "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.90\u001b[0m||Suddenly.\n", - "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Fgasp!z everything § w- whirling around me!i can't stand ue.\n", - "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.89\u001b[0m||Clark!i'm falling! help! help!\n", - "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.71\u001b[0m||I-i'm passing ohhh?\n", - "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.98\u001b[0m||Then, seconds later..\n", - "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesar's ghost! this \u001b[35m/\u001b[0m\u001b[95ms\u001b[0m black magic! we've been transportel to the weirdest world tit ever saw.\n", - "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.90\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry. look at the size of those bees.\n", - "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.62\u001b[0m||Oowwwww.\n", - "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.93\u001b[0m||Hurry. let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.88\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2g/ pain t feel pain? as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mble\u001b[0m superman uniform *\n", - "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly. ..\n", - "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.80\u001b[0m||Great caesar's ghost. he's spinning a web of g/ant, silk strands --\n", - "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun. the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! t've lost all my super-powers. ve become an ordinary mortal im this world.\n", - "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.97\u001b[0m||Get back. that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning\n", - "\n", - " ---------- Padded \u001b[1;36m4\u001b[0m, extracted ----------\n", - "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Sgasp/z everything s w- whirling around me!i can't stand ue.\n", - "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.87\u001b[0m||Clark! i'm falling’ help! help!\n", - "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.78\u001b[0m||I-i'm passing ohhhh.\n", - "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.98\u001b[0m||Then, seconds later..\n", - "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw!\n", - "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.90\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry. look at the size of those bees.\n", - "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", - "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.89\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0m pain t feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mble\u001b[0m superman uniform !\n", - "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m1.00\u001b[0m||Abruptly\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.94\u001b[0m||Great caesar's ghost! he's spinning a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun., the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.97\u001b[0m||Get back! that enormous spider- like creature is going berserk, \u001b[1;36m25\u001b[0m if the sight of us excited him into mad spinning\n", - "\n", - " ---------- Padded \u001b[1;36m8\u001b[0m, extracted ----------\n", - "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.91\u001b[0m||Suddemly\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.85\u001b[0m||Sgasp/z everything s w- whirling around me!i can't stand up.\n", - "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.84\u001b[0m||Clark! i'm falling’ _ help! help!\n", - "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.73\u001b[0m||I-i'm passing ohhha.\n", - "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Great caesar's ghost! f this \u001b[35m/\u001b[0m§ black magic! : we've been transported to the weirdest world i ever saw.\n", - "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.90\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry. look at the size of those bees.\n", - "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", - "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.89\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0m pain t feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/noestruct/\u001b[0m\u001b[95mble\u001b[0m superman uniform !\n", - "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly. ..\n", - "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-t feel the heat of the sun., the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.95\u001b[0m||Get back! that enormous spider- like creature is going berserk, \u001b[1;36m25\u001b[0m if the sight of us excited him into mad spinning g \u001b[1m[\u001b[0m\n", - "\n", - " ---------- Padded \u001b[1;36m8\u001b[0m, dilation \u001b[1;36m1\u001b[0m ----------\n", - "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.83\u001b[0m||Gasp! everything s w-whirling around wel\u001b[1m]\u001b[0m i can't stand\n", - "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.86\u001b[0m||Clark!i'm falling! . help! help!\n", - "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.73\u001b[0m||I-i'm passing ohhha.\n", - "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m0.95\u001b[0m||Then, seconds later.\n", - "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.91\u001b[0m||Great caesar's ghost! e this \u001b[35m/\u001b[0m\u001b[95m5\u001b[0m black magic! we've been transported to the weirdest world i ever saw/\n", - "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.91\u001b[0m||\u001b[33m...\u001b[0m|it certainly isnt our earth, perry” look at the size of those bees!\n", - "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.73\u001b[0m||Owwww.\n", - "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.94\u001b[0m||Yet the bee's stinger we right through my tform and penetrated my skin! that means. the fabric of my superman othe has become ordinary clot?\n", - "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.82\u001b[0m||Fain! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", - "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.00\u001b[0m||\n", - "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.67\u001b[0m||Great caesars ghost! he's spinning _ a web of g/ant,\n", - "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.86\u001b[0m||I-i feel the heat of the sun. the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers. ve ie an ordinary mortal in this world.\n", - "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.96\u001b[0m||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning’ g \u001b[1m[\u001b[0m\n", - "\n", - " ---------- Pad \u001b[1;36m8\u001b[0m, fract. \u001b[1;36m0.5\u001b[0m ----------\n", - "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m0.91\u001b[0m||Suddemly\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.87\u001b[0m||Gasp/z everything s w- whirling around me!i can't stand up.\n", - "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.84\u001b[0m||Clark! i'm falling’ _ help! help!\n", - "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.78\u001b[0m||I-i'm passing ohhhh.\n", - "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Great caesar's ghost! f this \u001b[35m/\u001b[0m§ black magic! : we've been transported to the weirdest world i ever saw.\n", - "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.92\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry! look at the size of those bees.\n", - "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", - "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.90\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0mpain i feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an \u001b[35m/nodestruct/\u001b[0m\u001b[95mible\u001b[0m superman uniform !\n", - "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly. ..\n", - "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun. the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.95\u001b[0m||Get back! that enormous spider- like creature is going berserk, \u001b[1;36m25\u001b[0m if the sight of us excited him into mad spinning g \u001b[1m[\u001b[0m\n", - "\n", - " ---------- Pad \u001b[1;36m8\u001b[0m, fract. \u001b[1;36m0.2\u001b[0m ----------\n", - "ResultOCRExtracted#block \u001b[1;36m00\u001b[0m: \u001b[1;36m1.00\u001b[0m||Suddenly\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m01\u001b[0m: \u001b[1;36m0.87\u001b[0m||Gasp/z everything s w- whirling around me!i can't stand up.\n", - "ResultOCRExtracted#block \u001b[1;36m02\u001b[0m: \u001b[1;36m0.84\u001b[0m||Clark! i'm falling’ _ help! help!\n", - "ResultOCRExtracted#block \u001b[1;36m03\u001b[0m: \u001b[1;36m0.73\u001b[0m||I-i'm passing ohhha.\n", - "ResultOCRExtracted#block \u001b[1;36m04\u001b[0m: \u001b[1;36m0.92\u001b[0m||Action comics\n", - "ResultOCRExtracted#block \u001b[1;36m05\u001b[0m: \u001b[1;36m1.00\u001b[0m||Then, seconds later\u001b[33m...\u001b[0m\n", - "ResultOCRExtracted#block \u001b[1;36m06\u001b[0m: \u001b[1;36m0.90\u001b[0m||Great caesar's ghost! f this \u001b[35m/\u001b[0m§ black magic! : we've been transported to the weirdest world i ever saw!\n", - "ResultOCRExtracted#block \u001b[1;36m07\u001b[0m: \u001b[1;36m0.92\u001b[0m||\u001b[33m...\u001b[0mit certainly isnt our earth, perry! look at the size of those bees.\n", - "ResultOCRExtracted#block \u001b[1;36m08\u001b[0m: \u001b[1;36m0.88\u001b[0m||Watch out, clark!\n", - "ResultOCRExtracted#block \u001b[1;36m09\u001b[0m: \u001b[1;36m0.67\u001b[0m||Owwww.,\n", - "ResultOCRExtracted#block \u001b[1;36m10\u001b[0m: \u001b[1;36m0.99\u001b[0m||Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!\n", - "ResultOCRExtracted#block \u001b[1;36m11\u001b[0m: \u001b[1;36m0.96\u001b[0m||Hurry! let's beat it before we get stung, too!\n", - "ResultOCRExtracted#block \u001b[1;36m12\u001b[0m: \u001b[1;36m0.91\u001b[0m||Ggreat guns!\u001b[33m...\u001b[0m2gasp/:\u001b[33m...\u001b[0m pain i feel fan! as superman, i should be invulnerable! \u001b[1;36m1\u001b[0m have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !\n", - "ResultOCRExtracted#block \u001b[1;36m13\u001b[0m: \u001b[1;36m0.96\u001b[0m||Abruptly\u001b[33m...\u001b[0m.\n", - "ResultOCRExtracted#block \u001b[1;36m14\u001b[0m: \u001b[1;36m0.93\u001b[0m||Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!\n", - "ResultOCRExtracted#block \u001b[1;36m15\u001b[0m: \u001b[1;36m0.89\u001b[0m||I-i feel the heat of the sun., the pain of the bee-sting\u001b[33m...\u001b[0m the heavy weight of my pack! every human discomfort\u001b[33m...\u001b[0m good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.\n", - "ResultOCRExtracted#block \u001b[1;36m16\u001b[0m: \u001b[1;36m0.96\u001b[0m||Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning gs \u001b[1m[\u001b[0m\n", - "\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_action_experiment: ExperimentOCR = cast(ExperimentOCR, ExperimentOCR.from_image(\n", - " CONTEXT, 'Tesseract', 'Action_Comics_1960-01-00_(262).JPG'))\n", - "image_action_experiment.display()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_action_experiment.plot_accuracies()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!
0.91
\n", - "
\n", - "
Great Caesar's Ghost! He's spinning a web of giant,⎕⎕ silk strands-- as tough as steel!

Great caesars ghost. he's spinning a web of g/ant,⎕| silk strands⎕⎕ as tough as steel!
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_action_experiment.result(14, CropMethod.PADDED_4)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_action_experiment.perform_methods(plot_acc=True)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Everything Everywhere All at Once\n" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [], - "source": [ - "CONTEXT.reset()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ExperimentsVisor" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class ExperimentsVisor(ContextVisor):\n", - " ctx: OCRExperimentContext\n", - "\n", - " def update_output(self, \n", - " model: OCRModel | None = None,\n", - " image_idx: ImgIdT | None = None,\n", - " display_option: DisplayOptions | None = None, \n", - " **kwargs):\n", - " model_selector, image_selector, content_selector, result_visor = self._comps()\n", - " if model is not None:\n", - " exp_ctx = result_visor.ctx\n", - " exp_ctx.ocr_model = list(model_selector.models.keys())[model.value]\n", - " result_visor.ctx = exp_ctx\n", - " if image_idx is not None:\n", - " img_ctx = ImageContext(self.ctx, image_idx)\n", - " result_visor.ctx.ctx = img_ctx\n", - " display_option = content_selector.values['display_option']\n", - " if display_option is not None and display_option != DisplayOptions.RESULTS:\n", - " result_visor.hide()\n", - " if display_option == DisplayOptions.BEST_RESULTS:\n", - " result_visor.best_results()\n", - " elif display_option == DisplayOptions.DATAFRAME:\n", - " result_visor.pd_to_html()\n", - " else:\n", - " content_selector.display_content(image_selector.image_ctx, display_option)\n", - " else:\n", - " result_visor.show()\n", - " result_visor.update_output(**kwargs)\n", - "\n", - " def _comps(self):\n", - " cc = self.comps\n", - " msel: ModelSelector = cc['model_selector'] # type: ignore\n", - " isel: ImageSelector = cc['image_selector'] # type: ignore\n", - " cs: ContentSelector = cc['content_selector'] # type: ignore\n", - " rv: ResultVisor = cc['result_visor'] # type: ignore\n", - " return msel, isel, cs, rv\n", - "\n", - " def setup_ui(self):\n", - " ctls = self.controls.values()\n", - " msw, isw, csw, rvw = [_.w for _ in self._comps()]\n", - " return W.VBox([W.HBox([msw, isw, csw, *ctls]), rvw,])\n", - "\n", - " def __init__(self, \n", - " ctx: OCRExperimentContext,\n", - " image_idx: ImgIdT | str | Path = 0,\n", - " ocr_model: OCRModel = OCRModel.TESSERACT,\n", - " display_option: DisplayOptions = DisplayOptions.RESULTS,\n", - " all_boxes: bool = False,\n", - " box_idx: int = 0,\n", - " all_methods: bool = False,\n", - " method: CropMethod=CropMethod.INITIAL_BOX,\n", - " ocr_models: dict[str, OCRModel] = {'Tesseract': OCRModel.TESSERACT},\n", - " out: W.Output | None = None,\n", - " ):\n", - " if not isinstance(ctx, OCRExperimentContext):\n", - " raise ValueError(\"ctx must be an OCRExperimentContext\")\n", - " exp = ExperimentOCR.from_image(ctx, 'Tesseract', image_idx)\n", - " if not exp:\n", - " raise ValueError(f\"Image {image_idx} not found in experiment context\")\n", - " \n", - " out = out or self.out\n", - " model_selector = ModelSelector(ctx, ocr_model=ocr_model, \n", - " ocr_models=ocr_models, out=out)\n", - " image_selector = ImageSelector(ctx, image_idx=image_idx, out=out)\n", - " content_selector = ContentSelector(ctx, display_option=display_option, out=out)\n", - " result_visor = ResultVisor(exp, out=out,\n", - " all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method)\n", - "\n", - " super().__init__(ctx, {}, out=out, \n", - " ctxs={'model_selector': model_selector, 'image_selector': image_selector, 'content_selector': content_selector, \n", - " 'result_visor': result_visor}\n", - " )\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualize all" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c722001cdbcf444da9b56a366c6723e1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(HBox(children=(HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), optio…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8a95135fcb03495f94fb726416f3df54", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# tesseract_experiment = ExperimentsVisor(CONTEXT)\n", - "tesseract_experiment = ExperimentsVisor(CONTEXT, BASE_IMAGE_IDX)\n", - "\n", - "test_eq(tesseract_experiment.all_values, {\n", - " 'image_selector': {'image_idx': 20},\n", - " 'content_selector': {'display_option': DisplayOptions.RESULTS},\n", - " 'result_visor': {\n", - " 'all_boxes': False,\n", - " 'box_idx': 0,\n", - " 'all_methods': False,\n", - " 'method': CropMethod.INITIAL_BOX,\n", - " },\n", - " 'model_selector': {'model': OCRModel.TESSERACT},\n", - " 'self': {}\n", - "})\n", - "\n", - "tesseract_experiment\n" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [], - "source": [ - "tesseract_experiment.update(display_option=DisplayOptions.BOXES)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Colophon\n", - "----\n" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [], - "source": [ - "import fastcore.all as FC\n", - "from nbdev.export import nb_export\n" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "metadata": {}, - "outputs": [], - "source": [ - "if FC.IN_NOTEBOOK:\n", - " nb_export('experiments.ipynb', '.')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/_testbed/experiments.py b/_testbed/experiments.py deleted file mode 100644 index 27d56ba6..00000000 --- a/_testbed/experiments.py +++ /dev/null @@ -1,1995 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: experiments.ipynb. - -# %% experiments.ipynb 7 -from __future__ import annotations - -import dataclasses -import difflib -import functools -import json -import shutil -from collections import defaultdict -from enum import Enum -from pathlib import Path -from typing import Any -from typing import Callable -from typing import cast -from typing import Mapping -from typing import Self -from typing import TypeAlias - -import fastcore.all as FC -import ipywidgets as W -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import pcleaner.config as cfg -import pcleaner.ctd_interface as ctm -import pcleaner.image_ops as ops -import pcleaner.ocr.ocr as ocr -import pcleaner.structures as st -import torch -from IPython.display import clear_output -from IPython.display import display -from IPython.display import HTML -from ipywidgets.widgets.interaction import show_inline_matplotlib_plots -from loguru import logger -from pcleaner.ocr.ocr_tesseract import TesseractOcr -from PIL import Image -from PIL import ImageFilter -from rich.console import Console -from tqdm.notebook import tqdm - - -# %% auto 0 -__all__ = ['CM', 'SubjIdT', 'ImgIdT', 'BoxIdT', 'ImgSpecT', 'remove_multiple_whitespaces', 'postprocess_ocr', - 'accuracy_ocr_naive', 'accuracy_ocr_difflib', 'ground_truth_path', 'read_ground_truth', - 'dilate_by_fractional_pixel', 'extract_text', 'lang2pcleaner', 'lang2tesseract', 'ResultOCR', - 'ResultOCRExtracted', 'CropMethod', 'crop_by_image', 'crop_by_extracted', 'ResultSet', 'ResultSetDefault', - 'results_to_dict', 'dict_to_results', 'ExperimentSubject', 'ExperimentContext', 'ImageContext', - 'OCRExperimentContext', 'ContextVisor', 'ImageSelector', 'OCRContextVisor', 'OCRModel', 'ModelSelector', - 'DisplayOptions', 'ContentSelector', 'ImageContextVisor', 'Experiment', 'ExperimentOCR', - 'ExperimentOCRMethod', 'ResultVisor', 'ExperimentVisor', 'ExperimentsVisor'] - -# %% experiments.ipynb 8 -from helpers import * -from ocr_metric import * - - -# %% experiments.ipynb 12 -console = Console(width=104, tab_size=4, force_jupyter=True) -cprint = console.print - - -# %% experiments.ipynb 20 -def remove_multiple_whitespaces(text): - return ' '.join(text.split()) - - -def postprocess_ocr(text): - "Basic postprocessing for English Tesseract OCR results." - return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize() - -def accuracy_ocr_naive(text, ground_truth): - return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text) - - -def accuracy_ocr_difflib(text, ground_truth): - """ - Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text, - using difflib's SequenceMatcher to account for differences in a manner similar to git diffs. - - :param text: The OCR-generated text. - :param ground_truth: The ground truth text. - :return: A float representing the similarity ratio between the OCR text and the ground truth, - where 1.0 is identical. - """ - # Initialize the SequenceMatcher with the OCR text and the ground truth - matcher = difflib.SequenceMatcher(None, text, ground_truth) - - # Get the similarity ratio - similarity_ratio = matcher.ratio() - - return similarity_ratio - -# %% experiments.ipynb 22 -def ground_truth_path(page_data: st.PageData): - path = Path(page_data.original_path) - return path.with_stem(path.stem + '_gt').with_suffix('.txt') - - -def read_ground_truth(page_data: st.PageData): - gts_path = ground_truth_path(page_data) - if gts_path.exists(): - gts = gts_path.read_text(encoding="utf-8").splitlines() - else: - gts = ["" for _ in range(len(page_data.boxes))] - return gts - - -# %% experiments.ipynb 24 -def dilate_by_fractional_pixel(image, dilation_fraction, filter_base_size=3): - """ - Dilates an image by a specified fractional pixel amount. The function calculates - the necessary scaling factor and filter size based on the desired dilation fraction. - - :param image: A PIL Image object (1-bit mode). - :param dilation_fraction: The desired fractional pixel amount for dilation (e.g., 0.2). - :param filter_base_size: The base size of the dilation filter to apply on the scaled image. - This size is adjusted based on the scaling factor to achieve the - desired dilation effect. - :return: A PIL Image object after dilation, converted back to grayscale. - """ - # Calculate the scale factor based on the desired dilation fraction - scale_factor = int(1 / dilation_fraction) - - # Adjust the filter size based on the scale factor - # This ensures the dilation effect is proportional to the desired fraction - filter_size = max(1, filter_base_size * scale_factor // 5) - - # Convert the image to grayscale for more nuanced intermediate values - image_gray = image.convert("L") - - # Resize the image to a larger size using bicubic interpolation - larger_size = (int(image.width * scale_factor), int(image.height * scale_factor)) - image_resized = image_gray.resize(larger_size, Image.BICUBIC) - - # Apply the dilation filter to the resized image - dilated_image = image_resized.filter(ImageFilter.MaxFilter(filter_size)) - - # Resize the image back to its original size using bicubic interpolation - image_dilated_fractional_pixel = dilated_image.resize(image.size, Image.BICUBIC) - - return image_dilated_fractional_pixel - - -# %% experiments.ipynb 25 -def extract_text(image, text_mask, box): - cropped_image = crop_box(box, image) - cropped_mask = crop_box(box, text_mask) - extracted = ops.extract_text(cropped_image, cropped_mask) - return cropped_image, cropped_mask, extracted - - -# %% experiments.ipynb 27 -_lang2pcleaner = {'English': st.DetectedLang.ENG, 'Japanese': st.DetectedLang.JA, 'Spanish': st.DetectedLang.ENG, - 'French':st.DetectedLang.ENG} -# _lang2tesseract = {'English': 'eng', 'Japanese': 'jpn'} -_lang2tesseract = {'English': 'eng', 'Japanese': 'jpn_vert', 'Spanish': 'spa', 'French': 'fra'} - - -# %% experiments.ipynb 28 -def lang2pcleaner(lang: str): - return _lang2pcleaner[lang] - -def lang2tesseract(lang: str): - return _lang2tesseract[lang] - - -# %% experiments.ipynb 35 -@dataclasses.dataclass -class ResultOCR: - block_idx: int - image: Image.Image | None - ocr: str - page_data: st.PageData - gts: list[str] - description: str = dataclasses.field(default='', kw_only=True) - - def __post_init__(self): - if self.image is None: - cache_path = self.cache_path() - if cache_path.exists(): - self.image = Image.open(cache_path) - - @property - def acc(self): - self._acc = accuracy_ocr_difflib(self.ocr, self.gts[self.block_idx]) - return self._acc - @property - def suffix(self): return f"{self.block_idx}_{self.description}" - - def diff_tagged(self): - _, html2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr, False) - return f"{html2}" - - def cache_path(self, suffix: str | None = None): - suffix = self.suffix + (('_'+suffix) if suffix else '') - parent = Path(self.page_data.image_path).parent - img_name = Path(self.page_data.original_path).stem - box_image_path = parent / f"{img_name}_{suffix}.png" - return box_image_path - - def cache_image(self, image: Image.Image | None = None, suffix: str | None = None): - image = image or (self.image if not suffix else None) - box_image_path = self.cache_path(suffix) - if image and not box_image_path.exists(): - image.save(box_image_path) - return box_image_path - - - def as_html(self): - acc_html = f"
{self.acc:.2f}" - box_image_path = self.cache_image() - html1 = get_columns_html([[box_image_path], [self.ocr + acc_html]]) - html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr) - html2 = f"
{html_str1}
{html_str2}
" - return html1 + '\n
\n' + html2 - - def __repr__(self): - return f"{type(self).__name__}#block {self.block_idx:02}: {self.acc:.2f}||{self.ocr}" - - def display(self): display(HTML(self.as_html())) - - def _ipython_display_(self): self.display() - - def to_dict(self): - d = dataclasses.asdict(self) - d['image'] = d['page_data'] = d['gts'] = None - return d - - # @classmethod - # def from_dict(cls, d: dict, page_data: st.PageData, gts: list[str]): - # return cls(**(d | {'page_data':page_data, 'gts':gts})) - - -@dataclasses.dataclass -class ResultOCRExtracted(ResultOCR): - - def __repr__(self): return super().__repr__() - def as_html(self): - html_str1, html_str2 = get_text_diffs_html(self.gts[self.block_idx], self.ocr) - diff_html = f"
{html_str1}
{html_str2}
" - cropped_image_path = self.cache_image(None, "cropped") - cropped_mask_path = self.cache_image(None, "mask") - result_path = self.cache_image() - return '\n
\n'.join([ - get_image_grid_html([cropped_image_path, cropped_mask_path, result_path], 1, 3), - acc_as_html(self.acc), - diff_html - ]) - - -# %% experiments.ipynb 37 -class CropMethod(Enum): - INITIAL_BOX = 'Initial box' - DEFAULT = 'Default' - DEFAULT_GREY_PAD = 'Default, grey pad' - PADDED_4 = 'Padded 4px' - PADDED_8 = 'Padded 8px' - EXTRACTED_INIT_BOX = 'Extracted, init box' - PADDED_4_EXTRACTED = 'Padded 4, extracted' - PADDED_8_EXTRACTED = 'Padded 8, extracted' - PADDED_8_DILATION_1 = 'Padded 8, dilation 1' - PAD_8_FRACT_0_5 = 'Pad 8, fract. 0.5' - PAD_8_FRACT_0_2 = 'Pad 8, fract. 0.2' - - @classmethod - def __display_names__(cls): - return dict( - zip([_.value for _ in cls], - cls)) - - -CM = CropMethod - -_IMAGE_METHODS = [CM.INITIAL_BOX, CM.DEFAULT, CM.DEFAULT_GREY_PAD, - CM.PADDED_4, CM.PADDED_8] -_EXTRACTED_METHODS = [CM.EXTRACTED_INIT_BOX, CM.PADDED_4_EXTRACTED, - CM.PADDED_8_EXTRACTED, CM.PADDED_8_DILATION_1, - CM.PAD_8_FRACT_0_5, CM.PAD_8_FRACT_0_2] - - -def crop_by_image(method: CM, - box: st.Box, - base: Image.Image, - preproc: cfg.PreprocessorConfig, - ): - image = None - match method: - case CM.INITIAL_BOX : - image = crop_box(box, base) - case CM.DEFAULT: - padded2_4 = ( - box.pad(preproc.box_padding_initial, base.size).right_pad( - preproc.box_right_padding_initial, base.size)) - image = crop_box(padded2_4, base) - case CM.DEFAULT_GREY_PAD: - image = crop_box(box, base) - image = ops.pad_image(image, 8, fill_color=(128, 128, 128)) - case CM.PADDED_4: - padded4 = box.pad(4, base.size) - image = crop_box(padded4, base) - case CM.PADDED_8: - padded4 = box.pad(8, base.size) - image = crop_box(padded4, base) - case _: pass - return image - - -def crop_by_extracted(method: CM, - box: st.Box, - base: Image.Image, - mask: Image.Image, - cropped_image_path: Path, - cropped_mask_path: Path, - dilated: dict[float, Image.Image] - ): - cropped_image, cropped_mask, image = None, None, None - if method in _EXTRACTED_METHODS: - if not cropped_image_path.exists() or not cropped_mask_path.exists(): - match method: - case CM.EXTRACTED_INIT_BOX: - cropped_image, cropped_mask, image = extract_text(base, mask, box) - case CM.PADDED_4_EXTRACTED: - padded4 = box.pad(4, base.size) - cropped_image, cropped_mask, image = extract_text(base, mask, padded4) - case CM.PADDED_8_EXTRACTED: - padded8 = box.pad(8, base.size) - cropped_image, cropped_mask, image = extract_text(base, mask, padded8) - case CM.PADDED_8_DILATION_1: - padded8 = box.pad(8, base.size) - cropped_image, cropped_mask, image = extract_text( - base, dilated[1], padded8) - case CM.PAD_8_FRACT_0_5: - padded8 = box.pad(8, base.size) - cropped_image, cropped_mask, image = extract_text( - base, dilated[0.5], padded8) - case CM.PAD_8_FRACT_0_2: - padded8 = box.pad(8, base.size) - cropped_image, cropped_mask, image = extract_text( - base, dilated[0.2], padded8) - case _: pass - - return image, cropped_image, cropped_mask - - - -# %% experiments.ipynb 39 -SubjIdT: TypeAlias = int -ImgIdT = SubjIdT -BoxIdT: TypeAlias = int - -class ResultSet(dict[BoxIdT, dict[CropMethod, ResultOCR]]): ... - -class ResultSetDefault(defaultdict[BoxIdT, dict[CropMethod, ResultOCR]]): ... - -def results_to_dict(results: ResultSet) -> dict[BoxIdT, dict[str, str]]: - d = {} - for box, box_methods in results.items(): - for method, result in box_methods.items(): - if box not in d: - d[box] = {} - d[box][method.name] = result.ocr - return d - -def dict_to_results( - image_idx: ImgIdT, - results_dict: dict[BoxIdT, dict[str, str]], - result_factory: Callable - ) -> ResultSetDefault: - results = ResultSetDefault(dict[CropMethod, ResultOCR]) - for box_idx, box_methods in results_dict.items(): - box_idx = int(box_idx) - for method, ocr in box_methods.items(): - m = CM[method] - results[box_idx][m] = result_factory(image_idx, box_idx, m, ocr) - return results - - - -# %% experiments.ipynb 41 -# class ExperimentSubject(Protocol): -# @property -# def exp(self) -> 'ExperimentContext': ... -# @property -# def idx(self) -> SubjIdT: ... -# def setup(self, -# exp: 'ExperimentContext', -# idx: Any, -# *args, **kwargs -# ): ... - - -# class ExperimentContext(Protocol): -# def subject_factory(self) -> Callable[..., ExperimentSubject]: ... -# def normalize_idx(self, idx: Any) -> SubjIdT: ... -# def experiment_subject(self, idx: Any, /, -# create: bool = False, *args, **kwargs) -> ExperimentSubject | None: -# """Get or create an `ExperimentSubject` for the given identifier. -# Returns `None` if `idx` is out of domain range. -# """ -# ... - - -# %% experiments.ipynb 42 -class ExperimentSubject: - exp: ExperimentContext - idx: SubjIdT - - def setup(self, exp: ExperimentContext, idx: Any, *args, **kwargs): - self.exp = exp - self.idx = cast(SubjIdT, exp.normalize_idx(idx)) - return self - - def __new__(cls, - exp: ExperimentContext, - idx: Any, - *args, **kwargs): - self = exp.experiment_subject(idx) - if self is None: - self = super().__new__(cls) - self = exp.experiment_subject(idx, new_subject=self, *args, **kwargs) - if self is None: - raise ValueError(f"Can't create new subject with idx: {idx}: out of range") - return self - - -class ExperimentContext: - "Class to maintain shared state across all file-based experiments within the experiment domain." - - subject_cls: Callable[..., ExperimentSubject] - def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls - - def normalize_idx(self, idx: int | str | Path) -> SubjIdT | None: - nidx = None - if isinstance(idx, int) and idx < len(self._paths): - nidx = idx - elif isinstance(idx, str): - try: - nidx = [_.name for _ in self._paths].index(idx) - except Exception: - pass - elif isinstance(idx, Path): - idx = idx.resolve() - if idx in self._paths: - nidx = self._paths.index(idx) - return nidx - - def path_from_idx(self, idx: int | str | Path): - _idx = self.normalize_idx(idx) - if _idx is None: - raise ValueError(f"{_idx} not found in context.") - path = Path(self._paths[_idx]) - if not path.exists(): - raise ValueError(f"{path} not found in context.") - return path - - @property - def count(self): return len(self._paths) - @property - def cache_dir(self): return Path(".cache/") - @functools.lru_cache() - def _cache_dir(self, idx: SubjIdT): - # create one folder for each image to cache and save results - path = self.path_from_idx(idx) - cache_dir = self.cache_dir / path.stem - cache_dir.mkdir(parents=True, exist_ok=True) - return cache_dir - def subject_cache_dir(self, idx: int | str | Path): - return self._cache_dir(idx) - - def empty_cache(self, idx: SubjIdT | None = None): - cache_dir = self.cache_dir - if idx is None: - shutil.rmtree(cache_dir, ignore_errors=True) - cache_dir.mkdir(parents=True, exist_ok=True) - else: - path = Path(self._paths[idx]) - cache_dir = cache_dir / path.stem - for p in cache_dir.glob("*"): - p.unlink(missing_ok=True) - if not any(cache_dir.iterdir()): - cache_dir.rmdir() - - def empty_cache_warn(self, idx: SubjIdT | None=None, *, warn: bool=True, out: W.Output | None=None): - def on_confirm_clicked(b): - try: - self.empty_cache(idx) - print("Cache cleared successfully.") - except Exception as e: - print(f"Failed to clear cache: {e}") - finally: - for widget in confirmation_box.children: - widget.close() - - def on_cancel_clicked(b): - print("Cache clear cancelled.") - for widget in confirmation_box.children: - widget.close() - - if out is None: - out = W.Output() - with out: - if FC.IN_NOTEBOOK: - confirm_button = W.Button(description="Confirm") - cancel_button = W.Button(description="Cancel") - confirm_button.on_click(on_confirm_clicked) - cancel_button.on_click(on_cancel_clicked) - label = W.Label('Are you sure you want to clear the cache? This action cannot be undone.') - confirmation_box = W.VBox([label, W.HBox([confirm_button, cancel_button])]) - display(confirmation_box) - else: - on_confirm_clicked(None) - - def experiment_subject(self, idx: SubjIdT | str | Path, /, - new_subject: ExperimentSubject | None = None, *args, **kwargs) -> ExperimentSubject | None: - "Cached subject. If provided, `new_subject` replaces value at the index." - if (nidx := self.normalize_idx(idx)) is None: - return None - if new_subject is None: - subject = self._subjects.get(nidx) - else: - new_subject.setup(self, nidx, *args, **kwargs) - self._subjects[nidx] = subject = new_subject - return subject - - def reset(self): - self._subjects.clear() - self._cache_dir.cache_clear() - - def __init__(self, paths: list[Path], root: Path | None = None): - self._root = (root or Path('.')).resolve() - self._paths = [p.resolve().relative_to(self._root) for p in paths] - self._subjects: dict[SubjIdT, ExperimentSubject] = {} - - -# %% experiments.ipynb 48 -ImgSpecT: TypeAlias = ImgIdT | str | Path - -class ImageContext(ExperimentSubject): - """ - A utility class to maintain image state for a ExperimentContext. - This class encapsulates state necessary for conducting OCR experiments. - - Attributes: - json_data (dict): JSON data loaded from cached files. - page_data (st.PageData): PanelClaner page data. - base_image (Image.Image): The base image loaded from the page data. - mask (Image.Image): The mask image used for text detection. - gts (list[str]): Ground truth data for the text in the images. - ocr_model (str): Name or identifier of the OCR model used. - mocr (ocr.OCRModel): OCR model configured for the experiment. - mask_dilated1 (Image.Image): Image mask dilated by 1 pixel. - mask_dilated05 (Image.Image): Image mask dilated by 0.5 pixels. - mask_dilated02 (Image.Image): Image mask dilated by 0.2 pixels. - - Methods: - init(config: cfg.Config, img_path: Path, cache_dir: Path, ocr_model: str): - Initializes the experiment context. It also handles the generation of text boxes - if they are not already present. - - setup_ground_truth(): - Loads or initializes ground truth data for the experiment based on the page data. - - setup_crop_masks(): - Prepares various dilated versions of the mask image to be used in different cropping - strategies during the experiments. - """ - exp: ExperimentContext - idx: ImgIdT - base_image: Image.Image - mask: Image.Image - json_data: dict | None - page_data: st.PageData - # ocr_model: str - # mocr: ocr.OCRModel - # postprocess_ocr: Callable[..., str] - _page_lang: str - _gts: list[str] - _mask_dilated1: Image.Image | None - _mask_dilated05: Image.Image | None - _mask_dilated02: Image.Image | None - - - # # this methods will be set downstream, declared here to make the type checker happy - # def result(self: Self, - # box_idx: int, method: CropMethod, ocr: bool = True, reset: bool=False) -> ResultOCR: ... - # def summary_box(self: Self, box_idx: int): ... - - def to_dict(self): - return { - 'image_idx': self.idx, - 'page_lang': self.page_lang, - } - - @property - def image_idx(self): return self.idx - @property - def cache_dir(self): - return self.exp.subject_cache_dir(self.idx) - cache_dir_image = cache_dir - - @property - def image_info(self): - img = self.base_image - w, h = img.size - print_size_in = size(w, h, 'in', 300) - print_size_cm = size(w, h, 'cm', 300) - required_dpi = dpi(w, h, 'Modern Age') - return (w, h), print_size_in, print_size_cm, required_dpi - - @property - def original_image_path(self): return Path(self.page_data.original_path) - @property - def image_path(self): return Path(self.page_data.image_path) - @property - def image_name(self): return self.original_image_path.name - @property - def image_size(self): return self.base_image.size - @property - def image_dim(self):return size(*self.image_size) - @property - def image_dpi(self): return dpi(*self.image_size) - @property - def image_print(self): - return self.image_size, self.image_dim, self.image_dpi - @property - def image_name_rich(self): - siz, dim, res = self.image_print - return f"{self.image_name} - {siz[0]}x{siz[1]} px: {dim[0]:.2f}x{dim[1]:.2f}\" @ {res:.2f} dpi" - - def setup_page_lang(self, page_lang: str | None = None): - path = Path(self.page_data.original_path).with_suffix('.json') - metadata = json.load(open(path)) if path.exists() else {} - if 'lang' in metadata and (page_lang == metadata['lang'] or page_lang is None): - self._page_lang = metadata['lang'] - return - self._page_lang = metadata['lang'] = page_lang or 'English' - json.dump(metadata, open(path, 'w'), indent=2) - @property - def page_lang(self): - if self._page_lang == None: - self.setup_page_lang() - return self._page_lang - - @property - def boxes(self): return self.page_data.boxes - - def setup_ground_truth(self): - self._gts = read_ground_truth(self.page_data) - @property - def gts(self): - if self._gts is None: - self.setup_ground_truth() - return self._gts - - @functools.lru_cache(typed=True) - def dilated_mask(self, fraction: float): - return dilate_by_fractional_pixel(self.mask, fraction) - - def mask_dilated1(self): - if self._mask_dilated1 is None: - self._mask_dilated1 = self.mask.filter(ImageFilter.MaxFilter(3)) - return self._mask_dilated1 - - def mask_dilated05(self): - if self._mask_dilated05 is None: - self._mask_dilated05 = self.dilated_mask(0.5) - return self._mask_dilated05 - - def mask_dilated02(self): - if self._mask_dilated02 is None: - self._mask_dilated02 = self.dilated_mask(0.2) - return self._mask_dilated02 - - def dilated(self): - return {1: self.mask_dilated1(), - 0.5: self.mask_dilated05(), - 0.2: self.mask_dilated02(),} - - def __new__(cls, - exp: ExperimentContext, - idx: ImgSpecT, - *args, **kwargs) -> Self: - return super().__new__(cls, exp, idx, *args, **kwargs) # type: ignore - - -# %% experiments.ipynb 50 -class OCRExperimentContext(ExperimentContext): - """ - A utility class to maintain shared state across all experiments within OCR domain. - This class encapsulates state necessary for conducting PanelCleaner OCR experiments. - """ - - config: cfg.Config - image_paths: list[Path] - # OCR engine -> Image index -> Box index -> Crop method -> Result - _results: dict[str, dict[ImgIdT, ResultSet]] - - - engines = { - 'Tesseract': cfg.OCREngine.TESSERACT, - 'Idefics': None, - 'manga-ocr': cfg.OCREngine.MANGAOCR} - - # subject_cls: ImageContext - # def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls - - @classmethod - def get_config(cls, cache_dir: Path | None = None) -> cfg.Config: - config = cfg.load_config() - config.cache_dir = cache_dir or Path(".") - profile = config.current_profile - preprocessor_conf = profile.preprocessor - # Modify the profile to OCR all boxes. - # Make sure OCR is enabled. - preprocessor_conf.ocr_enabled = True - # Make sure the max size is infinite, so no boxes are skipped in the OCR process. - preprocessor_conf.ocr_max_size = 10**10 - # Make sure the sus box min size is infinite, so all boxes with "unknown" language are skipped. - preprocessor_conf.suspicious_box_min_size = 10**10 - # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics. - preprocessor_conf.ocr_blacklist_pattern = ".*" - return config - - def to_dict(self): - return { - 'image_paths': list(map(str, self.image_paths)), - 'cache_dir': str(self.config.cache_dir) - } - def to_json(self): - return json.dumps(self.to_dict(), indent=2) - @classmethod - def from_json_data(cls, d: dict): - return cls(cls.get_config(Path(d['cache_dir'])), d['image_paths']) - @classmethod - def from_json_path(cls, path: Path): - return cls.from_json_data(json.loads(path.read_text())) - - - @functools.lru_cache() - def mocr(self, ocr_model: str, lang: str): - engine = self.engines[ocr_model] - ocr_processor = ocr.get_ocr_processor(True, engine) - proc = ocr_processor[lang2pcleaner(lang)] - if isinstance(proc, TesseractOcr): - proc.lang = lang2tesseract(lang) - return proc - - def ocr_box(self, result: ResultOCR, ocr_model: str, lang: str): - assert result.image is not None - text = self.mocr(ocr_model, lang)(result.image) - result.ocr = postprocess_ocr(text) - return result - - @property - def cache_dir(self): return self.config.get_cleaner_cache_dir() - image_cache_dir = ExperimentContext.subject_cache_dir - - @functools.lru_cache() - def _load_page_data(self, image_idx: int): - config = self.config - cache_dir = self.image_cache_dir(image_idx) - img_path = self.path_from_idx(image_idx) - image_name = img_path.stem - # read cached json - jsons = [_ for _ in cache_dir.glob("*#raw.json") if image_name in _.stem] - assert len(jsons) <= 1 - # generate text boxes if needed - if not jsons: - pfl = config.current_profile - gpu = torch.cuda.is_available() or torch.backends.mps.is_available() - model_path = config.get_model_path(gpu) - ctm.model2annotations(pfl.general, pfl.text_detector, model_path, [img_path], cache_dir) - # we don't need unique names for this tests, strip uuids - for p in cache_dir.glob(f"*{image_name}*"): - p.rename(strip_uuid(p)) - jsons = [_ for _ in cache_dir.glob("*#raw.json") if image_name in _.stem] - - # adapt paths to be relative to this notebook - this_path = self._root - json_file_path = jsons[0] - json_data = json.loads(json_file_path.read_text(encoding="utf-8")) - json_data["image_path"] = str(strip_uuid(json_data["image_path"]).relative_to(this_path)) - json_data["mask_path"] = str(strip_uuid(json_data["mask_path"]).relative_to(this_path)) - json.dump(json_data, open(json_file_path, "w"), indent=2) - else: - json_file_path = jsons[0] - json_data = json.loads(json_file_path.read_text(encoding="utf-8")) - - page_data = st.PageData( - json_data["image_path"], json_data["mask_path"], - json_data["original_path"], json_data["scale"], - [st.Box(*data["xyxy"]) for data in json_data["blk_list"]], - [], [], []) - # Merge boxes that have mutually overlapping centers. - page_data.resolve_total_overlaps() - return json_data, page_data - - def page_data(self, image_idx: int): - _, page_data = self._load_page_data(image_idx) - return page_data - def json_data(self, image_idx: int): - json_data, _ = self._load_page_data(image_idx) - return json_data - - def experiment_image(self, image_idx: ImgIdT | str | Path) -> ImageContext | None: - "Cached image context." - return cast(ImageContext, self.experiment_subject(image_idx)) - - def update_results(self, ocr_model: str, img_idx: ImgIdT, results: ResultSetDefault): - self._results[ocr_model][img_idx] = cast(ResultSet, results) - - - def _result_from(self, image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None): - img_ctx = ImageContext(self, image_idx) - extracted = method in _EXTRACTED_METHODS - result_cls = ResultOCRExtracted if extracted else ResultOCR - result = result_cls(int(box_idx), None, '', img_ctx.page_data, - img_ctx.gts, description=f"{method.value}") - if ocr is not None: - result.ocr = ocr - return result - - def result(self, - ocr_model: str, - image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, - ocr: bool=True, - rebuild: bool=False) -> ResultOCR | None: - img_ctx = ImageContext(self, image_idx) - result = self._results[ocr_model][image_idx][box_idx].get(method) - if not rebuild and result is not None: - return result - - result = self._result_from(image_idx, box_idx, method) - image, cropped_image, cropped_mask = result.image, None, None - base_image = img_ctx.base_image - box = img_ctx.boxes[box_idx] - if image is None and method in _IMAGE_METHODS: - image = crop_by_image( - method, box, base_image, self.config.current_profile.preprocessor) - - if image is None and method in _EXTRACTED_METHODS: - mask = img_ctx.mask - cropped_image_path = result.cache_image(cropped_image, "cropped") - cropped_mask_path = result.cache_image(cropped_mask, "mask") - if not cropped_image_path.exists() or not cropped_mask_path.exists(): - image, cropped_image, cropped_mask = crop_by_extracted( - method, box, base_image, mask, - cropped_image_path, cropped_mask_path, img_ctx.dilated()) - - assert image is not None - if result.image is None: - result.image = image - result.cache_image() - if cropped_image is not None: - result.cache_image(cropped_image, "cropped") - if cropped_mask is not None: - result.cache_image(cropped_mask, "mask") - - if ocr: - result = self.ocr_box(result, ocr_model, img_ctx.page_lang) - self._results[ocr_model][image_idx][box_idx][method] = result - return result - - def results(self, ocr_model: str | None = None, img_idx: ImgIdT | None = None): - if ocr_model is None: return self._results - if img_idx is None: return self._results[ocr_model] - return self._results[ocr_model][img_idx] - def model_results(self, ocr_model: str): - return cast(dict[ImgIdT, ResultSet], self.results(ocr_model)) - def image_results(self, ocr_model: str, img_idx: ImgIdT): - return cast(ResultSet, self.results(ocr_model, img_idx)) - def box_results(self, ocr_model: str, img_idx: ImgIdT, box_idx: BoxIdT): - return cast(ResultSet, self.results(ocr_model, img_idx))[box_idx] - def method_results(self, ocr_model: str, img_idx: ImgIdT, method: CropMethod): - image_results = self.image_results(ocr_model, img_idx) - return {i: box_results.get(method) for i,box_results in image_results.items()} - - def _reset_results(self): - results = defaultdict(lambda: defaultdict(lambda: ResultSetDefault(dict))) - self._results = cast(dict[str, dict[ImgIdT, ResultSet]], results) - def reset_results(self, - ocr_model: str | None = None, - image_idx: int | None = None, - box_idx: int | None = None, - method: CropMethod | None = None): - if ocr_model is None and image_idx is None and box_idx is None and method is None: - self._reset_results() - return - results = self._results - models = tuple(results.keys()) if ocr_model is None else [ocr_model] if ocr_model in results else [] - for ocr_model in models: - img_nodes = results[ocr_model] - imgs = tuple(img_nodes.keys()) if image_idx is None else [image_idx] if image_idx in img_nodes else [] - for img_idx in imgs: - box_nodes = img_nodes[img_idx] - boxes = tuple(box_nodes.keys()) if box_idx is None else [box_idx] if box_idx in box_nodes else [] - for box_idx in boxes: - if method is None: - del box_nodes[box_idx] - else: - methods = box_nodes[box_idx] - if method in methods: - del methods[method] - if not box_nodes[box_idx]: - del box_nodes[box_idx] - if not img_nodes[img_idx]: - del img_nodes[img_idx] - if not results[ocr_model]: - del results[ocr_model] - def reset(self): - super().reset() - self.reset_results() - self._load_page_data.cache_clear() - self.mocr.cache_clear() - - def __init__(self, - config: cfg.Config | None, - image_paths: list[Path] - ): - super().__init__(list(map(lambda p: p.resolve(), image_paths))) - self.config = config or type(self).get_config() - self.image_paths = self._paths - self._reset_results() - self._images = self._subjects - - -# %% experiments.ipynb 51 -@FC.patch_to(ImageContext) -def setup(self, exp: OCRExperimentContext, image_idx: ImgSpecT, page_lang: str | None = None): - super(type(self), self).setup(exp, image_idx) - self._mask_dilated1 = self._mask_dilated05 = self._mask_dilated02 = None - # if ocr_model not in exp.engines: - # raise ValueError(f"OCR model {ocr_model} not supported.") - # self.ocr_model = ocr_model - # self.idx = exp.normalize_idx(image_idx) - self.json_data, self.page_data = exp._load_page_data(self.idx) - self.setup_page_lang(page_lang) - self.mask = Image.open(self.page_data.mask_path) - self.base_image = Image.open(self.page_data.image_path) - self.setup_ground_truth() - - - -# %% experiments.ipynb 54 -class ContextVisor: - ctx: Any - # control_names: list[str] - values: dict[str, Any] - - _css = '' - - _ctxs: dict[str, ContextVisor] - _hdlrs: dict[str, ContextVisor] - - @property - def w(self) -> W.DOMWidget: - if getattr(self, '_w', None) is None: - self._w = self.setup_ui() - return self._w - @property - def out(self) -> W.Output: - if getattr(self, '_out', None) is None: - self._out = W.Output() - self._out.clear_output(wait=True) - return self._out # type: ignore - @property - def controls(self) -> dict[str, W.ValueWidget | W.fixed]: - if getattr(self, '_controls', None) is None: - self._controls = self.setup_controls() - return self._controls - @property - def all_controls(self) -> dict[str, W.ValueWidget | W.fixed]: - if getattr(self, '_all_controls', None) is None: - controls = {} - for visor in self._ctxs.values(): - controls.update(visor.all_controls) - controls.update(self.controls) - self._all_controls = controls - return self._all_controls - - @property - def all_values(self): - return {**{k:v.values for k,v in (self._ctxs | {'self': self}).items()}, **self.values} - - @property - def comps(self): return self._ctxs - def comp(self, k: str) -> ContextVisor | None: - return self._ctxs.get(k) - def handler(self, k: str) -> ContextVisor | None: - return self._hdlrs.get(k) - - @property - def styler(self) -> W.Output | None: - if (stl := self.setup_style()) is None: - return None - if getattr(self, '_style', None) is None: - self._style = W.Output(layout={'height': '0px'}) - with self._style: - display(stl) - return self._style - def setup_style(self): - return HTML(f"") if self._css else None - - def update_output(self, **kwargs): - cprint(kwargs) - - def setup_controls(self) -> dict[str, W.ValueWidget | W.fixed]: - return {k: W.Label(value=k) for k,v in self.values.items()} - - def hide(self): - self.w.layout.visibility = 'hidden' - def show(self): - self.w.layout.visibility = 'visible' - - def setup_ui(self): - comps = [] - for visor in self._ctxs.values(): - comps.append(visor.w) - return W.HBox([*comps, *self.controls.values()]) - - def setup_display(self): - if getattr(self, '_w', None) is None: - self._w = self.setup_ui() - - - def _output(self, **kwargs): - collator = defaultdict(dict) - show_inline_matplotlib_plots() - with self.out: - clear_output(wait=True) - for k,v in kwargs.items(): - if (comp := self.handler(k)) is not None: - collator[comp][k] = v - else: - assert 0 - # self.update_output(**{k: v}) - for comp, kw in collator.items(): - comp.update_output(**kw) - show_inline_matplotlib_plots() - def interactive_output(self): - controls = self.all_controls - controls2names = {v:k for k,v in controls.items()} - def observer(change): - control_name = controls2names[change['owner']] - kwargs = {control_name: change['new']} - updated = self._update(**kwargs) - self._output(**updated) - for w in controls.values(): - w.observe(observer, 'value') - def display(self, **kwargs): - if getattr(self, '_w', None) is None: - self.setup_display() - self.interactive_output() - self._update(**(self.values | kwargs)) - all_values= {} - for comp in list(self.comps.values()) + [self]: all_values.update(comp.values) - self._hdlrs = {k:self._hdlrs.get(k, self) for k in all_values} - self._output(**all_values) - display(self.styler, self.w, self.out) if self.styler else display(self.w, self.out) - else: - self.update(**kwargs) - def _ipython_display_(self): self.display() - - def _update(self, update_value: bool=True, **kwargs): - updated = {} - for visor in self.comps.values(): - updated.update(visor._update(update_value=update_value, **kwargs)) - values = self.values - my_vals = _pops_(kwargs, self.values.keys()) - for k,v in my_vals.items(): - if v is not None and v != values[k]: - if update_value: values[k] = v - updated[k] = v - return updated - def update(self, **kwargs): - updated = self._update(update_value=False, **kwargs) - controls = self.all_controls - for k in updated: - controls[k].value = updated[k] - # self._output(**updated) - - def __init__(self, - ctx: Any, - values: dict[str, Any], - out: W.Output | None = None, - ctxs: dict[str, ContextVisor] | None = None, - hdlrs: dict[str, ContextVisor] | None = None, - ): - self._ctxs = ctxs or {} - self._hdlrs = hdlrs or {} - self.ctx = ctx - self._out = out - self.values = values - - - -# %% experiments.ipynb 62 -class ImageSelector(ContextVisor): - ctx: OCRExperimentContext - - @property - def image_ctx(self): - return ImageContext(self.ctx, self.values['image_idx']) - - def setup_controls(self): - paths = self.ctx.image_paths - w = W.Dropdown( - options={_.stem:i for i,_ in enumerate(paths)}, - value=self.values['image_idx'], - layout={'width': 'fit-content'}, - style={'description_width': 'initial'}) - return {'image_idx': w} - - def update(self, image_idx: ImgSpecT | None = None, **kwargs): - if image_idx is None: return - idx = self.ctx.normalize_idx(image_idx) - if idx is None: return - super().update(image_idx=idx, **kwargs) - - - def __init__(self, - ctx: OCRExperimentContext, /, - image_idx: ImgSpecT = 0, *, - out: W.Output | None=None): - idx = ctx.normalize_idx(image_idx) - assert idx is not None, f"Image {image_idx} not found in experiment context" - super().__init__(ctx, {'image_idx': idx}, out) - - -# %% experiments.ipynb 66 -class OCRContextVisor(ContextVisor): - ctx: OCRExperimentContext - - def update_output(self, /, image_idx: ImgIdT, **kwargs): - img_path = self.ctx.path_from_idx(image_idx) - display_image_grid([img_path], 1, 1) - - def update(self, image_idx: ImgSpecT | None = None, **kwargs): - if image_idx is None: return - idx = self.ctx.normalize_idx(image_idx) - if idx is None: return - super().update(image_idx=idx, **kwargs) - - def __init__(self, - ctx: OCRExperimentContext, /, - image_idx: ImgSpecT = 0, *, - out: W.Output | None=None): - super().__init__(ctx, {}, out, - ctxs={'image_idx': ImageSelector(ctx, image_idx, out=self.out)}) - - -# %% experiments.ipynb 84 -class OCRModel(Enum): - TESSERACT = 0 - IDEFICS = 1 - @staticmethod - def __display_names__() -> dict[str, OCRModel]: - return dict( - zip("Tesseract, Idefics".split(', '), - OCRModel)) - - -class ModelSelector(ContextVisor): - ctx: OCRExperimentContext - - def setup_controls(self): - options = self.models - w = W.Dropdown( - options=options, - value=self.values['model'], - layout={'width': 'fit-content'}, - style={'description_width': 'initial'}) - return {'model': w} - - def setup_ui(self): - ctls = self.controls - model_grp = W.HBox([ctls['model']]) - model_grp.add_class('model_grp') - comps = [] - for visor in self.comps.values(): - comps.append(visor.setup_ui()) - ui = W.HBox([*comps, model_grp]) - return ui - - def __init__(self, - exp_ctx: OCRExperimentContext, - ocr_model: OCRModel | None=OCRModel.TESSERACT, - ocr_models: dict[str, OCRModel] | None = None, - out: W.Output | None = None - ): - self.models: dict[str, OCRModel] = ocr_models or OCRModel.__display_names__() - super().__init__(exp_ctx, - {'model': ocr_model or OCRModel.TESSERACT}, - out=out or self.out)#, ctxs=[exp_visor]) - - -# %% experiments.ipynb 87 -class DisplayOptions(Enum): - BOXES = 0 - IMAGE = 1 - MASK = 2 - IMAGE_MASK = 3 - PAGE_DATA = 4 - GROUND_TRUTH = 5 - ALL = 6 - RESULTS = 7 - BEST_RESULTS = 8 - DATAFRAME = 9 - - @staticmethod - def __display_names__(): - return dict( - zip("Boxes, Image, Mask, Image & Mask, Page data, Ground truth, All, Results, " - "Best results, Dataframe".split(', '), - DisplayOptions)) - - -class ContentSelector(ContextVisor): - ctx: OCRExperimentContext - - def image_info(self, image_ctx: ImageContext): - img = image_ctx.base_image - (w, h), print_size_in, print_size_cm, required_dpi = image_ctx.image_info - format = PRINT_FORMATS['Modern Age'] - cprint( f"{'Width x Height':>30}: {w} x {h} pixels\n" - f"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\n" - f"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in" - f" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\n" - f"Required DPI Modern Age format: {required_dpi:.3f} dpi " - f"({format[0]:.3f} x {format[1]:.3f} in)") - - - def display_content(self, image_ctx: ImageContext, display_option: DisplayOptions): - page_data = image_ctx.page_data - if display_option in (DisplayOptions.ALL, DisplayOptions.PAGE_DATA): - self.image_info(image_ctx) - RenderJSON(image_ctx.json_data, 350, 2).display() - if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH): - cprint(image_ctx.gts) - if display_option == DisplayOptions.IMAGE: - display_image_grid([page_data.image_path], 1, 1) - if display_option == DisplayOptions.MASK: - display_image_grid([page_data.mask_path], 1, 1) - if display_option in (DisplayOptions.ALL, DisplayOptions.IMAGE_MASK): - display_image_grid([page_data.image_path, page_data.mask_path], 1, 2) - if display_option in (DisplayOptions.ALL, DisplayOptions.BOXES): - _, out_path = page_boxes(page_data) - display_image_grid([out_path], 1, 1) - - - def setup_controls(self): - options = self.display_options or {**DisplayOptions.__display_names__()} - display_option_wdgt = W.Dropdown( - options=options, - value=self.values['display_option'], - layout={'width': '120px'}, - style={'description_width': 'initial'}) - return {'display_option': display_option_wdgt} - - - def setup_ui(self): - ctls = self.controls - display_option_grp = W.HBox([ctls['display_option']]) - display_option_grp.add_class('display_option_grp') - comps = [] - for visor in self.comps.values(): - comps.append(visor.setup_ui()) - ui = W.HBox([*comps, display_option_grp]) - return ui - - - def __init__(self, - exp_ctx: OCRExperimentContext, - display_option: DisplayOptions | None=DisplayOptions.BOXES, - display_options: Mapping[str, DisplayOptions] | None = None, - out: W.Output | None = None - ): - self.display_options = display_options - super().__init__(exp_ctx, - {'display_option': display_option or DisplayOptions.BOXES}, - out=out or self.out)#, ctxs=[exp_visor]) - - -# %% experiments.ipynb 91 -class ImageContextVisor(ContextVisor): - ctx: ImageContext - # control_names: list[str] = ['display_option'] - - _css = """ - .display_option_grp { - background-color: lightblue; - } - """ - - def image_info(self): - content_selector = cast(ContentSelector, self.comp('display_option')) - content_selector.image_info(self.ctx) - - def update_output(self, - display_option: DisplayOptions | None = None, - image_idx: ImgIdT | None = None, - **kwargs): - content_selector = cast(ContentSelector, self.comp('display_option')) - if image_idx is not None and image_idx != self.ctx.image_idx: - ctx = ImageContext(self.ctx.exp, image_idx) - assert ctx is not None - self.ctx = ctx - display_option = content_selector.values['display_option'] - if display_option is None: - return - content_selector.display_content(self.ctx, display_option) - - def update(self, - display_option: DisplayOptions | None=None, - image_idx: ImgSpecT | None=None, - **kwargs): - if image_idx is not None: - if (idx := self.ctx.exp.normalize_idx(image_idx)) is not None: - kwargs['image_idx'] = idx - super().update(display_option=display_option, **kwargs) - - def __init__(self, - exp_ctx: OCRExperimentContext, - img_idx: ImgIdT | str | Path | ImageContext, - display_option: DisplayOptions=DisplayOptions.BOXES, - display_options: Mapping[str, DisplayOptions] | None = None, - out: W.Output | None = None - ): - if isinstance(img_idx, ImageContext): - ctx = img_idx - else: - assert exp_ctx is not None, "exp_ctx must be provided if img_idx is not an ImageContext" - ctx = ImageContext(exp_ctx, img_idx) - assert ctx is not None, f"Image {img_idx} not found in experiment context" - if display_options is None: - display_options = {**DisplayOptions.__display_names__()} - del display_options['Results'] - out = out or self.out - content_selector = ContentSelector(exp_ctx, - display_option=display_option, display_options=display_options, out=out) - image_selector = ImageSelector(exp_ctx, ctx.image_idx, out=out) - super().__init__(ctx, {}, out=out, - ctxs={'image_idx': image_selector, 'display_option': content_selector}) - - -# %% experiments.ipynb 104 -def trimmed_mean(data, trim_percent): - sorted_data = np.sort(data) - n = len(data) - trim_count = int(trim_percent * n) - trimmed_data = sorted_data[trim_count:-trim_count] - return np.mean(trimmed_data) - -def mad_based_outlier(points, threshold=3.5): - median = np.median(points) - diff = np.abs(points - median) - mad = np.median(diff) - modified_z_score = 0.6745 * diff / mad - return points[modified_z_score < threshold] - -def iqr_outlier_removal(data): - q1 = np.percentile(data, 25) - q3 = np.percentile(data, 75) - iqr = q3 - q1 - lower_bound = q1 - 1.5 * iqr - upper_bound = q3 + 1.5 * iqr - return data[(data >= lower_bound) & (data <= upper_bound)] - - -# %% experiments.ipynb 105 -@dataclasses.dataclass -class Experiment: - ctx: ExperimentContext - - -@dataclasses.dataclass -class ExperimentOCR(Experiment): - ctx: ImageContext - ocr_model: str - - @property - def img_ctx(self): return self.ctx - @property - def ctxs(self): - img_ctx = self.img_ctx - return cast(OCRExperimentContext, img_ctx.exp), img_ctx - - @classmethod - def file_path_of(cls, page_data: st.PageData, ocr_model: str): - return f"{Path(page_data.original_path).stem}_{ocr_model}.json" - - def file_path(self): - img_ctx = self.img_ctx - return type(self).file_path_of(img_ctx.page_data, self.ocr_model) - - def to_dict(self): - "JSON serializable dict of the experiment" - img_ctx = self.img_ctx - img_idx = img_ctx.image_idx - results = results_to_dict(self.results()) - return { - 'image_name': img_ctx.image_name, - 'ocr_model': self.ocr_model, - 'results': results, - } - - def to_json(self, out_dir: Path | None = None): - img_ctx = self.img_ctx - fp = (out_dir or img_ctx.cache_dir_image) / self.file_path() - data = self.to_dict() - with open(fp, 'w') as f: - json.dump(data, f, indent=2) - return fp, data - - @classmethod - def from_json(cls, experiment: OCRExperimentContext, json_path: Path) -> Self: - try: - with open(json_path, 'r') as f: - data = json.load(f) - except Exception as e: - logger.error(f"Error loading {json_path}: {e}") - raise e - ocr_model = data['ocr_model'] - img_ctx = ImageContext(experiment, data['image_name']) - results: ResultSetDefault = dict_to_results( - img_ctx.image_idx, - data['results'], - result_factory=experiment._result_from) - experiment.update_results(ocr_model, img_ctx.image_idx, results) - return cls(img_ctx, ocr_model) - - @classmethod - def from_image(cls, - ctx: OCRExperimentContext, - ocr_model: str, - image_idx: ImgSpecT): - idx = cast(ImgIdT, ctx.normalize_idx(image_idx)) - img_ctx = ImageContext(ctx, idx) - if img_ctx is None: - raise ValueError(f"Image {image_idx} not found in experiment context") - fp = img_ctx.cache_dir / cls.file_path_of(img_ctx.page_data, ocr_model) - if fp.exists(): - return cast(Self, cls.from_json(cast(OCRExperimentContext, img_ctx.exp), fp)) - return cls(img_ctx, ocr_model) - - @classmethod - def from_method(cls, - ctx: OCRExperimentContext, - ocr_model: str, - image_idx: ImgIdT | str | Path, - method: CropMethod): - experiment = cls.from_image(ctx, ocr_model, image_idx) - if experiment is None: - return None - return experiment.method_experiment(method) - - @classmethod - def saved_experiment(cls, - ctx: OCRExperimentContext, ocr_model: str, image_idx: ImgIdT | str | Path): - idx = ctx.normalize_idx(image_idx) - if idx is None: - logger.warning(f"Image {image_idx} not found in experiment context") - return None - return cls.from_image(ctx, ocr_model, idx) - - @classmethod - def saved_experiments(cls, ctx: OCRExperimentContext, ocr_model: str) -> list[Self]: - return [exp for i in range(len(ctx.image_paths)) - if (exp := cls.from_image(ctx, ocr_model, i)) is not None] - - - def result(self, box_idx: BoxIdT, method: CropMethod, ocr: bool=True, rebuild: bool=False): - ctx, img_ctx = self.ctxs - return ctx.result(self.ocr_model, img_ctx.image_idx, box_idx, method, ocr, rebuild) - - def results(self): - ctx, img_ctx = self.ctxs - return cast(ResultSet, ctx.results(self.ocr_model, img_ctx.image_idx)) - - def has_run(self): - "at least one method has run" - img_ctx = self.img_ctx - return len(self.results()) == len(img_ctx.page_data.boxes) - - def best_results(self): - img_ctx = self.img_ctx - results = self.results() - if len(results) < len(img_ctx.page_data.boxes): # at least one method has run - return None - best = [] - for box_idx in results: - methods = results[box_idx] - best_method = max(methods, key=lambda m: methods[m].acc) # type: ignore - best.append((best_method, methods[best_method])) - return best - - def save_results_as_ground_truth(self, overwrite=False): - img_ctx = self.img_ctx - gts_path = ground_truth_path(img_ctx.page_data) - if overwrite or not gts_path.exists(): - best_results = self.best_results() - if best_results: - tt = [r.ocr for m,r in best_results] - gts_path.write_text('\n'.join(tt), encoding="utf-8") - img_ctx.setup_ground_truth() - logger.info(f"Ground truth data saved successfully to {gts_path}") - return True - else: - logger.info("No best results available to save.") - return False - else: - return False - - @property - def experiments(self): - if not hasattr(self, '_experiments'): - self._experiments = {} - return self._experiments - def method_experiment(self, method: CropMethod) -> ExperimentOCRMethod: - if method not in self.experiments: - self.experiments[method] = ExperimentOCRMethod(self, method) - return self.experiments[method] - - - def to_dataframe(self): - "Dataframe with crop methods as columns and box ids as rows" - methods = list(CropMethod.__members__.values()) - experiments = [self.method_experiment(m) for m in methods] - accuracies = [[result.acc for result in exp.results()] for exp in experiments] - # transpose accuracies - accuracies = list(zip(*accuracies)) - return pd.DataFrame(accuracies, columns=CropMethod.__display_names__()) - - def plot_accuracies(self, - methods: list[CropMethod] | None = None, - ): - "Plots a horizontal bar chart of the accuracies for a list of method experiments." - methods = methods or list(CropMethod.__members__.values()) - experiments = [self.method_experiment(m) for m in methods] - if not experiments: return - - ctx, img_ctx = self.ctxs - page_data = img_ctx.page_data - model = self.ocr_model - accuracies = [[result.acc for result in exp.results()] for exp in experiments] - accuracies = [np.mean(a) for a in accuracies] - # accuracies = [np.mean([result.acc for result in exp.results()]) for exp in experiments] - - _, ax = plt.subplots(figsize=(10, 5)) - - # Normalize the accuracies for color mapping - norm = plt.Normalize(min(accuracies), max(accuracies)) - # Color map from red to green - cmap = plt.get_cmap('RdYlGn') - colors = cmap(norm(accuracies)) - - ax.barh([m.value for m in methods], accuracies, color=colors) - - ax.set_xscale('log') # Set the x-axis to a logarithmic scale - ax.set_xlabel('Average Accuracy (log scale)', fontsize=12, fontweight='bold') - - ax.set_ylabel('Method', fontsize=12, fontweight='bold') - ax.set_yticks(range(len(methods))) - ax.set_yticklabels([f'{method.value} ({acc:.2f})' - for method, acc in zip(methods, accuracies)], fontsize=12) - max_acc_index = np.argmax(accuracies) - ax.get_yticklabels()[max_acc_index].set(color='blue', fontweight='bold') - - title_text = (f"{page_data.original_path} - OCR model: {model}") - ax.set_title(title_text, fontsize=12, fontweight='bold') - - plt.tight_layout() - plt.show() - - - def summary_box(self, box_idx: int): - results: list[tuple[CropMethod, ResultOCR]] = [] - pb = tqdm(CropMethod.__members__.values(), leave=False, desc=f"Box #{box_idx+1}") - for m in pb: - r = cast(ResultOCR, self.result(box_idx, m)) - results.append((m, r)) - methods, images, ocrs, accs = zip( - *map( - lambda t: (t[0].value, t[1].cache_image(), t[1].diff_tagged(), acc_as_html(t[1].acc)), - results)) - display_columns([methods, images, accs, ocrs], - headers=["Method", f"Box #{box_idx+1}", "Accuracy", "OCR"]) - - - def summary_method(self, method: CropMethod): - results = self.method_experiment(method).results() - methods, images, ocrs, accs = zip( - *map( - lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), - results)) - display_columns([methods, images, accs, ocrs], - headers=["Box #", "Box", "Accuracy", f"{method.value} OCR"]) - - - def display(self): - out = [] - for method in CropMethod: - out.append(f"---------- {method.value} ----------") - results = self.method_experiment(method).results() - out.extend(results) - out.append('\n') - cprint(*out, soft_wrap=True) - - - def reset(self, box_idx: int | None = None, method: CropMethod | None = None): - ctx, img_ctx = self.ctxs - ctx.reset_results(None, img_ctx.image_idx, box_idx, method) - - def perform_methods(self, - methods: CropMethod | list[CropMethod] | None = None, - box_idxs: BoxIdT | list[BoxIdT] | None = None, - rebuild: bool = False, - plot_acc: bool = False - ): - if methods is None: - methods = [*CropMethod.__members__.values()] - elif isinstance(methods, CropMethod): - methods = [methods] - if rebuild: - _methods = tqdm(methods, desc="Methods") - else: - _methods = methods - for method in _methods: - method_exp = self.method_experiment(method) - if method_exp: - if rebuild: - method_exp(box_idxs, rebuild=rebuild) - if plot_acc: - self.plot_accuracies() - - def __call__(self, - box_idxs: BoxIdT | list[BoxIdT] | None = None, - methods: CropMethod | list[CropMethod] | None = None, - save: bool = True, - display=False, - rebuild: bool=False, - save_as_ground_truth=False): - self.perform_methods(methods, box_idxs, rebuild=rebuild) - if save_as_ground_truth: - self.save_results_as_ground_truth(overwrite=True) - if save: - self.to_json() - if display: - self.display() - - -@dataclasses.dataclass -class ExperimentOCRMethod: - ctx: ExperimentOCR - method: CropMethod - - @property - def exp_ctx(self): return self.ctx - @property - def img_ctx(self): return self.ctx.ctx - @property - def ctxs(self): - img_ctx = self.img_ctx - return cast(OCRExperimentContext, img_ctx.exp), img_ctx, self.ctx - - def result(self, box_idx: BoxIdT, ocr: bool=True, rebuild: bool=False) -> ResultOCR | None: - ctx, img_ctx, exp_ctx = self.ctxs - return ctx.result(exp_ctx.ocr_model, img_ctx.image_idx, box_idx, self.method, ocr, rebuild) - - def results(self, - box_idxs: BoxIdT | list[BoxIdT] | None = None, - ocr: bool=True, rebuild: bool=False) -> list[ResultOCR]: - ctx, img_ctx, exp_ctx = self.ctxs - if box_idxs is None: - box_idxs = list(range(len(img_ctx.boxes))) - elif isinstance(box_idxs, int): - box_idxs = [box_idxs] - model = exp_ctx.ocr_model - results = ctx.method_results(model, img_ctx.image_idx, self.method) - results = {i:results[i] if i in results else None for i in box_idxs} - pb = rebuild or not results or any(r is None for r in results.values()) - if pb and len(results) > 2: - progress_bar = tqdm(list(results.keys()), desc=f"{self.method.value} - {model}") - else: - progress_bar = list(results.keys()) - results = [] - for i in progress_bar: - results.append(self.result(i, ocr, rebuild=rebuild)) - return results - - - def get_results_html(self, - box_idxs: BoxIdT | list[BoxIdT] | None = None, - max_image_width: int | None = None): - _, img_ctx, exp_ctx = self.ctxs - results: list[ResultOCR] = self.results(box_idxs) - accs = np.array([r.acc for r in results]) - mean_accuracy = np.mean(accs) - mean_trimmed = trimmed_mean(accs, 0.1) - # filtered_data = mad_based_outlier(accs) - # mean_mad = np.mean(filtered_data) - # filtered_data = iqr_outlier_removal(accs) - # mean_iqr = np.mean(filtered_data) - - descriptions, images, ocrs, accs = zip(*map( - lambda r: ( - r.block_idx+1, - r.cache_image(), - r.diff_tagged(), - acc_as_html(r.acc) - ), results)) - non_breakin_space = u'\u00A0' - tmpl = "{}" - padded_s = lambda s,n: tmpl.format(s.rjust(n)) - acc_fmt = f"{mean_accuracy:.2f}/{mean_trimmed:.2f}" - w, h = img_ctx.base_image.size - dim, _dpi = size(w, h), dpi(w, h) - dim_fmt = f"{w}x{h} px: {dim[0]:.2f} x {dim[1]:.2f} in @ {_dpi:.2f} dpi" - return '\n
\n'.join([ - ("
" - f"{padded_s('Page', 24)}: {img_ctx.page_data.original_path}
" - f"{padded_s('Size', 24)}: {dim_fmt}
" - f"{padded_s('Model', 24)}: {exp_ctx.ocr_model}
" - f"{padded_s('Crop Method', 24)}: {self.method.value}
" - f"{padded_s('Accuracy Mean/Trimmed', 24)}: {acc_fmt}" - "
"), - get_columns_html( - [descriptions, images, accs, ocrs], - max_image_width, - headers=["Box #", "Image", "Accuracy", "OCR"]), - ]) - - def display(self, - box_idxs: BoxIdT | list[BoxIdT] | None = None, max_image_width: int | None = None): - display(HTML(self.get_results_html(box_idxs, max_image_width))) - - - def summary(self): - results = self.results() - methods, images, ocrs, accs = zip( - *map( - lambda r: (r.block_idx+1, r.cache_image(), r.diff_tagged(), acc_as_html(r.acc)), - results)) - display_columns([methods, images, accs, ocrs], - headers=["Box #", "Box", "Accuracy", f"{self.method.value} OCR"]) - - - def reset(self): - _, _, exp_ctx = self.ctxs - exp_ctx.reset(method=self.method) - - def __call__(self, box_idxs: BoxIdT | list[BoxIdT] | None = None, display=False, rebuild=False): - if isinstance(box_idxs, int): - result = self.result(cast(BoxIdT, box_idxs), rebuild=rebuild) - if result is not None and display: - result.display() - else: - results = self.results(box_idxs, rebuild=rebuild) - if results and display: - self.display(box_idxs) - - -# %% experiments.ipynb 141 -class ResultVisor(ContextVisor): - ctx: ExperimentOCR - control_names: list[str] = ['all_boxes', 'box_idx', 'all_methods', 'method'] - - _css = """ - .box_grp { - background-color: aliceblue; - } - .method_grp { - background-color: #ededed; - } - """ - - def best_results(self): - ll = self.ctx.best_results() - if ll: - cprint([(m.value, f"{r.acc:.3f}", r.ocr) for m,r in ll]) - - def pd_to_html(self): - df = self.ctx.to_dataframe() - # set float precision - df = df.round(3) - # display floats with 3 decimal digits - df = df.applymap(lambda x: f"{x:.3f}") - # highlight max value in each row - stl = df.style.highlight_max(axis=0) - display(HTML(stl.to_html())) - - def update_output(self, **kwargs): - all_boxes: bool = self.values['all_boxes'] - box_idx: int = self.values['box_idx'] - all_methods: bool = self.values['all_methods'] - method: CropMethod = self.values['method'] - - # cprint(f"all_boxes: {all_boxes}, box_idx: {box_idx}, all_methods: {all_methods}, method: {method}") - - if all_boxes and all_methods: - self.ctx.plot_accuracies() - elif all_boxes: - self.ctx.summary_method(method) - elif all_methods: - self.ctx.summary_box(box_idx) - else: - result = self.ctx.result(box_idx, method) - if result is not None: - result.display() - - def setup_controls(self): - _, img_ctx = self.ctx.ctxs - values = self.values - box_wdgt = W.BoundedIntText( - value=values['box_idx'], min=0, max=len(img_ctx.boxes)-1, step=1, - disabled=values['all_boxes'], - layout={'width': '50px'}, - style={'description_width': 'initial'}) - methods_wdgt = W.Dropdown( - options=CropMethod.__display_names__(), - value=values['method'], - layout={'width': '150px'}, - style={'description_width': 'initial'}) - all_boxes_wdgt = W.Checkbox(label='All', value=values['all_boxes'], - description="all", - layout={'width': 'initial'}, - style={'description_width': '0px'}) - all_methods_wdgt = W.Checkbox(label='All', value=values['all_methods'], - description="all", - layout={'width': 'initial'}, - style={'description_width': '0px'}) - return {'all_boxes': all_boxes_wdgt, 'box_idx': box_wdgt, - 'all_methods': all_methods_wdgt, 'method': methods_wdgt} - - def setup_ui(self): - ctls = self.controls - _, img_ctx = self.ctx.ctxs - box_label = W.Label( - value=f"Box # (of {len(img_ctx.boxes)}):", - layout={'width': 'initial', 'padding': '0px 0px 0px 10px'}) - method_label = W.Label(value='Method:', layout={'width': 'initial', 'padding': '0px 0px 0px 10px'}) - - box_grp = W.HBox([box_label, ctls['all_boxes'], ctls['box_idx']]) - box_grp.add_class('box_grp') - method_grp = W.HBox([method_label, ctls['all_methods'], ctls['method']]) - method_grp.add_class('method_grp') - - return W.HBox([box_grp, method_grp]) - - def __init__(self, - ctx: OCRExperimentContext | ExperimentOCR, - img_idx: int | str | Path | None = None, - all_boxes: bool = False, - box_idx: int = 0, - all_methods: bool = False, - method: CropMethod=CropMethod.INITIAL_BOX, - out: W.Output | None = None, - ): - if isinstance(ctx, OCRExperimentContext): - assert img_idx is not None, "img_idx must be provided if ctx is an ExperimentContext" - exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx) - if not exp: - raise ValueError(f"Image {img_idx} not found in experiment context") - ctx = exp - else: - if not isinstance(ctx, ExperimentOCR): - raise ValueError("ctx must be an ExperimentOCR or OCRExperimentContext") - - super().__init__(ctx, {'all_boxes': all_boxes, 'box_idx': box_idx, - 'all_methods': all_methods, 'method': method}, out=out or self.out) - - -# %% experiments.ipynb 144 -class ExperimentVisor(ContextVisor): - ctx: ExperimentOCR - - def update_output(self, - image_idx: int | None = None, - **kwargs): - exp_ctx, img_ctx = self.ctx.ctxs - if image_idx is not None and image_idx != img_ctx.image_idx: - ctx = ImageContext(exp_ctx, image_idx) - assert ctx is not None - self.ctx.ctx = ctx - result_visor = self.comp('result_visor') - if result_visor is not None: - result_visor.update_output(**kwargs) - - def __init__(self, - ctx: OCRExperimentContext | ExperimentOCR, - img_idx: int | str | Path | None = None, - all_boxes: bool = False, - box_idx: int = 0, - all_methods: bool = False, - method: CropMethod=CropMethod.INITIAL_BOX, - out: W.Output | None = None, - ): - if isinstance(ctx, OCRExperimentContext): - assert img_idx is not None, "img_idx must be provided if ctx is an ExperimentContext" - exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx) - if not exp: - raise ValueError(f"Image {img_idx} not found in experiment context") - ctx = exp - else: - if not issubclass(type(ctx), ExperimentOCR): - raise ValueError("ctx must be an ExperimentOCR or OCRExperimentContext") - - exp_ctx, img_ctx = ctx.ctxs - out = out or self.out - image_selector = ImageSelector(exp_ctx, image_idx=img_ctx.image_idx, out=out) - result_visor = ResultVisor(ctx, out=out, - all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method) - - super().__init__(ctx, {}, out=out, - ctxs={'image_selector': image_selector, 'result_visor': result_visor}, - hdlrs={'display_option': result_visor} - ) - - -# %% experiments.ipynb 198 -class ExperimentsVisor(ContextVisor): - ctx: OCRExperimentContext - - def update_output(self, - model: OCRModel | None = None, - image_idx: ImgIdT | None = None, - display_option: DisplayOptions | None = None, - **kwargs): - model_selector, image_selector, content_selector, result_visor = self._comps() - if model is not None: - exp_ctx = result_visor.ctx - exp_ctx.ocr_model = list(model_selector.models.keys())[model.value] - result_visor.ctx = exp_ctx - if image_idx is not None: - img_ctx = ImageContext(self.ctx, image_idx) - result_visor.ctx.ctx = img_ctx - display_option = content_selector.values['display_option'] - if display_option is not None and display_option != DisplayOptions.RESULTS: - result_visor.hide() - if display_option == DisplayOptions.BEST_RESULTS: - result_visor.best_results() - elif display_option == DisplayOptions.DATAFRAME: - result_visor.pd_to_html() - else: - content_selector.display_content(image_selector.image_ctx, display_option) - else: - result_visor.show() - result_visor.update_output(**kwargs) - - def _comps(self): - cc = self.comps - msel: ModelSelector = cc['model_selector'] # type: ignore - isel: ImageSelector = cc['image_selector'] # type: ignore - cs: ContentSelector = cc['content_selector'] # type: ignore - rv: ResultVisor = cc['result_visor'] # type: ignore - return msel, isel, cs, rv - - def setup_ui(self): - ctls = self.controls.values() - msw, isw, csw, rvw = [_.w for _ in self._comps()] - return W.VBox([W.HBox([msw, isw, csw, *ctls]), rvw,]) - - def __init__(self, - ctx: OCRExperimentContext, - image_idx: ImgIdT | str | Path = 0, - ocr_model: OCRModel = OCRModel.TESSERACT, - display_option: DisplayOptions = DisplayOptions.RESULTS, - all_boxes: bool = False, - box_idx: int = 0, - all_methods: bool = False, - method: CropMethod=CropMethod.INITIAL_BOX, - ocr_models: dict[str, OCRModel] = {'Tesseract': OCRModel.TESSERACT}, - out: W.Output | None = None, - ): - if not isinstance(ctx, OCRExperimentContext): - raise ValueError("ctx must be an OCRExperimentContext") - exp = ExperimentOCR.from_image(ctx, 'Tesseract', image_idx) - if not exp: - raise ValueError(f"Image {image_idx} not found in experiment context") - - out = out or self.out - model_selector = ModelSelector(ctx, ocr_model=ocr_model, - ocr_models=ocr_models, out=out) - image_selector = ImageSelector(ctx, image_idx=image_idx, out=out) - content_selector = ContentSelector(ctx, display_option=display_option, out=out) - result_visor = ResultVisor(exp, out=out, - all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method) - - super().__init__(ctx, {}, out=out, - ctxs={'model_selector': model_selector, 'image_selector': image_selector, 'content_selector': content_selector, - 'result_visor': result_visor} - ) - diff --git a/_testbed/helpers.ipynb b/_testbed/helpers.ipynb deleted file mode 100644 index 32a6fa6d..00000000 --- a/_testbed/helpers.ipynb +++ /dev/null @@ -1,835 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp helpers" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# %reload_ext autoreload\n", - "# %autoreload 0\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# install (Colab)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# try: \n", - "# import fastcore as FC\n", - "# except ImportError: \n", - "# !pip install -q fastcore\n", - "# try:\n", - "# import rich\n", - "# except ImportError:\n", - "# !pip install -q rich\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@basic-tesseract" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Testing `Tesseract` OCR for Comics\n", - "> Accuracy Enhancements for OCR in `PanelCleaner`\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prologue" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "from __future__ import annotations\n", - "\n", - "import base64\n", - "import json\n", - "import re\n", - "import sys\n", - "import uuid\n", - "from importlib import resources\n", - "from io import BytesIO\n", - "from pathlib import Path\n", - "from typing import Any\n", - "from typing import Iterable\n", - "from typing import Mapping\n", - "from typing import Sequence\n", - "\n", - "import pcleaner.data\n", - "import pcleaner.structures as st\n", - "from IPython.display import clear_output\n", - "from IPython.display import display\n", - "from IPython.display import HTML\n", - "from PIL import Image\n", - "from PIL import ImageDraw\n", - "from PIL import ImageFont\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "import fastcore.xtras # patch Path with some utils\n", - "import ipywidgets as W\n", - "import rich\n", - "from fastcore.test import * # type: ignore\n", - "from rich.console import Console\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Helpers" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# pretty print by default\n", - "# %load_ext rich" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "console = Console(width=104, tab_size=4, force_jupyter=True)\n", - "cprint = console.print\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## dict helpers: _pops_\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "_all_ = ['_pops_', '_pops_values_', '_gets_']\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def _pops_(d: dict, ks: Iterable) -> dict: \n", - " \"Pops `ks` keys from `d` and returns them in a dict. Note: `d` is changed in-place.\"\n", - " return {k:d.pop(k) for k in ks if k in d}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "test_eq(_pops_({'a': 1, 'b': 2, 'c': 3}, ['a', 'b']), {'a': 1, 'b': 2})\n", - "test_eq(_pops_({'a': 1, 'b': 2, 'c': 3}, ['d']), {})\n", - "test_eq(_pops_({'a': 1, 'b': 2, 'c': 3}, ['a', 'c', 'd']), {'a': 1, 'c': 3})\n", - "test_eq(_pops_({}, ['a']), {})\n", - "test_eq(_pops_({'a': 1}, ['a', 'a']), {'a': 1})\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def _pops_values_(d: dict, ks: Iterable) -> tuple:\n", - " \"Pops `ks` keys from `d` and returns them as a tuple. Note: `d` is changed in-place.\"\n", - " return tuple(d.pop(k, None) for k in ks)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "test_eq(_pops_values_({'a': 1, 'b': 2, 'c': 3}, ['a', 'b']), (1, 2))\n", - "test_eq(_pops_values_({'a': 1, 'b': 2, 'c': 3}, ['d']), (None,))\n", - "test_eq(_pops_values_({'a': 1, 'b': 2, 'c': 3}, ['a', 'c', 'd']), (1, 3, None))\n", - "test_eq(_pops_values_({}, ['a']), (None,))\n", - "test_eq(_pops_values_({'a': 1}, ['a', 'a']), (1, None))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def _gets_(d: Mapping[str, Any], ks: Iterable):\n", - " \"Fetches values from a mapping for a given list of keys, returning `None` for missing keys.\"\n", - " return (d.get(k, None) for k in ks)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "test_eq(_gets_({'a': 1, 'b': 2}, ('a', 'c', 'b')), [1, None, 2])\n", - "test_eq(_gets_({'a': 1, 'b': 2}, ()), [])\n", - "a, b = _gets_({'a': 1, 'b': 2}, ('b', 'a'))\n", - "test_eq((a, b), (2, 1))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## cleanupwidget\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "def _get_globals(mod: str):\n", - " if hasattr(sys, '_getframe'):\n", - " glb = sys._getframe(2).f_globals\n", - " else:\n", - " glb = sys.modules[mod].__dict__\n", - " return glb\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# _all_ = ['_get_globals']\n", - "def _gtest():\n", - " return _get_globals(__name__)\n", - "g1 = _gtest()\n", - "g2 = globals()\n", - "test_eq(g1, g2)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def cleanupwidgets(*ws, mod: str|None=None, clear=True):\n", - " glb = _get_globals(mod or __name__)\n", - " if clear: clear_output(wait=True)\n", - " for w in ws:\n", - " _w = glb.get(w) if isinstance(w, str) else w\n", - " if _w:\n", - " try: _w.close() # type: ignore\n", - " except: pass\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "_b = W.Button()\n", - "test_ne(_b.comm, None)\n", - "cleanupwidgets('_b')\n", - "test_is(_b.comm, None)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Collapsable JSON in a notebook cell" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class RenderJSON(object):\n", - " def __init__(self, json_data, max_height=200, init_level=0):\n", - " if isinstance(json_data, (Sequence, Mapping)):\n", - " s = json.dumps(json_data)\n", - " elif hasattr(json_data, 'to_dict'):\n", - " s = json.dumps(json_data.to_dict())\n", - " elif hasattr(json_data, 'to_json'):\n", - " s = json_data.to_json()\n", - " else:\n", - " s = json_data\n", - " self.json_str = s\n", - " self.uuid = str(uuid.uuid4())\n", - " self.max_height = max_height\n", - " self.init_level = init_level\n", - "\n", - " def display(self):\n", - " html_content = f\"\"\"\n", - "
\n", - "
\n", - " \n", - "
\n", - " \"\"\"\n", - " display(HTML(html_content))\n", - "\n", - " def _ipython_display_(self):\n", - " self.display()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "json_data = {\n", - " \"name\": \"Petronila\",\n", - " \"age\": 28,\n", - " \"interests\": [\"reading\", \"cycling\", \"technology\"],\n", - " \"education\": {\n", - " \"bachelor\": \"Computer Science\",\n", - " \"master\": \"Data Science\",\n", - " \"phd\": \"Not enrolled\"\n", - " }\n", - "}\n", - "\n", - "RenderJSON(json_data, init_level=1).display()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Visualize boxes on the page image\n", - "> adapted from `PageData.visualize` but returns the image instead of saving it." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def page_boxes(self: st.PageData, out_dir: Path | None = None) -> tuple[Image.Image, Path]:\n", - " \"\"\"\n", - " Visualize the boxes on an image.\n", - " Typically, this would be used to check where on the original image the\n", - " boxes are located.\n", - "\n", - " :param image_path: The path to the image to visualize the boxes on.\n", - " \"\"\"\n", - " image_path = Path(self.image_path)\n", - " image = Image.open(image_path)\n", - " draw = ImageDraw.Draw(image)\n", - " data_path = resources.files(pcleaner.data)\n", - " font_path = str(data_path / \"LiberationSans-Regular.ttf\")\n", - " # Figure out the optimal font size based on the image size. E.g. 30 for a 1600px image.\n", - " font_size = int(image.size[0] / 50) + 5\n", - "\n", - " for index, box in enumerate(self.boxes):\n", - " draw.rectangle(box.as_tuple, outline=\"green\")\n", - " # Draw the box number, with a white background, respecting font size.\n", - " draw.text(\n", - " (box.x1 + 4, box.y1),\n", - " str(index + 1),\n", - " fill=\"green\",\n", - " font=ImageFont.truetype(font_path, font_size),\n", - " stroke_fill=\"white\",\n", - " stroke_width=3,\n", - " )\n", - "\n", - " for box in self.extended_boxes:\n", - " draw.rectangle(box.as_tuple, outline=\"red\")\n", - " for box in self.merged_extended_boxes:\n", - " draw.rectangle(box.as_tuple, outline=\"purple\")\n", - " for box in self.reference_boxes:\n", - " draw.rectangle(box.as_tuple, outline=\"blue\")\n", - "\n", - " # Save the image.\n", - " extension = \"_boxes\"\n", - " out_path = image_path.with_stem(image_path.stem + extension)\n", - " if out_dir is not None:\n", - " out_dir.mkdir(parents=True, exist_ok=True)\n", - " out_path = out_dir / image_path.name\n", - " image.save(out_path)\n", - "\n", - " return image, out_path" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Simple crop" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def crop_box(box: st.Box, image: Image.Image) -> Image.Image:\n", - " return image.crop(box.as_tuple)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Print size & resolution" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "PRINT_FORMATS = {\n", - " 'Golden Age': (7.75, 10.5), # (1930s-40s) \n", - " 'Siver Age': (7, 10.375), # (1950s-60s)\n", - " 'Modern Age': (6.625,10.25), # North American comic books\n", - " 'Magazine': (8.5, 11), \n", - " 'Digest': (5.5, 8.5), \n", - " 'Manga': (5.0, 7.5),\n", - "}\n", - "\n", - "\n", - "def size(w: int, h: int, unit: str = 'in', dpi: float = 300.) -> tuple:\n", - " \"\"\"\n", - " Calculate the print size of an image in inches or centimeters.\n", - "\n", - " Args:\n", - " w (int): Width of the image in pixels.\n", - " h (int): Height of the image in pixels.\n", - " unit (str): Unit of measurement ('in' for inches, 'cm' for centimeters).\n", - " dpi (float): Dots per inch (resolution).\n", - "\n", - " Returns:\n", - " tuple: Width and height of the image in the specified unit.\n", - " \"\"\"\n", - " if unit == 'cm':\n", - " return (w / dpi * 2.54, h / dpi * 2.54)\n", - " else: # default to inches\n", - " return (w / dpi, h / dpi)\n", - "\n", - "\n", - "def dpi(w: int, h: int, print_format: str = 'Modern Age') -> float:\n", - " \"\"\"\n", - " Calculate the dpi (dots per inch) needed to print an image at a specified format size.\n", - "\n", - " Args:\n", - " w (int): Width of the image in pixels.\n", - " h (int): Height of the image in pixels.\n", - " print_format (str): Print format as defined in the formats dictionary.\n", - "\n", - " Returns:\n", - " float: Required dpi to achieve the desired print format size.\n", - " \"\"\"\n", - " # Default to 'Modern Age' if format not found\n", - " format_size = PRINT_FORMATS.get(print_format, PRINT_FORMATS['Modern Age'])\n", - " width_inch, height_inch = format_size\n", - " dpi_w = w / width_inch\n", - " dpi_h = h / height_inch\n", - " return (dpi_w + dpi_h) / 2 # Average dpi for width and height\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Show images and texts on HTML tables" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def get_image_html(image: Image.Image | Path | str, max_width: int | None = None):\n", - " \"\"\"\n", - " Converts a PIL image to an HTML image tag containing the image as a base64 blob.\n", - "\n", - " :param image: A PIL Image object.\n", - " :param max_size: A PIL Image object.\n", - " :return: A string containing an HTML tag with the image.\n", - " \"\"\"\n", - " style = f' style=\"max-width: {max_width}px;\"' if max_width is not None else ''\n", - " if isinstance(image, (Path, str)):\n", - " return f''\n", - " else:\n", - " buffered = BytesIO()\n", - " image.save(buffered, format='PNG')\n", - " img_str = base64.b64encode(buffered.getvalue()).decode()\n", - " return f''\n", - "\n", - "\n", - "def get_columns_html(\n", - " columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None\n", - "):\n", - " if not all(len(col) == len(columns[0]) for col in columns):\n", - " raise ValueError(\"All columns must have the same length.\")\n", - "\n", - " # Calculate the maximum width of images in each column\n", - " max_widths = []\n", - " for col_index in range(len(columns)):\n", - " max_col_width = 0\n", - " for item in columns[col_index]:\n", - " if isinstance(item, (Image.Image, Path)):\n", - " if isinstance(item, (Path, str)):\n", - " item = Image.open(item)\n", - " width, _ = item.size\n", - " max_col_width = max(max_col_width, width)\n", - " if max_col_width > 0:\n", - " max_widths.append(\n", - " f\"{min(max_col_width, max_image_width)}px\"\n", - " if max_image_width is not None else \n", - " f\"{max_col_width}px\"\n", - " )\n", - " else:\n", - " max_widths.append('auto')\n", - "\n", - " html_str = \"\"\n", - "\n", - " # Apply calculated column widths using and elements\n", - " html_str += \"\"\n", - " for width in max_widths:\n", - " html_str += f\"\"\n", - " html_str += \"\"\n", - "\n", - " if headers:\n", - " if len(headers) != len(columns):\n", - " raise ValueError(\"Headers list must match the number of columns.\")\n", - " html_str += (\n", - " \"\"\n", - " + \"\".join(\n", - " f\"\"\n", - " for header in headers\n", - " )\n", - " + \"\"\n", - " )\n", - "\n", - " for row_items in zip(*columns):\n", - " html_str += \"\"\n", - " for i, item in enumerate(row_items):\n", - " if isinstance(item, (Image.Image, Path)):\n", - " img_html = get_image_html(item, max_width=max_image_width)\n", - " html_str += f\"\"\n", - " else: # Assume the item is a string\n", - " style = \"font-weight: bold;\" if i == 0 else \"\"\n", - " html_str += f\"\"\n", - " html_str += \"\"\n", - "\n", - " html_str += \"
{header}
{img_html}{item}
\"\n", - " return html_str\n", - "\n", - "\n", - "def display_columns(\n", - " columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None\n", - "):\n", - " \"\"\"\n", - " Displays a table with any combination of columns, which can be lists of strings or lists \n", - " of PIL Image objects, within a Jupyter notebook cell.\n", - "\n", - " :param columns: A list of lists, where each sublist represents a column in the table. \n", - " Each sublist can contain either strings or PIL Image objects.\n", - " :param max_image_width: The maximum size of the images in pixels. This controls the max-height \n", - " of the images.\n", - " :param headers: A list of header labels for the table. If None, no headers are displayed.\n", - " \"\"\"\n", - " return display(HTML(get_columns_html(columns, max_image_width, headers)))\n", - "\n", - "\n", - "def get_image_grid_html(\n", - " images: list[Image.Image | Path | str],\n", - " rows: int,\n", - " columns: int,\n", - " titles: list[str] | None = None,\n", - " max_image_width: int | None = None,\n", - " caption: str | None = None\n", - "):\n", - " if titles and len(titles) != len(images):\n", - " raise ValueError(\"Titles list must match the number of images if provided.\")\n", - "\n", - " html_str = \"\"\n", - "\n", - " if caption:\n", - " html_str += (f\"\")\n", - "\n", - " image_index = 0\n", - " for row in range(rows):\n", - " html_str += \"\"\n", - " for col in range(columns):\n", - " if image_index < len(images):\n", - " img_html = get_image_html(images[image_index], max_width=max_image_width)\n", - " title_html = (\n", - " f\"
{titles[image_index]}
\"\n", - " if titles\n", - " else \"\"\n", - " )\n", - " html_str += f\"\"\n", - " else:\n", - " html_str += \"\" # Empty cell if no more images\n", - " image_index += 1\n", - " html_str += \"\"\n", - "\n", - " html_str += \"
{caption}
{title_html}{img_html}
\"\n", - " return html_str\n", - "\n", - "\n", - "def display_image_grid(\n", - " images: list[Image.Image | Path | str],\n", - " rows: int,\n", - " columns: int,\n", - " titles: list[str] | None = None,\n", - " max_image_width: int | None = None,\n", - " caption: str | None = None,\n", - "):\n", - " \"\"\"\n", - " Displays a grid of images in a HTML table within a Jupyter notebook cell.\n", - "\n", - " :param images: A list of PIL Image objects to be displayed.\n", - " :param rows: The number of rows in the grid.\n", - " :param columns: The number of columns in the grid.\n", - " :param titles: An optional list of titles for each image. If provided, it must match the length \n", - " of the images list.\n", - " :param max_image_width: The maximum width of the images in pixels.\n", - " \"\"\"\n", - " display(HTML(get_image_grid_html(images, rows, columns, titles, max_image_width, caption)))\n", - "\n", - "\n", - "def acc_as_html(acc):\n", - " return f\"
{acc:.2f}
\"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## UUIDs" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def strip_uuid(p: Path | str):\n", - " _p: Path = p if isinstance(p, Path) else Path(p)\n", - " new_stem = re.sub(r'(?i)[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', '', _p.stem).strip('_')\n", - " return _p.with_stem(new_stem)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Path('a/b/c/Strange Tales 172_boxes.png')" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "strip_uuid(Path(\"a/b/c/ac265dc1-51a0-46ca-9101-7195cbad33f2_Strange Tales 172_boxes.png\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Other" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "# Deep copy a defaultdict of defaultdicts to a dict of dicts if it is not already a dict\n", - "def defaultdict_to_dict(d) -> dict:\n", - " if not isinstance(d, defaultdict):\n", - " return d\n", - " return {k: defaultdict_to_dict(v) for k, v in d.items()}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "aaa\n" - ] - } - ], - "source": [ - "print('aaa')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Colophon\n", - "----\n" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "import fastcore.all as FC\n", - "from nbdev.export import nb_export\n" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "if FC.IN_NOTEBOOK:\n", - " nb_export('helpers.ipynb', '.')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/_testbed/helpers.py b/_testbed/helpers.py deleted file mode 100644 index a7095701..00000000 --- a/_testbed/helpers.py +++ /dev/null @@ -1,372 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: helpers.ipynb. - -# %% helpers.ipynb 7 -from __future__ import annotations - -import base64 -import json -import re -import sys -import uuid -from importlib import resources -from io import BytesIO -from pathlib import Path -from typing import Any -from typing import Iterable -from typing import Mapping -from typing import Sequence - -import pcleaner.data -import pcleaner.structures as st -from IPython.display import clear_output -from IPython.display import display -from IPython.display import HTML -from PIL import Image -from PIL import ImageDraw -from PIL import ImageFont - - -# %% auto 0 -__all__ = ['PRINT_FORMATS', 'cleanupwidgets', 'RenderJSON', 'page_boxes', 'crop_box', 'size', 'dpi', 'get_image_html', - 'get_columns_html', 'display_columns', 'get_image_grid_html', 'display_image_grid', 'acc_as_html', - 'strip_uuid', '_pops_', '_pops_values_', '_gets_'] - -# %% helpers.ipynb 13 -_all_ = ['_pops_', '_pops_values_', '_gets_'] - - -# %% helpers.ipynb 14 -def _pops_(d: dict, ks: Iterable) -> dict: - "Pops `ks` keys from `d` and returns them in a dict. Note: `d` is changed in-place." - return {k:d.pop(k) for k in ks if k in d} - - -# %% helpers.ipynb 16 -def _pops_values_(d: dict, ks: Iterable) -> tuple: - "Pops `ks` keys from `d` and returns them as a tuple. Note: `d` is changed in-place." - return tuple(d.pop(k, None) for k in ks) - - -# %% helpers.ipynb 18 -def _gets_(d: Mapping[str, Any], ks: Iterable): - "Fetches values from a mapping for a given list of keys, returning `None` for missing keys." - return (d.get(k, None) for k in ks) - - -# %% helpers.ipynb 21 -def _get_globals(mod: str): - if hasattr(sys, '_getframe'): - glb = sys._getframe(2).f_globals - else: - glb = sys.modules[mod].__dict__ - return glb - - -# %% helpers.ipynb 23 -def cleanupwidgets(*ws, mod: str|None=None, clear=True): - glb = _get_globals(mod or __name__) - if clear: clear_output(wait=True) - for w in ws: - _w = glb.get(w) if isinstance(w, str) else w - if _w: - try: _w.close() # type: ignore - except: pass - - -# %% helpers.ipynb 26 -class RenderJSON(object): - def __init__(self, json_data, max_height=200, init_level=0): - if isinstance(json_data, (Sequence, Mapping)): - s = json.dumps(json_data) - elif hasattr(json_data, 'to_dict'): - s = json.dumps(json_data.to_dict()) - elif hasattr(json_data, 'to_json'): - s = json_data.to_json() - else: - s = json_data - self.json_str = s - self.uuid = str(uuid.uuid4()) - self.max_height = max_height - self.init_level = init_level - - def display(self): - html_content = f""" -
-
- -
- """ - display(HTML(html_content)) - - def _ipython_display_(self): - self.display() - -# %% helpers.ipynb 29 -def page_boxes(self: st.PageData, out_dir: Path | None = None) -> tuple[Image.Image, Path]: - """ - Visualize the boxes on an image. - Typically, this would be used to check where on the original image the - boxes are located. - - :param image_path: The path to the image to visualize the boxes on. - """ - image_path = Path(self.image_path) - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - data_path = resources.files(pcleaner.data) - font_path = str(data_path / "LiberationSans-Regular.ttf") - # Figure out the optimal font size based on the image size. E.g. 30 for a 1600px image. - font_size = int(image.size[0] / 50) + 5 - - for index, box in enumerate(self.boxes): - draw.rectangle(box.as_tuple, outline="green") - # Draw the box number, with a white background, respecting font size. - draw.text( - (box.x1 + 4, box.y1), - str(index + 1), - fill="green", - font=ImageFont.truetype(font_path, font_size), - stroke_fill="white", - stroke_width=3, - ) - - for box in self.extended_boxes: - draw.rectangle(box.as_tuple, outline="red") - for box in self.merged_extended_boxes: - draw.rectangle(box.as_tuple, outline="purple") - for box in self.reference_boxes: - draw.rectangle(box.as_tuple, outline="blue") - - # Save the image. - extension = "_boxes" - out_path = image_path.with_stem(image_path.stem + extension) - if out_dir is not None: - out_dir.mkdir(parents=True, exist_ok=True) - out_path = out_dir / image_path.name - image.save(out_path) - - return image, out_path - -# %% helpers.ipynb 31 -def crop_box(box: st.Box, image: Image.Image) -> Image.Image: - return image.crop(box.as_tuple) - -# %% helpers.ipynb 33 -PRINT_FORMATS = { - 'Golden Age': (7.75, 10.5), # (1930s-40s) - 'Siver Age': (7, 10.375), # (1950s-60s) - 'Modern Age': (6.625,10.25), # North American comic books - 'Magazine': (8.5, 11), - 'Digest': (5.5, 8.5), - 'Manga': (5.0, 7.5), -} - - -def size(w: int, h: int, unit: str = 'in', dpi: float = 300.) -> tuple: - """ - Calculate the print size of an image in inches or centimeters. - - Args: - w (int): Width of the image in pixels. - h (int): Height of the image in pixels. - unit (str): Unit of measurement ('in' for inches, 'cm' for centimeters). - dpi (float): Dots per inch (resolution). - - Returns: - tuple: Width and height of the image in the specified unit. - """ - if unit == 'cm': - return (w / dpi * 2.54, h / dpi * 2.54) - else: # default to inches - return (w / dpi, h / dpi) - - -def dpi(w: int, h: int, print_format: str = 'Modern Age') -> float: - """ - Calculate the dpi (dots per inch) needed to print an image at a specified format size. - - Args: - w (int): Width of the image in pixels. - h (int): Height of the image in pixels. - print_format (str): Print format as defined in the formats dictionary. - - Returns: - float: Required dpi to achieve the desired print format size. - """ - # Default to 'Modern Age' if format not found - format_size = PRINT_FORMATS.get(print_format, PRINT_FORMATS['Modern Age']) - width_inch, height_inch = format_size - dpi_w = w / width_inch - dpi_h = h / height_inch - return (dpi_w + dpi_h) / 2 # Average dpi for width and height - - -# %% helpers.ipynb 35 -def get_image_html(image: Image.Image | Path | str, max_width: int | None = None): - """ - Converts a PIL image to an HTML image tag containing the image as a base64 blob. - - :param image: A PIL Image object. - :param max_size: A PIL Image object. - :return: A string containing an HTML tag with the image. - """ - style = f' style="max-width: {max_width}px;"' if max_width is not None else '' - if isinstance(image, (Path, str)): - return f'' - else: - buffered = BytesIO() - image.save(buffered, format='PNG') - img_str = base64.b64encode(buffered.getvalue()).decode() - return f'' - - -def get_columns_html( - columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None -): - if not all(len(col) == len(columns[0]) for col in columns): - raise ValueError("All columns must have the same length.") - - # Calculate the maximum width of images in each column - max_widths = [] - for col_index in range(len(columns)): - max_col_width = 0 - for item in columns[col_index]: - if isinstance(item, (Image.Image, Path)): - if isinstance(item, (Path, str)): - item = Image.open(item) - width, _ = item.size - max_col_width = max(max_col_width, width) - if max_col_width > 0: - max_widths.append( - f"{min(max_col_width, max_image_width)}px" - if max_image_width is not None else - f"{max_col_width}px" - ) - else: - max_widths.append('auto') - - html_str = "" - - # Apply calculated column widths using and elements - html_str += "" - for width in max_widths: - html_str += f"" - html_str += "" - - if headers: - if len(headers) != len(columns): - raise ValueError("Headers list must match the number of columns.") - html_str += ( - "" - + "".join( - f"" - for header in headers - ) - + "" - ) - - for row_items in zip(*columns): - html_str += "" - for i, item in enumerate(row_items): - if isinstance(item, (Image.Image, Path)): - img_html = get_image_html(item, max_width=max_image_width) - html_str += f"" - else: # Assume the item is a string - style = "font-weight: bold;" if i == 0 else "" - html_str += f"" - html_str += "" - - html_str += "
{header}
{img_html}{item}
" - return html_str - - -def display_columns( - columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None -): - """ - Displays a table with any combination of columns, which can be lists of strings or lists - of PIL Image objects, within a Jupyter notebook cell. - - :param columns: A list of lists, where each sublist represents a column in the table. - Each sublist can contain either strings or PIL Image objects. - :param max_image_width: The maximum size of the images in pixels. This controls the max-height - of the images. - :param headers: A list of header labels for the table. If None, no headers are displayed. - """ - return display(HTML(get_columns_html(columns, max_image_width, headers))) - - -def get_image_grid_html( - images: list[Image.Image | Path | str], - rows: int, - columns: int, - titles: list[str] | None = None, - max_image_width: int | None = None, - caption: str | None = None -): - if titles and len(titles) != len(images): - raise ValueError("Titles list must match the number of images if provided.") - - html_str = "" - - if caption: - html_str += (f"") - - image_index = 0 - for row in range(rows): - html_str += "" - for col in range(columns): - if image_index < len(images): - img_html = get_image_html(images[image_index], max_width=max_image_width) - title_html = ( - f"
{titles[image_index]}
" - if titles - else "" - ) - html_str += f"" - else: - html_str += "" # Empty cell if no more images - image_index += 1 - html_str += "" - - html_str += "
{caption}
{title_html}{img_html}
" - return html_str - - -def display_image_grid( - images: list[Image.Image | Path | str], - rows: int, - columns: int, - titles: list[str] | None = None, - max_image_width: int | None = None, - caption: str | None = None, -): - """ - Displays a grid of images in a HTML table within a Jupyter notebook cell. - - :param images: A list of PIL Image objects to be displayed. - :param rows: The number of rows in the grid. - :param columns: The number of columns in the grid. - :param titles: An optional list of titles for each image. If provided, it must match the length - of the images list. - :param max_image_width: The maximum width of the images in pixels. - """ - display(HTML(get_image_grid_html(images, rows, columns, titles, max_image_width, caption))) - - -def acc_as_html(acc): - return f"
{acc:.2f}
" - - -# %% helpers.ipynb 37 -def strip_uuid(p: Path | str): - _p: Path = p if isinstance(p, Path) else Path(p) - new_stem = re.sub(r'(?i)[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', '', _p.stem).strip('_') - return _p.with_stem(new_stem) - diff --git a/_testbed/media/.gitkeep b/_testbed/media/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/_testbed/model/.gitkeep b/_testbed/model/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/_testbed/ocr_idefics.py b/_testbed/ocr_idefics.py deleted file mode 100644 index 67a50c42..00000000 --- a/_testbed/ocr_idefics.py +++ /dev/null @@ -1,184 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: test_idefics.ipynb. - -# %% test_idefics.ipynb 12 -from __future__ import annotations - -import functools -from pathlib import Path - -import pcleaner.ocr.ocr as ocr -import torch -import transformers -from pcleaner.ocr.ocr_tesseract import TesseractOcr -from PIL import Image -from rich.console import Console -from transformers import AutoProcessor -from transformers import Idefics2ForConditionalGeneration -from transformers import PreTrainedModel - - -# %% auto 0 -__all__ = ['IdeficsOCR', 'IdeficsExperimentContext'] - -# %% test_idefics.ipynb 17 -console = Console(width=104, tab_size=4, force_jupyter=True) -cprint = console.print - - -# %% test_idefics.ipynb 20 -from experiments import * -from helpers import * -from ocr_metric import * - - -# %% test_idefics.ipynb 21 -def load_image(img_or_path) -> Image.Image: - if isinstance(img_or_path, (str, Path)): - return Image.open(img_or_path) - elif isinstance(img_or_path, Image.Image): - return img_or_path - else: - raise ValueError(f"img_or_path must be a path or PIL.Image, got: {type(img_or_path)}") - - -# %% test_idefics.ipynb 36 -processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b") - -# %% test_idefics.ipynb 37 -device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu" - -model = Idefics2ForConditionalGeneration.from_pretrained( - "HuggingFaceM4/idefics2-8b", - torch_dtype=torch.bfloat16, - _attn_implementation="flash_attention_2", - ).to(device) # type: ignore - - -# %% test_idefics.ipynb 39 -prompt_text_tmpl = ( - "Please perform optical character recognition (OCR) on this image, which displays " - "speech balloons from a comic book. The text is in {}. Extract the text and " - "format it as follows: transcribe in standard sentence case, avoid using all capital " - "letters. Provide the transcribed text clearly and double check the sentence is not all capital letters.") - -# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " -# f"speech balloons from a manga comic. The text is in {}. Extract the text and " -# "format it without newlines. Provide the transcribed text clearly.") - -# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " -# "speech balloons from a comic book. The text is in {}. Extract the text and " -# "format it as follows: transcribe in standard sentence case (avoid using all capital " -# "letters) and use asterisks to denote any words that appear in bold within the image. " -# "Provide the transcribed text clearly.") - -# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " -# "speech balloons from a comic book. The text is in {}. Extract the text and " -# "format it as follows: transcribe in standard sentence case, capitalized. Avoid using " -# "all capital letters. In comics, it is common to use two hyphens '--' to interrupt a sentence. " -# "Retain any hyphens as they appear in the original text. Provide the transcribed text " -# "clearly, ensuring it is capitalized where appropriate, including proper nouns.") - -prompt_text_tmpl = ( - "Please perform optical character recognition (OCR) on this image, which displays " - "speech balloons from a comic book. The text is in {}. Extract the text and " - "format it as follows: transcribe in standard sentence case, capitalized. Avoid using " - "all capital letters, but ensure it is capitalized where appropriate, including proper nouns. " - "Provide the transcribed text clearly. Double check the text is not all capital letters.") - -default_prompt_text_tmpl = prompt_text_tmpl - -# %% test_idefics.ipynb 41 -class IdeficsOCR: - prompt_text_tmpl: str = default_prompt_text_tmpl - - def __init__(self, - lang: str | None = None, - prompt_text_tmpl: str|None = None, - device: str | None = None - ): - self.lang = lang - self.prompt_text_tmpl = prompt_text_tmpl or self.prompt_text_tmpl - self.device = (device or - "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu") - - @staticmethod - def is_idefics_available() -> bool: - return True - - def _generation_args(self, image: Image.Image, resulting_messages: list[dict]): - prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True) - inputs = processor(text=prompt, images=[image], return_tensors="pt") - inputs = {k: v.to(self.device) for k, v in inputs.items()} - - max_new_tokens = 512 - repetition_penalty = 1.2 - decoding_strategy = "Greedy" - temperature = 0.4 - top_p = 0.8 - - generation_args = { - "max_new_tokens": max_new_tokens, - "repetition_penalty": repetition_penalty, - } - - assert decoding_strategy in [ - "Greedy", - "Top P Sampling", - ] - - if decoding_strategy == "Greedy": - generation_args["do_sample"] = False - elif decoding_strategy == "Top P Sampling": - generation_args["temperature"] = temperature - generation_args["do_sample"] = True - generation_args["top_p"] = top_p - - generation_args.update(inputs) - return prompt, generation_args - - def __call__( - self, - img_or_path: Image.Image | Path | str, - prompt_text: str | None = None, - lang: str | None = None, - config: str | None = None, - show_prompt: bool = False, - **kwargs, - ) -> str: - if not self.is_idefics_available(): - raise RuntimeError("Idefics is not installed or not found.") - resulting_messages = [ - { - "role": "user", - "content": [{"type": "image"}] + [ - {"type": "text", "text": prompt_text or self.prompt_text_tmpl.format(lang or self.lang)} - ] - } - ] - image = load_image(img_or_path) - prompt, generation_args = self._generation_args(image, resulting_messages) - generated_ids = model.generate(**generation_args) - generated_texts = processor.batch_decode( - generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True) - if show_prompt: - cprint("INPUT:", prompt, "|OUTPUT:", generated_texts) - return generated_texts[0]#.strip('"') - - def postprocess_ocr(self, text): - return ' '.join(remove_multiple_whitespaces(text).splitlines()) - - -# %% test_idefics.ipynb 43 -class IdeficsExperimentContext(OCRExperimentContext): - @functools.lru_cache() - def mocr(self, ocr_model: str, lang: str): - if ocr_model == 'Idefics': - proc = IdeficsOCR(lang) - else: - engine = self.engines[ocr_model] - ocr_processor = ocr.get_ocr_processor(True, engine) - proc = ocr_processor[lang2pcleaner(lang)] - if isinstance(proc, TesseractOcr): - proc.lang = lang2tesseract(lang) - return proc - diff --git a/_testbed/ocr_metric.ipynb b/_testbed/ocr_metric.ipynb deleted file mode 100644 index 41f2fad4..00000000 --- a/_testbed/ocr_metric.ipynb +++ /dev/null @@ -1,276 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp ocr_metric" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# %reload_ext autoreload\n", - "# %autoreload 0\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# install (Colab)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# try: \n", - "# import fastcore as FC\n", - "# except ImportError: \n", - "# !pip install -q fastcore\n", - "# try:\n", - "# import rich\n", - "# except ImportError:\n", - "# !pip install -q rich\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Developing a metric for OCR of Comics/Manga texts\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prologue" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "from __future__ import annotations\n", - "\n", - "import difflib\n", - "import html\n", - "\n", - "from IPython.display import display\n", - "from IPython.display import HTML\n", - "from rich.console import Console\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import fastcore.all as FC\n", - "import fastcore.xtras # patch Path with some utils\n", - "import rich\n", - "from fastcore.test import * # type: ignore\n", - "from loguru import logger\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Helpers" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# pretty print by default\n", - "# %load_ext rich" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "console = Console(width=104, tab_size=4, force_jupyter=True)\n", - "cprint = console.print\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## OCR metric\n", - "> Some basic ways to compare OCR results" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def get_text_diffs_html(str1, str2, ignore_align: bool = False):\n", - " matcher = difflib.SequenceMatcher(None, str1, str2)\n", - " html_str1, html_str2 = \"\", \"\"\n", - " _ch ='⎕' # ▿\n", - " ch = f'&#x{ord(_ch):x};'\n", - " span1_g = lambda l: f\"{ch*l}\" if l > 0 else \"\"\n", - " span1_r = lambda l: f\"{ch*l}\" if l > 0 else \"\"\n", - " span2 = lambda s: f\"{html.escape(s)}\" if s else \"\"\n", - "\n", - " for opcode in matcher.get_opcodes():\n", - " tag, i1, i2, j1, j2 = opcode\n", - " if tag == \"equal\":\n", - " html_str1 += html.escape(str1[i1:i2])\n", - " html_str2 += html.escape(str2[j1:j2])\n", - " elif tag == \"replace\":\n", - " max_span = max(i2 - i1, j2 - j1)\n", - " # str1_segment = str1[i1:i2].ljust(max_span)\n", - " html_str1 += html.escape(str1[i1:i2]) + span1_g(max_span - (i2 - i1))\n", - " html_str2 += span2(str2[j1:j2]) + (span1_r(max_span - (j2 - j1)) if not ignore_align else '')\n", - " elif tag == \"delete\":\n", - " deleted_segment = str1[i1:i2]\n", - " html_str1 += html.escape(deleted_segment)\n", - " if not ignore_align: html_str2 += span1_r(len(deleted_segment))\n", - " elif tag == \"insert\":\n", - " inserted_segment = str2[j1:j2].replace(\" \", _ch)\n", - " html_str1 += span1_g(len(inserted_segment))\n", - " html_str2 += span2(inserted_segment)\n", - " html_str1 = f\"
{html_str1}
\"\n", - " html_str2 = f\"
{html_str2}
\"\n", - " return html_str1, html_str2\n", - "\n", - "def display_text_diffs(str1, str2):\n", - " \"\"\"\n", - " Displays two strings one above the other, with differing characters highlighted in red in the \n", - " second string only, using difflib.SequenceMatcher to align the strings and ensure matching \n", - " sequences are vertically aligned.\n", - "\n", - " :param str1: The first string to compare.\n", - " :param str2: The second string to compare.\n", - " \"\"\"\n", - " html_str1, html_str2 = get_text_diffs_html(str1, str2)\n", - " display(HTML(f\"
{html_str1}
{html_str2}
\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
This is an awesome_⎕⎕⎕ test string.This is an awesome_⎕⎕⎕ test string.This is an awesome_⎕⎕⎕ test string.This is an awesome_⎕⎕⎕ test string.

This was an a⎕⎕⎕⎕mazing test spring.This was an a⎕⎕⎕⎕mazing test spring.This was an a⎕⎕⎕⎕mazing test spring.This was an a⎕⎕⎕⎕mazing test spring.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "str1 = \"This is an awesome_ test string.\"*4\n", - "str2 = \"This was an amazing test▿ spring.\"*4\n", - "display_text_diffs(str1, str2)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
I was in a bad mood, and Curt sensed it immediately...

I was in a bad mood, and Curt sensed it immediately...
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "str1 = \"I was in a bad mood, and Curt sensed it immediately...\"\n", - "str2 = \"I was in a bad mood, and Curt sensed it immediately ...\"\n", - "display_text_diffs(str1, str2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Colophon\n", - "----\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import fastcore.all as FC\n", - "from nbdev.export import nb_export\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "if FC.IN_NOTEBOOK:\n", - " nb_export('ocr_metric.ipynb', '.')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/_testbed/ocr_metric.py b/_testbed/ocr_metric.py deleted file mode 100644 index f81b9a5e..00000000 --- a/_testbed/ocr_metric.py +++ /dev/null @@ -1,64 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ocr_metric.ipynb. - -# %% ocr_metric.ipynb 6 -from __future__ import annotations - -import difflib -import html - -from IPython.display import display -from IPython.display import HTML -from rich.console import Console - - -# %% auto 0 -__all__ = ['get_text_diffs_html', 'display_text_diffs'] - -# %% ocr_metric.ipynb 10 -console = Console(width=104, tab_size=4, force_jupyter=True) -cprint = console.print - - -# %% ocr_metric.ipynb 12 -def get_text_diffs_html(str1, str2, ignore_align: bool = False): - matcher = difflib.SequenceMatcher(None, str1, str2) - html_str1, html_str2 = "", "" - _ch ='⎕' # ▿ - ch = f'&#x{ord(_ch):x};' - span1_g = lambda l: f"{ch*l}" if l > 0 else "" - span1_r = lambda l: f"{ch*l}" if l > 0 else "" - span2 = lambda s: f"{html.escape(s)}" if s else "" - - for opcode in matcher.get_opcodes(): - tag, i1, i2, j1, j2 = opcode - if tag == "equal": - html_str1 += html.escape(str1[i1:i2]) - html_str2 += html.escape(str2[j1:j2]) - elif tag == "replace": - max_span = max(i2 - i1, j2 - j1) - # str1_segment = str1[i1:i2].ljust(max_span) - html_str1 += html.escape(str1[i1:i2]) + span1_g(max_span - (i2 - i1)) - html_str2 += span2(str2[j1:j2]) + (span1_r(max_span - (j2 - j1)) if not ignore_align else '') - elif tag == "delete": - deleted_segment = str1[i1:i2] - html_str1 += html.escape(deleted_segment) - if not ignore_align: html_str2 += span1_r(len(deleted_segment)) - elif tag == "insert": - inserted_segment = str2[j1:j2].replace(" ", _ch) - html_str1 += span1_g(len(inserted_segment)) - html_str2 += span2(inserted_segment) - html_str1 = f"
{html_str1}
" - html_str2 = f"
{html_str2}
" - return html_str1, html_str2 - -def display_text_diffs(str1, str2): - """ - Displays two strings one above the other, with differing characters highlighted in red in the - second string only, using difflib.SequenceMatcher to align the strings and ensure matching - sequences are vertically aligned. - - :param str1: The first string to compare. - :param str2: The second string to compare. - """ - html_str1, html_str2 = get_text_diffs_html(str1, str2) - display(HTML(f"
{html_str1}
{html_str2}
")) diff --git a/_testbed/requirements.txt b/_testbed/requirements.txt deleted file mode 100644 index 4e3d1c68..00000000 --- a/_testbed/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -matplotlib -rich -fastcore -nbdev -ipywidgets diff --git a/_testbed/test_idefics.ipynb b/_testbed/test_idefics.ipynb deleted file mode 100644 index a8978d12..00000000 --- a/_testbed/test_idefics.ipynb +++ /dev/null @@ -1,1497 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp ocr_idefics" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# %reload_ext autoreload\n", - "# %autoreload 0\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# install (Colab)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# try: \n", - "# import fastcore as FC\n", - "# except ImportError: \n", - "# !pip install -q fastcore\n", - "# try:\n", - "# import rich\n", - "# except ImportError:\n", - "# !pip install -q rich\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@basic-tesseract" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "need version >4.40 of transformers" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# %pip install git+https://github.com/huggingface/transformers" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Fash attention doesn't support Metal [#412](https://github.com/Dao-AILab/flash-attention/issues/412) (but see [metal-flash-attention](https://github.com/philipturner/metal-flash-attention))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# %env FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE\n", - "# %pip install flash-attn --no-build-isolation" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "notebookRunGroups": { - "groupValue": "" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fri May 10 18:32:10 2024 \n", - "+---------------------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 535.161.08 Driver Version: 535.161.08 CUDA Version: 12.2 |\n", - "|-----------------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", - "| | | MIG M. |\n", - "|=========================================+======================+======================|\n", - "| 0 NVIDIA GeForce RTX 3090 Ti On | 00000000:65:00.0 Off | Off |\n", - "| 0% 50C P8 33W / 480W | 1MiB / 24564MiB | 0% Default |\n", - "| | | N/A |\n", - "+-----------------------------------------+----------------------+----------------------+\n", - " \n", - "+---------------------------------------------------------------------------------------+\n", - "| Processes: |\n", - "| GPU GI CI PID Type Process name GPU Memory |\n", - "| ID ID Usage |\n", - "|=======================================================================================|\n", - "| No running processes found |\n", - "+---------------------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Testing `Idefics` OCR for Comics\n", - "> Accuracy Enhancements for OCR in `PanelCleaner`\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prologue" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "from __future__ import annotations\n", - "\n", - "import functools\n", - "from pathlib import Path\n", - "\n", - "import pcleaner.ocr.ocr as ocr\n", - "import torch\n", - "import transformers\n", - "from pcleaner.ocr.ocr_tesseract import TesseractOcr\n", - "from PIL import Image\n", - "from rich.console import Console\n", - "from transformers import AutoProcessor\n", - "from transformers import Idefics2ForConditionalGeneration\n", - "from transformers import PreTrainedModel\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "from typing import cast\n", - "\n", - "import fastcore.xtras # patch Path with some utils\n", - "import pcleaner.config as cfg\n", - "from fastcore.test import * # type: ignore\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "notebookRunGroups": { - "groupValue": "" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'4.41.0.dev0'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "transformers.__version__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Helpers" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "notebookRunGroups": { - "groupValue": "" - } - }, - "outputs": [], - "source": [ - "# pretty print by default\n", - "# %load_ext rich" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "notebookRunGroups": { - "groupValue": "" - } - }, - "outputs": [], - "source": [ - "#| exporti\n", - "\n", - "console = Console(width=104, tab_size=4, force_jupyter=True)\n", - "cprint = console.print\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Force reload of `experiments` module" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "if 'experiments' in sys.modules:\n", - " import importlib; importlib.reload(experiments) # type: ignore\n", - "else:\n", - " import experiments\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "from experiments import *\n", - "from helpers import *\n", - "from ocr_metric import *\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "\n", - "def load_image(img_or_path) -> Image.Image:\n", - " if isinstance(img_or_path, (str, Path)):\n", - " return Image.open(img_or_path)\n", - " elif isinstance(img_or_path, Image.Image):\n", - " return img_or_path\n", - " else:\n", - " raise ValueError(f\"img_or_path must be a path or PIL.Image, got: {type(img_or_path)}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Idefics basic usage\n", - "\n", - "not working, cuda memory error" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# # Note that passing the image urls (instead of the actual pil images) to the processor is also possible\n", - "# # image1 = load_image(\"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\")\n", - "# # image2 = load_image(\"https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg\")\n", - "# # image3 = load_image(\"https://cdn.britannica.com/68/170868-050-8DDE8263/Golden-Gate-Bridge-San-Francisco.jpg\")\n", - "\n", - "# image1 = Image.open(\"media/Statue-of-Liberty-Island-New-York-Bay.webp\")\n", - "# image2 = Image.open(\"media/Skyline-Chicago.webp\")\n", - "# image3 = Image.open(\"media/Golden-Gate-Bridge-San-Francisco.webp\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# processor = AutoProcessor.from_pretrained(\"HuggingFaceM4/idefics2-8b\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# model = Idefics2ForConditionalGeneration.from_pretrained(\n", - "# \"HuggingFaceM4/idefics2-8b\",\n", - "# torch_dtype=torch.bfloat16,\n", - "# #_attn_implementation=\"flash_attention_2\",\n", - "# )\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# assert isinstance(model, PreTrainedModel)\n", - "# model.to(DEVICE)\n", - "# type(model), model.device\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create inputs" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# messages = [\n", - "# {\n", - "# \"role\": \"user\",\n", - "# \"content\": [\n", - "# {\"type\": \"image\"},\n", - "# {\"type\": \"text\", \"text\": \"What do we see in this image?\"},\n", - "# ]\n", - "# },\n", - "# {\n", - "# \"role\": \"assistant\",\n", - "# \"content\": [\n", - "# {\"type\": \"text\", \"text\": \"In this image, we can see the city of New York, and more specifically the Statue of Liberty.\"},\n", - "# ]\n", - "# },\n", - "# {\n", - "# \"role\": \"user\",\n", - "# \"content\": [\n", - "# {\"type\": \"image\"},\n", - "# {\"type\": \"text\", \"text\": \"And how about this image?\"},\n", - "# ]\n", - "# }, \n", - "# ]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# prompt = processor.apply_chat_template(messages, add_generation_prompt=True)\n", - "# inputs = processor(text=prompt, images=[image1, image2], return_tensors=\"pt\")\n", - "# inputs = {k: v.to(DEVICE) for k, v in inputs.items()}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Generate" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# generated_ids = model.generate(**inputs, max_new_tokens=500)\n", - "# generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)\n", - "\n", - "# print(generated_texts)\n", - "# # ['User: What do we see in this image? \\nAssistant: In this image, we can see the city of New York, and more specifically the Statue of Liberty. \\nUser: And how about this image? \\nAssistant: In this image we can see buildings, trees, lights, water and sky.']\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "# [\n", - "# 'User: What do we see in this image? '\n", - "# 'Assistant: In this image, we can see the city of New York, and more specifically the Statue of Liberty. '\n", - "# 'User: And how about this image? '\n", - "# 'Assistant: In this image we can see buildings, trees, lights, water and sky.'\n", - "# ]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Idefics experiments\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Idefics" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Idefics initialization" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" - ] - } - ], - "source": [ - "#| exporti\n", - "\n", - "processor = AutoProcessor.from_pretrained(\"HuggingFaceM4/idefics2-8b\")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5a21b61268674a159f210694841c2149", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/7 [00:00 bool:\n", - " return True\n", - "\n", - " def _generation_args(self, image: Image.Image, resulting_messages: list[dict]):\n", - " prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)\n", - " inputs = processor(text=prompt, images=[image], return_tensors=\"pt\")\n", - " inputs = {k: v.to(self.device) for k, v in inputs.items()}\n", - " \n", - " max_new_tokens = 512\n", - " repetition_penalty = 1.2\n", - " decoding_strategy = \"Greedy\"\n", - " temperature = 0.4\n", - " top_p = 0.8\n", - "\n", - " generation_args = {\n", - " \"max_new_tokens\": max_new_tokens,\n", - " \"repetition_penalty\": repetition_penalty,\n", - " }\n", - "\n", - " assert decoding_strategy in [\n", - " \"Greedy\",\n", - " \"Top P Sampling\",\n", - " ]\n", - "\n", - " if decoding_strategy == \"Greedy\":\n", - " generation_args[\"do_sample\"] = False\n", - " elif decoding_strategy == \"Top P Sampling\":\n", - " generation_args[\"temperature\"] = temperature\n", - " generation_args[\"do_sample\"] = True\n", - " generation_args[\"top_p\"] = top_p\n", - "\n", - " generation_args.update(inputs)\n", - " return prompt, generation_args\n", - "\n", - " def __call__(\n", - " self,\n", - " img_or_path: Image.Image | Path | str,\n", - " prompt_text: str | None = None,\n", - " lang: str | None = None,\n", - " config: str | None = None,\n", - " show_prompt: bool = False,\n", - " **kwargs,\n", - " ) -> str:\n", - " if not self.is_idefics_available():\n", - " raise RuntimeError(\"Idefics is not installed or not found.\")\n", - " resulting_messages = [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": [{\"type\": \"image\"}] + [\n", - " {\"type\": \"text\", \"text\": prompt_text or self.prompt_text_tmpl.format(lang or self.lang)}\n", - " ]\n", - " }\n", - " ]\n", - " image = load_image(img_or_path)\n", - " prompt, generation_args = self._generation_args(image, resulting_messages)\n", - " generated_ids = model.generate(**generation_args)\n", - " generated_texts = processor.batch_decode(\n", - " generated_ids[:, generation_args[\"input_ids\"].size(1):], skip_special_tokens=True)\n", - " if show_prompt:\n", - " cprint(\"INPUT:\", prompt, \"|OUTPUT:\", generated_texts)\n", - " return generated_texts[0]#.strip('\"')\n", - "\n", - " def postprocess_ocr(self, text):\n", - " return ' '.join(remove_multiple_whitespaces(text).splitlines())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## IdeficsExperimentContext" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class IdeficsExperimentContext(OCRExperimentContext):\n", - " @functools.lru_cache()\n", - " def mocr(self, ocr_model: str, lang: str):\n", - " if ocr_model == 'Idefics':\n", - " proc = IdeficsOCR(lang)\n", - " else:\n", - " engine = self.engines[ocr_model]\n", - " ocr_processor = ocr.get_ocr_processor(True, engine)\n", - " proc = ocr_processor[lang2pcleaner(lang)]\n", - " if isinstance(proc, TesseractOcr):\n", - " proc.lang = lang2tesseract(lang)\n", - " return proc\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# PanelCleaner Configuration\n", - "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "config = cfg.load_config()\n", - "config.cache_dir = Path(\".\")\n", - "\n", - "cache_dir = config.get_cleaner_cache_dir()\n", - "\n", - "profile = config.current_profile\n", - "preprocessor_conf = profile.preprocessor\n", - "# Modify the profile to OCR all boxes.\n", - "# Make sure OCR is enabled.\n", - "preprocessor_conf.ocr_enabled = True\n", - "# Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", - "preprocessor_conf.ocr_max_size = 10**10\n", - "# Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", - "preprocessor_conf.suspicious_box_min_size = 10**10\n", - "# Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", - "preprocessor_conf.ocr_blacklist_pattern = \".*\"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test images\n", - "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['00: Action_Comics_1960-01-00_(262).JPG',\n", - " '01: Adolf_Cap_01_008.jpg',\n", - " '02: Barnaby_v1-028.png',\n", - " '03: Barnaby_v1-029.png',\n", - " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", - " '05: Cannon-292.jpg',\n", - " '06: Contrato_con_Dios_028.jpg',\n", - " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", - " '08: FOX_CHILLINTALES_T17_012.jpg',\n", - " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", - " '10: Galactus_12.jpg',\n", - " '11: INOUE_KYOUMEN_002.png',\n", - " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", - " '13: MCCAY_LITTLENEMO_090.jpg',\n", - " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", - " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", - " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", - " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", - " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", - " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", - " '20: Strange_Tales_172005.jpg',\n", - " '21: Strange_Tales_172021.jpg',\n", - " '22: Tarzan_014-21.JPG',\n", - " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", - " '24: Transformers_-_Unicron_000-004.jpg',\n", - " '25: Transformers_-_Unicron_000-016.jpg',\n", - " '26: WARE_ACME_024.jpg',\n", - " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", - " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", - " '29: manga_0033.jpg',\n", - " '30: ronson-031.jpg',\n", - " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "media_path = Path(\"media/\")\n", - "\n", - "IMAGE_PATHS = sorted(\n", - " [_ for _ in media_path.glob(\"*\") if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", - "\n", - "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CONTEXT\n" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Configuration:\n", - "\n", - "Locale: System default\n", - "Default Profile: Built-in\n", - "Saved Profiles:\n", - "- victess: /home/vic/dev/repo/DL-mac/cleaned/victess.conf\n", - "- victmang: /home/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", - "\n", - "Profile Editor: System default\n", - "Cache Directory: .\n", - "Default Torch Model Path: /home/vic/.cache/pcleaner/model/comictextdetector.pt\n", - "Default CV2 Model Path: /home/vic/.cache/pcleaner/model/comictextdetector.pt.onnx\n", - "GUI Theme: System default\n", - "\n", - "--------------------\n", - "\n", - "Config file located at: /home/vic/.config/pcleaner/pcleanerrc\n", - "System default cache directory: /home/vic/.cache/pcleaner\n" - ] - }, - { - "data": { - "text/html": [ - "
      cache_dir: Path('cleaner')\n",
-       "     model_path: Path('/home/vic/.cache/pcleaner/model/comictextdetector.pt')\n",
-       "         device: 'cuda'\n",
-       "
\n" - ], - "text/plain": [ - " cache_dir: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'cleaner'\u001b[0m\u001b[1m)\u001b[0m\n", - " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/home/vic/.cache/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", - " device: \u001b[32m'cuda'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "CONTEXT = IdeficsExperimentContext(None, IMAGE_PATHS)\n", - "\n", - "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", - "model_path = CONTEXT.config.get_model_path(gpu)\n", - "DEVICE = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", - "\n", - "CONTEXT.config.show()\n", - "cprint(\n", - " f\"{'cache_dir':>15}: {repr(cache_dir)}\\n\"\n", - " f\"{'model_path':>15}: {repr(model_path)}\\n\"\n", - " f\"{'device':>15}: {repr(DEVICE)}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Base image\n", - "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", - "assert CONTEXT.path_from_idx(BASE_IMAGE_IDX).exists()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualize images\n" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "IMAGE_CONTEXT = ImageContext(CONTEXT, BASE_IMAGE_IDX)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9c0379d4776a4e0f9facd7e0092c79f3", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output(layout=Layout(height='0px'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5f0ae88943a14ea38efb029c40183ffd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "26c6ecdbae6f4952aab7ff3106400f04", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "img_visor = ImageContextVisor(CONTEXT, BASE_IMAGE_IDX)\n", - "img_visor\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Box id\n", - "> change `BOX_IDX` to use any box to test crop methods" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "BOX_IDX = 0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Idefics inference" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, - "outputs": [], - "source": [ - "page_lang = IMAGE_CONTEXT.page_lang\n", - "\n", - "resulting_messages = [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": [{\"type\": \"image\"}] + [\n", - " {\"type\": \"text\", \"text\": prompt_text_tmpl.format(page_lang)}\n", - " ]\n", - " }\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "def idefics_generation_args(image: Image.Image, resulting_messages: list[dict]):\n", - " prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)\n", - " inputs = processor(text=prompt, images=[image], return_tensors=\"pt\")\n", - " inputs = {k: v.to(DEVICE) for k, v in inputs.items()}\n", - " \n", - " max_new_tokens = 512\n", - " repetition_penalty = 1.2\n", - " decoding_strategy = \"Greedy\"\n", - " temperature = 0.4\n", - " top_p = 0.8\n", - "\n", - " generation_args = {\n", - " \"max_new_tokens\": max_new_tokens,\n", - " \"repetition_penalty\": repetition_penalty,\n", - " }\n", - "\n", - " assert decoding_strategy in [\n", - " \"Greedy\",\n", - " \"Top P Sampling\",\n", - " ]\n", - "\n", - " if decoding_strategy == \"Greedy\":\n", - " generation_args[\"do_sample\"] = False\n", - " elif decoding_strategy == \"Top P Sampling\":\n", - " generation_args[\"temperature\"] = temperature\n", - " generation_args[\"do_sample\"] = True\n", - " generation_args[\"top_p\"] = top_p\n", - "\n", - " generation_args.update(inputs)\n", - " return prompt, generation_args\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Crop methods" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "# image_experiment = ExperimentOCR(IMAGE_CONTEXT, 'Idefics')\n", - "image_experiment = ExperimentOCR.from_image(CONTEXT, 'Idefics', IMAGE_CONTEXT.image_idx) # use cache\n" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "method = CropMethod.INITIAL_BOX\n", - "\n", - "result = cast(ResultOCR, image_experiment.result(BOX_IDX, method, ocr=False))\n", - "image = cast(Image.Image, result.image)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.\n", - "The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.\n" - ] - }, - { - "data": { - "text/html": [ - "
INPUT: User:<image>Please perform optical character recognition (OCR) on this image, which displays \n",
-       "speech balloons from a comic book. The text is in English. Extract the text and format it as follows: \n",
-       "transcribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \n",
-       "capitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \n",
-       "check the text is not all capital letters.<end_of_utterance>\n",
-       "Assistant: |OUTPUT:\n",
-       "[\n",
-       "    'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \n",
-       "Orleans, kept tidy by a white-haired old man known only as Bambu.'\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "INPUT: User:\u001b[1m<\u001b[0m\u001b[1;95mimage\u001b[0m\u001b[39m>Please perform optical character recognition \u001b[0m\u001b[1;39m(\u001b[0m\u001b[39mOCR\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m on this image, which displays \u001b[0m\n", - "\u001b[39mspeech balloons from a comic book. The text is in English. Extract the text and format it as follows: \u001b[0m\n", - "\u001b[39mtranscribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \u001b[0m\n", - "\u001b[39mcapitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \u001b[0m\n", - "\u001b[39mcheck the text is not all capital letters.\u001b[0m\n", - "Assistant: |OUTPUT:\n", - "\u001b[1m[\u001b[0m\n", - " \u001b[32m'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \u001b[0m\n", - "\u001b[32mOrleans, kept tidy by a white-haired old man known only as Bambu.'\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "prompt, generation_args = idefics_generation_args(image, resulting_messages)\n", - "generated_ids = model.generate(**generation_args)\n", - "\n", - "generated_texts = processor.batch_decode(\n", - " generated_ids[:, generation_args[\"input_ids\"].size(1):], skip_special_tokens=True)\n", - "cprint(\"INPUT:\", prompt, \"|OUTPUT:\", generated_texts)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
1.00
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "result.ocr = generated_texts[0]\n", - "result\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
INPUT: User:<image>Please perform optical character recognition (OCR) on this image, which displays \n",
-       "speech balloons from a comic book. The text is in English. Extract the text and format it as follows: \n",
-       "transcribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \n",
-       "capitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \n",
-       "check the text is not all capital letters.<end_of_utterance>\n",
-       "Assistant: |OUTPUT:\n",
-       "[\n",
-       "    'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \n",
-       "Orleans, kept tidy by a white-haired old man known only as Bambu.'\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "INPUT: User:\u001b[1m<\u001b[0m\u001b[1;95mimage\u001b[0m\u001b[39m>Please perform optical character recognition \u001b[0m\u001b[1;39m(\u001b[0m\u001b[39mOCR\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m on this image, which displays \u001b[0m\n", - "\u001b[39mspeech balloons from a comic book. The text is in English. Extract the text and format it as follows: \u001b[0m\n", - "\u001b[39mtranscribe in standard sentence case, capitalized. Avoid using all capital letters, but ensure it is \u001b[0m\n", - "\u001b[39mcapitalized where appropriate, including proper nouns. Provide the transcribed text clearly. Double \u001b[0m\n", - "\u001b[39mcheck the text is not all capital letters.\u001b[0m\n", - "Assistant: |OUTPUT:\n", - "\u001b[1m[\u001b[0m\n", - " \u001b[32m'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \u001b[0m\n", - "\u001b[32mOrleans, kept tidy by a white-haired old man known only as Bambu.'\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
1.00
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "method = CropMethod.INITIAL_BOX\n", - "\n", - "result = cast(ResultOCR, image_experiment.result(BOX_IDX, method, ocr=False))\n", - "image = cast(Image.Image, result.image)\n", - "\n", - "mocr: IdeficsOCR = cast(IdeficsOCR, CONTEXT.mocr('Idefics', page_lang))\n", - "text = mocr(image, show_prompt=True)\n", - "result.ocr = mocr.postprocess_ocr(text)\n", - "result\n" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
0.98
\n", - "
\n", - "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PADDED_4)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "
\n", - "
0.94
\n", - "
\n", - "
Embow⎕⎕ered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Encountered by great charles cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "image_experiment.result(BOX_IDX, CropMethod.PAD_8_FRACT_0_2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Visualize results" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "522220949ff540fdbd864dc8d8722cf0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output(layout=Layout(height='0px'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "13a3ec1b0ea04630b339e5026e01eee2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Label(value='Box # (of 15):', layout=Layout(padding='0px 0px 0px 10px', width='i…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "989b4acac70848f2a5da0a74f99bc181", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "result_visor = ResultVisor(image_experiment)\n", - "result_visor\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Visualize Experiment" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "# p, d = image_experiment.to_json()" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bae922f0f53b47b5909334ca7f5d24fc", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9c8a0db76d0a40eb9c3451129e803124", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "exp_visor = ExperimentVisor(image_experiment)\n", - "exp_visor\n" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "# p, d = exp_visor.ctx.to_json()\n", - "# p" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# EEAaO" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d441a9415ca94481a85523a6d30eca92", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(HBox(children=(Dropdown(index=1, layout=Layout(width='fit-content'), options={'T…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4f568a40794a4cba951fb6652b5446dd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "idefics_experiment = ExperimentsVisor(CONTEXT, BASE_IMAGE_IDX, \n", - " box_idx=13, method=CropMethod.DEFAULT_GREY_PAD,\n", - " ocr_model=OCRModel.IDEFICS, \n", - " ocr_models={'Tesseract': OCRModel.TESSERACT, 'Idefics': OCRModel.IDEFICS})\n", - "idefics_experiment\n" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "idefics_experiment.update(model=OCRModel.TESSERACT)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Colophon\n", - "----\n" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "import fastcore.all as FC\n", - "from nbdev.export import nb_export\n" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "if FC.IN_NOTEBOOK:\n", - " nb_export('test_idefics.ipynb', '.')\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "panel-cleaner", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/_testbed/test_tesseract.ipynb b/_testbed/test_tesseract.ipynb deleted file mode 100644 index 3cb1dc79..00000000 --- a/_testbed/test_tesseract.ipynb +++ /dev/null @@ -1,638 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# install (Colab)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# try: \n", - "# import fastcore as FC\n", - "# except ImportError: \n", - "# !pip install -q fastcore\n", - "# try:\n", - "# import rich\n", - "# except ImportError:\n", - "# !pip install -q rich\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Testing `Tesseract` OCR for Comics\n", - "> Accuracy Enhancements for OCR in `PanelCleaner`\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prologue" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "from pathlib import Path\n", - "from typing import cast\n", - "\n", - "import pcleaner.config as cfg\n", - "import torch\n", - "from rich.console import Console\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from experiments import *\n", - "from helpers import *\n", - "from ocr_metric import *\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import fastcore.xtras # patch Path with some utils\n", - "from fastcore.test import * # type: ignore\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Helpers" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# pretty print by default\n", - "# %load_ext rich" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "console = Console(width=104, tab_size=4, force_jupyter=True)\n", - "cprint = console.print\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Tesseract installation" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['tesseract 5.3.4',\n", - " ' leptonica-1.84.1',\n", - " ' libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp 1.4.0 : libopenjp2 2.5.2',\n", - " ' Found NEON',\n", - " ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n", - " ' Found libcurl/8.4.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.11 nghttp2/1.51.0']" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = !tesseract --version\n", - "out\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install jpn_vert tesserac lang\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```bash\n", - "cd model\n", - "sudo ln -s jpn_vert_tessdata_best.traineddata /usr/share/tesseract-ocr/5/tessdata/jpn_vert.traineddata\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(Path('/opt/homebrew/share/tessdata'),\n", - " ['afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n", - " 'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n", - " 'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n", - " 'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n", - " 'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n", - " 'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n", - " 'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n", - " 'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, script/Kannada, script/Khmer',\n", - " 'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n", - " 'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n", - " 'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'])" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = !tesseract --list-langs\n", - "tessdata = Path(out[0].split('\"')[1])\n", - "tessdata, [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'),\n",
-       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'),\n",
-       "    Path('/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata')\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/eng_tessdata_best_410.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_vert_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", - " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/dev/repo/DL-mac/PanelCleaner/_testbed/model/jpn_tessdata_best.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "langs = tessdata.ls()\n", - "cprint([p.resolve() for p in langs if 'eng' in p.name] + [p.resolve() for p in langs if 'jpn' in p.name])\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "----\n", - "# Tesseract experiments" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# PanelCleaner Configuration\n", - "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "config = cfg.load_config()\n", - "config.cache_dir = Path(\".\")\n", - "\n", - "cache_dir = config.get_cleaner_cache_dir()\n", - "\n", - "profile = config.current_profile\n", - "preprocessor_conf = profile.preprocessor\n", - "# Modify the profile to OCR all boxes.\n", - "# Make sure OCR is enabled.\n", - "preprocessor_conf.ocr_enabled = True\n", - "# Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", - "preprocessor_conf.ocr_max_size = 10**10\n", - "# Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", - "preprocessor_conf.suspicious_box_min_size = 10**10\n", - "# Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", - "preprocessor_conf.ocr_blacklist_pattern = \".*\"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test images\n", - "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['00: Action_Comics_1960-01-00_(262).JPG',\n", - " '01: Adolf_Cap_01_008.jpg',\n", - " '02: Barnaby_v1-028.png',\n", - " '03: Barnaby_v1-029.png',\n", - " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", - " '05: Cannon-292.jpg',\n", - " '06: Contrato_con_Dios_028.jpg',\n", - " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", - " '08: FOX_CHILLINTALES_T17_012.jpg',\n", - " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", - " '10: Galactus_12.jpg',\n", - " '11: INOUE_KYOUMEN_002.png',\n", - " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", - " '13: MCCAY_LITTLENEMO_090.jpg',\n", - " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", - " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", - " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", - " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", - " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", - " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", - " '20: Strange_Tales_172005.jpg',\n", - " '21: Strange_Tales_172021.jpg',\n", - " '22: Tarzan_014-21.JPG',\n", - " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", - " '24: Transformers_-_Unicron_000-004.jpg',\n", - " '25: Transformers_-_Unicron_000-016.jpg',\n", - " '26: WARE_ACME_024.jpg',\n", - " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", - " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", - " '29: manga_0033.jpg',\n", - " '30: ronson-031.jpg',\n", - " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "media_path = Path(\"media/\")\n", - "\n", - "IMAGE_PATHS = sorted(\n", - " [_ for _ in media_path.glob(\"*\") if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", - "\n", - "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CONTEXT\n", - "> `CONTEXT` is an `OCRExperimentContext` object that contains the configuration and the list of image paths.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can get the configuration with `OCRExperimentContext.get_config()`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Configuration:\n", - "\n", - "Locale: System default\n", - "Default Profile: Built-in\n", - "Saved Profiles:\n", - "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", - "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", - "\n", - "Profile Editor: cursor\n", - "Cache Directory: .\n", - "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", - "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", - "GUI Theme: System default\n", - "\n", - "--------------------\n", - "\n", - "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", - "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" - ] - }, - { - "data": { - "text/html": [ - "
      cache_dir: Path('cleaner')\n",
-       "     model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
-       "         device: 'mps'\n",
-       "
\n" - ], - "text/plain": [ - " cache_dir: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'cleaner'\u001b[0m\u001b[1m)\u001b[0m\n", - " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", - " device: \u001b[32m'mps'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "CONTEXT = OCRExperimentContext(None, IMAGE_PATHS)\n", - "\n", - "gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", - "model_path = CONTEXT.config.get_model_path(gpu)\n", - "DEVICE = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", - "\n", - "CONTEXT.config.show()\n", - "cprint(\n", - " f\"{'cache_dir':>15}: {repr(cache_dir)}\\n\"\n", - " f\"{'model_path':>15}: {repr(model_path)}\\n\"\n", - " f\"{'device':>15}: {repr(DEVICE)}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Base image\n", - "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", - "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"0033\")\n", - "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"INOUE_KYOUMEN_002\")\n", - "# BASE_IMAGE_IDX = CONTEXT.normalize_idx(\"Action_Comics_1960-01-00_(262)\")\n", - "\n", - "assert BASE_IMAGE_IDX is not None\n", - "img_path = Path(CONTEXT.image_paths[BASE_IMAGE_IDX])\n", - "assert img_path.exists()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Empty cache\n", - "> Clear the image cache used profusely throughout the examples below." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "You will be warned before the cache is emptied." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# CONTEXT.empty_cache_warn()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# CONTEXT.empty_cache_warn(BASE_IMAGE_IDX)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Base image\n", - "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", - "assert CONTEXT.path_from_idx(BASE_IMAGE_IDX).exists()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualize images\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3de3bd90585a452ab7bd9f5dce716e4e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output(layout=Layout(height='0px'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6ab61afc65d84115b81c248ed1d0ab03", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(Dropdown(index=20, layout=Layout(width='fit-content'), options={'Action_Comics_1…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8c79a1393b5a4feaaa8c6d7cf2b458bc", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "img_visor = ImageContextVisor(CONTEXT, BASE_IMAGE_IDX)\n", - "img_visor\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Tesseract experiments\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d65bd435e6774637ac667defde594c4d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), options={'Tesseract'…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f2ca36a4d81144d59ba835c40b990d34", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# tesseract_experiment = ExperimentsVisor(CONTEXT)\n", - "tesseract_experiment = ExperimentsVisor(CONTEXT, BASE_IMAGE_IDX)\n", - "\n", - "test_eq(tesseract_experiment.all_values, {\n", - " 'image_selector': {'image_idx': 20},\n", - " 'content_selector': {'display_option': DisplayOptions.RESULTS},\n", - " 'result_visor': {\n", - " 'all_boxes': False,\n", - " 'box_idx': 0,\n", - " 'all_methods': False,\n", - " 'method': CropMethod.INITIAL_BOX,\n", - " },\n", - " 'model_selector': {'model': OCRModel.TESSERACT},\n", - " 'self': {}\n", - "})\n", - "\n", - "tesseract_experiment\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 0b14d8a84f93c1be3790bd75d267c6b84cae445a Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 24 May 2024 19:24:24 +0200 Subject: [PATCH 05/27] Colab requirements --- setup-cli-gui.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup-cli-gui.cfg b/setup-cli-gui.cfg index b5c4642b..c00ba3fa 100644 --- a/setup-cli-gui.cfg +++ b/setup-cli-gui.cfg @@ -20,7 +20,7 @@ install_requires = opencv-python transformers manga_ocr - Pillow + Pillow == 9.4.0 # Colab torch torchvision tqdm From a3e0a6e80d45815973ad34b017c773a71ea76c4f Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 24 May 2024 19:26:05 +0200 Subject: [PATCH 06/27] Downgrade to Python 10 as Colab not yet support 11 --- pcleaner/config.py | 19 ++++++++++++------- pcleaner/gui/profile_parser.py | 2 +- pcleaner/preprocessor.py | 12 ++++++------ pcleaner/structures.py | 4 ++-- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/pcleaner/config.py b/pcleaner/config.py index 1a3066f3..4d4add4d 100644 --- a/pcleaner/config.py +++ b/pcleaner/config.py @@ -72,13 +72,17 @@ Percentage = NewType("Percentage", float) -class ReadingOrder(StrEnum): +class EnumStr_(Enum): + @classmethod + def display_names(cls) -> dict[str, 'EnumStr_']: + return {e.value: e for e in cls} + +class ReadingOrder(EnumStr_): AUTO = "auto" MANGA = "manga" COMIC = "comic" - -class OCREngine(StrEnum): +class OCREngine(EnumStr_): AUTO = "auto" MANGAOCR = "manga-ocr" TESSERACT = "tesseract" @@ -1309,13 +1313,14 @@ def try_to_load( return # check before: `StrEnum` is a `str` - elif isinstance(attr_type, type) and issubclass(attr_type, StrEnum): - if conf_data in attr_type.__members__.values(): - attr_value = conf_data + elif isinstance(attr_type, type) and issubclass(attr_type, EnumStr_): + names = attr_type.display_names() + if conf_data in names: # type: ignore + attr_value = names[conf_data] else: print( f"Option {attr_name} in section {section} should be a one " - f"of {', '.join(repr(str(_)) for _ in attr_type.__members__.values())}.\n" + f"of {', '.join(_.value for _ in attr_type)}.\n" f"Failed to parse '{conf_data}'" ) return diff --git a/pcleaner/gui/profile_parser.py b/pcleaner/gui/profile_parser.py index 4af9d5bb..9aaa48e2 100644 --- a/pcleaner/gui/profile_parser.py +++ b/pcleaner/gui/profile_parser.py @@ -206,7 +206,7 @@ def _get_text() -> str | None: enm = {EntryTypes.OCREngine: OCREngine, EntryTypes.ReadingOrder: ReadingOrder}[ entry_type ] - for member in enm.__members__.values(): + for member in enm: self._data_widget.addTextItemLinkedData(member.value, member) self._data_widget.setCurrentIndexByLinkedData(enm.AUTO) self._data_widget.currentIndexChanged.connect(self._value_changed) diff --git a/pcleaner/preprocessor.py b/pcleaner/preprocessor.py index 0f43ac3e..c15cd251 100644 --- a/pcleaner/preprocessor.py +++ b/pcleaner/preprocessor.py @@ -130,15 +130,15 @@ def prep_json_file( original_path: str = json_data["original_path"] scale: float = json_data["scale"] boxes: list[st.Box] = [] - page_langs: list[st.DetectedLang] = [] + page_langs: list[str] = [] # Define permitted languages based on strictness. # Since the OCR model is only trained to recognize Japanese, # we need to discard anything that isn't, and if strict, also # those that are unknown (likely a mix). - language_whitelist = [st.DetectedLang.JA, st.DetectedLang.ENG] + language_whitelist = [st.DetectedLang.JA.value, st.DetectedLang.ENG.value] if not preprocessor_conf.ocr_strict_language: - language_whitelist.append(st.DetectedLang.UNKNOWN) + language_whitelist.append(st.DetectedLang.UNKNOWN.value) for data in json_data["blk_list"]: # Check box language. @@ -155,9 +155,9 @@ def prep_json_file( page_langs.append(data["language"]) boxes.append(box) - page_lang: st.DetectedLang = ( - Counter(page_langs).most_common(1)[0][0] if boxes else st.DetectedLang.UNKNOWN - ) + page_lang: st.DetectedLang = st.DetectedLang.display_names()[( + Counter(page_langs).most_common(1)[0][0] if boxes else st.DetectedLang.UNKNOWN.value + )] logger.debug(f"Detected lang: {page_lang}") # reading_order = preprocessor_conf.reading_order diff --git a/pcleaner/structures.py b/pcleaner/structures.py index 02362bf1..8b3a1215 100644 --- a/pcleaner/structures.py +++ b/pcleaner/structures.py @@ -1,7 +1,7 @@ import json import re from enum import Enum -from enum import StrEnum +# from enum import StrEnum from importlib import resources from pathlib import Path from typing import Sequence @@ -15,7 +15,7 @@ import pcleaner.data -class DetectedLang(StrEnum): +class DetectedLang(cfg.EnumStr_): JA = "ja" ENG = "eng" UNKNOWN = "unknown" From 1c0a19bbd8de46cecc56df01bc94f209d80f852b Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 24 May 2024 19:27:26 +0200 Subject: [PATCH 07/27] fixes, refactor for Colab --- pcleaner/_testbed/README.md | 61 + pcleaner/_testbed/__init__.py | 0 pcleaner/_testbed/experiment/.gitkeep | 0 pcleaner/_testbed/experiment/Idefics.json | 3825 +++++++++ pcleaner/_testbed/experiment/Tesseract.json | 4062 ++++++++++ pcleaner/_testbed/experiment/cache/.gitkeep | 0 pcleaner/_testbed/experiment/source/.gitkeep | 0 pcleaner/_testbed/nbs/experiments.ipynb | 7640 ++++++++++++++++++ pcleaner/_testbed/nbs/helpers.ipynb | 821 ++ pcleaner/_testbed/nbs/ocr_idefics.ipynb | 1618 ++++ pcleaner/_testbed/nbs/ocr_metric.ipynb | 276 + pcleaner/_testbed/nbs/test_web_server.ipynb | 755 ++ pcleaner/_testbed/nbs/visor.ipynb | 1024 +++ pcleaner/_testbed/nbs/web_server.ipynb | 1 + pcleaner/_testbed/pcleaner.png | Bin 0 -> 17709 bytes pcleaner/_testbed/requirements-colab.txt | 7 + pcleaner/_testbed/requirements-idefics.txt | 4 + pcleaner/_testbed/requirements.txt | 8 + pcleaner/_testbed/test_idefics.ipynb | 1 + pcleaner/_testbed/test_tesseract.ipynb | 1 + pcleaner/_testbed/testbed/__init__.py | 0 pcleaner/_testbed/testbed/bottle.py | 4417 ++++++++++ pcleaner/_testbed/testbed/experiments.py | 2285 ++++++ pcleaner/_testbed/testbed/helpers.py | 381 + pcleaner/_testbed/testbed/ocr_idefics.py | 310 + pcleaner/_testbed/testbed/ocr_metric.py | 64 + pcleaner/_testbed/testbed/visor.py | 391 + pcleaner/_testbed/testbed/web_server.py | 330 + 28 files changed, 28282 insertions(+) create mode 100644 pcleaner/_testbed/README.md create mode 100644 pcleaner/_testbed/__init__.py create mode 100644 pcleaner/_testbed/experiment/.gitkeep create mode 100644 pcleaner/_testbed/experiment/Idefics.json create mode 100644 pcleaner/_testbed/experiment/Tesseract.json create mode 100644 pcleaner/_testbed/experiment/cache/.gitkeep create mode 100644 pcleaner/_testbed/experiment/source/.gitkeep create mode 100644 pcleaner/_testbed/nbs/experiments.ipynb create mode 100644 pcleaner/_testbed/nbs/helpers.ipynb create mode 100644 pcleaner/_testbed/nbs/ocr_idefics.ipynb create mode 100644 pcleaner/_testbed/nbs/ocr_metric.ipynb create mode 100644 pcleaner/_testbed/nbs/test_web_server.ipynb create mode 100644 pcleaner/_testbed/nbs/visor.ipynb create mode 100644 pcleaner/_testbed/nbs/web_server.ipynb create mode 100644 pcleaner/_testbed/pcleaner.png create mode 100644 pcleaner/_testbed/requirements-colab.txt create mode 100644 pcleaner/_testbed/requirements-idefics.txt create mode 100644 pcleaner/_testbed/requirements.txt create mode 100644 pcleaner/_testbed/test_idefics.ipynb create mode 100644 pcleaner/_testbed/test_tesseract.ipynb create mode 100644 pcleaner/_testbed/testbed/__init__.py create mode 100644 pcleaner/_testbed/testbed/bottle.py create mode 100644 pcleaner/_testbed/testbed/experiments.py create mode 100644 pcleaner/_testbed/testbed/helpers.py create mode 100644 pcleaner/_testbed/testbed/ocr_idefics.py create mode 100644 pcleaner/_testbed/testbed/ocr_metric.py create mode 100644 pcleaner/_testbed/testbed/visor.py create mode 100644 pcleaner/_testbed/testbed/web_server.py diff --git a/pcleaner/_testbed/README.md b/pcleaner/_testbed/README.md new file mode 100644 index 00000000..0bead663 --- /dev/null +++ b/pcleaner/_testbed/README.md @@ -0,0 +1,61 @@ +# PanelCleaner Testbed + +## Overview +The **PanelCleaner** testbed serves as a dedicated area for experimenting and testing new ideas with *PanelCleaner* using Jupyter Notebooks. Currently, it focuses on **OCR** technologies, primarily using **Tesseract** and **IDefics** models. The testbed also begins the development of an evaluation framework to support future experiments. This project utilizes the `nbdev` literate programming environment. + +## Installation +To get started with the notebooks, you'll need Jupyter Lab/Notebook or any Python IDE that supports Jupyter notebooks like *VSCode* or *Google Colab*. +The setup mostly shares the same requirements as PanelCleaner and its CLI, with a few additional dependencies. +Here’s how to set up your environment: +1. Activate a virtual environment. +2. Navigate to the `_testbed` directory: + ```bash + cd _testbed + ``` +3. Install the required dependencies: + ```bash + pip install -r requirements.txt + ``` +Note: Each notebook may require the installation of additional dependencies. + +## Google Colab Support +The notebooks are ready to use on Google Colab, allowing you to run them directly on the platform without any extra setup or local GPU rigs. +Instructions to use Google Colab are included in the notebooks. + +## Install Test Images +The test images are not included in the repository but can be downloaded from the following link: +- [Test images PCISet](https://drive.google.com/file/d/18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT/view?usp=drive_link). The notebooks have also instructions and code to download directly the set + +After downloading, place the test images in the [source](source) directory. If you want to use your own, each image should have a corresponding text file with the same name, but with the extension `.txt`, which contains the ground truth data, one line per box (as calculated by PanelCleaner). Optionally, you can also include a `.json` file with the same name, specifying the language of the page: +```json +{ + "lang": "Spanish" +} +``` +If no language file is found, English will be used by default. In the near future, language detection will be automated. + +## Introduction to nbdev +[nbdev](https://nbdev.fast.ai/) is a **literate programming** environment that allows you to develop a Python library in Jupyter Notebooks, integrating exploratory programming, code, tests, and documentation into a single cohesive workflow. Inspired by **Donald Knuth**'s concept of literate programming, this approach not only makes the development process more intuitive but also eases the maintenance and understanding of the codebase. + +## Library Notebooks (WIP) + +#### [helpers.ipynb](helpers.ipynb) +This notebook includes utility functions and helpers that support the experiments in other notebooks, streamlining repetitive tasks and data manipulation. + +#### [ocr_metric.ipynb](ocr_metric.ipynb) +This notebook focuses on defining and implementing metrics to evaluate the performance and accuracy of OCR engines, crucial for assessing the effectiveness of OCR technologies in various scenarios. It currently develops a basic metric for evaluating OCR models. In the near future, additional metrics will be added, such as precision and recall using Levenshtein distance (edit distance). More importantly, it will introduce a metric tailored to the unique characteristics of Comics/Manga OCR, a topic currently unexplored in technical literature. + +#### [experiments.ipynb](experiments.ipynb) +This notebook details the development of the evaluation framework used in other notebooks, with Tesseract as a case study to illustrate the evaluation process. It's a work in progress, and will be updated continuously. If you're only interested in visualizing the results of the experiments, go directly to `Test_tesseract.ipynb` or `Test_idefics.ipynb`, which are much shorter and more to the point. + +#### [visor.ipynb](visor.ipynb) +Base infrastructure of experiments visualization. Simple composition of Jupyter widgets. + + +## Test Notebooks (WIP) + +#### [test_tesseract.ipynb](test_tesseract.ipynb) +This notebook is dedicated to testing the Tesseract OCR engine, offering insights into its capabilities and limitations through hands-on experiments. + +#### [test_idefics.ipynb](test_idefics.ipynb) +Similar to `test_tesseract.ipynb`, this notebook focuses on the IDefics LVM model, evaluating its performance and accuracy under different conditions. Here you can compare the results of the Tesseract OCR engine with the IDefics LVM model to see how the two stand in terms of accuracy and performance. diff --git a/pcleaner/_testbed/__init__.py b/pcleaner/_testbed/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pcleaner/_testbed/experiment/.gitkeep b/pcleaner/_testbed/experiment/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/pcleaner/_testbed/experiment/Idefics.json b/pcleaner/_testbed/experiment/Idefics.json new file mode 100644 index 00000000..00a0f903 --- /dev/null +++ b/pcleaner/_testbed/experiment/Idefics.json @@ -0,0 +1,3825 @@ +{ + "ocr_model": "Idefics", + "runs": { + "Idefics-crop-post": { + "Strange_Tales_172005.jpg": { + "1": { + "DEFAULT_GREY_PAD": "The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home ...", + "INITIAL_BOX": "\"the house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home...\"", + "DEFAULT": "The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home ...", + "PADDED_4": "\"the house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home...\"", + "PADDED_8": "The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home ...", + "EXTRACTED_INIT_BOX": "The house and the old man are alike in many ways; tally-hoo! patient, contented always to wait until the master comes home ... \"", + "PADDED_4_EXTRACTED": "The house and the old man are alike in many ways; tally proud, patient, contented always to wait until they're naster comes home ...", + "PADDED_8_EXTRACTED": "The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until they master comes home ...", + "PADDED_8_DILATION_1": "The house and the old man are alike in many ways; \"tall, proud, patient, contented always to wait until their master comes home\" ...", + "PAD_8_FRACT_0_5": "The house and the old man are alike in many ways; tall, proud, patient, contented (they always to wait until they master comes home ...", + "PAD_8_FRACT_0_2": "The house and the old man are alike in many ways; tall, proud, patient, contented (they always to wait until their master comes home ..." + }, + "0": { + "DEFAULT_GREY_PAD": "Empowered by great charled cypress trees , the ancient manor stands alone on the outskirts of new orleans , kept tidy by a white-haired old man known only as bambu.", + "INITIAL_BOX": "Embodied by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.", + "DEFAULT": "Embodied by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.", + "PADDED_4": "Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.", + "PADDED_8": "Empowered by great charled cypress trees , the ancient manor stands alone on the outskirts of new orleans , kept tidy by a white-haired old man known only as bambu.", + "EXTRACTED_INIT_BOX": "Encountered by great charled express trees, the ancient manor stands alone on the quaint skirts of new orleans kept tidy by a white-haired old man known only as bannibull.", + "PADDED_4_EXTRACTED": "Enthroned by great charles cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-hired and man known only as bambu.", + "PADDED_8_EXTRACTED": "Enchanted by great charled cypress trees, the ancient manor stands alone on the outskirts of new orleans kept tidy by a white-haired and man known only as bambu.", + "PADDED_8_DILATION_1": "Engineered by great charled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-hairied old man known only as bambu.", + "PAD_8_FRACT_0_5": "Encountered by great charles cypress trees, the ancient manor stands alone on the outskirts of new orleans kept tidy by a white-haired red man known only as bambu.", + "PAD_8_FRACT_0_2": "Encountered by great charles cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu." + }, + "2": { + "DEFAULT_GREY_PAD": "And one in need of some help, it would appear.", + "INITIAL_BOX": "And one in need of some help, it would appear...", + "DEFAULT": "And one in need of some help, it would appear.", + "PADDED_4": "And one in need of some help, it would appear...", + "PADDED_8": "And one in need of some help, it would appear.", + "EXTRACTED_INIT_BOX": "And one in need of some help, it would appear...", + "PADDED_4_EXTRACTED": "And one in need of some help. it would appear...", + "PADDED_8_EXTRACTED": "And one in need of some help it would appear...", + "PADDED_8_DILATION_1": "And one in need of some help, it would appear.", + "PAD_8_FRACT_0_5": "And one in need of some help it would appear.", + "PAD_8_FRACT_0_2": "And one in need of some help, it would appear." + }, + "3": { + "DEFAULT_GREY_PAD": "Bambu - we have a guest.", + "INITIAL_BOX": "Bambi - we have a guest.", + "DEFAULT": "Bambu - we have a guest.", + "PADDED_4": "Bambi - we have a guest.", + "PADDED_8": "Bambu - we have a guest ...", + "EXTRACTED_INIT_BOX": "Banned by the law - we have a bluest.", + "PADDED_4_EXTRACTED": "Bam blam we have a guest.", + "PADDED_8_EXTRACTED": "Bambi - he have a guest.", + "PADDED_8_DILATION_1": "Bambi - we have a guest.", + "PAD_8_FRACT_0_5": "Bambi - we have a guest.", + "PAD_8_FRACT_0_2": "Bambi - we have a guest." + }, + "4": { + "DEFAULT_GREY_PAD": "\"and tonight, he comes most urgently, slamming open the oaken front doors!", + "INITIAL_BOX": "\"and tonight, he comes most urgently slamming open the oaken front doors!", + "DEFAULT": "And tonight, he comes most urgently, slamming open the oaken front doors!", + "PADDED_4": "And tonight, he comes most urgently, slamming open the oaken front doors!", + "PADDED_8": "And tonight, he comes most urgently, slamming open the oaken front doors!", + "EXTRACTED_INIT_BOX": "\"and tonight, he comes most largely slamming open the oaken front doors!", + "PADDED_4_EXTRACTED": "And tonight he comes most urgently, slamming open the oaken front doors!", + "PADDED_8_EXTRACTED": "And tonight, he comes most livelyly slamming open the oaken front doors!", + "PADDED_8_DILATION_1": "And tonight, he comes most urgently, slamming open the oaken front doors!", + "PAD_8_FRACT_0_5": "\"and tonight, he comes most lircently, slamming open the oaken front doors!", + "PAD_8_FRACT_0_2": "\"and to night, he comes most lircently, slamming open the oaken front doors!" + }, + "5": { + "DEFAULT_GREY_PAD": "Tell me master how may bambi serve?", + "INITIAL_BOX": "Tell me... master... how may bambi serve?", + "DEFAULT": "Tell me... master... how may bambu serve?", + "PADDED_4": "Tell me, master... how may bambi serve?", + "PADDED_8": "Tell me master how may bambi serve?", + "EXTRACTED_INIT_BOX": "Tell me master... how mary bambi serve?", + "PADDED_4_EXTRACTED": "Tell me master... how may bamouli serve?", + "PADDED_8_EXTRACTED": "Titell me master... how may bambi serve?", + "PADDED_8_DILATION_1": "Tell me master how may bamboli serve?", + "PAD_8_FRACT_0_5": "Tibell master... how may bambi serve?", + "PAD_8_FRACT_0_2": "Tell me master... how may bamoli serve?" + }, + "6": { + "DEFAULT_GREY_PAD": "Some blankets to keep her warm, bambi...and perhaps some dry clothes...?", + "INITIAL_BOX": "Some blankets to keep her warm, bambi- and perhaps some dry clothes?", + "DEFAULT": "Some blankets to keep her warm, bambi...and perhaps some dry clothes?", + "PADDED_4": "Some blankets to keep her warm, bambu- and perhaps some dry clothes?", + "PADDED_8": "Some blankets to keep her warm, bambi...and perhaps some dry clothes?", + "EXTRACTED_INIT_BOX": "Gome blankets to keep her warm, bambu-and perhaps some dry clothes", + "PADDED_4_EXTRACTED": "Gome blankets to keep her warm, bambu-and perhaps some dry clothes", + "PADDED_8_EXTRACTED": "Gone blankets to keep her warm, bamboo- and perhaps some dry clothes", + "PADDED_8_DILATION_1": "Some blankets to keep her warm, bambi- and perhaps some dry clothes", + "PAD_8_FRACT_0_5": "Some blankets to keep her warm, bambi- and perhaps some dry clothes", + "PAD_8_FRACT_0_2": "Some blankets to keep her warm, bambi- and perhaps some dry clothes" + }, + "7": { + "DEFAULT_GREY_PAD": "The echo of the old man's footsteps fades down the hall as...", + "INITIAL_BOX": "The echo of the old man's footsteps fades down the hall as...", + "DEFAULT": "The echo of the old man's footsteps fades down the hall as...", + "PADDED_4": "The echo of the old man's footsteps fades down the hall as...", + "PADDED_8": "The echo of the old man's footsteps fades down the hall as....", + "EXTRACTED_INIT_BOX": "The echo of the old man's footsteps fades down the hall ais...", + "PADDED_4_EXTRACTED": "The echo of the old man's footsteps fades down the hall aisw...", + "PADDED_8_EXTRACTED": "The echo of the old man's footsteps fades down the hall a6\"", + "PADDED_8_DILATION_1": "The echo of the old man's footsteps fades down the hall as", + "PAD_8_FRACT_0_5": "The echo of the old man's footsteps fades down the hall as", + "PAD_8_FRACT_0_2": "The echo of the old man's footsteps fades down the hall as..." + }, + "8": { + "DEFAULT_GREY_PAD": "How curious the whims of fate had i not chanced to stroll along the river tonight.", + "INITIAL_BOX": "How curious the whims of fate... had i not changed to stroll along the river tonight...", + "DEFAULT": "How curious the whims of fate... had i not changed to stroll along the river tonight...", + "PADDED_4": "How curious the whims of fate ... had i not changed to stroll along the river tonight ...", + "PADDED_8": "How curious the whims of fate... had i not changed to stroll along the river tonight...", + "EXTRACTED_INIT_BOX": "How curious the whims of fate! had i not changed to stroll along the river tonight ...", + "PADDED_4_EXTRACTED": "How curious the whims of fate ... had i not changed to stroll along the river tonight ... \"", + "PADDED_8_EXTRACTED": "How curious the whims of fate! had i not changed to stroll along the river tonight -?", + "PADDED_8_DILATION_1": "How curious the whims of fate. had i not changed to stroll along the river tonight--", + "PAD_8_FRACT_0_5": "How curious the whimings of fate! had i not chanced to stroll along the river tonight?", + "PAD_8_FRACT_0_2": "How curious the whims of fate! had i not chanced to stroll along the river tonight?" + }, + "9": { + "DEFAULT_GREY_PAD": "As quickly as i can, master.", + "INITIAL_BOX": "As quickly as i can master. \"", + "DEFAULT": "As quickly as i can, master.", + "PADDED_4": "As quickly as i can, master.", + "PADDED_8": "As quickly as i can, master.", + "EXTRACTED_INIT_BOX": "As quickly as i can, master.", + "PADDED_4_EXTRACTED": "As quickly as i can, master. \"", + "PADDED_8_EXTRACTED": "As quickly as i can, master!", + "PADDED_8_DILATION_1": "As quickly as i can, master.", + "PAD_8_FRACT_0_5": "As quickly as i can, master!", + "PAD_8_FRACT_0_2": "As quickly as i can, master!" + }, + "10": { + "DEFAULT_GREY_PAD": "\"the girl would most surely be dead by now.", + "INITIAL_BOX": "\"the girl would most surely be dead by now.", + "DEFAULT": "\"the girl would most surely be dead by now..\"", + "PADDED_4": "- the girl would most surely be dead by now...", + "PADDED_8": "\"the girl would most surely be dead by now.", + "EXTRACTED_INIT_BOX": "\"the girl would most surely be dead by now!", + "PADDED_4_EXTRACTED": "\"the girl would most surely be dead by now.", + "PADDED_8_EXTRACTED": "\"the girl, who'd most surely be dead by now...", + "PADDED_8_DILATION_1": "\"the girl would most surely be dead by now.", + "PAD_8_FRACT_0_5": "\"the girl would most surely be dead by now.", + "PAD_8_FRACT_0_2": "\"the girl would most surely be dead by now." + }, + "11": { + "DEFAULT_GREY_PAD": "Shede has been generous. the death god has given the girl a second chance at...\"", + "INITIAL_BOX": "Ghede has been generous. the death god has given the girl a second chance at...\"", + "DEFAULT": "Shede has been generous. the death god has given the girl a second chance at \"", + "PADDED_4": "Ghede has been generous. the death god has given the girl a second chance at...\"", + "PADDED_8": "Ghede has been generous...the death god has given the girl a second chance at...\"", + "EXTRACTED_INIT_BOX": "Chete has been gunned down the death god has given the girl a second chance at\"", + "PADDED_4_EXTRACTED": "Gwende has been generously the death (god has given the girl a second chance\" at--", + "PADDED_8_EXTRACTED": "One of his been generous the death; god has given the girl a second chance at\"", + "PADDED_8_DILATION_1": "Ghede has been generous. the death god has given the girl a second chance! at--", + "PAD_8_FRACT_0_5": "Cheers of has been generalizer the death; god has given the girl a second chance at\"", + "PAD_8_FRACT_0_2": "Chede has been generous the death god has given the girl a second chance at\"" + }, + "12": { + "DEFAULT_GREY_PAD": "Easy, girl-- there's nothing to scream about anymore.", + "INITIAL_BOX": "Easy, girl! \"\" there's nothing to scream about anymore.", + "DEFAULT": "Easy, girl--there's nothing to scream about anymore.", + "PADDED_4": "Easy, girl... there's nothing to scream about anymore.", + "PADDED_8": "Easy, girl-- there's nothing to scream about anymore.", + "EXTRACTED_INIT_BOX": "Easy, girl -- there's nothing to scream about anymore...", + "PADDED_4_EXTRACTED": "Easy, girl -- there's nothing to scream about anymore...", + "PADDED_8_EXTRACTED": "Easy, girl! there's nothing to scream about anymore.", + "PADDED_8_DILATION_1": "Easy, girl -- there's nothing to scream about anymore.", + "PAD_8_FRACT_0_5": "Easy, girl... there's nothing to scream about anymore.", + "PAD_8_FRACT_0_2": "Easy, girl...there's nothing to scream about anymore." + }, + "13": { + "DEFAULT_GREY_PAD": "You're among friends now. you're safe?", + "INITIAL_BOX": "You're among friends now. you're safe?", + "DEFAULT": "You're among friends now. you're safe?", + "PADDED_4": "You're among friends now. you're safe?", + "PADDED_8": "You're among friends now. you're safe?", + "EXTRACTED_INIT_BOX": "You're among friends now. you're safe!", + "PADDED_4_EXTRACTED": "You're among friends now! you're safe?", + "PADDED_8_EXTRACTED": "You're among friends now, you're safe!", + "PADDED_8_DILATION_1": "You're among friends now, you're safe!", + "PAD_8_FRACT_0_5": "You're among friends now, you're safe!", + "PAD_8_FRACT_0_2": "You're among friends now, you're safe!" + }, + "14": { + "DEFAULT_GREY_PAD": "Continued after next page", + "INITIAL_BOX": "Continued after next page", + "DEFAULT": "Continued after next page", + "PADDED_4": "Continued after next page", + "PADDED_8": "Continued after next page.", + "EXTRACTED_INIT_BOX": "Continued in the next issue! \" but it's", + "PADDED_4_EXTRACTED": "Cognitively tuned into everything", + "PADDED_8_EXTRACTED": "Continued on next page", + "PADDED_8_DILATION_1": "Continued... text page", + "PAD_8_FRACT_0_5": "Continued on next page.", + "PAD_8_FRACT_0_2": "Continued... text page." + } + }, + "Action_Comics_1960-01-00_(262).JPG": { + "1": { + "DEFAULT_GREY_PAD": ">gasp:: everything's w-whirling around me! i can't stand up...", + "INITIAL_BOX": "Gasp! everything's whirling around me! i can't stand up...", + "DEFAULT": "Gasp! everything's w-whirling around me! i can't stand up...", + "PADDED_4": "Gasp! everything's w-whirling around me! i can't stand up...", + "PADDED_8": "Gaspl; everything's w-whirling around me! i can't stand up...", + "EXTRACTED_INIT_BOX": "\" yeah... everything's all-whirlin 'round me! i can't stand up.", + "PADDED_4_EXTRACTED": "Figs? everything's w-whirling around me! i can't stand up?", + "PADDED_8_EXTRACTED": "\" i guess everything's well whirling around me! i can't stand up.", + "PADDED_8_DILATION_1": "%gasp% everything s w-whirling around me! i can't stand up.", + "PAD_8_FRACT_0_5": "\" gasp! everything's whirling around me! i can't stand up.", + "PAD_8_FRACT_0_2": "! gasps! everything's whirling around me! i can't stand up?" + }, + "0": { + "DEFAULT_GREY_PAD": "Suddenly...", + "INITIAL_BOX": "Suddenly...", + "DEFAULT": "Suddenly...", + "PADDED_4": "Suddenly...", + "PADDED_8": "Suddenly...", + "EXTRACTED_INIT_BOX": "Suddenly yours.", + "PADDED_4_EXTRACTED": "Suddenly... \"", + "PADDED_8_EXTRACTED": "Suddenly...", + "PADDED_8_DILATION_1": "Suddenly...", + "PAD_8_FRACT_0_5": "Suddenly...", + "PAD_8_FRACT_0_2": "Suddenly..." + }, + "2": { + "DEFAULT_GREY_PAD": "Clark! i'm falling! help?", + "INITIAL_BOX": "Clark! i'm falling! help?", + "DEFAULT": "Clark! i'm falling! help?", + "PADDED_4": "Clark! i'm falling! help?", + "PADDED_8": "Clark! i'm falling! help?", + "EXTRACTED_INIT_BOX": "\" clark, i'm falling! help? \"", + "PADDED_4_EXTRACTED": "\" clark, i'm falling! help? help? \"", + "PADDED_8_EXTRACTED": "' clark, i'm falling! help?", + "PADDED_8_DILATION_1": "Clark! i'm falling! help?", + "PAD_8_FRACT_0_5": "Clark! i'm falling! help?", + "PAD_8_FRACT_0_2": "Clark! i'm falling! help?" + }, + "3": { + "DEFAULT_GREY_PAD": "I-i'm passing out... ohhh...", + "INITIAL_BOX": "I'm passing out... ohhhh...", + "DEFAULT": "I-i'm passing out... ohhh...", + "PADDED_4": "I'm passing out... ohhhh...", + "PADDED_8": "I-i'm passing out... ohhh...", + "EXTRACTED_INIT_BOX": "I'm passing out... oh my god!", + "PADDED_4_EXTRACTED": "I'm passing out ohhhhmm...", + "PADDED_8_EXTRACTED": "I'm passing out... now?", + "PADDED_8_DILATION_1": "I-i'm passing out... ohhhh....", + "PAD_8_FRACT_0_5": "I'm passing out... original?", + "PAD_8_FRACT_0_2": "I'm passing out... oh no!" + }, + "4": { + "DEFAULT_GREY_PAD": "Action comics", + "INITIAL_BOX": "Action comics", + "DEFAULT": "Action comics", + "PADDED_4": "Action comics", + "PADDED_8": "Action comics", + "EXTRACTED_INIT_BOX": "Action comics", + "PADDED_4_EXTRACTED": "Action comics", + "PADDED_8_EXTRACTED": "Action comics", + "PADDED_8_DILATION_1": "Action comics", + "PAD_8_FRACT_0_5": "Action comics", + "PAD_8_FRACT_0_2": "Action comics" + }, + "5": { + "DEFAULT_GREY_PAD": "Then, seconds later...", + "INITIAL_BOX": "Then seconds later...", + "DEFAULT": "Then seconds later...", + "PADDED_4": "Then seconds later...", + "PADDED_8": "Then, seconds later...", + "EXTRACTED_INIT_BOX": "It was seconds later...", + "PADDED_4_EXTRACTED": "\"the next few seconds later...", + "PADDED_8_EXTRACTED": "Timeworn sectoids later...", + "PADDED_8_DILATION_1": "Then, seconds later...", + "PAD_8_FRACT_0_5": "Then seconds later...", + "PAD_8_FRACT_0_2": "Then seconds later..." + }, + "6": { + "DEFAULT_GREY_PAD": "Great caesar's ghost! this is black magic! we've been transported to the weirdest world i ever saw?", + "INITIAL_BOX": "Great caesar's ghost! this is black magic! we've been transported to the weirdest world i ever saw?", + "DEFAULT": "Great caesar's ghost! this /is black magic! we 've been transported to the weirdest world i ever saw?", + "PADDED_4": "Great caesar's ghost! this /5 black magic! we 've been transported to the weirdest world i ever saw?", + "PADDED_8": "Great caesar's ghost! this is black magic! we've been transported to the weirdest world i ever saw?", + "EXTRACTED_INIT_BOX": "Great caesar's ghost! this is black magic! we've been transported to the weirdest world i ever saw?", + "PADDED_4_EXTRACTED": "Great caesar's ghost! this is black magic! we've been transported to the weirdest world i ever saw?", + "PADDED_8_EXTRACTED": "Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw?", + "PADDED_8_DILATION_1": "Great caesar's ghost! this /s black magic! we 've been transported to the weirdest world i ever saw?", + "PAD_8_FRACT_0_5": "Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw?", + "PAD_8_FRACT_0_2": "Great caesar's ghost! this as black magic, we 've been transported to the weirdest world i ever saw?" + }, + "7": { + "DEFAULT_GREY_PAD": "... it certainly isn't our earth, perry! look at the size of those bees! '", + "INITIAL_BOX": "\"... it certainly isn't our earth, perry! look at the size of those bees!", + "DEFAULT": "... it certainly isn't our earth, perry! look at the size of those bees?", + "PADDED_4": "... it certainly isn't our earth, perry! look at the size of those bees?", + "PADDED_8": "... it certainly isn't our earth, perry! look at the size of those bees!", + "EXTRACTED_INIT_BOX": "... it certainly isn't on earth! perry look at the size- of those bees! '", + "PADDED_4_EXTRACTED": "... it certainly isn't okay, earth! peter look at the shee- of those beees?", + "PADDED_8_EXTRACTED": "... it certainly isn't our earth, perry! look at the size of those bees?", + "PADDED_8_DILATION_1": "\"... it certainly isn't our earth, perry! look at the size of those bees?", + "PAD_8_FRACT_0_5": "... it certainly isn't our earth, perry! look at the curse of those bees?", + "PAD_8_FRACT_0_2": "... it certainly isn't our earth, perry! look at the size of those bees?" + }, + "8": { + "DEFAULT_GREY_PAD": "Watch out, clark!", + "INITIAL_BOX": "Watch out, clark?", + "DEFAULT": "Watch out, clark!", + "PADDED_4": "Watch out, clark!", + "PADDED_8": "Watch out, clark!", + "EXTRACTED_INIT_BOX": "Watch out, clark?", + "PADDED_4_EXTRACTED": "Watch out, clark!", + "PADDED_8_EXTRACTED": "Watch out, clark?", + "PADDED_8_DILATION_1": "Watch out, clark!", + "PAD_8_FRACT_0_5": "Watch out, clark!", + "PAD_8_FRACT_0_2": "Watch out, clark!" + }, + "9": { + "DEFAULT_GREY_PAD": "Owww ww!?", + "INITIAL_BOX": "Ow! ww! w!?", + "DEFAULT": "Own nw nw!?", + "PADDED_4": "Own nwn!?", + "PADDED_8": "Owww!?", + "EXTRACTED_INIT_BOX": "\" ow! whoa? !", + "PADDED_4_EXTRACTED": "Owww wwww!?", + "PADDED_8_EXTRACTED": "Owww wwwhh?", + "PADDED_8_DILATION_1": "Oh ww!?", + "PAD_8_FRACT_0_5": "Owwwww!?", + "PAD_8_FRACT_0_2": "Owwwww!" + }, + "10": { + "DEFAULT_GREY_PAD": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means the fabric of my superman costume has become ordinary cloth!", + "INITIAL_BOX": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means: the fabric of my superman costume has become ordinary cloth!", + "DEFAULT": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means...the fabric of my superman costume has become ordinary cloth!", + "PADDED_4": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means...the fabric of my superman costume has become ordinary cloth!", + "PADDED_8": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means...the fabric of my superman costume has become ordinary cloth!", + "EXTRACTED_INIT_BOX": "Yet the bee's stinger worent mighty yithrough my uniform and penetrated my skin! that means, the fabric of my superman costume has become ordinary cloth! \"", + "PADDED_4_EXTRACTED": "Yet the bee's stinger went through my uniform and penetrated an skin! that means, the paired of my superhero suit has become ordinary cloth!", + "PADDED_8_EXTRACTED": "Yet the bee's stinger want high through my uniform, and penetrated an skin! that means, the fabric of my superhero costume has become ordinary cloth!", + "PADDED_8_DILATION_1": "Yet the bee's stinger went right through an uniform and penetrated an skin that means the fabric of an superman costume has become ordinary cloth!", + "PAD_8_FRACT_0_5": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means, the fabric of my superman costume has become ordinary cloth!!", + "PAD_8_FRACT_0_2": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means, the fabric of my superman costume has become ordinary cloth! \"" + }, + "11": { + "DEFAULT_GREY_PAD": "Hurry! let's beat it before we get stung, too?", + "INITIAL_BOX": "Hurry! let's beat it before we get stung, too?", + "DEFAULT": "Hurry! let's beat it before we get stung, too?", + "PADDED_4": "Hurry! let's beat it before we get stung, too?", + "PADDED_8": "Hurry! let's beat it before we get stung, too?", + "EXTRACTED_INIT_BOX": "Hurry! let's beat it before we get stung, too?", + "PADDED_4_EXTRACTED": "Hurry! let's beat it before we get stung, too?", + "PADDED_8_EXTRACTED": "Hurry! let's beat it before we get stung, too?", + "PADDED_8_DILATION_1": "Hurry! let's beat it before we get stung, too?", + "PAD_8_FRACT_0_5": "Hurry! let's beat it before we get stung, too?", + "PAD_8_FRACT_0_2": "Hurry! let's beat it before we get stung, too?" + }, + "12": { + "DEFAULT_GREY_PAD": "Ggreat guns! ... gasp... pain i feel pain! as superman, i should be invulnerable: i have unbreakable skin! under my clark kent clothes, i'm wearing an indestructible superman uniform!", + "INITIAL_BOX": "Great guns! ... gaspe... pain i feel pain! as superman, i should be invulnerable! i have breakable skin under my clark kent clothes. i'm wearing an indestructible superman uniform!", + "DEFAULT": "\" g-great guns! ... gaspe... pain i feel pain! as superman, i should be invulnerable! i have breakable skin under my clark kent clothes, wearing an indestructible superman uniform! \"", + "PADDED_4": "Great guns! ... gaspi... pain i feel pain! as superman, i should be invulnerable! i have breakable skin under my clark kent clothes, tim. wearing an indestructible superman uniform!", + "PADDED_8": "\"great guns! ... gasp... pain! i feel pain! as superman, i should be invulnerable: i have unbreakable skin! under my clark kent clothes, i'm wearing an indestructible superman uniform!", + "EXTRACTED_INIT_BOX": "Great guns! ... superman? ..... panic!!! i feel framed as superman; i should be amazingly awesome; i've undergone infinite hours of clark kent clothes, and undergoing an indestroctible superman uniform!!", + "PADDED_4_EXTRACTED": "Great guns ... good guys... pair it terf pants as superman, i should be able to wear them in clark kent clothes, the metamorphosis an inestigatable superhuman unknown! ?", + "PADDED_8_EXTRACTED": "\" great guns! ... you'll pay! i feel from this semi-barrage i should be able to understand how you managed to tie hinne injury with an indestructible suitsportman undronna !\"", + "PADDED_8_DILATION_1": "Great guns! ... gaspe... pain... i feel pain as superman! i should be invulnerable? i have unbreakable skin under my clark kent clothes; i'm wearing an indestructible superman uniform! ?", + "PAD_8_FRACT_0_5": "Great guns! ... yaffy pan! i feel fine as superman, y you should be involved newbie? i have unbreakable bon in under clark kent clothes, i'm wearing an indestroctible superman underwear! ?", + "PAD_8_FRACT_0_2": "Great guns! ... ygaspee! pain! i feel pain! as superman, i should be invulnerable! i have unbreakable skin under my clark kent clothes. i'm wearing an indestructible superman uniform!" + }, + "13": { + "DEFAULT_GREY_PAD": "Abruptly...", + "INITIAL_BOX": "Abruptly...", + "DEFAULT": "Abruptly...", + "PADDED_4": "Abruptly...", + "PADDED_8": "Abruptly...", + "EXTRACTED_INIT_BOX": "Absolutely! ...", + "PADDED_4_EXTRACTED": "Absolutely!", + "PADDED_8_EXTRACTED": "Absolutely...", + "PADDED_8_DILATION_1": "Abruptly...", + "PAD_8_FRACT_0_5": "Absolutely!", + "PAD_8_FRACT_0_2": "Absolutely..." + }, + "14": { + "DEFAULT_GREY_PAD": "Great caesar's ghost! he 's spinning a web of giant, silk strands ... as tough as steel!", + "INITIAL_BOX": "Great caesar's ghost! he's spinning a web of giant, silk strands ... as tough as steel!", + "DEFAULT": "Great caesar's ghost! he's spinning a web of giant, silk strands ... as tough as steel!", + "PADDED_4": "Great caesar's ghost! he's spinning a web of giant, silk strands ... as tough as steel!", + "PADDED_8": "Great caesar's ghost! he's spinning a web of giant, silk strands ... as tough as steel?", + "EXTRACTED_INIT_BOX": "Great caesar's ghost! he 's spinning a web of giant silk strands - as tough as steel?", + "PADDED_4_EXTRACTED": "Great caesar's ghost! he 's spinning a web of giant, silk strands - as tough as steel?", + "PADDED_8_EXTRACTED": "Great caesar's ghost! he 's spinning a web of giant, silk 5 strands - as tough as steel?", + "PADDED_8_DILATION_1": "Great caesar's ghost! he 's spinning a web of giant, silk strands ... as tough as steel?", + "PAD_8_FRACT_0_5": "Great caesar's ghost! he 's spinning a web of giant, silk strands ... as tough as steel?", + "PAD_8_FRACT_0_2": "Great caesar's ghost! he 's spinning a web of giant, silk strands ... as tough as steel?" + }, + "15": { + "DEFAULT_GREY_PAD": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort...good grief! i've lost all my super-powers...i've become an ordinary mortal in this world?", + "INITIAL_BOX": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort! good grief! i've lost all my super-powers! i've become an ordinary mortal in this world?", + "DEFAULT": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort! good grief! i've lost all my super-powers! i've become an ordinary mortal in this world?", + "PADDED_4": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort! good grief! i've lost all my super-powers! i've become an ordinary mortal in this world?", + "PADDED_8": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort...good grief! i've lost all my super-powers. i've become an ordinary mortal in this world?", + "EXTRACTED_INIT_BOX": "It-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human recomfort...good grief! i've lost all my super-powers...and become an ordinary mortal...as this world?", + "PADDED_4_EXTRACTED": "I just feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! everything hillman discomfort...good grievier? i've lost all my super-powers and now i'm an electric monkey at this world!", + "PADDED_8_EXTRACTED": "I-i feel the heat of the gun...the pain of the bee-string...the heavy weight of my own mortality! humanity has become an organism mortal to tyris world?", + "PADDED_8_DILATION_1": "I...i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort! good grief! i've lost all my super-powers! have become an ordinary mortal in this world?", + "PAD_8_FRACT_0_5": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human being good grief! i've lost all my super-powers. i've become an ordinary mortal on this world?", + "PAD_8_FRACT_0_2": "I-i feel the heat of the sun...the pain of the bee-sting...the heavy weight of my pack! every human discomfort...good grief! i've lost all my super-powers. i've become an ordinary mortal in this world?" + }, + "16": { + "DEFAULT_GREY_PAD": "Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning.", + "INITIAL_BOX": "Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning.", + "DEFAULT": "Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning!", + "PADDED_4": "Get back! that enormous spider- like creature is going berserk, as if the sight of him excited him into mad spinning.", + "PADDED_8": "Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning!", + "EXTRACTED_INIT_BOX": "Get back! that enormous spider-like creature is going berserk, as if the sight of us excited him into a frenzy of spinning.", + "PADDED_4_EXTRACTED": "Get back! that enormous spider- like creature is going berserk, as if the sight of him excited him into and spinning", + "PADDED_8_EXTRACTED": "Get back! that enormous spider-like creature is going to berserk, as if the sight of us excited him into mad spinning?", + "PADDED_8_DILATION_1": "Get back! that enormous spider-like creature is going berserk, as if the sight of his excited him into mad spinning. \"", + "PAD_8_FRACT_0_5": "Get back! that enormous spider-like creature is going berserk, as if the sight of us excited him into mad spinning.", + "PAD_8_FRACT_0_2": "Get back! that enormous spider-like creature is going berserk, as if the sight of us excited him into mad spinning." + } + }, + "Adolf_Cap_01_008.jpg": { + "1": { + "DEFAULT_GREY_PAD": "Fuerza de agua!", + "INITIAL_BOX": "Fuera de aqui!", + "DEFAULT": "Fuera de aqui!", + "PADDED_4": "Fuera de aqui!", + "PADDED_8": "Fueria de aguas!", + "EXTRACTED_INIT_BOX": "A fista de aguilas?", + "PADDED_4_EXTRACTED": "\u00bfqui\u00e9n es aqu\u00ed?", + "PADDED_8_EXTRACTED": "La rustia de agustin", + "PADDED_8_DILATION_1": "La fuera de agua?", + "PAD_8_FRACT_0_5": "Irustria de aguino?", + "PAD_8_FRACT_0_2": "Infierria de agua!" + }, + "0": { + "INITIAL_BOX": "Ya le dije - a mi padre que lo en- contrre en un aglue- ro en un arbol.", + "DEFAULT": "Ya le dije - a mi padre que lo en- contrre en un aglue- ro en un arbol.", + "DEFAULT_GREY_PAD": "Ya le dije - a mi padre que lo en- contrre en un aglue- ro en un arbol.", + "PADDED_4": "Ya le dice a mi padre que lo en- ctrre en un agujle- ro en una arbol.", + "PADDED_8": "Ya le dije - a mi padre que lo en- contrre en un aglje- ro en un arbol.", + "EXTRACTED_INIT_BOX": "Xia l.g. dijes a \"mi padre quie lo en- contrrete en un adulto en un arbol-.", + "PADDED_4_EXTRACTED": "Ya las pujes a mi paquer quie lo en- contrien un agente en una aglitte pro en un arrolo.", + "PADDED_8_EXTRACTED": "Ya le dice a mi favor quie lo-en contrieo en un agujilla-ro en uin arsol.", + "PADDED_8_DILATION_1": "Ya le dice a mi padre que lo envie contriere en un aguje- ro eno un arbol.", + "PAD_8_FRACT_0_5": "Xa le due a mi padre que lo en contrte en un agujer-o en un arbol.", + "PAD_8_FRACT_0_2": "Ya le due a mi padre que lo en contre en un aguje- ro en un arsol." + }, + "2": { + "INITIAL_BOX": "Te he pregunta que quien escribio este.", + "DEFAULT": "Te he pregunta que quien escribio este.", + "DEFAULT_GREY_PAD": "Te he pregunta que quien escribio este.", + "PADDED_4": "Te he pregunta que quien escribio este.", + "PADDED_8": "Te he pregunta que quien escribio este.", + "EXTRACTED_INIT_BOX": "El his friy gilnstado calle gulien escalibio esteo.", + "PADDED_4_EXTRACTED": "El his fry glin-tado cue guillen escribio esteo.", + "PADDED_8_EXTRACTED": "Tis his free glin-tado que alque enscidio esto.", + "PADDED_8_DILATION_1": "Te he pregunta que quien escribio este libro?", + "PAD_8_FRACT_0_5": "Te has encontrado al que quieres descubrirlo este.", + "PAD_8_FRACT_0_2": "Te has encontrado al que quieres escribir este." + }, + "3": { + "INITIAL_BOX": "Ino lo se! no quiero hablar mas deello.", + "DEFAULT": "Ino lo se/ no quiero hablar mas deello.", + "DEFAULT_GREY_PAD": "Ino lo se! no quiero hablar mas deello.", + "PADDED_4": "No lo se! no quiero hablar mas deello.", + "PADDED_8": "Ino lo sej no quiero hablar mas deello.", + "EXTRACTED_INIT_BOX": "No lo saque de la calle hablaor m\u00e1s, 175 eld20.", + "PADDED_4_EXTRACTED": "Uno lo sa que no quiero hablar mas de este e.u..", + "PADDED_8_EXTRACTED": "Uno lo sa que no quiero hablar mas de esto bueno.", + "PADDED_8_DILATION_1": "No lo se/ no quiero hablar mas de ello.", + "PAD_8_FRACT_0_5": "No lo seo, no quiero hablar mas deello.", + "PAD_8_FRACT_0_2": "No lo se/ no quiero hablar mas deello." + }, + "4": { + "INITIAL_BOX": "Adolf, lo que dice ese papel es completa- mente falso. es demagogia peligrosa.", + "DEFAULT": "Adolf, lo que dice ese papel es completa-mente falso. es demagogia pelegrosa.", + "DEFAULT_GREY_PAD": "Adolf, lo que dice ese papel es completa- mente falsa. es demagogia peligrosa.", + "PADDED_4": "Adolf, lo que dice ese papel es completa-mente falso. es demagogia peligrosa.", + "PADDED_8": "Adolf, lo que dice ese papel es completa- mente falso. es demagogia peligrosa.", + "EXTRACTED_INIT_BOX": "Ahora, lo que m\u00e1s me pase f\u00e1cil- es completo - mente italiano.", + "PADDED_4_EXTRACTED": "Ahora, lo que dice es que pafel bis completa ta- mi nte italso. as letrinos gregos, la filigrosa.", + "PADDED_8_EXTRACTED": "Ahora, lo que dice base pafel- es completa- mente italiano: es dih-magogia publica.", + "PADDED_8_DILATION_1": "Adulto , lo que dice ese papel es completa- mente falso. es demagogia peligrosa.", + "PAD_8_FRACT_0_5": "Ahora, lo que dice es que falta bs. completa - mente italso. es demagogia pablosra.", + "PAD_8_FRACT_0_2": "Adolf, lo que dice esa papafele bs completa - mente falso. es demagogia peligrosa." + }, + "5": { + "INITIAL_BOX": "Entonces,\" por que le preo- cupa tan to?", + "DEFAULT": "Entonces, \u00bfpor qu\u00e9 le preocupa cupa tan tanto?", + "DEFAULT_GREY_PAD": "Entonces,\" por que le preo- cupa tant\u00f3?", + "PADDED_4": "Entonces, \u00bfpor qu\u00e9 le preocupa cupa tan t\u00f3to?", + "PADDED_8": "Entonces, \u00bfpor qu\u00e9 le preocupa cupa tan tanto?", + "EXTRACTED_INIT_BOX": "Entonces cuesta dupor que los preco cupa tanto?", + "PADDED_4_EXTRACTED": "En tonto caso, antes de preocuparse por cuida tontot", + "PADDED_8_EXTRACTED": "Entonces ches\" uporr guie lee preco cupa tauntof", + "PADDED_8_DILATION_1": "Entonces, \u00bfpor que le preocupa cuya tanta?", + "PAD_8_FRACT_0_5": "Entonces, \u00bfpor que les preocupa cuanta tan top?", + "PAD_8_FRACT_0_2": "Entonces, \u00bfpor que les preocupa cuanta tan tontos?" + }, + "6": { + "INITIAL_BOX": "El consulado no permita la circulaci\u00f3n de ese tipo de rumores.", + "DEFAULT": "El consulado no permita la circulaci\u00f3n de ese tipo de rumores.", + "DEFAULT_GREY_PAD": "El consulado no permita la circulaci\u00f3n de ese tipo de rumores.", + "PADDED_4": "El consulado no permita la circulaci\u00f3n de ese tipo de rumores.", + "PADDED_8": "El consulado no permita la circulaci\u00f3n de ese tipo de rumores.", + "EXTRACTED_INIT_BOX": "El consulado no permite la circulacion del rosto tipo de trucos.", + "PADDED_4_EXTRACTED": "El con su amado no permiten la circulacion de este tipo de peligrosas.", + "PADDED_8_EXTRACTED": "El con su lenguaje no permitir\u00eda la circulaci\u00f3n de las razas tipo d'p3 krimmorges.", + "PADDED_8_DILATION_1": "El consulado no permita la circulacion de esos tipos de rumores.", + "PAD_8_FRACT_0_5": "El con su lago no pertenece la circulacion de ese tipo de rumores.", + "PAD_8_FRACT_0_2": "El con su lado no perwitte la circulacion de ese tipo de rumores." + }, + "7": { + "INITIAL_BOX": "Ino se nada!", + "DEFAULT": "Ino se nada!", + "DEFAULT_GREY_PAD": "Ino se nada!", + "PADDED_4": "Ino se nada!", + "PADDED_8": "Ino se nada!", + "EXTRACTED_INIT_BOX": "Kind of naive", + "PADDED_4_EXTRACTED": "No se rindan!", + "PADDED_8_EXTRACTED": "No se rindan!", + "PADDED_8_DILATION_1": "No se nadar!", + "PAD_8_FRACT_0_5": "No se nadar!", + "PAD_8_FRACT_0_2": "No se nadar!" + }, + "8": { + "INITIAL_BOX": "Tengo que silenciar, ciarlo, jentien-des?", + "DEFAULT": "Tengo que silenciar a carlos, jentien-des?", + "DEFAULT_GREY_PAD": "Tengo que silenciar, ciarlo, sent\u00eden-des?", + "PADDED_4": "Tengo que silenciar a ciarlo, \u00bfentienes- des?", + "PADDED_8": "Tengo que silenciar a ciarlo, dentien-des?", + "EXTRACTED_INIT_BOX": "Tango glue silen- claro de mentien- daisy?", + "PADDED_4_EXTRACTED": "Tango que salon- charlos. zentien- dusty?", + "PADDED_8_EXTRACTED": "Tengo que salvar a charles. sienten-dise?", + "PADDED_8_DILATION_1": "Tengo que silenciar- ciarlos, sentien- des?", + "PAD_8_FRACT_0_5": "Tengo que salvar a carlos, sentimiento-desesper?", + "PAD_8_FRACT_0_2": "Tengo que silenciar a carlos, cient\u00eden-des?" + }, + "9": { + "INITIAL_BOX": "Ahora, \u00bfhabla?", + "DEFAULT": "Ahora, \u00a1habla!", + "DEFAULT_GREY_PAD": "Ahora, \u00a1habla!", + "PADDED_4": "Ahora, \u00a1h\u00e1bla!", + "PADDED_8": "Ahora, \u00a1habla!", + "EXTRACTED_INIT_BOX": "Algunos de mis hablabs?", + "PADDED_4_EXTRACTED": "Ahira! hablas?", + "PADDED_8_EXTRACTED": "Ahora y habla!", + "PADDED_8_DILATION_1": "Ahora, ihablav", + "PAD_8_FRACT_0_5": "Ahora, nhablan", + "PAD_8_FRACT_0_2": "Ahora, hablamos!" + }, + "10": { + "INITIAL_BOX": "\u00bfc\u00f3mo est\u00e1s encu-briendo a alguien?", + "DEFAULT": "\u00bfest\u00e1s en cu- briendo a alguien?", + "DEFAULT_GREY_PAD": "\u00bfc\u00f3mo est\u00e1s encu- briendo a alguien?", + "PADDED_4": "\u00bfc\u00f3mo est\u00e1s enchu- briendo a alguien?", + "PADDED_8": "\u00bfest\u00e1s en cu- briendo a alguien?", + "EXTRACTED_INIT_BOX": "\u00bfqui\u00e9n es encuentro? es un alguien?", + "PADDED_4_EXTRACTED": "\u00bfqui\u00e9n es enrique? siempre lo llamo - aval - g\u00fcem\u00e9n?", + "PADDED_8_EXTRACTED": "Les tas enciu- bienido a al- guilen?", + "PADDED_8_DILATION_1": "\u00bfest\u00e1s encu-briendo a alguien?", + "PAD_8_FRACT_0_5": "Les tas enciu- briendo a al-quien?", + "PAD_8_FRACT_0_2": "\u00bfest\u00e1s en cu- briendo a alguien?" + } + }, + "Barnaby_v1-028.png": { + "1": { + "DEFAULT_GREY_PAD": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "INITIAL_BOX": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "DEFAULT": "We haven't any cattle and pop's victory garden is growing fine... and i don't think we've had much trouble with werewolves at all.", + "PADDED_4": "We haven't any cattle and pop's victory garden is growing fine... and i don't think we've had much trouble with werewolves at all.", + "PADDED_8": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "EXTRACTED_INIT_BOX": "We haven't any castle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all!", + "PADDED_4_EXTRACTED": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PADDED_8_EXTRACTED": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PADDED_8_DILATION_1": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PAD_8_FRACT_0_5": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PAD_8_FRACT_0_2": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all." + }, + "0": { + "INITIAL_BOX": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "DEFAULT": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "DEFAULT_GREY_PAD": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PADDED_4": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PADDED_8": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "EXTRACTED_INIT_BOX": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PADDED_4_EXTRACTED": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PADDED_8_EXTRACTED": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PADDED_8_DILATION_1": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PAD_8_FRACT_0_5": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ...", + "PAD_8_FRACT_0_2": "Barnaby, i should like to bestow a boon upon this household in return for its hospitality. ... shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? ..." + }, + "2": { + "INITIAL_BOX": "Then perhaps i can drive out snakes? or witch a well? ...", + "DEFAULT": "Then perhaps i can drive out snakes? or witch a well? ...", + "DEFAULT_GREY_PAD": "Then perhaps i can drive out snakes? or witch a well? ...", + "PADDED_4": "Then perhaps i can drive out snakes? or witch a well? ...", + "PADDED_8": "Then perhaps i can drive out snakes? or witch a well? ...", + "EXTRACTED_INIT_BOX": "Then perhaps i can drive out snakes? or switch a well? ...", + "PADDED_4_EXTRACTED": "Then perhaps i can drive out snakes? or switch a well? ...", + "PADDED_8_EXTRACTED": "Then perhaps i can drive out snakes? or witch a well? ...", + "PADDED_8_DILATION_1": "Then perhaps i can drive out snakes? or witch a well? ...", + "PAD_8_FRACT_0_5": "Then perhaps i can drive out snakes? or witch a well? ...", + "PAD_8_FRACT_0_2": "Then perhaps i can drive out snakes? or witch a well? ..." + }, + "3": { + "INITIAL_BOX": "Where the end of this dividing rod turns to the earth we'll find water.", + "DEFAULT": "Where the end of this dividing rod turns to the earth we'll find water.", + "DEFAULT_GREY_PAD": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_4": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_8": "Where the end of this divining rod turns to the earth we'll find water.", + "EXTRACTED_INIT_BOX": "Where are the ends of this diverging road leads to the earth we'll find water...", + "PADDED_4_EXTRACTED": "Where are the end of this dividing road turns to the earth we'll find water.", + "PADDED_8_EXTRACTED": "Where the end of this dividing rod turns to the earth we'll find water.", + "PADDED_8_DILATION_1": "Where the end of this dividing rod turns to the earth we'll find water.", + "PAD_8_FRACT_0_5": "Where the end of this dividing rod turns to the earth we'll find water.", + "PAD_8_FRACT_0_2": "Where the end of this dividing rod turns to the earth we'll find water." + }, + "4": { + "INITIAL_BOX": "Gosh... don't go to a lot of trouble, mr.. o'malley", + "DEFAULT": "Gosh... don't go to a lot of trouble, mr.. o'malley", + "DEFAULT_GREY_PAD": "Gosh. don't go to a lot of trouble, mr. o'malley.", + "PADDED_4": "Gosh. don't go to a lot of trouble, mr. o'malley ...", + "PADDED_8": "Gosh. don't go to a lot of trouble, mr. o'malley.", + "EXTRACTED_INIT_BOX": "Gosh... don't go too far of trouble, mr. o'malley.", + "PADDED_4_EXTRACTED": "Gosh... don't go too far a look of trouble, mr. o'malley...", + "PADDED_8_EXTRACTED": "Gosh... don't go to a lot of trouble, mr. o'malley.", + "PADDED_8_DILATION_1": "Gosh... don't go to a lot of trouble, mr. o'malley.", + "PAD_8_FRACT_0_5": "Gosh... don't go to a lot of trouble, mr. o'malley.", + "PAD_8_FRACT_0_2": "Gosh... don't go to a lot of trouble, mr. o'malley." + }, + "5": { + "INITIAL_BOX": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "DEFAULT": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "DEFAULT_GREY_PAD": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "PADDED_4": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "PADDED_8": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "EXTRACTED_INIT_BOX": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lord.", + "PADDED_4_EXTRACTED": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lord.", + "PADDED_8_EXTRACTED": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "PADDED_8_DILATION_1": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m'lad.", + "PAD_8_FRACT_0_5": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad.", + "PAD_8_FRACT_0_2": "When you grow to manhood and inherit this estate you 'll thank your fairy godfather for this well right in your cellar, m'lad." + }, + "6": { + "INITIAL_BOX": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy.", + "DEFAULT": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... feych me a forked stick, m'boy.", + "DEFAULT_GREY_PAD": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy.", + "PADDED_4": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy.", + "PADDED_8": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy.", + "EXTRACTED_INIT_BOX": "'ah, that's it! i shall bestow the blessing of a never-failing water supply upon this pilot of land ... feron me a forked stick, m'boy.", + "PADDED_4_EXTRACTED": "Ah, that's in it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... feron me a forked stick, m'boy.", + "PADDED_8_EXTRACTED": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy.", + "PADDED_8_DILATION_1": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fehch me a forked stick, m'boy.", + "PAD_8_FRACT_0_5": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy.", + "PAD_8_FRACT_0_2": "Ah, that's it! i shall bestow the blessing of a never-failing water supply upon this plot of land ... fetch me a forked stick, m'boy." + }, + "7": { + "INITIAL_BOX": "But we've got city water, mr. o'malley ...", + "DEFAULT": "But we've got city water, mr. o'malley.", + "DEFAULT_GREY_PAD": "But we've got city matter, mr. o'malley.", + "PADDED_4": "But we've got city writter, mr. o'malley.", + "PADDED_8": "But we've got city whisker, mr. o'malley.", + "EXTRACTED_INIT_BOX": "But we've got fifty wrestlers, mr. o'malley ...", + "PADDED_4_EXTRACTED": "But we've got city workers, mr.. o'malley.", + "PADDED_8_EXTRACTED": "But we've got city winter, mr. o'malley.", + "PADDED_8_DILATION_1": "But we've got city writter, mr. o'malley.", + "PAD_8_FRACT_0_5": "But we've got city weather, mr. o'malley.", + "PAD_8_FRACT_0_2": "But we've got city water, mr. o'malley." + }, + "8": { + "INITIAL_BOX": "Er ... is there any beer on ice, m'boy?", + "DEFAULT": "Er ... is there any beer on ice, mi'boy?", + "DEFAULT_GREY_PAD": "Er ... is there any beer on ice, mi'boy?", + "PADDED_4": "Er ... is there any beer on ice, mi'boy?", + "PADDED_8": "Er ... is there any beer on ice, mi'boy?", + "EXTRACTED_INIT_BOX": "Er ... is there any heavier ice, m'boy?", + "PADDED_4_EXTRACTED": "Ehr ... is there any heavier on ice, m'boy?", + "PADDED_8_EXTRACTED": "Er ... is there any beer on ice, m'boy?", + "PADDED_8_DILATION_1": "Br ... is there any beer on ice, m'boy?", + "PAD_8_FRACT_0_5": "Ehr ... is there any beer on ice, m'boy?", + "PAD_8_FRACT_0_2": "Er ... is there any beer on ice, m'boy?" + }, + "9": { + "INITIAL_BOX": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ....", + "DEFAULT": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirstily merely to describe it ....", + "DEFAULT_GREY_PAD": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ....", + "PADDED_4": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ....", + "PADDED_8": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ....", + "EXTRACTED_INIT_BOX": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ...", + "PADDED_4_EXTRACTED": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ...", + "PADDED_8_EXTRACTED": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ....", + "PADDED_8_DILATION_1": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ...", + "PAD_8_FRACT_0_5": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ....", + "PAD_8_FRACT_0_2": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth ... makes me thirsty merely to describe it ...." + }, + "10": { + "INITIAL_BOX": "The rockety johnson", + "DEFAULT": "The rockett johnson", + "DEFAULT_GREY_PAD": "Grockett johnson", + "PADDED_4": "Crockett johnson", + "PADDED_8": "Crockett johnson", + "EXTRACTED_INIT_BOX": "The rocky johnson family", + "PADDED_4_EXTRACTED": "G-rock: hey john! son!", + "PADDED_8_EXTRACTED": "E-roc mey john m son", + "PADDED_8_DILATION_1": "Grocket johnson", + "PAD_8_FRACT_0_5": "E rock me? john mason", + "PAD_8_FRACT_0_2": "G rocket? john mason" + }, + "11": { + "INITIAL_BOX": "We can really get along all right with the city water supply ...", + "DEFAULT": "We can really get along all right with the city water supply ....", + "DEFAULT_GREY_PAD": "We can really get along all right with the city water supply ....", + "PADDED_4": "We can really get along all right with the city water supply ...", + "PADDED_8": "We can really get along all right with the city water supply ....", + "EXTRACTED_INIT_BOX": "We can really get along all right with the city water supply ...", + "PADDED_4_EXTRACTED": "\"we can really get along all right within the city water supply ...", + "PADDED_8_EXTRACTED": "\"we can really get along all right with the city water supply ...", + "PADDED_8_DILATION_1": "We can really get along all right with the city water supply ...", + "PAD_8_FRACT_0_5": "\" we can really get along all right with the city water supply ...", + "PAD_8_FRACT_0_2": "\" we can really get along all right with the city water supply ..." + }, + "12": { + "INITIAL_BOX": "Hush, m'boy. this is a time of conservation.", + "DEFAULT": "Hush, m'boy. this is a time of conservation.", + "DEFAULT_GREY_PAD": "Hush, m'boy. this is a time of conservation..", + "PADDED_4": "Hush, m'boy. this is a time of conservation..", + "PADDED_8": "Hush, m'boy. this is a time of conservation..", + "EXTRACTED_INIT_BOX": "Hush, m'boy. this is a time of conservation...", + "PADDED_4_EXTRACTED": "Hush, m'boy. this is a time of conservation.", + "PADDED_8_EXTRACTED": "Mush, m'booy. this is a time of conservation.", + "PADDED_8_DILATION_1": "Hush, m'boy! this is a time of conservation.", + "PAD_8_FRACT_0_5": "Hush, m'booy. this is a time of conservation.", + "PAD_8_FRACT_0_2": "Hush, m'thoy. this is a time of conservation." + }, + "13": { + "INITIAL_BOX": "We're being patriotic.", + "DEFAULT": "We're being patriotic.", + "DEFAULT_GREY_PAD": "We're being patriotic.", + "PADDED_4": "We're being patriotic.", + "PADDED_8": "We're being patriotic -.", + "EXTRACTED_INIT_BOX": "We 're being patronizing --", + "PADDED_4_EXTRACTED": "We 're being patriotic --", + "PADDED_8_EXTRACTED": "We 're being patriotic.", + "PADDED_8_DILATION_1": "We 're leaving paradise -.", + "PAD_8_FRACT_0_5": "We're being patriotic.", + "PAD_8_FRACT_0_2": "We 're being patriotic." + }, + "14": { + "INITIAL_BOX": "Ah! here's the place! bring the pickaxe.", + "DEFAULT": "Ah! here's the place! bring the pickaxe.", + "DEFAULT_GREY_PAD": "Aha! here's the place! bring the pickaxe.", + "PADDED_4": "Ah! here's the place! bring the pickaxe.", + "PADDED_8": "Ah! here's the place! bring the pickaxe.", + "EXTRACTED_INIT_BOX": "Ah! here's the place! bringing the pickoxxe.", + "PADDED_4_EXTRACTED": "Aah! here's the place! bring the pickaxe.", + "PADDED_8_EXTRACTED": "Aha! here's the place! bring the pickaxe.", + "PADDED_8_DILATION_1": "Ah! here's the place! bring the pickaxe.", + "PAD_8_FRACT_0_5": "Aha! here's the place! bring the pickaxe.", + "PAD_8_FRACT_0_2": "Aha! here's the place! bring the pickaxe." + }, + "15": { + "INITIAL_BOX": "Crockett johnson", + "DEFAULT": "Crockett johnson", + "DEFAULT_GREY_PAD": "Crockett johnson", + "PADDED_4": "Crockett johnson", + "PADDED_8": "Crockett johnson", + "EXTRACTED_INIT_BOX": "Crockett johnson.", + "PADDED_4_EXTRACTED": "Crockett johnson", + "PADDED_8_EXTRACTED": "Crockett john sonei", + "PADDED_8_DILATION_1": "Crockett johnson", + "PAD_8_FRACT_0_5": "Crockett johnson", + "PAD_8_FRACT_0_2": "Crockett johnson" + } + }, + "Barnaby_v1-029.png": { + "1": { + "DEFAULT_GREY_PAD": "Maybe pop doesn't want a well.", + "INITIAL_BOX": "Maybe pop doesn't want a well.", + "DEFAULT": "Maybe pop doesn't want a well..", + "PADDED_4": "Maybe pop doesn't want a well.", + "PADDED_8": "Maybe pop doesn't want a well.", + "EXTRACTED_INIT_BOX": "Maybe pop doesn't want a wreath.", + "PADDED_4_EXTRACTED": "Maybobo pop doesn't want a woll?", + "PADDED_8_EXTRACTED": "A mayo-pop pop doesn't want a well-.", + "PADDED_8_DILATION_1": "Maybob pop doesn't want a woll.", + "PAD_8_FRACT_0_5": "Maybebob pop doesn't want a well.", + "PAD_8_FRACT_0_2": "Maybob pop doesn't want a well." + }, + "0": { + "INITIAL_BOX": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy to the stairs. bring the pick.", + "DEFAULT": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy to the stairs. bring the pick.", + "DEFAULT_GREY_PAD": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy to the stairs. bring the pick.", + "PADDED_4": "Trust your old fairy godfather to find the best spot in the cellar for a well... right handy to the stairs... bring the pick,.", + "PADDED_8": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy to the stairs. bring the pick.", + "EXTRACTED_INIT_BOX": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy to the stairs. bring the pick,.", + "PADDED_4_EXTRACTED": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy to the stairs, bring the pick,", + "PADDED_8_EXTRACTED": "Trust your old fairy godfathers to find the best sport in the collar for a well! right handy man with fine stains. bring the pick,", + "PADDED_8_DILATION_1": "Trust your old fairy godfather to find the best spot in the cellar for a well. right handy man to find the stain. bring the pick,", + "PAD_8_FRACT_0_5": "Trust your old fairy godfather to find the best spot in the collar for a well. right handy fella to find stains. bring the pick.", + "PAD_8_FRACT_0_2": "Trust your old fairy godfather to find the best spot in the collar for a well. right handy fella to find stains. bring the pick." + }, + "2": { + "INITIAL_BOX": "Nonsense, mi'boy. your whole family will be delighted.", + "DEFAULT": "Nonsense, mi'boy. your whole family will be delighted.", + "DEFAULT_GREY_PAD": "Nonsense, m'boy. your whole family will be delighted.", + "PADDED_4": "Nonsense, m'boy. your whole family will be delighted.", + "PADDED_8": "Nonsense, m'boy. your whole family will be delighted.", + "EXTRACTED_INIT_BOX": "Nonnenseo, m'boy. your whole family will be delighted.", + "PADDED_4_EXTRACTED": "Monsense, m'lord! your whole family will be delighted.", + "PADDED_8_EXTRACTED": "Nonsense, m'boy. your whole family will be delighted.", + "PADDED_8_DILATION_1": "Nonsense, m'boy. your whole family will be delighted.", + "PAD_8_FRACT_0_5": "Nonsense, m'boy. your whole family will be delighted.", + "PAD_8_FRACT_0_2": "Nonsense, m'boy. your whole family will be delighted." + }, + "3": { + "INITIAL_BOX": "That 's not water, mr. o'malley! did something go wrong?", + "DEFAULT": "That 's not water, mr. o'malley: did something go wrong?", + "DEFAULT_GREY_PAD": "That 's not water, mr. o'malley! did something go wrong?", + "PADDED_4": "That 's not water, mr. o'malley! did something go wrong?", + "PADDED_8": "That 's not water, mr. o'malley! did something go wrong?", + "EXTRACTED_INIT_BOX": "That 's not weather, mr. o'malley! did something go wrong?", + "PADDED_4_EXTRACTED": "That 's not water, mr. o'malley! did something go wrong?", + "PADDED_8_EXTRACTED": "\" that 's not water, mr. o'malley! did something go wrong?", + "PADDED_8_DILATION_1": "That's not water, mr. o'malley! did something go wrong?", + "PAD_8_FRACT_0_5": "That 's not water, mr. o'malley! did something go wrong?", + "PAD_8_FRACT_0_2": "That 's not water, mr. o'malley! did something go wrong?" + }, + "4": { + "INITIAL_BOX": "Cushlamochree?", + "DEFAULT": "Cushlamochree?", + "DEFAULT_GREY_PAD": "Cushlamochree?", + "PADDED_4": "Cushlamochree?", + "PADDED_8": "Cushlamochree!", + "EXTRACTED_INIT_BOX": "Crystal-ammo! charge?", + "PADDED_4_EXTRACTED": "Cushlie-larnum! oh dearie!", + "PADDED_8_EXTRACTED": "Cushlieannn@ch3ree?", + "PADDED_8_DILATION_1": "Cushlamoochree!", + "PAD_8_FRACT_0_5": "Cushla! oh ghreee?", + "PAD_8_FRACT_0_2": "Cushla! oh no?" + }, + "5": { + "INITIAL_BOX": "It does seem a bit brackish, doesn't it, m'boy. perhaps some ill-natured demon or?", + "DEFAULT": "It does seem a bit brackish, doesn't it, m'boy. perhaps some ill-natured demon ...?", + "DEFAULT_GREY_PAD": "It does seem a bit brackish, doesn't it, m'boy. perhaps some ill-natured demon or ...", + "PADDED_4": "It does seem a bit brackish, doesn't it, m'boy. perhaps some ill-natured demon ...?", + "PADDED_8": "It does seem a bit brackish, doesn't it, m'boy. perhaps some ill-natured demon ...?", + "EXTRACTED_INIT_BOX": "It does seem a bit brockish, doesn't it, milady. perhaps some ill-matured demon ...?", + "PADDED_4_EXTRACTED": "It does seem a bit backish, doesn't it, m'boy. perhaps some ill-natured demon? \"", + "PADDED_8_EXTRACTED": "It does seem a bit barbarickish, doesn't it, m'boy. perhaps some ill-natured demon ?", + "PADDED_8_DILATION_1": "It does seem a bit backassish, doesn't it, m'boy. perhaps some ill-natured demon ...? \"", + "PAD_8_FRACT_0_5": "It does seem a bit barbarickish, doesn't it, m'boy. perhaps some ill-natured demon so ...", + "PAD_8_FRACT_0_2": "It does seem a bit backackish, doesn't it, m'boy. perhaps some ill-natured demon so>" + }, + "6": { + "INITIAL_BOX": "May 22 - 23", + "DEFAULT": "May 22 - 23", + "DEFAULT_GREY_PAD": "May 22 - 23", + "PADDED_4": "May 22 - 23", + "PADDED_8": "May 22 - 23", + "EXTRACTED_INIT_BOX": "Molly - 22 - 23", + "PADDED_4_EXTRACTED": "J'mary 22 - 23", + "PADDED_8_EXTRACTED": "Myory 22 - 23", + "PADDED_8_DILATION_1": "May 22 - 23", + "PAD_8_FRACT_0_5": "Mary 22 - 23", + "PAD_8_FRACT_0_2": "Mary 22 - 23" + }, + "7": { + "INITIAL_BOX": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "DEFAULT": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "DEFAULT_GREY_PAD": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "PADDED_4": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "PADDED_8": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "EXTRACTED_INIT_BOX": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest water.", + "PADDED_4_EXTRACTED": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest water", + "PADDED_8_EXTRACTED": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "PADDED_8_DILATION_1": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "PAD_8_FRACT_0_5": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ...", + "PAD_8_FRACT_0_2": "This well will put the city's water supply to shame. a few blows of the pick will usher up the coolest, clearest ..." + }, + "8": { + "INITIAL_BOX": "Crockett johnson", + "DEFAULT": "Crocksby johnson", + "DEFAULT_GREY_PAD": "Crockett johnson", + "PADDED_4": "Crockett johnson", + "PADDED_8": "Crockett johnson", + "EXTRACTED_INIT_BOX": "Crooked by john sonney", + "PADDED_4_EXTRACTED": "Crosby johnson", + "PADDED_8_EXTRACTED": "Crockett johnson", + "PADDED_8_DILATION_1": "Grockit johnston", + "PAD_8_FRACT_0_5": "Crockett johnson", + "PAD_8_FRACT_0_2": "Crockett johnson" + }, + "9": { + "INITIAL_BOX": "Or could my divining rod have been short of the new regulation witching standards?", + "DEFAULT": "Or could my divining rod have been short of the new regulation water witching standards?", + "DEFAULT_GREY_PAD": "Or could my divining rod have been short of the new regulation witching standards?", + "PADDED_4": "Or could my divining rod have been short of the new regulation water witching standards?", + "PADDED_8": "Or could my divining rod have been short of the new regulation witching standards?", + "EXTRACTED_INIT_BOX": "Or could my dividing road have been short of the new regulation wichiting standards?", + "PADDED_4_EXTRACTED": "Or could my divining rod have been short of the new regulation whatever witching standards?", + "PADDED_8_EXTRACTED": "Or could my divining rod have been short of the new regulation width w/itching standards?", + "PADDED_8_DILATION_1": "Or could my divining rod have been short of the new regulation wattching standards?", + "PAD_8_FRACT_0_5": "Or could my divining rod have been short of the new regulation width witching standards?", + "PAD_8_FRACT_0_2": "Or could my divining rod have been short of the new regulation witching standards?" + }, + "10": { + "INITIAL_BOX": "Mr., o'malley!?", + "DEFAULT": "Mr., o'malley!?", + "DEFAULT_GREY_PAD": "Mr. o'malley!?", + "PADDED_4": "Mr. o'malley!?", + "PADDED_8": "Mr. o'malley!", + "EXTRACTED_INIT_BOX": "\" mr. o'malley!?", + "PADDED_4_EXTRACTED": "Mr. o'malletley!?", + "PADDED_8_EXTRACTED": "Mr. . o'malletley!?", + "PADDED_8_DILATION_1": "Mr. o'malley!?", + "PAD_8_FRACT_0_5": "Mr. o'maltley!?", + "PAD_8_FRACT_0_2": "Mr. o'maltley!?" + }, + "11": { + "INITIAL_BOX": "We've struck oil?", + "DEFAULT": "We've struck oil?", + "DEFAULT_GREY_PAD": "We've struck oil?", + "PADDED_4": "We've struck oil?", + "PADDED_8": "We've struck oil?", + "EXTRACTED_INIT_BOX": "How 've we been sherlock?", + "PADDED_4_EXTRACTED": "We 've struck oil?", + "PADDED_8_EXTRACTED": "We've struck oil?", + "PADDED_8_DILATION_1": "We've struck oil?", + "PAD_8_FRACT_0_5": "We've struck oil?", + "PAD_8_FRACT_0_2": "We've struck oil?" + }, + "12": { + "INITIAL_BOX": "Crockett johnson", + "DEFAULT": "Crockett johnson", + "DEFAULT_GREY_PAD": "Crockett johnson", + "PADDED_4": "Crockett johnson", + "PADDED_8": "Crockett johnson", + "EXTRACTED_INIT_BOX": "Crockett johnson", + "PADDED_4_EXTRACTED": "Chockett johnson", + "PADDED_8_EXTRACTED": "Chockett johnson", + "PADDED_8_DILATION_1": "Crockett johnson", + "PAD_8_FRACT_0_5": "Crockett johnson", + "PAD_8_FRACT_0_2": "Crockett johnson" + } + }, + "FOX_CHILLINTALES_T17_012.jpg": { + "1": { + "DEFAULT_GREY_PAD": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me ...", + "INITIAL_BOX": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me ...", + "DEFAULT": "At last the door swung open to reveal a gadaverous old man who peered out into the darkness at me ...", + "PADDED_4": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me ...", + "PADDED_8": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me ...", + "EXTRACTED_INIT_BOX": "At last the door swung open to reveal a canaverous old man who peered out into the darkness at me.", + "PADDED_4_EXTRACTED": "At last the door swung open to reveal a dastardly old man who peered out into the darkness at me.", + "PADDED_8_EXTRACTED": "At last the door swung open to reveal a dastardly old man who peered out into the darkness at me.", + "PADDED_8_DILATION_1": "At last the door swung open to reveal a gadabout old man who peered out into the darkness at me.", + "PAD_8_FRACT_0_5": "At last the door swung open to reveal a dastardly old man who peered out into the darkness at me.", + "PAD_8_FRACT_0_2": "At last the door swung open to reveal a dastardly old man who peered out into the darkness at me." + }, + "0": { + "INITIAL_BOX": "Who are you? what do you want?", + "DEFAULT": "Who are you? what do you want?", + "DEFAULT_GREY_PAD": "Who are you? what do you want?", + "PADDED_4": "Who are you? what do you want?", + "PADDED_8": "Who are you? what do you want?", + "EXTRACTED_INIT_BOX": "Who are you? what do you want?", + "PADDED_4_EXTRACTED": "Who are you? what do you want?", + "PADDED_8_EXTRACTED": "Who are you? what do you want?", + "PADDED_8_DILATION_1": "Who are you? what do you want?", + "PAD_8_FRACT_0_5": "Who are you? what do you want?", + "PAD_8_FRACT_0_2": "Who are you? what do you want?" + }, + "2": { + "INITIAL_BOX": "I want to see professor blutress. the agency sent me.", + "DEFAULT": "I want to see professor blutress. the agency sent me.", + "DEFAULT_GREY_PAD": "I want to see professor blutress. the agency sent me.", + "PADDED_4": "I want to see professor blutress . the agency sent me.", + "PADDED_8": "I want to see professor blutress. the agency sent me.", + "EXTRACTED_INIT_BOX": "I want to see professor blutress . the agency sent me.", + "PADDED_4_EXTRACTED": "I want to see professor blutress . the agency sent me.", + "PADDED_8_EXTRACTED": "I want to see professor blutress . the agency sent me.", + "PADDED_8_DILATION_1": "I want to see professor blutriss. the agency sent me.", + "PAD_8_FRACT_0_5": "I want to see professor blutress . the agency sent me.", + "PAD_8_FRACT_0_2": "I want to see professor blutress . the agency sent me." + }, + "3": { + "INITIAL_BOX": "Oh, yes. you must be the secretary i sent for. i'm professor blutress -- come in.", + "DEFAULT": "Oh, yes. you must be the secretary i sent for. i'm professor blutress--- come in. come in.", + "DEFAULT_GREY_PAD": "Oh, yes. you must be the secretary i sent for. i'm professor blutress -- come in. come in.", + "PADDED_4": "Oh, yes, you must be the secretary i sent for. i'm professor blutress -- come in. come in.", + "PADDED_8": "Oh, yes. you must be the secretary i sent for. i'm professor blutress -- come in. come in.", + "EXTRACTED_INIT_BOX": "Oh, yes, you must be the secretary i sent for. i'm professor blutress -- come in. come in.", + "PADDED_4_EXTRACTED": "Oh, yes, you must be the secretary i sent for. i'm professor blutress-- come in. come in.", + "PADDED_8_EXTRACTED": "Oh, yes, you must be the secretary i sent for. i'm professor blutress-- come in, come in.", + "PADDED_8_DILATION_1": "Oh, yes, you must be the secretary i sent for. i'm professor blutress -- come in, come in.", + "PAD_8_FRACT_0_5": "Oh, yes, you must be the secretary i sent for. i'm professor blutress-- come in, come in.", + "PAD_8_FRACT_0_2": "Oh, yes, you must be the secretary i sent for. i'm professor blutress-- come in, come in." + }, + "4": { + "INITIAL_BOX": "As we entered the pro- fessor's study an unearthly scream came from the dark recesses of the house ...", + "DEFAULT": "As we entered the pro- fessor's study an unearthly scream came from the dark recesses of the house ...", + "DEFAULT_GREY_PAD": "As we entered the pro- fessor's study an unearthly scream came from the dark recesses of the house ...", + "PADDED_4": "As we entered the pro- fessor's study an unearthly scream came from the dark recesses of the house ...", + "PADDED_8": "As we entered the pro- fessor's study an unearthly scream came from the dark recesses of the house ...", + "EXTRACTED_INIT_BOX": "We're ed ro's reunion of romark ecesses of the e house.", + "PADDED_4_EXTRACTED": "We ed ro's re form ark recesses of e house.", + "PADDED_8_EXTRACTED": "We ed ro's re rom ark excesses of e house.", + "PADDED_8_DILATION_1": "We ed ro's re rom ark ecgeses of e house.", + "PAD_8_FRACT_0_5": "We ed ro's re rom ark excesses of e house.", + "PAD_8_FRACT_0_2": "We ed ro's re rom e ark ecesses of e house." + }, + "5": { + "INITIAL_BOX": "Good lord! what was that? it sounded like a scream?", + "DEFAULT": "Good lord! what was that? it sounded like a scream?", + "DEFAULT_GREY_PAD": "Good lord! what was that? it sounded like a scream?", + "PADDED_4": "Good lord! what was that? it sounded like a scream?", + "PADDED_8": "Good lord! what was that? it sounded like a scream?", + "EXTRACTED_INIT_BOX": "Good lord! what was that? it sounded like a scream!", + "PADDED_4_EXTRACTED": "Good lord! what was that? it sounded like a scream?", + "PADDED_8_EXTRACTED": "Good lord! what was that? it sounded like a scream?", + "PADDED_8_DILATION_1": "Good lord! what was that? it sounded like a scream?", + "PAD_8_FRACT_0_5": "Good lord! what was that? it sounded like a scream?", + "PAD_8_FRACT_0_2": "Good lord! what was that? it sounded like a scream?" + }, + "6": { + "INITIAL_BOX": "A scream? i heard nothing, mr. howe.", + "DEFAULT": "A scream? i heard nothing, mr. howe.", + "DEFAULT_GREY_PAD": "A scream? i heard nothing, mr. howe.", + "PADDED_4": "A scream? i heard nothing, mr. howe.", + "PADDED_8": "A scream? i heard nothing, mr. howe.", + "EXTRACTED_INIT_BOX": "A scream? i heard nothing, mr. howe.", + "PADDED_4_EXTRACTED": "A scream? i heard nothing, mr. howe.", + "PADDED_8_EXTRACTED": "A scream? i heard nothing, mr. howe..", + "PADDED_8_DILATION_1": "A scream? i heard nothing, mr. howe.", + "PAD_8_FRACT_0_5": "A scream? i heard nothing, mr. howe..", + "PAD_8_FRACT_0_2": "A scream? i heard nothing, mr. howe." + }, + "7": { + "INITIAL_BOX": "I could have sworn - perhaps it was just my imagination.", + "DEFAULT": "I could have sworn ... perhaps it was just my imagination.", + "DEFAULT_GREY_PAD": "I could have sworn-- perhaps it was just my imagination.", + "PADDED_4": "I could have sworn ... perhaps it was just my imagination.", + "PADDED_8": "I could have sworn ... perhaps it was just my imagination.", + "EXTRACTED_INIT_BOX": "I could have sworn - perhaps it was just my imagination.", + "PADDED_4_EXTRACTED": "I could have sworn --- perhaps it was just my imagination.", + "PADDED_8_EXTRACTED": "I could have sworn -- perhaps it was just my imagination.", + "PADDED_8_DILATION_1": "I could've have sworn -- perhaps it was just my imagination.", + "PAD_8_FRACT_0_5": "I could have sworn-- perhaps it was just my imagination.", + "PAD_8_FRACT_0_2": "I could have sworn -- perhaps it was just my imagination." + }, + "8": { + "INITIAL_BOX": "\" you are tired. let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "DEFAULT": "\" you are tired. let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "DEFAULT_GREY_PAD": "\" you are tired. let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "PADDED_4": "\" you are tired. let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "PADDED_8": "\" you are tired. let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "EXTRACTED_INIT_BOX": "--- you are tired. let me take you to your room. we'll discuss your duties in the morning ... come ...", + "PADDED_4_EXTRACTED": "\"you are tired. let me take you to your room. we'll discuss your duties in the morning ... come ...", + "PADDED_8_EXTRACTED": "\"you are tired. let me take you to your room. we'll discuss your duties in the morning... come...\".", + "PADDED_8_DILATION_1": "\" you are tired. just let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "PAD_8_FRACT_0_5": "\" you are tired. just let me take you to your room. we 'll discuss your duties in the morning ... come ...", + "PAD_8_FRACT_0_2": "\" turn -- you are tired. just let me take you to your room. we 'll discuss your duties in the morning ... come ..." + }, + "9": { + "INITIAL_BOX": "\"the next morning i woke early and started downstairs. on the way down i passed a heavy barred door ...", + "DEFAULT": "\"the next morning i woke early and started downstairs, on the way down i passed a heavy barred door ...", + "DEFAULT_GREY_PAD": "He next morning i awoke early and started downstairs, on the way down i passed a heavy barred door ...", + "PADDED_4": "The next morning i woke early and started downstairs. on the way down i passed a heavy barred door ...", + "PADDED_8": "They next morning i woke early and started downstairs, on the way down i passed a heavy barred door ...", + "EXTRACTED_INIT_BOX": "Working early and started downstairs, on the way down i passed a heavy barred door", + "PADDED_4_EXTRACTED": "Morning - early and started downstairs. on the way down i passed a heavy # warred dogo", + "PADDED_8_EXTRACTED": "Morning - early and started com'stains. on the way down i passed a heavy barred door", + "PADDED_8_DILATION_1": "Morning! a few minutes ago i passed a heavy barred door", + "PAD_8_FRACT_0_5": "Morning - early and started c*ckwinstains. on the way down i passed a heavy barred door", + "PAD_8_FRACT_0_2": "Morning - early and started c'ownstairs. on the way down i passed a heavy barred door" + }, + "10": { + "INITIAL_BOX": "Ugh! it's awful...", + "DEFAULT": "Ugh! it's awful...", + "DEFAULT_GREY_PAD": "Ugh! it's awful...", + "PADDED_4": "Ugh! it's awful...", + "PADDED_8": "Ugh! it's awful...", + "EXTRACTED_INIT_BOX": "Ugh! it's awful...", + "PADDED_4_EXTRACTED": "Ugh! it's awful...", + "PADDED_8_EXTRACTED": "Ugh! it's awful...", + "PADDED_8_DILATION_1": "Ugh! it's awful...", + "PAD_8_FRACT_0_5": "Ugh! it's awful...", + "PAD_8_FRACT_0_2": "Ugh! it's awful..." + }, + "11": { + "INITIAL_BOX": "That 's strange; i wonder what's behind it? it 's thick enough to be a vault.", + "DEFAULT": "That's strange! i wonder what's behind it? it's thick enough to be a vault.", + "DEFAULT_GREY_PAD": "That 's strange! i wonder what's behind it? it 's thick enough to be a vault.", + "PADDED_4": "That 's strange! i wonder what's behind it? it 's thick enough to be a vault.", + "PADDED_8": "That's strange! i wonder what's behind it? it's thick enough to be a vault.", + "EXTRACTED_INIT_BOX": "That's... strange! i wonder what's behind it? it's thick enough to be a vault.", + "PADDED_4_EXTRACTED": "That's...strange! i wonder what's behind it? it's thick enough to be a vault.", + "PADDED_8_EXTRACTED": "That's...strange! i wonder what's behind it? it's thick enough to be a vault.", + "PADDED_8_DILATION_1": "That's... strange? i wonder what's behind it? it's thick enough to be a vault.", + "PAD_8_FRACT_0_5": "That's... strange! i wonder what's behind it? it's thick enough to be a vault.", + "PAD_8_FRACT_0_2": "That's... strange! i wonder what's behind it? it's thick enough to be a vault." + }, + "12": { + "INITIAL_BOX": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of rotting flesh...", + "DEFAULT": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of rotting flesh...", + "DEFAULT_GREY_PAD": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_4": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_8": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "EXTRACTED_INIT_BOX": "Suddenly i became aware of a frightful door that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_4_EXTRACTED": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decaying flesh...", + "PADDED_8_EXTRACTED": "Suddenly i became aware of a frightful door that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_8_DILATION_1": "Suddenly i became aware of a frightful odor that seemed to come from beneath the door. the house eating of its attendants decayed, rotting flesh...", + "PAD_8_FRACT_0_5": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PAD_8_FRACT_0_2": "Suddenly i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh..." + } + }, + "MCCALL_ROBINHOOD_T31_010.jpg": { + "1": { + "DEFAULT_GREY_PAD": "Seize them! cut them down! don't let them escape!", + "INITIAL_BOX": "Seize them! cut them down! don't let them escape?", + "DEFAULT": "Seize them! cut them down! don't let them escape?", + "PADDED_4": "Seize them! cut them down! don't let them escape?", + "PADDED_8": "Seize them! cut them down! don't let them escape?", + "EXTRACTED_INIT_BOX": "Seize them! cut them down! don't let them escape!", + "PADDED_4_EXTRACTED": "Seize them! cut them down! don't let them escape!", + "PADDED_8_EXTRACTED": "Seize them! / cut them down! don't let them escape!", + "PADDED_8_DILATION_1": "Squeeze them! cut them down! don't let them escape!", + "PAD_8_FRACT_0_5": "Seize them! cut them down! don't let them escape!", + "PAD_8_FRACT_0_2": "Seize them! cut them down! don't let them escape !" + }, + "0": { + "INITIAL_BOX": "Like a flash, robin and luthor john race for an exit, the guards close on their heels ...and at that moment, little john's feet slip on the wood strewn on the floor...", + "DEFAULT": "Like a flash, robin and little john race for an exit, the guards close on their heels ... and that moment, little john's feet slip on the food strewn on the floor...", + "DEFAULT_GREY_PAD": "Like a flash, robin and little john race for an exit. the guards close on their heels ...and at that moment, little-john's feet slip on the gooey stewen on the floor...", + "PADDED_4": "Like a flash, robin and little john race for an exit. the guards close on their heels ... and at that moment, little-john's feet slip on the good stewen on the floor...", + "PADDED_8": "Like a flash, robin and little john race for an exit. the guards close on their heels and at that moment, little-john's feet slip on the gooey stewen on the floor...", + "EXTRACTED_INIT_BOX": "Like a flash, robin and little john exit the guards close on their heels at that moment, little john's feet slip on the straw strewn on the floor.", + "PADDED_4_EXTRACTED": "I make a flash. robin and little john exit the race for an exit. the guards close on their heels and at that moment, little john's feet slip on the strewn straw on the floor.", + "PADDED_8_EXTRACTED": "I make a flash robin and little john race for an exit. the guards close on their heels and at that moment, little john's feet slip on the straw-laden floor...", + "PADDED_8_DILATION_1": "Like a flash, robin and little johnin race for the gin exit, but the guards close on their heels at that moment, little john's feet slip on the greasy floor on the third step down to the good room.", + "PAD_8_FRACT_0_5": "I make a flash, robin and little john race for an exit, the guards close on their heels and at that moment, 'little' john's feet slip on the straw strewn on the floor...", + "PAD_8_FRACT_0_2": "I make a flash, robin and little john race for an exit. the guards close on their heels and at that moment, little john's feet slip on the straw strewn on the floor..." + }, + "2": { + "INITIAL_BOX": "Whoops?", + "DEFAULT": "Whoops?", + "DEFAULT_GREY_PAD": "Whoo-0ops?", + "PADDED_4": "Whoo-ops?", + "PADDED_8": "Whoops?", + "EXTRACTED_INIT_BOX": "Whoops?", + "PADDED_4_EXTRACTED": "Whoops?", + "PADDED_8_EXTRACTED": "Whoops?", + "PADDED_8_DILATION_1": "Whoops?", + "PAD_8_FRACT_0_5": "Whoops?", + "PAD_8_FRACT_0_2": "Whoops?" + }, + "3": { + "INITIAL_BOX": "Go on! get away if you can! don't bother about me?", + "DEFAULT": "Go on! get away if you can! don't bother about me?", + "DEFAULT_GREY_PAD": "Go on! get away if you can! don't bother about me?", + "PADDED_4": "Go on! get away if you can! don't bother about me?", + "PADDED_8": "Go on! get away if you can! don't bother about me?", + "EXTRACTED_INIT_BOX": "Go on! get away if you can! don't bother about me?", + "PADDED_4_EXTRACTED": "Go on! get away if you can! don't bother about me?", + "PADDED_8_EXTRACTED": "Go on! get away if you can! don't bother about me?", + "PADDED_8_DILATION_1": "Go on! get away if you can! don't bother about me?", + "PAD_8_FRACT_0_5": "Go on! get away if you can! don't bother about me?", + "PAD_8_FRACT_0_2": "Go on! get away if you can! don't bother about me?" + }, + "4": { + "INITIAL_BOX": "\"the struggle is brief ... soon they are helpless prisoners... and the earl dancing in rage, is heaping abuse on them...\".", + "DEFAULT": "\"the struggle is brief ... soon they are helpless prisoners ... and the earl dancing in rage ... is heaping abuse on them ...\".", + "DEFAULT_GREY_PAD": "\"the struggle is brief... soon they are helpless prisoners... and the earl, dancing in rage ... is heaping abuse on them...\".", + "PADDED_4": "\"the struggle is brief.... soon they are helpless prisoners... and the earl dancing in rage..... he's heaping abuse on them...\".", + "PADDED_8": "\"the struggle is brief... soon they are helpless prisoners... and the earl dancing in rage... is heaping abuse on them...\".", + "EXTRACTED_INIT_BOX": "\"the struggle is brief... soon they are welded into prisoners. and the girl dancing in rage ... is resting. rise up on them!", + "PADDED_4_EXTRACTED": "\"the struggle is brief ... soon they are helpless prisoners. find the girl dancing in the ring. she is weeping. abuse them.\"", + "PADDED_8_EXTRACTED": "T he struggle is brief... soon they are helpless... prisoners... finding the end in unbridge... is respiring abuse on them...", + "PADDED_8_DILATION_1": "\"the struggle is brief... soon they are helpless prisoners.. and the earl dancing in, in rage .. is cheaping abuse on them...\".", + "PAD_8_FRACT_0_5": "\"the struggle is brief...\" soon they are helpless prisoners... and the final dance is unhinged violence... abuse on them...\"", + "PAD_8_FRACT_0_2": "\"the struggle is brief... soon they are helpless prisoners. and the final dancing in rage ... is cheating abuse on them...\"." + }, + "5": { + "INITIAL_BOX": "Fiercely the guards fall on the prone giant ... and robin, almost clear of the room, gallantly turns back to help his friend ...", + "DEFAULT": "Fiercely the guards fall on the prone giant...and robin, almost clear of the room, graciously turns back to help his friends...", + "DEFAULT_GREY_PAD": "Fiercely the guards fall on the prone giant...and robin, almost clear of the room, brilliantly turns back to help his friend...", + "PADDED_4": "Fiercely the guards fall on the prone giant ... and robin, almost clear of the room, grillantly turns back to help his friend ...", + "PADDED_8": "Fiercely the guards fall on the prone giant...and robin, almost clear of the room, brilliantly turns back to help his friend...", + "EXTRACTED_INIT_BOX": "Ferociously the guards fall on the poone giant... and robin, almost clear of the room, gallantly turns back to help his friend.", + "PADDED_4_EXTRACTED": "Prestow the cumaps fall on the probe giant...and robin, almost clear of the room, gallantly turns back to help his friends.", + "PADDED_8_EXTRACTED": "Friendly the guards fall on the prone giant...and robin, almost clear of the monolithically turning batcave to help his friend...\"", + "PADDED_8_DILATION_1": "Fiercely the guards fall on the prone giant...and robin, almost clear of the room gratefully turns back to help his friend... \"", + "PAD_8_FRACT_0_5": "Fiendly the guards fall on the prone giant...and robin, almost clear of the manor brilliantly turns back to help his friend. \"", + "PAD_8_FRACT_0_2": "Fiendly the guards fall on the prone giant...and robin, almost clear of the room grillantly turns back to help his friend: \"" + }, + "6": { + "INITIAL_BOX": "Wretches! doltz! assassins! you've made me the laughing stock of my friends! to the dungeons with them! on the morrow they suffer for this?", + "DEFAULT": "Wretches! doltz ! assassins! you've made me the laughing stock of my friends! to the dungeons with them! on the morrow they suffer for this?", + "DEFAULT_GREY_PAD": "Wretches! doltz! assassins! you've made me the laughing stock of my friends / to the dungeons with them / on the morrow they suffer for this!", + "PADDED_4": "Wretches! doltz! assassins! you've made me the laughing stock of my friends! to the dungeons with them! on to the morrow they suffer for this?", + "PADDED_8": "Wretches! doltz! assassins! you've made me the laughing stock of my friends / to the dungeons with them / on the morrow they suffer for this!", + "EXTRACTED_INIT_BOX": "Wretchies! / doltz! / assassining! // you've made me the laughing stock of any friends ! to the dungeon with them ! on the morrow they suffer for this!", + "PADDED_4_EXTRACTED": "Wretchnes! ! doltz! nassasinning! you've made me the laughing stock of my friends ! to the dungeons with them on the morrow they suffer for this?", + "PADDED_8_EXTRACTED": "Wretches! dolts! massacres! i've lived to make me the laughing stock of my friends / to the dungeoned with them ! on the morrow they suffer for this!", + "PADDED_8_DILATION_1": "Wrestlers ! dolts! resserins! / you've made me the laughing stock of mia' friends ! to the dungeons with them ! on the morrow they suffer for this!", + "PAD_8_FRACT_0_5": "Wretches! dolts! massacrings! i've made me the laughing stock of my friends ! to the dungeons with them ! on the morrow they suffer for this?", + "PAD_8_FRACT_0_2": "Wretches! dolts! massacringss! you've made me the laughing stock of my friends ! to the dungeons with them ! on the morrow they suffer for this?" + }, + "7": { + "INITIAL_BOX": "With fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless ...", + "DEFAULT": "With fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless ...", + "DEFAULT_GREY_PAD": "\" with fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless ...", + "PADDED_4": "With fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless ...", + "PADDED_8": "Cloth fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless ...", + "EXTRACTED_INIT_BOX": "Cyborg fists lashing out right and left, the out- law chief hurls himself in at the guards ... but he knows the odds are hopeless...", + "PADDED_4_EXTRACTED": "Sixth fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless...", + "PADDED_8_EXTRACTED": "Mouth fists lashing out right and left, the outline law chief hurls himself at the guards ... but he knows the odds are hopless ...", + "PADDED_8_DILATION_1": "\"young fists lashing out right and left, the cult- law chief hurls himself at the guards ... but he knows the odds are against himself...", + "PAD_8_FRACT_0_5": "Mouth fists lashing out right and left, the out- law chief hurls himself at the guards ... but he knows the odds are hopeless ...", + "PAD_8_FRACT_0_2": "Muth fists lashing out right and left, the oult- law chief hurls himself at the guards ... but he knows the oodds are hopeless..." + }, + "8": { + "INITIAL_BOX": "Roughly, they are hurled into a dungeon cell ...", + "DEFAULT": "Roughly, they are hurled into a dun-geon cell ...", + "DEFAULT_GREY_PAD": "Roughly, they are hurled into a din-geon cell ...", + "PADDED_4": "Roughly, they are hurled into a dun-geon cell ...", + "PADDED_8": "Roughly, they are hurled into a dun-geon cell ...", + "EXTRACTED_INIT_BOX": "Brougham, there's no time to hesitate! hurled into a dungeon cell...", + "PADDED_4_EXTRACTED": "Inough! they 've hurled into a din- owom cellar!", + "PADDED_8_EXTRACTED": "Roughly, they're hurled into a dungeon cell...", + "PADDED_8_DILATION_1": "Roughly, they're hurtled into a dun-geon cell ...", + "PAD_8_FRACT_0_5": "Roughly, they raced hurried into a dungeon cell...", + "PAD_8_FRACT_0_2": "Roughly, they're hurled into a dungeon cell ..." + }, + "9": { + "INITIAL_BOX": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an earl?", + "DEFAULT": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an earl?", + "DEFAULT_GREY_PAD": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an ear?", + "PADDED_4": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an ear?", + "PADDED_8": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an ear?", + "EXTRACTED_INIT_BOX": "'tis a pretty fix we're in, sir! what if the penalty for spilling soup on an earl?", + "PADDED_4_EXTRACTED": "'tis a pretty fix we're in, sir! what is the penalty? for spilling soup on an earl?", + "PADDED_8_EXTRACTED": "'tis a pretty fix we're in; side! what is the penalty for spilling soup on an earl?", + "PADDED_8_DILATION_1": "'tis a pretty fix we're in, sir! what's the penalty for spilling soup on an earl?", + "PAD_8_FRACT_0_5": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an earl?", + "PAD_8_FRACT_0_2": "'tis a pretty fix we're in, sir! what is the penalty for spilling soup on an earl?" + }, + "10": { + "INITIAL_BOX": "Dire tortures, i fear me, littlejohn/ rh, well, i came to see the earl's castle and see it did... even to the dungeon....", + "DEFAULT": "Dire tortures, i fear me, littlejohn! rh, well, i came to see the earl's castle and see it i did... even to the dungeon...", + "DEFAULT_GREY_PAD": "Dire tortures, i fear me, littlejohn/ rh, well, i came to see the earl's castle and see it i did... even to the dungeon....", + "PADDED_4": "Dire tortures, i fear me, littlejohn/ rh. well, i came to see the earl's castle and see it i did... even to the dungeon....", + "PADDED_8": "Dire tortures, i fear me, littlejohn/ rh, well... i came to see the earl's castle and see it i did... even to the dungeon....", + "EXTRACTED_INIT_BOX": "Dire tortures, i fear me, little john/yh, well! i came to see the earl's castle and see it stood... even to the dungeon....", + "PADDED_4_EXTRACTED": "Dire tortures, i fear me, little john/! rh. well: i come to see the earl's castle and see it did... even to the dungeon....", + "PADDED_8_EXTRACTED": "Dirte tortures, i fear me, littlejohn/ rh, well, i came to see the earl's castle and see it did... even to the dungeon....", + "PADDED_8_DILATION_1": "Dire tortures, i fear me, little john/oh, well! i came to see the earl's castle and see it did... even to the dungeon....", + "PAD_8_FRACT_0_5": "Dirte tortures, i fear me, littlejohn/ rh, well, i came to see the earl's castle and see it did... even to the dungeon....", + "PAD_8_FRACT_0_2": "Dare tortures, i fear me, little john/ rh, well, i come to see the earl's castle and see it did... even to the dungeon...." + }, + "11": { + "INITIAL_BOX": "Ruefully but undaunted they consider their plight.", + "DEFAULT": "Ruffianly disdained their considerate, dutiful.", + "DEFAULT_GREY_PAD": "Ruefully bit unenthusiastic they consider their plight...", + "PADDED_4": "Rifffully ruffled at undaunted they consider their plight.", + "PADDED_8": "Ruefully bit unenthusiastic they consider their plight...", + "EXTRACTED_INIT_BOX": "Sufficiently bet uncomplained they consider, their manners?", + "PADDED_4_EXTRACTED": "Correctly but undoubtedly they consider them valians. \"", + "PADDED_8_EXTRACTED": "Sufficiently bit uncounted they consider their actions.", + "PADDED_8_DILATION_1": "Kneefully, but undaunted they consider, their plight?", + "PAD_8_FRACT_0_5": "Awfully bit unmanly they consider their duties?", + "PAD_8_FRACT_0_2": "Awfully bit unaccounted they consider, their darlings?" + } + }, + "MCCAY_LITTLENEMO_090.jpg": { + "1": { + "DEFAULT_GREY_PAD": "Mon palais est tout pres juste une petite promenade dans la jungle.", + "INITIAL_BOX": "Mon palais est tout pres juste une petite promenade dans la jungle.", + "DEFAULT": "Mon palais est tout pres juste une petite promenade dans la jungle.", + "PADDED_4": "Mon palais est tout pres juste une petite promenade dans la jungle.", + "PADDED_8": "Mon palais est tout pres juste une petite promenade dans la jungle.", + "EXTRACTED_INIT_BOX": "Mon palais est tout fini juste une petite promenade j'ai pris la plage.", + "PADDED_4_EXTRACTED": "\"mon palais est tout pr\u00eat juste comme une petite promenade de jungle\"", + "PADDED_8_EXTRACTED": "Mon palais est tout fres juste avec petite promenade dambala jungle.", + "PADDED_8_DILATION_1": "\"mon palais est tout pr\u00eat juste une petite promenade dans la jungle.\".", + "PAD_8_FRACT_0_5": "\"mon palais est tout\" fres juste une petite fromenade jungle", + "PAD_8_FRACT_0_2": "\"mon palais est tout pres juste une petite promenade dans la jungle.\"." + }, + "2": { + "DEFAULT_GREY_PAD": "Vous allez avoir ce que vous cherchez chez et bien encore!", + "INITIAL_BOX": "\"vous allez avoir ce que vous cherchez - chez eux et bien encore\"", + "DEFAULT": "Vous allez avoidre ce que vous cherchez chez eux et bien encore", + "PADDED_4": "Vous allez avoirdre que vous cher-chez et bien encore", + "PADDED_8": "Vous allez avoirdre que vous cher- chez et bien encore", + "EXTRACTED_INIT_BOX": "\" well then i 'll just have to find another way of getting you outta here! \"", + "PADDED_4_EXTRACTED": "Us? we're us.", + "PADDED_8_EXTRACTED": "\" we 're here to help you. \"", + "PADDED_8_DILATION_1": "Us : \" re - us?", + "PAD_8_FRACT_0_5": "\" we 're here to help you. \"", + "PAD_8_FRACT_0_2": "\" we 're us! \"" + }, + "0": { + "DEFAULT_GREY_PAD": "Si vous tenez une rester en vie ... nevez approchez pas de moi!", + "INITIAL_BOX": "Si vous tenez une rester en vie ... nevez approchez pas de moi?", + "DEFAULT": "Si vous tenez une rester en vie ... nevez approchez pas de moi?", + "PADDED_4": "Sioux tenez a rester en vie... nevous approchez pas de moi?", + "PADDED_8": "Si vous tenez une rester en vie... nevez approchez pas de moi?", + "EXTRACTED_INIT_BOX": "Si vous tenez une rester en vie ... nous approchez pas de moi?", + "PADDED_4_EXTRACTED": "Si vous tenez un r\u00easter en vie ... nous approchez pas de moi?", + "PADDED_8_EXTRACTED": "Si vous tenez \u00e0 rester en vie... nous approchez pas de moi?", + "PADDED_8_DILATION_1": "Si vous tenez \u00e0 rester en vie ... ne vous approchez pas de moi!", + "PAD_8_FRACT_0_5": "Si vous tenez \u00e0 rester en vie... nous approchez pas de moi?", + "PAD_8_FRACT_0_2": "Si vous voulez tenez une rester en vie ... ne vous approchez pas de moi?" + }, + "3": { + "INITIAL_BOX": "Et maintenant, je veux parler de toi ! suivez-moi ?", + "DEFAULT": "And besides, i don't want to be alone! so?", + "DEFAULT_GREY_PAD": "Et mainte, vant par ic! suivez-moi?", + "PADDED_4": "And besides, i don't want to be alone! so?", + "PADDED_8": "And besides, i don't want to be alone! so what?", + "EXTRACTED_INIT_BOX": "Et maintenant je veux parler de toi ! suivez-moi ?", + "PADDED_4_EXTRACTED": "Et maintenant je veux parler de toi ! - suivez-moi?", + "PADDED_8_EXTRACTED": "Et maintenant je vant par ki! suivez-moi?", + "PADDED_8_DILATION_1": "Et mainte- nant par ic! suivez- moi?", + "PAD_8_FRACT_0_5": "Et maintenant je veux parler avec toi ! suivez-moi ?", + "PAD_8_FRACT_0_2": "Et maintenant n'want par ici ! suivez-moi?" + }, + "4": { + "INITIAL_BOX": "\"mon palais est moins bien que celui du roi mais il est joli fil'nest pas lun\"", + "DEFAULT": "Mon palais est moins bien que celui du roi mais il est joli l'n'est pas 1oin", + "DEFAULT_GREY_PAD": "\"mon palais est moins bien que celui du roi mais il est joli l'nest pas la vin\"", + "PADDED_4": "Mon palais est moins bien que celui du roi mais il est joli il n'est pas loin.", + "PADDED_8": "Mon palais est moins bien que celui du roi mais il est joli il n'est pas loin.", + "EXTRACTED_INIT_BOX": "Moon palace est maintenant bien que c'est du roi mais il est joli l'west pas long", + "PADDED_4_EXTRACTED": "Moon palais est mais bien que celui du roi 'monsieur il est joli l'nest pas un", + "PADDED_8_EXTRACTED": "\" mon palais bst moins bien que celui du roi ' monsieur il est joli l' nest pas un clin", + "PADDED_8_DILATION_1": "\"mon palais est moins bien que celui du roi\" \"mais il est joli l'nest pas qui\"", + "PAD_8_FRACT_0_5": "\"mon palais bst moins bien que celui du roi ' mus il est joli l' nbbt pas ' qui", + "PAD_8_FRACT_0_2": "\"mon palais bst moins bien que celui du roi 'mais il est joli l'nest pas 'quin\"" + }, + "5": { + "INITIAL_BOX": "Je croyais que les ma- rins dewaient nous accompa- gner? il s'en suivent?", + "DEFAULT": "Je crois que les ma- rins devaient nous accompagner sien? il s'en suiveit?", + "DEFAULT_GREY_PAD": "Je crois que les ma- rins devaient nous accompa gner? il s'en suive t?", + "PADDED_4": "Je crois que les ma- rins devaient nous accompagner sner? il s'en suiveit?", + "PADDED_8": "Je crois que les ma- rins devaient nous accompa gner? il s'en suiveit?", + "EXTRACTED_INIT_BOX": "Je crois que les mr king jeweler m'ont ?ils ne comprenent pas?", + "PADDED_4_EXTRACTED": "Je crois que les mi kings sont enfermer no ? / lesner?ils ne suivent pas?", + "PADDED_8_EXTRACTED": "Je crois que les murs riment j'en suis certain ?", + "PADDED_8_DILATION_1": "Je crois que les ma rins devaient nos s'enner pils no suivent?", + "PAD_8_FRACT_0_5": "Je crois que les mi kings power ni s ! super? il n'en suive pas ?", + "PAD_8_FRACT_0_2": "Je crois que les miens sont apr\u00e8s toi ! m's ? il s'en sortent?" + }, + "6": { + "INITIAL_BOX": "We regardez pas ces singes ou il sont toujours bombarder avee des noix de coco.", + "DEFAULT": "We regard these passages singes ou il s'avont vus bombarder avee des noix de coco.", + "DEFAULT_GREY_PAD": "\"ne regardez pas ces singes ou il sont vous bombarder avec des noix de coco\"", + "PADDED_4": "We regardez pas ces singes ou il sont vous bombarder avec des noix de coco.", + "PADDED_8": "\"ne regardez pas ces singes ou ils vont vous bombarder avec des noix de coco\"", + "EXTRACTED_INIT_BOX": "We recorded these cbs singes on 13/wont wave bombardier away das nox de coc", + "PADDED_4_EXTRACTED": "We regarder les cbs singes du riwont ving d'hommardier avec des noix de coco.", + "PADDED_8_EXTRACTED": "We regardz les ces singes d'ici wont ving bom barner avec des noix de coc", + "PADDED_8_DILATION_1": "Me regardez pas ces singes ou il sont tous bombarder avec des noia de codec", + "PAD_8_FRACT_0_5": "We regard these ces singers who won't vote for bombarier ainc as noix de coc.", + "PAD_8_FRACT_0_2": "We regret to inform you that your services are no longer required by bombardier aerospace." + }, + "7": { + "INITIAL_BOX": "Oui ! ouil! mais vous n'avez rien \u00e0 craindre et ne failles pas attention \u00e0 ce que vous pourriez voir?", + "DEFAULT": "Oui ! oui! mais vous n'avez rien \u00e0 craindre et ne failles pas attention \u00e7aque que vous pourriez voir?", + "DEFAULT_GREY_PAD": "Oui ! oui! mais vous n'avez rien \u00e0 craindre et ne faite pas attenter tion \u00e0 ce que vous pourriez voir !", + "PADDED_4": "Oui ! oui! mais vous n'avez rien \u00e0 craindre et ne faites pas attenter tion ace que vous pourriez voir?", + "PADDED_8": "Oui !!! mais vous n'avez rien \u00e0 craindre et ne faite pas attenter tion \u00e0 ce que vous pourriez voir ?", + "EXTRACTED_INIT_BOX": "' oui! oui ma'am, vous havaet rien a craindr de me futter in a knack age '.", + "PADDED_4_EXTRACTED": "\" oui' on ma hous n'avet rien a brager d'etre puttes en loage film\"", + "PADDED_8_EXTRACTED": "\" nou'ui ma 'nous n'avaz rien a brainor de participer a un age \" po", + "PADDED_8_DILATION_1": "\" oui\" oui ma , vous n'aviez rien a craindr b le faites pa a lion age po", + "PAD_8_FRACT_0_5": "\" oui\" oui mais n'avaz rien a ecraser d'une pantin faites pa how a long time ago!?", + "PAD_8_FRACT_0_2": "\" oui\" d'ou mais wus nyanzez rue a crainvor b he fantas fa a how a che po" + }, + "8": { + "INITIAL_BOX": "Non l'hai mai passato per ca flip! non?", + "DEFAULT": "Now i never fails pas ca flip! non?", + "DEFAULT_GREY_PAD": "Non! ne faites pas \u00e7a flip ! non?", + "PADDED_4": "Non l'ine faites pas \u00e7a flip ! non?", + "PADDED_8": "Non! n'ai faites pas \u00e7a flip ! non?", + "EXTRACTED_INIT_BOX": "Now i've failed as caflip! now?", + "PADDED_4_EXTRACTED": "Now i've failed as ga flip! now?", + "PADDED_8_EXTRACTED": "Now i we failtes pas sea flip! now?", + "PADDED_8_DILATION_1": "Now i'm never fatties pas sa flip! now?", + "PAD_8_FRACT_0_5": "Now i ne faites pas sa flip! now?", + "PAD_8_FRACT_0_2": "Now i see faites pas sa flip! now?" + }, + "9": { + "INITIAL_BOX": "\" oooh! arretez! arret\u00e9zei! \"", + "DEFAULT": "\" oooh! arretez arettez! oohh!", + "DEFAULT_GREY_PAD": "Oooh! arretez arretezi oooh?", + "PADDED_4": "Oooh! arretez! arretetez! oooh?", + "PADDED_8": "\" oooh! arretez aarretez! \"", + "EXTRACTED_INIT_BOX": "\" oh! arctez arcteus! do you think i'm stupid?", + "PADDED_4_EXTRACTED": "\" i 'll arrest you! \" arcturus shouted.", + "PADDED_8_EXTRACTED": "\" ha! arctez arreterei! \"", + "PADDED_8_DILATION_1": "\"000h! arrettez. arretetez! dooa hi\"", + "PAD_8_FRACT_0_5": "\"00h! arrettez! arrettezi !\"", + "PAD_8_FRACT_0_2": "\"000h! arretez! arretetez! 000h?" + }, + "10": { + "INITIAL_BOX": "Pourquoi donc nemo's agite-t-il tant la nuit? ce soir il ne faut que se retourner!", + "DEFAULT": "Pourquoi donc nemo's agite-t-il tant la nuit? ce soir il ne faut que se retourner!", + "DEFAULT_GREY_PAD": "Pourquoi donc nemo s'agite-t-il tant la nuit? ce soir il ne faut que se retourner!", + "PADDED_4": "Pourquoi donc memo s'agite-t-il tant la nuit? ce soir il ne faut que se retourner!", + "PADDED_8": "Pourquoi donc nemo s'agite-t-il tant la nuit? ce soir il ne faut que se retourner!", + "EXTRACTED_INIT_BOX": "Pourquoi dois-je m'humilier? - eh! tu veux que je te laisse tranquille? ce soir, il ne faut qu'que ses retours se terminent !", + "PADDED_4_EXTRACTED": "Pourquoi est-ce que je ne peux pas m'en aller?", + "PADDED_8_EXTRACTED": "Pourquoi dois-je m'humilier? -tu veux me agitter? -il faut la lune? ce soir, il ne fait qu'enfer! -retourner?", + "PADDED_8_DILATION_1": "Pourquoi done memo stagite-t il tanti la nutt? ce soir il ne part que se retourner!", + "PAD_8_FRACT_0_5": "Pourquoi dois-je m\u00e9mo sa git\u00e9-t-7 il t'ant la nuit? ce soir il ne faut que \u00e7a s'arr\u00eate!", + "PAD_8_FRACT_0_2": "Pouvons-nous diner avec ma s\u0153ur? -7 il vaut la peine de le dire! ce soir il ne faut que 5$ pour rouler !" + } + }, + "Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg": { + "0": { + "DEFAULT_GREY_PAD": "On stage" + } + }, + "PIKE_BOYLOVEGIRLS_T41_012.jpg": { + "0": { + "DEFAULT_GREY_PAD": "Curt! oh... i forgot we were going to play tennis!", + "INITIAL_BOX": "Curt! oh... i forgot we were going to play tennis?", + "DEFAULT": "Curt! oh... i forgot we were going to play tennis?", + "PADDED_4": "Curt! oh... i forgot we were going to play tennis!", + "PADDED_8": "Curt! oh... i forgot we were going to play tennis!", + "EXTRACTED_INIT_BOX": "Curt! oh... i forgot we were going to play tennis?", + "PADDED_4_EXTRACTED": "Gurt! oh... i forgot we were going to play tennis?", + "PADDED_8_EXTRACTED": "Curt! oh... i forgot we were going to play tennis?", + "PADDED_8_DILATION_1": "Curt! oh... i forgot we were going to play tennis?", + "PAD_8_FRACT_0_5": "Curt! oh... i forgot we were going to play tennis?", + "PAD_8_FRACT_0_2": "Curt! oh... i forgot we were going to play tennis?" + }, + "1": { + "INITIAL_BOX": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?", + "DEFAULT": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?", + "DEFAULT_GREY_PAD": "Well... okay, honey! i 'd just as soon go for a drive, anyway?", + "PADDED_4": "Well ... okay, honey! i 'd just as soon go for a drive, anyway !", + "PADDED_8": "Well... okay, honey! i 'd just as soon go for a drive, anyway?", + "EXTRACTED_INIT_BOX": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?", + "PADDED_4_EXTRACTED": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?", + "PADDED_8_EXTRACTED": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?", + "PADDED_8_DILATION_1": "Well ... okay, honey! i 'd just as soon go for a drive anyway?", + "PAD_8_FRACT_0_5": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?", + "PAD_8_FRACT_0_2": "Well ... okay, honey! i 'd just as soon go for a drive, anyway?" + }, + "2": { + "INITIAL_BOX": "I was in a bad mood, and curt sensed it immediately ...", + "DEFAULT": "I was in a bad mood, and curt sensed it immediately ...", + "DEFAULT_GREY_PAD": "I was in a bad mood, and curt sensed it immediately ...", + "PADDED_4": "I was in a bad mood, and curt sensed it immediately ...", + "PADDED_8": "I was in a bad mood, and curt sensed it immediately ...", + "EXTRACTED_INIT_BOX": "I was pa a bad wood, and court sensed \" immediately ......\"", + "PADDED_4_EXTRACTED": "I was in a bad mood, and curt sensed it immediately ...", + "PADDED_8_EXTRACTED": "I was at a bad mood, and curt sensed it immediately ...", + "PADDED_8_DILATION_1": "I was in a bad mood, and gurt seemed angry immediately ...", + "PAD_8_FRACT_0_5": "I was at a bad mood, and curt sensed it immediately ...", + "PAD_8_FRACT_0_2": "I was in a bad mood, and curt sensed it immediately ..." + }, + "3": { + "INITIAL_BOX": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy?", + "DEFAULT": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy?", + "DEFAULT_GREY_PAD": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy!", + "PADDED_4": "You've seemed so unhappy lately, cynthyl i wish there was something i could do i wish you 'd let me try and make you happy!", + "PADDED_8": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy!", + "EXTRACTED_INIT_BOX": "You've seemed so unhappy lately, cynthia! i wish there was something i could do! i wish you'd let me try and make you happy?", + "PADDED_4_EXTRACTED": "You've seemed so unhappy lately, cynthya! i wish there was something i could do to make you happy! i wish you'd let me try and make you happy?", + "PADDED_8_EXTRACTED": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy?", + "PADDED_8_DILATION_1": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy!", + "PAD_8_FRACT_0_5": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy?", + "PAD_8_FRACT_0_2": "You've seemed so unhappy lately, cynthya! i wish there was something i could do! i wish you'd let me try and make you happy?" + }, + "4": { + "INITIAL_BOX": "I knew what was coming, but i didn't want to give him an answer ... not then! i tried to change the mood!", + "DEFAULT": "I knew what was coming, but i didn't want to give him an answer ... not then! i tried to change the wood!", + "DEFAULT_GREY_PAD": "I knew what was coming, but i didn't want to give him an answer ... not then. i tried to change the mood.", + "PADDED_4": "I knew what was coming, but i didn't want to give him an answer ... not then. i tried to change the mood.", + "PADDED_8": "I knew what was coming, but i didn't want to give him an answer ... not then. i tried to change the mood.", + "EXTRACTED_INIT_BOX": "I knew what was coming, but i didn't want to give him an answer ... not theanswer i tried to change - the wood!", + "PADDED_4_EXTRACTED": "I knew what was coming, but i didn't want to give him an answer ... not then! i tried to change the wood,", + "PADDED_8_EXTRACTED": "I knew what was coming, but i didn't want to give him an answer ... not then! i tried to change the wood.", + "PADDED_8_DILATION_1": "I knew what was coming, but i didn't want to give him any answer ... not then! i tried to change the mood.", + "PAD_8_FRACT_0_5": "I knew what was coming, but i didn't want to give him an answer ... not then! i tried to change the wood.", + "PAD_8_FRACT_0_2": "I knew what was coming, but i didn't want to give him an answer ... not then! i tried to change the wood." + }, + "5": { + "INITIAL_BOX": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "DEFAULT": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "DEFAULT_GREY_PAD": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "PADDED_4": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "PADDED_8": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "EXTRACTED_INIT_BOX": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "PADDED_4_EXTRACTED": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "PADDED_8_EXTRACTED": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "PADDED_8_DILATION_1": "Goodness, curt, you worry about me too much! canon, let's go for a swim!", + "PAD_8_FRACT_0_5": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?", + "PAD_8_FRACT_0_2": "Goodness, curt, you worry about me too much! c'mon, let's go for a swim?" + }, + "6": { + "INITIAL_BOX": "I liked curt but i didn't love him, and i knew that marrying him would only be a surrender to my parents. i had to lick this problem myself - not give in to it later that evening, when we dropped in at gloria's. everybody gathered at gloria's -", + "DEFAULT": "I liked curt but i didn't love him, and i knew that marrying him would only be a surrender to my parents. i had to lick this problem myself - not give in to it later that evening, when we dropped in at gloria's. everybody gathered at gloria's...", + "DEFAULT_GREY_PAD": "I liked curt but i didn't love him, and i knew that marrying him would only be a surrender to my parents. i had to lick this problem myself not give in to it later that evening, we dropped in at gloria's everybody gathered at gloria's...", + "PADDED_4": "I liked curt but i didn't love him, and i knew that marrying him would only be a surrender to my parents. i had to lick this problem myself - not give in to it later that evening, when he dropped in at gloria's. everybody gathered at gloria's--", + "PADDED_8": "I liked curt but i didn't love him, and i knew that marrying him would only be a surrender to my parents. i had to lick this problem myself - not give in to it later that evening, when we dropped in at gloria's. everybody gathered at gloria's...", + "EXTRACTED_INIT_BOX": "I liked curt but i didn't love him, and i knew that marrying him would only be a surrender to my parents. i had to lick this problem myself not give in to it! later that evening, at gloria's everybody gathered at gloria's...", + "PADDED_4_EXTRACTED": "I liked curt but i didn't love him, and i knew that carrying his child would only be to surrender to my parents. i had to lick this problem myself. not give in to it! later that evening, at gloria's everybody gathered at gloria's - everyone except me.", + "PADDED_8_EXTRACTED": "I liked curt but i didn't love him, and i knew that warrying him would only be a surrender to my parents. i had to lick this problem myself not give in to it! later that evening, at 10 pm, in everybody's gloria's everybody gathered at gloria's...", + "PADDED_8_DILATION_1": "I liked curt but i didn't love him, and i knew that marrying him would only be to surrender to my parents. i had to lick this problem myself later that evening, in fact he dropped in at gloria's everybody gathered at gloria's--", + "PAD_8_FRACT_0_5": "I liked curt but i didn't love him, and i knew that warring his would only be a surrender to my parents. i had to lick this problem myself not give in to it! later that evening, we dropped in at gloria's everybody gazed at gloria's...", + "PAD_8_FRACT_0_2": "I liked curt but i didn't love him, and i knew that carrying his child would only be to surrender to my parents. i had to lick this problem myself. not give in to it! later that evening, at gloria's everybody gathered at gloria's..." + }, + "7": { + "INITIAL_BOX": "Hi, kids! what's up?", + "DEFAULT": "Hi, kids! what's up?", + "DEFAULT_GREY_PAD": "Hi, kids! what's up?", + "PADDED_4": "Hi, kids! what's up?", + "PADDED_8": "Hi, kids! what's up?", + "EXTRACTED_INIT_BOX": "Hi, kids! what's up?", + "PADDED_4_EXTRACTED": "Hi, kids! what's up?", + "PADDED_8_EXTRACTED": "Hi, kids! what's up?", + "PADDED_8_DILATION_1": "Hi, kids! what's up?", + "PAD_8_FRACT_0_5": "Hi, kids! what's up?", + "PAD_8_FRACT_0_2": "Hi, kids! what's up?" + }, + "8": { + "INITIAL_BOX": "Sparkling, wit... brilliant conversation... penetrating thought... what did you expect?", + "DEFAULT": "Sparkling, wit... brilliant conversation... penetrating thought... what did you expect?", + "DEFAULT_GREY_PAD": "Sparkling wit... brilliant conversation... penetrating thought... what did you expect?", + "PADDED_4": "Sparkling wit... brilliant conversation... penetrating thought... what did you expect?", + "PADDED_8": "Sparkling wit... brilliant conversation... penetrating thought... what did you expect?", + "EXTRACTED_INIT_BOX": "Sparkling...w.t....brilliant conversation.....penetrating thought....what did you expect?", + "PADDED_4_EXTRACTED": "Sparkling...wit...brilliant conversation....penetrating thought....what did you expect?", + "PADDED_8_EXTRACTED": "Sparkling...wit...brilliant conversation....penetrating thought....what did you expect?", + "PADDED_8_DILATION_1": "Sparkling wit ... brilliant ant conversation ... penetrating thought ... what did you expect?", + "PAD_8_FRACT_0_5": "Sparkling...wit...brilliant conversation....penetrating thought....what did you expect?", + "PAD_8_FRACT_0_2": "Sparkling wit ... brilliant conversation ... penetrating thought ... what did you expect?" + }, + "9": { + "INITIAL_BOX": "He means it 's the usual dull evening!", + "DEFAULT": "He means it 's the usual dull evening!", + "DEFAULT_GREY_PAD": "He means it 's the usual dull evening!", + "PADDED_4": "He means it 's the usual dull evening!", + "PADDED_8": "He means it's the usual dull evening!", + "EXTRACTED_INIT_BOX": "He means it 's the usual dull evening!", + "PADDED_4_EXTRACTED": "He means it 's the usual dull evening!", + "PADDED_8_EXTRACTED": "He means it 's the usual dull evening!", + "PADDED_8_DILATION_1": "He means it 's the usual dull evening!", + "PAD_8_FRACT_0_5": "He means it 's the usual dull evening!", + "PAD_8_FRACT_0_2": "He means it 's the usual dull evening!" + }, + "10": { + "INITIAL_BOX": "My old man says i'm going to have to take that advertising job in new york!", + "DEFAULT": "My old man says i'm going to have to take that advertising job in new york!", + "DEFAULT_GREY_PAD": "My old man says i'm going to have to take that advertising job in new york!", + "PADDED_4": "My old man says i'm going to have to take that advertising job in new york!", + "PADDED_8": "My old man says i'm going to have to take that advertising job in new york!", + "EXTRACTED_INIT_BOX": "My old man says i'm going to have to take that advertising job in new york!", + "PADDED_4_EXTRACTED": "Any old man says i'm going to have to take that advertising job in new york!", + "PADDED_8_EXTRACTED": "Any old man says i'm going to have to take that advertising job in new york!", + "PADDED_8_DILATION_1": "My old man says i'm going to have to take that advertising job in new york!", + "PAD_8_FRACT_0_5": "Any old man says i'm going to have to take that advertising job in new york!", + "PAD_8_FRACT_0_2": "My old man says i'm going to have to take that advertising job in new york!" + }, + "11": { + "INITIAL_BOX": "The conversation wandered aimlessly ...", + "DEFAULT": "The conversation wandered aimlessly ...", + "DEFAULT_GREY_PAD": "The conversation wandered aimlessly ...", + "PADDED_4": "The conversation wandered aimlessly ...", + "PADDED_8": "The conversation wandered aimlessly ...", + "EXTRACTED_INIT_BOX": "The colossus against whom he waged war was ordered on an angle essentially...", + "PADDED_4_EXTRACTED": "\" the conversations from mankind originated on an amorphously shaped planet.", + "PADDED_8_EXTRACTED": "\" the conversation was narrated on an ambulance bed \".", + "PADDED_8_DILATION_1": "\" the college examinations wangled on in a lessless? ...", + "PAD_8_FRACT_0_5": "The conversation wandered on amiable essy...", + "PAD_8_FRACT_0_2": "The conversation wandered on amiable essyl..." + }, + "12": { + "INITIAL_BOX": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle's publishing house! did it ever", + "DEFAULT": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle's publishing house! did it ever?", + "DEFAULT_GREY_PAD": "Yeah, me, too! he'll take me on at the bank, but i think i'd rather work in my uncle's publishing house! did it ever?", + "PADDED_4": "Yeah, me, too! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house! did it ever new york?", + "PADDED_8": "Yeah, me, too! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house?", + "EXTRACTED_INIT_BOX": "Yeah, me, too! he'll take me on at the bank, but i think i'd rather work in my uncle's publishing house! did it ever", + "PADDED_4_EXTRACTED": "Yeah, me, too! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house! did it ever occur to you that he might be interested?", + "PADDED_8_EXTRACTED": "Yeah, me...100! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house?", + "PADDED_8_DILATION_1": "Yeah, me too! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house?", + "PAD_8_FRACT_0_5": "Yeah, me, too! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house?", + "PAD_8_FRACT_0_2": "Yeah, me, too! he'll take me on at the bank, but i think i 'd rather work in my uncle's publishing house?" + }, + "13": { + "INITIAL_BOX": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "DEFAULT": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "DEFAULT_GREY_PAD": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PADDED_4": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PADDED_8": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "EXTRACTED_INIT_BOX": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PADDED_4_EXTRACTED": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PADDED_8_EXTRACTED": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PADDED_8_DILATION_1": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PAD_8_FRACT_0_5": "Lucky? my old man's got enough dough to support half this town! why should i want to work?", + "PAD_8_FRACT_0_2": "Lucky? my old man's got enough dough to support half this town! why should i want to work?" + }, + "14": { + "INITIAL_BOX": "If you don't know, i'm not going to tell you?", + "DEFAULT": "If you don't know, i'm not going to tell you?", + "DEFAULT_GREY_PAD": "If you don't know, i'm not going to tell you!", + "PADDED_4": "If you don't know, i'm not going to tell you!", + "PADDED_8": "If you don't know, i'm not going to tell you!", + "EXTRACTED_INIT_BOX": "If you don't know, i'm not going to tell you?", + "PADDED_4_EXTRACTED": "If you don't know, i'm not going to tell you?", + "PADDED_8_EXTRACTED": "If you don't know, i'm not going to tell you?", + "PADDED_8_DILATION_1": "If you don't know, i am not going to tell you?", + "PAD_8_FRACT_0_5": "If you don't know, i'm not going to tell you?", + "PAD_8_FRACT_0_2": "If you don't know, i'm not going to tell you?" + }, + "15": { + "INITIAL_BOX": "Are you speaking to me?", + "DEFAULT": "Are you speaking to me?", + "DEFAULT_GREY_PAD": "Are you speaking to me?", + "PADDED_4": "Are you speaking to me?", + "PADDED_8": "Are you speaking to me?", + "EXTRACTED_INIT_BOX": "Are you speaking to me?", + "PADDED_4_EXTRACTED": "Are you speaking to me?", + "PADDED_8_EXTRACTED": "Are you speaking to me?", + "PADDED_8_DILATION_1": "Are you speaking to me?", + "PAD_8_FRACT_0_5": "Are you speaking to me?", + "PAD_8_FRACT_0_2": "Are you speaking to me?" + }, + "16": { + "INITIAL_BOX": "Did it ever occur to you that you're lucky to get jobs?", + "DEFAULT": "Did it ever occur to you that you 're lucky to get jobs?", + "DEFAULT_GREY_PAD": "Did it ever occur to you that you 're lucky to get jobs?", + "PADDED_4": "Did it ever occur to you that you 're lucky to get jobs?", + "PADDED_8": "Did it ever occur to you that you 're lucky to get jobs?", + "EXTRACTED_INIT_BOX": "Did it ever occur to you that you 're lucky to get jobs?", + "PADDED_4_EXTRACTED": "Did it ever occur to you that you 're lucky to get jobs?", + "PADDED_8_EXTRACTED": "Did it ever occur to you that you 're lucky to get jobs?", + "PADDED_8_DILATION_1": "Did it ever occur to you that you 're lucky to get jobs?", + "PAD_8_FRACT_0_5": "Did it ever occur to you that you 're lucky to get jobs?", + "PAD_8_FRACT_0_2": "Did it ever occur to you that you 're lucky to get jobs?" + } + }, + "Strange_Tales_172021.jpg": { + "0": { + "DEFAULT_GREY_PAD": "A blur--only a blur at first, dark menacing...greets brother voodoo's eyes as he rises from the darkness......", + "INITIAL_BOX": "A blur -- only a blur at first, dark menacing...greets brother voodoo's eyes as he rises from the darkness....\"", + "DEFAULT": "A blur--only a blur at first, dark menacing...greets brother voodoo's eyes as he rises from the darkness...\"", + "PADDED_4": "A blur--only a blur at first, dark menacing...greets brother voodoo's eyes as he rises from the darkness......", + "PADDED_8": "A blur--only a blur at first, dark menacing...greets brother voodoo's eyes as he rises from the darkness......", + "EXTRACTED_INIT_BOX": "A billion -- only a billion at first, dark menacing--\" greets another voodoo's eyes as he rises from the darkness... \"", + "PADDED_4_EXTRACTED": "A blur -- only a blair at first, dark menacing--\" greets another / voodoo's eyes as he rodes from the darknesses... \"", + "PADDED_8_EXTRACTED": "A billion--only a blur at first, dark menacing...greets brother vodooodoo's eyes as he howls from the darkness......", + "PADDED_8_DILATION_1": "A blur -- only a blur at first, dark menacing greets brother voodoo's eyes as he rises from the darkness, ......", + "PAD_8_FRACT_0_5": "A blur -- only a blizz at first, dark menacing greys behind him. vooodoo is eyes as he nods from the darkness. ...", + "PAD_8_FRACT_0_2": "A blur -- only a blur at first, dark mannacing ... ghosts brother voodoo is eyes as he names from the darkness ... \"" + }, + "1": { + "INITIAL_BOX": "-\" but brother voodoo is not yet ready to sit at the table on the dead! \"", + "DEFAULT": "\" but brother voodoo is not yet ready to sit at the table on the dead! \"", + "DEFAULT_GREY_PAD": "\" but brother voodoo is not yet ready to sit at the table of the dead! \"", + "PADDED_4": "\" but brother voodoo is not yet ready to sit at the table of the dead! \"", + "PADDED_8": "\" but brother voodoo is not yet ready to sit at the table for the dead! \"", + "EXTRACTED_INIT_BOX": "\" but brother-in-law is not yet ready to sit at this table on the dead! \"", + "PADDED_4_EXTRACTED": "\" suit yourself wyatt is not ready to sit at the tables of the dead! \"", + "PADDED_8_EXTRACTED": "\" but brother wisedom is not yet ready to sit at the table of the dead! \"", + "PADDED_8_DILATION_1": "A - butt brother voodoo is not ready to sit at this table on the dead! \"", + "PAD_8_FRACT_0_5": "- but brother who'd have thought you were ready to sit at the tables of this dead! \"", + "PAD_8_FRACT_0_2": "-\" but brother whooosh! we're not ready to git at this trouble of this dead! \"" + }, + "2": { + "INITIAL_BOX": "--and immediately regrets it!?", + "DEFAULT": "--and immediately regrets it!?", + "DEFAULT_GREY_PAD": "--and immediately regrets it!?", + "PADDED_4": "--and immediately regrets it!?", + "PADDED_8": "--and immediately regrets it!?", + "EXTRACTED_INIT_BOX": "--and unleashed days!?!!?", + "PADDED_4_EXTRACTED": "--\"and unleash diversity defence-yes it!?", + "PADDED_8_EXTRACTED": "--and unleashed dynamicity begins it? !", + "PADDED_8_DILATION_1": "--and immediately regrets it!", + "PAD_8_FRACT_0_5": "--and unanswered diary entries!!?", + "PAD_8_FRACT_0_2": "--and immediately afterretes it! ?" + }, + "3": { + "INITIAL_BOX": "W-h-a? m-my throat! ?", + "DEFAULT": "W-h-a? m-my throat!?", + "DEFAULT_GREY_PAD": "W-h-a? m-my throat! ?", + "PADDED_4": "W-h-a? m-my throat!?", + "PADDED_8": "W-h-a? m-my throat!?", + "EXTRACTED_INIT_BOX": "W-w-what?!! my m-my throat! ?", + "PADDED_4_EXTRACTED": "What's that? my throat? !", + "PADDED_8_EXTRACTED": "What?!! my throat!?", + "PADDED_8_DILATION_1": "What? m-my throat?", + "PAD_8_FRACT_0_5": "What?!! my throat!?", + "PAD_8_FRACT_0_2": "Why-ha?--my throat!?" + }, + "4": { + "INITIAL_BOX": "\" the hooded men have not fled, \" thinks the still-dazed voodoo-lord: \" one, at least, has remained behind to deliver the final stroke...", + "DEFAULT": "\" the hooded men have not fled, \" thinks the still-dazed voodoo-lord: \" one, at least, has remained behind to deliver the final stroke...", + "DEFAULT_GREY_PAD": "\" the hooded men have not fled, \" thinks the still-dazed voodoo-lord: \" one, at least, has remained behind to deliver the final stroke...", + "PADDED_4": "\"the hooded men have not fled, \" thinks the still-dazed voodoo-lord? \" one, at least, has remained behind to deliver the final stroke...", + "PADDED_8": "\"the hooded men have not fled, \" thinks the still-dazed voodoo-lord? \" one, at least, has remained behind to deliver the final stroke--\"", + "EXTRACTED_INIT_BOX": "\" the hooded men have not fled, \" tinks the still-dazed woodman-lord. \" one, at least, has remained behind to another the final strike -", + "PADDED_4_EXTRACTED": "\" the hooded men have not fled, \" it thinks the still - dazed woodman lord. \" one, at least, has remained behind to deliver the final stroke-\"", + "PADDED_8_EXTRACTED": "\" the hooded men have not fled, \" i thinks the still- dazed woodman - lord? one, at least has remained behind to answer the final stroke -", + "PADDED_8_DILATION_1": "\" the hooded men have not fled, \" thinks the still-dazed voodoo-horror, \" one, at least, has remained behind to acheive the final stroke-", + "PAD_8_FRACT_0_5": "\" the hooded men have not fled, \" it thinks the still- dazed woodman lord, \" one, at least has remained behind to maneuver the final stroke.", + "PAD_8_FRACT_0_2": "\" the hooded men have not flinched, \" it thinks the still- dazed wood-lord, one, at least, has remained behind to face the final strokes." + }, + "5": { + "INITIAL_BOX": "Inspector tate? i ...", + "DEFAULT": "Inspector tate? i ...", + "DEFAULT_GREY_PAD": "Inspector tate? i ...", + "PADDED_4": "Inspector tate? i ...", + "PADDED_8": "Inspector tate? i ...", + "EXTRACTED_INIT_BOX": "Inspector tate? i'm...", + "PADDED_4_EXTRACTED": "Inspector tate? i...", + "PADDED_8_EXTRACTED": "Inspector tate? i ...", + "PADDED_8_DILATION_1": "Inspector tate? i...", + "PAD_8_FRACT_0_5": "Inspector tate? i ...", + "PAD_8_FRACT_0_2": "Inspector tate? i ..." + }, + "6": { + "INITIAL_BOX": "I can imagine what you thought, man!", + "DEFAULT": "I can imagine what you thought, man!", + "DEFAULT_GREY_PAD": "I can imagine what you thought, man?", + "PADDED_4": "I can imagine what you thought, man?", + "PADDED_8": "I can imagine what you thought, man?", + "EXTRACTED_INIT_BOX": "I can imagine what you thought, man! ?", + "PADDED_4_EXTRACTED": "I can imagine what you thought, man!", + "PADDED_8_EXTRACTED": "I can imagine what you thought, man!", + "PADDED_8_DILATION_1": "I can imagine what you thought, man!", + "PAD_8_FRACT_0_5": "I can imagine what you thought, man!", + "PAD_8_FRACT_0_2": "I can imagine what you thought, man!" + }, + "7": { + "INITIAL_BOX": "With this, the lord of the loa thrusts out a wise-like hand -", + "DEFAULT": "With this, the lord of the loa thrusts out a wise-like hand -", + "DEFAULT_GREY_PAD": "With this, the lord of the loa thrusts out a wise-like hand -", + "PADDED_4": "With this, the lord of the loa thrusts out a wise-like hand -", + "PADDED_8": "With this, the lord of the loa thrusts out a vise-like hand.", + "EXTRACTED_INIT_BOX": "With this, the lord of the loa thrusts out a wise-like hand...", + "PADDED_4_EXTRACTED": "With this, the lords of the loa thrust out a vice-like hand -", + "PADDED_8_EXTRACTED": "With this, the lord of the loa thrusts out a vice-like hand!", + "PADDED_8_DILATION_1": "With this, the lord of the loa thrusts out a wise-like hand ...", + "PAD_8_FRACT_0_5": "With this, the lord of the loa thrusts out a vice-like hand!", + "PAD_8_FRACT_0_2": "With this, the lord of the loa thrusts out a vice-like hand! \"" + }, + "8": { + "INITIAL_BOX": "\"forgive me, sir... but i thought...\".", + "DEFAULT": "Forgive me, sir... but i thought...", + "DEFAULT_GREY_PAD": "\"forgive me, sir... but i thought...\".", + "PADDED_4": "\"forgive me, sir... but i thought...\".", + "PADDED_8": "I forgive you.", + "EXTRACTED_INIT_BOX": "\"forgive me, sir... but i thought...\".", + "PADDED_4_EXTRACTED": "\"forgive me, sir... but i thought...\".", + "PADDED_8_EXTRACTED": "\"forgive me, sir - but i thought...\".", + "PADDED_8_DILATION_1": "\"forgive me, sir... but i thought...\".", + "PAD_8_FRACT_0_5": "\"forgive me, sir... but i thought...\".", + "PAD_8_FRACT_0_2": "\"forgive me, sir... but i thought...\"." + }, + "9": { + "INITIAL_BOX": "So's this one, sam... a little groggy -- but he'll survive.", + "DEFAULT": "So's this one, sam. a little groggy--but he'll survive.", + "DEFAULT_GREY_PAD": "So's this one, sam. a little groggy--but he'll survive.", + "PADDED_4": "So's this one, sam. a little groggy--but he'll survive.", + "PADDED_8": "So's this one, sam. a little groggy--but he'll survive.", + "EXTRACTED_INIT_BOX": "So's this one, sam, a little groogy-- but we'll survive.", + "PADDED_4_EXTRACTED": "90's this one, gam... a little groogy-- but he'll survive.", + "PADDED_8_EXTRACTED": "So's this one, sam. a little groogy--but we'll survive.", + "PADDED_8_DILATION_1": "So's this one, sam. a little groggy--but he'll survive.", + "PAD_8_FRACT_0_5": "So's this one, gam... a little groogy-- but he'll survive.", + "PAD_8_FRACT_0_2": "20's this one, sam... a little groogy-- but we'll survive." + }, + "10": { + "INITIAL_BOX": "And your name...", + "DEFAULT": "And your name?", + "DEFAULT_GREY_PAD": "And your name?", + "PADDED_4": "And your name?", + "PADDED_8": "And your name?", + "EXTRACTED_INIT_BOX": "And your name is...", + "PADDED_4_EXTRACTED": "And your name?", + "PADDED_8_EXTRACTED": "And your name is?", + "PADDED_8_DILATION_1": "And your name?", + "PAD_8_FRACT_0_5": "And your name?", + "PAD_8_FRACT_0_2": "And your name?" + }, + "11": { + "INITIAL_BOX": "Enough intro-ductions.", + "DEFAULT": "Enough intro-ductions.", + "DEFAULT_GREY_PAD": "Enough intro-ductions.", + "PADDED_4": "Enough intro-ductions.", + "PADDED_8": "Enough intro-ductions.", + "EXTRACTED_INIT_BOX": "Enough intro-actions.", + "PADDED_4_EXTRACTED": "Enough intro-auctions...", + "PADDED_8_EXTRACTED": "Enough intro-auctions.", + "PADDED_8_DILATION_1": "Enough intro-auctions.", + "PAD_8_FRACT_0_5": "Enough intro-auctions.", + "PAD_8_FRACT_0_2": "Enough intro-auctions." + }, + "12": { + "INITIAL_BOX": "I only bent down to see how you were...", + "DEFAULT": "I only bent down to see how you were...", + "DEFAULT_GREY_PAD": "I only bent down to see how you were...", + "PADDED_4": "I only bent down to see how you were...", + "PADDED_8": "I only bent down to see how you were...", + "EXTRACTED_INIT_BOX": "I only bent down to see how you were...", + "PADDED_4_EXTRACTED": "I only bent down to see how you were...", + "PADDED_8_EXTRACTED": "I only bent down to see how you were...", + "PADDED_8_DILATION_1": "I only bent down to see how you were...", + "PAD_8_FRACT_0_5": "I only bent down to see how you were...", + "PAD_8_FRACT_0_2": "I only bent down to see how you were..." + }, + "13": { + "INITIAL_BOX": "... but, judging by the bruises on my neck, i did say you're fine!", + "DEFAULT": "--- but, judging by the bruises on my neck, i'd say you're fine!", + "DEFAULT_GREY_PAD": "\"\" but, judging by the bruises on my neck, i did say you're fine! \"", + "PADDED_4": "--- but, judging by the bruises on my neck, i'd say you're fine!", + "PADDED_8": "\"\" but, judging by the bruises on my neck, i did say you're fine! \"", + "EXTRACTED_INIT_BOX": "--- but, judging by the bruises on my neck, i did say you're fine! \"", + "PADDED_4_EXTRACTED": "--- but, judging by the bruises on my neck, i say you're fine!", + "PADDED_8_EXTRACTED": "-\" but, judging by the bruises on my neck, i did say you're fine! \"", + "PADDED_8_DILATION_1": "--\" but, judging by the bruises on my neck, i'd say you're fine! \"", + "PAD_8_FRACT_0_5": "... but, judging by the bruises on my neck, i did say you're fine!", + "PAD_8_FRACT_0_2": "... but, judging by the bruises on my neck, i did say you're fine!" + }, + "14": { + "INITIAL_BOX": "We know who you are, mister: jericho drumm, psychologist - turned-voodoo quack -", + "DEFAULT": "We know who you are, mister: jericho drumm, psychologist - turned-voodoo quack ---", + "DEFAULT_GREY_PAD": "We know who you are, mister: jericho drumm, psychologist - turned-voodoo quick ---", + "PADDED_4": "We know who you are, mister : jericho drumm, psychologist - turned-voodoo quick ---", + "PADDED_8": "We know who you are, mister: jericho drumm, psychologist - turned-voodoo quick ---", + "EXTRACTED_INIT_BOX": "We know who you are, mister; jerkicho drummer, psychologist - turned-yokoddo quick! \"", + "PADDED_4_EXTRACTED": "We know who you are, mister : jerk/cho drummer, psychologist - turned- voodoo quick ---", + "PADDED_8_EXTRACTED": "We know who you are, mister : jericho drummer, psychologist - turned-voodoo shaman. \"", + "PADDED_8_DILATION_1": "We know who you are, mister: jericho drummm, psychologist- turned- voodoo quack! -", + "PAD_8_FRACT_0_5": "We know who you are, mister : jericho drummer, psychologist - turned- voodoo shaman. \"", + "PAD_8_FRACT_0_2": "We know who you are, mister; jericho drumman, psychologist - turned-voodoo shaman -\"" + }, + "15": { + "INITIAL_BOX": "Hawkins, detective first-class... but you can call me pete.", + "DEFAULT": "Hawkins, detective first-class... but you can call me pete...", + "DEFAULT_GREY_PAD": "Hawkins, detective first-class... but you can call me pete.", + "PADDED_4": "Hawkins, detective first-class... but you can call me pete...", + "PADDED_8": "Hawkins, detective first-class... but you can call me pete.", + "EXTRACTED_INIT_BOX": "Hawking, detective first-class-- but you can call me pete", + "PADDED_4_EXTRACTED": "Hawking, detective first-class-- but you call me pete", + "PADDED_8_EXTRACTED": "Hawking, detective first-class-- but you can call me pete", + "PADDED_8_DILATION_1": "Hawking, detective first-class-- but you can call me pete", + "PAD_8_FRACT_0_5": "Hawking, detective first-class... but you can call me pete?", + "PAD_8_FRACT_0_2": "Hawking, detective first-class... but you can call me pete?" + }, + "16": { + "INITIAL_BOX": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "DEFAULT": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "DEFAULT_GREY_PAD": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "PADDED_4": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "PADDED_8": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "EXTRACTED_INIT_BOX": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "PADDED_4_EXTRACTED": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt larry.", + "PADDED_8_EXTRACTED": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "PADDED_8_DILATION_1": "Easy, sam - the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "PAD_8_FRACT_0_5": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry.", + "PAD_8_FRACT_0_2": "Easy, sam--the man wouldn't have been crazy enough to call us here if he'd hurt lorry." + }, + "17": { + "INITIAL_BOX": "Very well then... pete?", + "DEFAULT": "Very well then... pete?", + "DEFAULT_GREY_PAD": "Very well then ... pete?", + "PADDED_4": "Very well then... pete!?", + "PADDED_8": "Very well then... pete!?", + "EXTRACTED_INIT_BOX": "Very well then... pete!", + "PADDED_4_EXTRACTED": "Very well then ... pete?", + "PADDED_8_EXTRACTED": "Very well then ... pete?", + "PADDED_8_DILATION_1": "Very well then ... pete!", + "PAD_8_FRACT_0_5": "Very well then ... pete!", + "PAD_8_FRACT_0_2": "Very well then ... pete!" + }, + "18": { + "INITIAL_BOX": "I ... wish i knew, inspector she was kidnapped shortly before you arrived -- by the same men who overpowered bambu and myself ---", + "DEFAULT": "I ... wish i knew, inspector she was kidnapped shortly before you arrived - by the same men who overpowered bambu and myself ....", + "DEFAULT_GREY_PAD": "I ... wish i knew, inspector she was kidnapped shortly before you arrived -- by the same men who overpowered bambu and myself ...", + "PADDED_4": "I ... wish i knew, inspector she was kidnapped shortly before you arrived ... by the same men who overpowered bambu and myself ...", + "PADDED_8": "I ... wish i knew, inspector she was kidnapped shortly before you arrived -- by the same men who overpowered bambu and myself ...", + "EXTRACTED_INIT_BOX": "\" ... i wish i knew, inspector\" she was kidnapped shortly before you arrived - by the same men who overpowered him and myself - \"", + "PADDED_4_EXTRACTED": "I ... wish i knew, inspector she was kidnapped shortly before you arrived - by the same men who overpowered symbol and myself ...", + "PADDED_8_EXTRACTED": "... i wish i knew, inspector she was kidnapped shortly before you arrived - by the same men who overpowered both you and myself ...", + "PADDED_8_DILATION_1": "I ... wish i knew, inspector she was kidnapped shortly before you arrived - by the same men who overpowered bambu and myself.", + "PAD_8_FRACT_0_5": "... wish i knew, inspector she was kidnapped shortly before you arrived - by the same men who overpowered sholou and myself...", + "PAD_8_FRACT_0_2": "\" i wish i knew, inspector she was kidnapped shortly before you arrived - by the same men who overpowered symbol and myself." + }, + "19": { + "INITIAL_BOX": "\"and i'm warning you now--if you've done anything to hurt my loralee, i'll...\"", + "DEFAULT": "\"and i'm warning you now--if you've done anything to hurt my lorelee, i'll...", + "DEFAULT_GREY_PAD": "\" and i'm warning you now-- if you've done anything to hurt my lorelee, i'll ...", + "PADDED_4": "\"and i'm warning you now--if you've done anything to hurt my coralee, i'll...\"", + "PADDED_8": "\"and i'm warning you now--if you've done anything to hurt my lorelee, i'll...\"", + "EXTRACTED_INIT_BOX": "And i'm warning you now-- if you've done anything to hurt ivan in any way, i'll...", + "PADDED_4_EXTRACTED": "And i'm warning you now--if you've done anything to hurt my daughter, i'll...", + "PADDED_8_EXTRACTED": "And i'm warning you now--if you've done anything to hurt my daughter, i'll...", + "PADDED_8_DILATION_1": "And i'm warning you now--if you've done anything to hurt my nagralee, i'll...", + "PAD_8_FRACT_0_5": "And i'm warning you now-- if you've done anything to hurt my grahlee, i'll ...", + "PAD_8_FRACT_0_2": "And i'm warning you now-- if you've done anything to hurt my graleee, i'll..." + }, + "20": { + "INITIAL_BOX": "Then where is she, drum?", + "DEFAULT": "Then where is she? drumm?", + "DEFAULT_GREY_PAD": "Then where is she, drum?", + "PADDED_4": "Then where is she, drum?", + "PADDED_8": "Then where is she, drum?", + "EXTRACTED_INIT_BOX": "Then where is she? drummer?", + "PADDED_4_EXTRACTED": "Then where is she, drummer?", + "PADDED_8_EXTRACTED": "Then where is she? drummer?", + "PADDED_8_DILATION_1": "Then where is she, drum?", + "PAD_8_FRACT_0_5": "Then where is she? drummer?", + "PAD_8_FRACT_0_2": "Then where is she? drummer?" + }, + "21": { + "INITIAL_BOX": "Men who claimed she was marked as sacrifice to the dark lord...", + "DEFAULT": "Men who claimed she was marked as sacrifice to the dark lord...", + "DEFAULT_GREY_PAD": "--men who claimed she was marked as sacrifice to the dark lord--", + "PADDED_4": "---men who claimed she was marked as sacrifice to the dark lord---", + "PADDED_8": "Men who claimed she was marked as sacrifice to the dark lord...", + "EXTRACTED_INIT_BOX": "--- men who claimed she was marked as sacrifice to the dark lord --\"", + "PADDED_4_EXTRACTED": "--- men who claimed she was marked as sacrifice to the dark lord --\"", + "PADDED_8_EXTRACTED": "---\"men who claimed she was marked as sacrifice to the dark lord\"----", + "PADDED_8_DILATION_1": "---men who claimed she was marked as sacrifice to the dark lord---\"", + "PAD_8_FRACT_0_5": "--- men who claimed she was marked as sacrifice to the dark lord --", + "PAD_8_FRACT_0_2": "--- men who claimed she was marked as sacrifice to the dark lord --" + }, + "22": { + "INITIAL_BOX": "Continued after next page.", + "DEFAULT": "Continued after next page", + "DEFAULT_GREY_PAD": "Continued after next page.", + "PADDED_4": "Continued after next page", + "PADDED_8": "Continued after next page.", + "EXTRACTED_INIT_BOX": "\" countdown to extinction was one of my favorite albums when i was growing up.", + "PADDED_4_EXTRACTED": "Continued as per exit", + "PADDED_8_EXTRACTED": "And he continued as if next", + "PADDED_8_DILATION_1": "Bounded configuration ap ... er exit", + "PAD_8_FRACT_0_5": "And i'm continued as next?", + "PAD_8_FRACT_0_2": "But now # configured ar_er exit?" + }, + "23": { + "INITIAL_BOX": "Marked by the sign of...the black rooster!?", + "DEFAULT": "Marked by the sign of...the black rooster!?", + "DEFAULT_GREY_PAD": "Marked by the sign of... the black rooster!?", + "PADDED_4": "Marked by the sign of...the black rooster!?", + "PADDED_8": "Marked by the sign of... the black rooster!?", + "EXTRACTED_INIT_BOX": "Marked by the sign of...the black rooster!?", + "PADDED_4_EXTRACTED": "Marked by the sign of... the black rooster!?", + "PADDED_8_EXTRACTED": "Marked by the sign of... the black rooster? !", + "PADDED_8_DILATION_1": "Marked by the sign of... the black rooster!?", + "PAD_8_FRACT_0_5": "Marked by the sign of the black rooster!", + "PAD_8_FRACT_0_2": "Marked by the sign of the black rooster!" + } + }, + "Tarzan_014-21.JPG": { + "0": { + "DEFAULT_GREY_PAD": "Toog? he is brave to come back here, after fighting with our leader...", + "INITIAL_BOX": "Toog? he is brave to come back here, after fighting with our leader...", + "DEFAULT": "Toog? he is brave to come back here, after fighting with our leader...", + "PADDED_4": "Toog? he is brave to come back here, after fighting with our leader.", + "PADDED_8": "Toog? he is brave to come back here, after fighting with our leader...", + "EXTRACTED_INIT_BOX": "Doog? me he is brave to come back here, after fighting with our leader.", + "PADDED_4_EXTRACTED": "Tyrog p? me is brave to come back here, after fighting with our leader.", + "PADDED_8_EXTRACTED": "Yiodog? me he is brave! to come back here, after fighting with our leader.", + "PADDED_8_DILATION_1": "Toodg p me is brave to come back here, after fighting with our leader.", + "PAD_8_FRACT_0_5": "Yodog p the is brave to come back again, after fighting with our leader.", + "PAD_8_FRACT_0_2": "Tyrog p the great braves to come back after fighting with our leader." + }, + "1": { + "INITIAL_BOX": "But the she will be a pretty one... when all her cuts heal... she will...", + "DEFAULT": "But the she will be a pretty one... when all her cuts heal... she will...", + "DEFAULT_GREY_PAD": "But the she will be a pretty one... when all her cuts heal... she will...", + "PADDED_4": "But she will be a pretty one... when all her cuts heal... she will...", + "PADDED_8": "But she will be a pretty one... when all her cuts heal... she will...", + "EXTRACTED_INIT_BOX": "But the gang will be a pretty one... when all her customs are gone... she will... \"", + "PADDED_4_EXTRACTED": "But time will be a pretty one... when all her cuts heal... same huh? ...hmmm....", + "PADDED_8_EXTRACTED": "But time will be a pretty one ... when all her cuts has healed ... she will ...", + "PADDED_8_DILATION_1": "But this she will be a pretty one... when all her cuts heal... she will...", + "PAD_8_FRACT_0_5": "But time will be a pretty one ... haimen all over cuts assail. one wills ...", + "PAD_8_FRACT_0_2": "Butt that game will be a pretty one... when all her cuts heal. some will ..." + }, + "2": { + "INITIAL_BOX": "Teeka will die before she will mate with toog--or with anyone but taung, of the tribe of merchants!", + "DEFAULT": "Teeka will die before she will mate with toog--or with anyone but taung, of the tribe of merchants!", + "DEFAULT_GREY_PAD": "Teeka will die before she will mate with toog--or with anyone but taung, of the tribe of merchak!?", + "PADDED_4": "Teeka will die before she will mate with toog--or with anyone but taung, of the tribe of merchants!", + "PADDED_8": "Teeka will die before she will mate with toog--or with anyone but taung, of the tribe of merchak!?", + "EXTRACTED_INIT_BOX": "Teefya will die before she will make with tooc--or with anyone buying of the virtue of miesmosmann!", + "PADDED_4_EXTRACTED": "Tiffanya will die before she will make with tobacco--or with anyone of the daughters of meroman!", + "PADDED_8_EXTRACTED": "Teeka will die before she will mate with troggs--or with anyone dying, of the property of aeronoman!", + "PADDED_8_DILATION_1": "Teeka will die before she will mate with toog--or with anyone but taung, of the tribe of meroman!?", + "PAD_8_FRACT_0_5": "Treesma will die before she makes it with troops -- or with anyone only dying of the poison of american!?", + "PAD_8_FRACT_0_2": "Teresa will die before she whines. matte with toots--or with anyone on the outside of her chain!" + }, + "3": { + "INITIAL_BOX": "Stay out of her reach, mangani! she is a fierce one!", + "DEFAULT": "Stay out of her reach, mangani! she is a fierce one!", + "DEFAULT_GREY_PAD": "Stay out of her reach, mangani! she is a fierce one!", + "PADDED_4": "Stay out of her reach! mangani! she is a fierce one!", + "PADDED_8": "Stay out of her reach, mangani! she is a fierce one!", + "EXTRACTED_INIT_BOX": "Stay out of her francis! marygani is she as a firebomb one?", + "PADDED_4_EXTRACTED": "Stay out of her business! mangamily she is a firebomb one!?", + "PADDED_8_EXTRACTED": "Stay out of her reach! manganiis! she is a fiend one! ?", + "PADDED_8_DILATION_1": "Stay out of her reach, manganelis! she is a fiend one!?", + "PAD_8_FRACT_0_5": "Stay out of her freaking business! manicanni? she is a firebot one! ?", + "PAD_8_FRACT_0_2": "Stay out of her freaking, mango-bannis! she is a fiend one! ?" + }, + "4": { + "INITIAL_BOX": "Just then...", + "DEFAULT": "Just then...", + "DEFAULT_GREY_PAD": "Just then...", + "PADDED_4": "Just then...", + "PADDED_8": "Just then...", + "EXTRACTED_INIT_BOX": "Just then...", + "PADDED_4_EXTRACTED": "\"just then\" he said.", + "PADDED_8_EXTRACTED": "Just then...", + "PADDED_8_DILATION_1": "Just then...", + "PAD_8_FRACT_0_5": "Just then...", + "PAD_8_FRACT_0_2": "Just then..." + }, + "5": { + "INITIAL_BOX": "Mangani! a pair of strange apes comes this way.", + "DEFAULT": "Mangani! a pair of strange apes comes this way.", + "DEFAULT_GREY_PAD": "Mangani! a pair of strange apes comes this way.", + "PADDED_4": "Mangani! a pair of strange apes comes this way.", + "PADDED_8": "Mangani! a pair of strange apes comes this way.", + "EXTRACTED_INIT_BOX": "Maggie ann! a pair of strange apes cries that's nutty?", + "PADDED_4_EXTRACTED": "Manning! a pair of strange apes comes this way?", + "PADDED_8_EXTRACTED": "Mapped ann! a pair of starranger apes monkeys this way?", + "PADDED_8_DILATION_1": "Mangani! a pair of strange apes comes this way.", + "PAD_8_FRACT_0_5": "Mappers ami! ? a pair of stranger apes comics this week.", + "PAD_8_FRACT_0_2": "Mapmaking game! a pair of stranger apes comics this way." + }, + "6": { + "INITIAL_BOX": "One is an ape...the other, a hairless monster.", + "DEFAULT": "One is an ape ... the other, a hairless monster.", + "DEFAULT_GREY_PAD": "One is an ape ... the other, a hairless monster.", + "PADDED_4": "One is an ape...the other, a hairless monster.", + "PADDED_8": "One is an ape ... the other, a hairless monster.", + "EXTRACTED_INIT_BOX": "One as an alpe --- the other, a miami egg monster.", + "PADDED_4_EXTRACTED": "Omif is an aide to the other, a hairless monster.", + "PADDED_8_EXTRACTED": "One as an ape -- the other, a hairless monster...", + "PADDED_8_DILATION_1": "One is an ape --- the other, a hairless monster.", + "PAD_8_FRACT_0_5": "One is an ape -- \"the other, \" a hairyless monster?", + "PAD_8_FRACT_0_2": "One is an ape -- \"the other, a hairless monster\"." + }, + "7": { + "INITIAL_BOX": "You hear, brothers? will you let strangers take toog's she away?", + "DEFAULT": "You hear, brothers? will you let strangers take toog's she away?", + "DEFAULT_GREY_PAD": "You hear, brothers? will you let strangers take toog's she away?", + "PADDED_4": "You hear, brothers? will you let strangers take todg's she away?", + "PADDED_8": "You hear, brothers? will you let strangers take toog's she away?", + "EXTRACTED_INIT_BOX": "You'll hear, brothers? will you live to see tomorrow morning for tom's bar away?", + "PADDED_4_EXTRACTED": "You hear, brothers? will you let strommebjer get twice todd's share away?", + "PADDED_8_EXTRACTED": "You mean, brothers? will you let cyborgers take todd's gun away?", + "PADDED_8_DILATION_1": "You hear, brothers? will you let strangers take todd's life away?", + "PAD_8_FRACT_0_5": "You hear, brothers? unveil you! let o'bannion take those frogs' car away?", + "PAD_8_FRACT_0_2": "You hear, brothers? will you let o'banioners take toodog's share away?" + }, + "8": { + "INITIAL_BOX": "Your she is no concern of ours--but we will not let any invade our part of the jungle.", + "DEFAULT": "Your she is no concern of ours--but we will not let any invade our part of the jungle.", + "DEFAULT_GREY_PAD": "Your she is no concern of ours--but we will not let any invade our part of the jungle.", + "PADDED_4": "Your she is no concern of ours--but we will not let any invade our part of the jungle.", + "PADDED_8": "Your she is no concern of ours--but we will not let any invade our part of the jungle.", + "EXTRACTED_INIT_BOX": "Your she is no conquest of ours --\" but he will not lift any invade our part of the jungle.", + "PADDED_4_EXTRACTED": "Your sister is no concern of ours--buy he will not let any invade our party of the jungle.", + "PADDED_8_EXTRACTED": "Nousr shie is no conversation of ours--buy we will not let any invaders on our part of the jungle", + "PADDED_8_DILATION_1": "Your ship is no concern of ours--but we will not let any invade our part of the jungle.", + "PAD_8_FRACT_0_5": "Your ship is no match for ours--but we will not let any invader take our part of the jungle!", + "PAD_8_FRACT_0_2": "Your ship is no connection of ours --\" but we will not let any invader cut our part of the jungle!" + }, + "9": { + "INITIAL_BOX": "Come ... and drag the she along! we will leap on them from hiding.", + "DEFAULT": "Come ... and drag the she along! we will leap on them from hiding.", + "DEFAULT_GREY_PAD": "Come ... and drag the she along! we will leap on them from hiding.", + "PADDED_4": "Come ... and drag the she along! we will leap on them from hiding.", + "PADDED_8": "Come ... and drag the she along! we will leap on them from hiding.", + "EXTRACTED_INIT_BOX": "\"come --\" and drag the she along! we will leap on them from hiding.", + "PADDED_4_EXTRACTED": "Coming -- and drag the she along! we will leap on them from hiding.", + "PADDED_8_EXTRACTED": "Coming --and dragging them she along! we will leap on them from hidings.", + "PADDED_8_DILATION_1": "Comes...and drag, the she along! we will leap on them from hiding.", + "PAD_8_FRACT_0_5": "Coming --and dragging the shit along! we will leap on them from hiding.", + "PAD_8_FRACT_0_2": "Coming --\"and drag the shit along! we will leap on them from hiding\"." + }, + "10": { + "INITIAL_BOX": "They are following toog's spoor!", + "DEFAULT": "They are following toog's spoor!", + "DEFAULT_GREY_PAD": "They are following toog's spoor!", + "PADDED_4": "They are following toog's spoor!", + "PADDED_8": "They are following toog's spoor!", + "EXTRACTED_INIT_BOX": "They are following toog's spoor!", + "PADDED_4_EXTRACTED": "They are following toog's smooor?", + "PADDED_8_EXTRACTED": "They are following toog's spoor!", + "PADDED_8_DILATION_1": "They are following toog's spoor!", + "PAD_8_FRACT_0_5": "They are following too's spoor?", + "PAD_8_FRACT_0_2": "They were following too's yooor?" + }, + "11": { + "INITIAL_BOX": "Teeka was here.", + "DEFAULT": "Teeka was here.", + "DEFAULT_GREY_PAD": "Teeka was here.", + "PADDED_4": "Teeka was here.", + "PADDED_8": "Teeka was here.", + "EXTRACTED_INIT_BOX": "Yeehaw was here.", + "PADDED_4_EXTRACTED": "Yeeka was here.", + "PADDED_8_EXTRACTED": "Yeeka was here.", + "PADDED_8_DILATION_1": "Yeeka was here.", + "PAD_8_FRACT_0_5": "Yeena was here.", + "PAD_8_FRACT_0_2": "Yeva was here." + }, + "12": { + "INITIAL_BOX": "Soon afterward, as tarzan and tug reach the selves same grove ...", + "DEFAULT": "Soon afterward, as tarzan and tang reach the selfsame grove ...", + "DEFAULT_GREY_PAD": "Soon afterward, as tarzan and tang reach the selfsame grove ...", + "PADDED_4": "Soon afterward, as tarzan and taj reach the selvesame grove ...", + "PADDED_8": "Soon afterward, as tarzan and tang reach the selfsame grove ...", + "EXTRACTED_INIT_BOX": "Soon afterworded as three-fourths of you reach the seaside once more ...", + "PADDED_4_EXTRACTED": "Soon afterwards, as thirty years ago, king george iii reached the age of...", + "PADDED_8_EXTRACTED": "Soon after that, as thirty-two and talking preach the selfsame song...", + "PADDED_8_DILATION_1": "Soon afterword, as tarkazan and takmes reach time selfsame grove ...", + "PAD_8_FRACT_0_5": "Soon after that, as tarazen and talus reach the shelter of same roof ...", + "PAD_8_FRACT_0_2": "Soon after shishimaro, as tarzan and tajun reach the selfsame grove ..." + }, + "13": { + "INITIAL_BOX": "Taugg! tarzan?", + "DEFAULT": "Taugg! tarzan?", + "DEFAULT_GREY_PAD": "Taug! tarzan?", + "PADDED_4": "Taug! tarzan?", + "PADDED_8": "Taug! tarzan?", + "EXTRACTED_INIT_BOX": "\" whoa!?", + "PADDED_4_EXTRACTED": "\"yahoo? darzan\"", + "PADDED_8_EXTRACTED": "Twug? darzany?", + "PADDED_8_DILATION_1": "Taug! tarzan", + "PAD_8_FRACT_0_5": "Yaz? darzan!", + "PAD_8_FRACT_0_2": "Tanug! tarzan" + }, + "14": { + "INITIAL_BOX": "Kretech4h?", + "DEFAULT": "Kretoah?", + "DEFAULT_GREY_PAD": "Kretegah?", + "PADDED_4": "Kre-o-ah?", + "PADDED_8": "Kretegah?", + "EXTRACTED_INIT_BOX": "Kre g'ah! *", + "PADDED_4_EXTRACTED": "Kreel: \"ah!", + "PADDED_8_EXTRACTED": "Kreel \"ah!", + "PADDED_8_DILATION_1": "Kreee-ahh?", + "PAD_8_FRACT_0_5": "Kreel \"ah!", + "PAD_8_FRACT_0_2": "Kre! o-ah?" + }, + "15": { + "INITIAL_BOX": "Toog will!?", + "DEFAULT": "Toog will!", + "DEFAULT_GREY_PAD": "Toog will!", + "PADDED_4": "Toog will!", + "PADDED_8": "Toog will!", + "EXTRACTED_INIT_BOX": "Toog will?", + "PADDED_4_EXTRACTED": "Toog will?", + "PADDED_8_EXTRACTED": "Toog will?", + "PADDED_8_DILATION_1": "Toog will!?", + "PAD_8_FRACT_0_5": "Tood hell?", + "PAD_8_FRACT_0_2": "Tood hymall?" + }, + "16": { + "INITIAL_BOX": "Yes! i smell her spoor...and the other's.", + "DEFAULT": "Yes! i smell her spoor...and the other's.", + "DEFAULT_GREY_PAD": "Yes! i smell her spoor...and the other's.", + "PADDED_4": "Yes! i smell her spoor...and the other's.", + "PADDED_8": "Yes! i smell her spoor...and the other's.", + "EXTRACTED_INIT_BOX": "Yes! i smell merr spook and me o'the", + "PADDED_4_EXTRACTED": "This! i smell mer spoor and he's out of time.", + "PADDED_8_EXTRACTED": "Yipes! i smell her spor and me odyne", + "PADDED_8_DILATION_1": "Yes! i smell her spoor and he othe", + "PAD_8_FRACT_0_5": "Yep! i smell their spoor and me oyna?", + "PAD_8_FRACT_0_2": "Yes! i smell her spoor and me owime?" + }, + "17": { + "INITIAL_BOX": "But, where can--?", + "DEFAULT": "But, where can--?", + "DEFAULT_GREY_PAD": "But, where can--?", + "PADDED_4": "But, where can--?", + "PADDED_8": "But, where can--?", + "EXTRACTED_INIT_BOX": "\" biggie, i'm sorry! \"", + "PADDED_4_EXTRACTED": "Booth, by the way...", + "PADDED_8_EXTRACTED": "But, nowhere! a", + "PADDED_8_DILATION_1": "But, where?", + "PAD_8_FRACT_0_5": "But, nowhere? a", + "PAD_8_FRACT_0_2": "But, nowhere? a" + }, + "18": { + "INITIAL_BOX": "Toog! keep her quiet?", + "DEFAULT": "Toog! keep her quiet?", + "DEFAULT_GREY_PAD": "Toog! keep her quiet?", + "PADDED_4": "Toog! keep her quiet?", + "PADDED_8": "Toog! keep her quiet?", + "EXTRACTED_INIT_BOX": "Toog! keep her quiet?", + "PADDED_4_EXTRACTED": "Toog! keep her quiet?", + "PADDED_8_EXTRACTED": "Toog! keep her quiet?", + "PADDED_8_DILATION_1": "Toog! keep her quiet?", + "PAD_8_FRACT_0_5": "Too bad! keep her quiet?", + "PAD_8_FRACT_0_2": "Toog! keep her quiet?" + }, + "19": { + "INITIAL_BOX": "Continue... after next page", + "DEFAULT": "Continue after next page", + "DEFAULT_GREY_PAD": "Continue... after next page", + "PADDED_4": "Continue after next page", + "PADDED_8": "Continue after next page.", + "EXTRACTED_INIT_BOX": "\" i don't know\" he muttered next page", + "PADDED_4_EXTRACTED": "Login vuger matter next page", + "PADDED_8_EXTRACTED": "Giant mole\" after next page.", + "PADDED_8_DILATION_1": "Continuer after next page", + "PAD_8_FRACT_0_5": "Continue? after next page.", + "PAD_8_FRACT_0_2": "Continue? after next page." + }, + "20": { + "INITIAL_BOX": "But he is an instant too late...", + "DEFAULT": "But he is an instant too late ...", + "DEFAULT_GREY_PAD": "But he is an instant too late ...", + "PADDED_4": "But he is an instant too late...", + "PADDED_8": "But he is an instant too late ...", + "EXTRACTED_INIT_BOX": "But he is an instant too late ...", + "PADDED_4_EXTRACTED": "But he is an instant two late ...", + "PADDED_8_EXTRACTED": "But he is an instant too late ...", + "PADDED_8_DILATION_1": "But he is an instant too late ...", + "PAD_8_FRACT_0_5": "But he is an instant too late ...", + "PAD_8_FRACT_0_2": "But he is an instant too late ..." + } + }, + "Transformers_-_Unicron_000-004.jpg": { + "0": { + "DEFAULT_GREY_PAD": "With the light of hope there is always a chance for victory.", + "INITIAL_BOX": "With the light of hope, there is always a chance for victory.", + "DEFAULT": "With the light of hope, there is always a chance for victory.", + "PADDED_4": "With the light of hope, there is always a chance for victory.", + "PADDED_8": "With the light of hope there is always a chance for victory.", + "EXTRACTED_INIT_BOX": "Anytime the light of hope fades there is always a chance for victory.", + "PADDED_4_EXTRACTED": "Kitty: the light of hope there is always a chance for victory.", + "PADDED_8_EXTRACTED": "With the light of hope there is always a chance for victory.", + "PADDED_8_DILATION_1": "With the light of hope there is always a chance for victory.", + "PAD_8_FRACT_0_5": "With the light of hope there is always a chance for victory.", + "PAD_8_FRACT_0_2": "With the light of hope there is always a chance for victory." + }, + "1": { + "INITIAL_BOX": "Platitudes are useless, prime. we have to hit that thing--now.", + "DEFAULT": "Platitudes are useless, prime. we have to hit that thing--now.", + "DEFAULT_GREY_PAD": "Platitudes are useless, prime. we have to hit that thing--now.", + "PADDED_4": "Platitudes are useless, prime. we have to hit that thing--now.", + "PADDED_8": "Platitudes are useless, prime. we have to hit that thing--now.", + "EXTRACTED_INIT_BOX": "Platitudes are useless primes. we have to hit that thing--anyway.", + "PADDED_4_EXTRACTED": "Platitudes are useless prime. we have to hit that thing--now.", + "PADDED_8_EXTRACTED": "Flatulences are messages prime. we have to hit that t-hings--more.", + "PADDED_8_DILATION_1": "Platitudes are useless, prime. we have to hit that thing--now.", + "PAD_8_FRACT_0_5": "Platitudes are useless to hit that thing--wow.", + "PAD_8_FRACT_0_2": "Flattery is a weapon primarily used to hit that things--now." + }, + "2": { + "INITIAL_BOX": "Pyra magna torchbearer.", + "DEFAULT": "Pyra magna, torchbearer.", + "DEFAULT_GREY_PAD": "Pyra magna torchbearer.", + "PADDED_4": "Pyra magna, torchbearer.", + "PADDED_8": "Pyra magna, torchbearer.", + "EXTRACTED_INIT_BOX": "Python. helghagn hj. to rgb e-mailer.", + "PADDED_4_EXTRACTED": "Pyhf. hjalgh. turch belier.", + "PADDED_8_EXTRACTED": "Python high middle eastern.", + "PADDED_8_DILATION_1": "Pyra magna torchbearer.", + "PAD_8_FRACT_0_5": "Pity him magg. tough brewer.", + "PAD_8_FRACT_0_2": "Pytha imagina, to righe bearer." + }, + "3": { + "INITIAL_BOX": "You know i've never run from a fight, pyra.", + "DEFAULT": "You know i've never run from a fight, pyra.", + "DEFAULT_GREY_PAD": "You know i've never run from a fight, pyra.", + "PADDED_4": "You know i've never run from a fight, pyra.", + "PADDED_8": "You know i've never run from a fight, pyra.", + "EXTRACTED_INIT_BOX": "You always say i've never run from a fight, pyra.", + "PADDED_4_EXTRACTED": "You know how i've never run from a fight, pytha.", + "PADDED_8_EXTRACTED": "You always have never been down from a fight, pyra.", + "PADDED_8_DILATION_1": "You know i've never run from a fight, pyra.", + "PAD_8_FRACT_0_5": "You know how i've never run from a fight, dyra?", + "PAD_8_FRACT_0_2": "You know i've never been in from a fight, dyra." + }, + "4": { + "INITIAL_BOX": "But even if this was winable--and it clearly is not--ask yourself ....", + "DEFAULT": "But even if this was winnable--and it clearly is not--ask yourself...", + "DEFAULT_GREY_PAD": "But even if this was winnable--and it clearly is not--ask yourself...", + "PADDED_4": "But even if this was winnable--and it clearly is not--ask yourself...", + "PADDED_8": "But even if this was winnable--and it clearly is not--ask yourself...", + "EXTRACTED_INIT_BOX": "But even if this was viable--and it clearly is not! ask yourself?", + "PADDED_4_EXTRACTED": "But even if this was viable--and it clearly is not! ask yourself...", + "PADDED_8_EXTRACTED": "But even if this pass was maneuverable - and it clearly is not - ask yourself, ...", + "PADDED_8_DILATION_1": "But even if this was winnable -and it clearly is not- ask yourself...", + "PAD_8_FRACT_0_5": "\" but even if this has happened, i'm still capable - and it clearly is! not ask yourself ...?", + "PAD_8_FRACT_0_2": "But even if this was viable--and it certainly is not! ask yourself, ..." + }, + "5": { + "INITIAL_BOX": "... did we come here to end lives or save them?", + "DEFAULT": "... did we come here to end lives or save them?", + "DEFAULT_GREY_PAD": "... did we come here to end lives or save them?", + "PADDED_4": "... did we come here to end lives or save them?", + "PADDED_8": "... did we come here to end lives or save them?", + "EXTRACTED_INIT_BOX": "Did we come here to end lives of slaves? them?", + "PADDED_4_EXTRACTED": "Did i come here to end limits or save them?", + "PADDED_8_EXTRACTED": "... did who come here to end or save them?", + "PADDED_8_DILATION_1": "Did we come here to end lives or save them?", + "PAD_8_FRACT_0_5": "Did what come here to end lives or save them?", + "PAD_8_FRACT_0_2": "... did what? come here to end lives or save them?" + }, + "6": { + "INITIAL_BOX": "That's where i come in. this doohickey\"ll create a world-spannin' space bridge-- and get every living being outta there.", + "DEFAULT": "That's where i come in. this doohickey'll create a world-spannin' space bridge - and get every living being outta there.", + "DEFAULT_GREY_PAD": "That's where i come in. this doohickey'll create a world-spannin' space bridge-- and get every living being outta there.", + "PADDED_4": "That's where i come in. this doohickey'll create a world-spannin' space bridge-- and get every living being outta there.", + "PADDED_8": "That's where i come in. this doohickey'll create a world-spannin' space bridge-- and get every living being outta there.", + "EXTRACTED_INIT_BOX": "That's where i come in! this moonickyll create a world-spannin', space bridge - and get every living being outta there?", + "PADDED_4_EXTRACTED": "That's somewhere i come in. this boomwickedly l. creates a world shannin' space bridge - and gets every living being outta there!", + "PADDED_8_EXTRACTED": "That's me! i came in this worldly fashion - a space bridge - and get every living being outta here?", + "PADDED_8_DILATION_1": "That's where i come in. this doohickey /ll create a world spanning space bridge - and get every living being outta there", + "PAD_8_FRACT_0_5": "That's pretty cool i come in this dope hickerylul creates a world dannin' space bridge - and get every living being outta there!", + "PAD_8_FRACT_0_2": "That 's where i come in this doohickey will create a world-shattering space bridge - and get every living thing outta thaer" + }, + "7": { + "INITIAL_BOX": "We don't know what this anomaly is, or why it's doing this--let alone how to stop something bigger than a planet.", + "DEFAULT": "We don't know what this anomaly is, or why it's doing this--let alone how to stop something bigger than a planet.", + "DEFAULT_GREY_PAD": "We don't know what this anomaly is, or why it's doing this--let alone how to stop something bigger than a planet.", + "PADDED_4": "We don't know what this anomaly is, or why it's doing this--let alone how to stop something bigger than a planet.", + "PADDED_8": "We don't know what this anomaly is, or why it's doing this--let alone how to stop something bigger than a planet.", + "EXTRACTED_INIT_BOX": "We don't know what this is or why we're doing this - let alone how to stop anything bigger than a plummet.", + "PADDED_4_EXTRACTED": "We don't know what this anomaly is or why it's doing this - list all one to stop something bigger than a planet?", + "PADDED_8_EXTRACTED": "We don't know what this anomaly is or why it's doing this - left alone how to stop something bigger than a planet?", + "PADDED_8_DILATION_1": "We don't know what this anomaly is or why its doing this-let alone how to stop something bigger than a planet.", + "PAD_8_FRACT_0_5": "We don't know what this anomaly is or why it's doing this-- let alone how to stop something bigger than a planet?", + "PAD_8_FRACT_0_2": "We don't know what this anomaly is or why it's doing this--it's alone how to stop something bigger than a planet?" + }, + "8": { + "INITIAL_BOX": "The anomaly is here because our world called it.", + "DEFAULT": "The anomaly is here because our world called it.", + "DEFAULT_GREY_PAD": "The anomaly is here because our world called it.", + "PADDED_4": "The anomaly is here because our world called it.", + "PADDED_8": "The anomaly is here because our world called it.", + "EXTRACTED_INIT_BOX": "\"the anomaly is here because our world called it.", + "PADDED_4_EXTRACTED": "The anomaly is here because our world called it.", + "PADDED_8_EXTRACTED": "The anomaly is here because our world called it.", + "PADDED_8_DILATION_1": "\"the anomaly is here because our world called it.\".", + "PAD_8_FRACT_0_5": "\"the anomaly is here because our world called it.", + "PAD_8_FRACT_0_2": "The anomaly is here because our world called it." + }, + "9": { + "INITIAL_BOX": "I will not let another civilization die because of cybertron's actions.", + "DEFAULT": "I will not let another civilization die because of cybertron's actions.", + "DEFAULT_GREY_PAD": "I will not let another civilization die because of cybertron's actions.", + "PADDED_4": "I will not let another civilization die because of cybertron's actions.", + "PADDED_8": "I will not let another civilization die because of cybertron's actions.", + "EXTRACTED_INIT_BOX": "I will not let another civilization die because of cybertron's actions.", + "PADDED_4_EXTRACTED": "I will. not let another civilization die because of cyborg actions!", + "PADDED_8_EXTRACTED": "I will not let another civilization die because of evertron's actions.", + "PADDED_8_DILATION_1": "I will not let another civilization die because of cybertron's actions.", + "PAD_8_FRACT_0_5": "I will not let another civilization die because of egotron's actions.", + "PAD_8_FRACT_0_2": "I will not list another civilization dies because of cybertron's actions." + }, + "10": { + "INITIAL_BOX": "I appreciate the aid, optimus prime...", + "DEFAULT": "I appreciate the aid, optimus prime ...", + "DEFAULT_GREY_PAD": "I appreciate the aid optimus prime...", + "PADDED_4": "I appreciate the aid, optimus prime ...", + "PADDED_8": "I appreciate the aid, optimus prime ...", + "EXTRACTED_INIT_BOX": "J. apparently type a optimists few", + "PADDED_4_EXTRACTED": "I appreciate this aid, optimus prime...", + "PADDED_8_EXTRACTED": "I appreciate time, and optimum prime...", + "PADDED_8_DILATION_1": "I appreciate the aid, optimus prime ...", + "PAD_8_FRACT_0_5": "I appreciate the aid, optimus prime...", + "PAD_8_FRACT_0_2": "I appreciate the aid, optimus prime..." + }, + "11": { + "INITIAL_BOX": "We just need about thirty of these suckers placed at strategic, uh, places.", + "DEFAULT": "We just need about thirty of these suckers placed at strategic, uh, places.", + "DEFAULT_GREY_PAD": "We just need about thirty of these suckers placed at strategic, uh, places.", + "PADDED_4": "We just need about thirty of these suckers placed at strategic, uh, places.", + "PADDED_8": "We just need about thirty of these suckers placed at strategic, uh, places.", + "EXTRACTED_INIT_BOX": "We just need about thirty / of these suckers placed at strategic points in places.", + "PADDED_4_EXTRACTED": "We just need about thirty of these suckers placed at strategic points.", + "PADDED_8_EXTRACTED": "We just need about twenty of these guys placed at strategic points.", + "PADDED_8_DILATION_1": "We just need about thirty of these suckers placed at strategic uh places.", + "PAD_8_FRACT_0_5": "We just need about three weeks / of that time to place at strategic locations.", + "PAD_8_FRACT_0_2": "We just need about three seconds to place at strategic points along the way of their march." + }, + "12": { + "INITIAL_BOX": "On elonia, if that wasn't implied.", + "DEFAULT": "On elonia, if that wasn't implied.", + "DEFAULT_GREY_PAD": "On elonia, if that wasn't implied.", + "PADDED_4": "On elonia, if that wasn't implied.", + "PADDED_8": "On elonia, if that wasn't implied.", + "EXTRACTED_INIT_BOX": "On earth, if that wasn't impolite.", + "PADDED_4_EXTRACTED": "On zeck ossa, if that hasn't implied.", + "PADDED_8_EXTRACTED": "Can i be honest, if that hasn't impaired you?", + "PADDED_8_DILATION_1": "On elonia, if that wasn't implied.", + "PAD_8_FRACT_0_5": "On becomes, if that isn't impudent enough?", + "PAD_8_FRACT_0_2": "On bloom! if that isn't impudent?" + }, + "13": { + "INITIAL_BOX": "Wheeljack, mad scientist.", + "DEFAULT": "Wheeljack, mad scientist.", + "DEFAULT_GREY_PAD": "Wheeljack, mad scientist.", + "PADDED_4": "Wheeljack, mad scientist.", + "PADDED_8": "Wheeljack, mad scientist.", + "EXTRACTED_INIT_BOX": "In the wheel-jack who scifiest...", + "PADDED_4_EXTRACTED": "My wheel-jack, who sci-fist.", + "PADDED_8_EXTRACTED": "In the wheeljack who scifist.", + "PADDED_8_DILATION_1": "W.h.e.l.j.a.c.k., m.d. scientist.", + "PAD_8_FRACT_0_5": "\" wheel-jack, who scientist.", + "PAD_8_FRACT_0_2": "Why wheel-jack? mad scientist." + }, + "14": { + "INITIAL_BOX": "... yet surely you did not believe the solstar order would be without its own defenses.", + "DEFAULT": "... yet surely you did not believe the solstar order would be without its own defenses.", + "DEFAULT_GREY_PAD": "... yet surely you did not believe the solstar order would be without its own defenses.", + "PADDED_4": "... yet surely you did not believe the solstar order would be without its own defenses.", + "PADDED_8": "... yet surely you did not believe the solstar order would be without its own defenses.", + "EXTRACTED_INIT_BOX": "\" yet surely you did not believe the seas-tair would be without its own defenders.", + "PADDED_4_EXTRACTED": "... yet surely you did not believe the sons-of-the-star would be without its own defenders.", + "PADDED_8_EXTRACTED": "Yet surely you did not believe the sons of starfire would be without its own defenders.", + "PADDED_8_DILATION_1": "...yet surely you did not believe the solstar order would be without its own defenses.", + "PAD_8_FRACT_0_5": "... yet surely you did not believe the gangstar would be without its own defense?", + "PAD_8_FRACT_0_2": "... yet surely you did not believe the golltar order would be without its own deepspaceboats." + }, + "15": { + "INITIAL_BOX": "Rom was the first of the greatest warriors the cosmos has ever known......", + "DEFAULT": "Rom was the first of the greatest warriors the cosmos has ever known....", + "DEFAULT_GREY_PAD": "Rom was the first of the greatest warriors the cosmos has ever known ...", + "PADDED_4": "Rom was the first of the greatest warriors the cosmos has ever known....", + "PADDED_8": "Rom was the first of the greatest warriors the cosmos has ever known....", + "EXTRACTED_INIT_BOX": "Roam maks the first of the greatest warriors the clanshas has ever known", + "PADDED_4_EXTRACTED": "Rom was the first of the great boy warriors. the cosmos has ever known.", + "PADDED_8_EXTRACTED": "Roman was the first of the great boy warriors the cosmos has ever known...", + "PADDED_8_DILATION_1": "Rom was the first of the greatest warriors the cosmos has ever known...", + "PAD_8_FRACT_0_5": "Rom was the first of the greatest boy warriors the cosmos has ever known...", + "PAD_8_FRACT_0_2": "Romm was the first of the greatest warriors the cosmos has ever known..." + } + }, + "Transformers_-_Unicron_000-016.jpg": { + "0": { + "DEFAULT_GREY_PAD": "... if you please, windblade.", + "INITIAL_BOX": "\"\"if you please, windblade.", + "DEFAULT": "\"\"... if you please, windblade.", + "PADDED_4": "... if you please, windblade.", + "PADDED_8": "... if you please, windblade.", + "EXTRACTED_INIT_BOX": "If you please, know yourself - and die?", + "PADDED_4_EXTRACTED": ". if you please, wait until i've finished?", + "PADDED_8_EXTRACTED": ". if you please, mister--i'm yours?", + "PADDED_8_DILATION_1": "If you please, windblade.", + "PAD_8_FRACT_0_5": ". if you please, www.dell-us.", + "PAD_8_FRACT_0_2": ". if you please, mister beladee?" + }, + "1": { + "INITIAL_BOX": "Our allies have been tracking this anomaly since the destruction of a planet called lv-117 some years ago.", + "DEFAULT": "Our allies have been tracking this anomaly since the destruction of a planet called lv-1117 some years ago.", + "DEFAULT_GREY_PAD": "Our allies have been tracking this anomaly since the destruction of a planet called lv-117 some years ago.", + "PADDED_4": "Our allies have been tracking this anomaly since the destruction of a planet called lv-117 some years ago.", + "PADDED_8": "Our allies have been tracking this anomaly since the destruction of a planet called lv-1117 some years ago.", + "EXTRACTED_INIT_BOX": "Mr. allan's have been tracking this anomaly/ since the destruction of a planet called alvary some years ago", + "PADDED_4_EXTRACTED": "Our allies have been tracking this anomaly since the destruction of a planet called "vega" 300 years ago.", + "PADDED_8_EXTRACTED": "Our allies have been tracking this anomaly since the destruction of a planet called vega 50 years ago.", + "PADDED_8_DILATION_1": "Our allies have been tracking this anomaly since the destruction of a planet called livistag some years ago.", + "PAD_8_FRACT_0_5": "Our allies have been tracking this anomaly since the destruction of a planet called cythera five years ago.", + "PAD_8_FRACT_0_2": "Our allies have been tracking this anomaly since the destruction of a flanny called eve back in 2017. bombs years ago!" + }, + "2": { + "INITIAL_BOX": "But lv-117 was unstuck in time.", + "DEFAULT": "But lv-117 was unstuck in time.", + "DEFAULT_GREY_PAD": "But lv-117 was unstuck in time.", + "PADDED_4": "But lv-117 was unstuck in time.", + "PADDED_8": "But lv-117 was unstuck in time.", + "EXTRACTED_INIT_BOX": "Alt jw-177 was stricken in 3712.", + "PADDED_4_EXTRACTED": "Bwt lw-117 kms6 chemtrails in time.", + "PADDED_8_EXTRACTED": "But lt.-117 was completely wrong in time.", + "PADDED_8_DILATION_1": "But ly-17 was unstuck in time.", + "PAD_8_FRACT_0_5": "But lw-117 was somewhere else in time.", + "PAD_8_FRACT_0_2": "But lw-177 was no threat in time." + }, + "3": { + "INITIAL_BOX": "What does that mean?", + "DEFAULT": "What does that mean?", + "DEFAULT_GREY_PAD": "What does that mean?", + "PADDED_4": "What does that mean?", + "PADDED_8": "What does that mean?", + "EXTRACTED_INIT_BOX": "What does that mean?", + "PADDED_4_EXTRACTED": "What does that mean?", + "PADDED_8_EXTRACTED": "What does that mean?", + "PADDED_8_DILATION_1": "What does that mean?", + "PAD_8_FRACT_0_5": "What does that mean?", + "PAD_8_FRACT_0_2": "\"what does that mean?" + }, + "4": { + "INITIAL_BOX": "The world was infused with a regenesis ore like the one that forged your armor--this one altering the flow of time.", + "DEFAULT": "The world was infused with a regenesis ore like the one that forged your armor--this one altering the flow of time.", + "DEFAULT_GREY_PAD": "The world was infused with a regenesis ore like the one that forged your armor--this one altering the flow of time.", + "PADDED_4": "The world was infused with a regenesis ore like the one that forged your armor--this one altering the flow of time.", + "PADDED_8": "The world was infused with a regenesis ore like the one that forged your armor--this one altering the flow of time.", + "EXTRACTED_INIT_BOX": "The world was infused with a brutalism so like the one that forged your armor - this one altering the flow of time.", + "PADDED_4_EXTRACTED": "The world was infused with a blazing sense of one like the one that opened you up armor - this one altering the flow of time.", + "PADDED_8_EXTRACTED": "The world was infused with a bewildering sense of like the one that forced your armore - thus one altering the flow of time.", + "PADDED_8_DILATION_1": "The world was infused with a regenesis or like the one that forged your armor--this one altering the flow of time.", + "PAD_8_FRACT_0_5": "The world was infused with a breathtaking sense of like the one that forged your armor--this one altering the flow of time.", + "PAD_8_FRACT_0_2": "The world was infused with a bio-energy so like the one that would armor--this or altering the flow of time." + }, + "5": { + "INITIAL_BOX": "While the effects of lv-117's destruction were apparent long ago, the actual attack occurred only last week.", + "DEFAULT": "While the effects of lv-177's destruction were apparent long ago, the actual attack occurred only last week.", + "DEFAULT_GREY_PAD": "While the effects of lv-117's destruction were apparent long ago, the actual attack occurred only last week.", + "PADDED_4": "While the effects of lv-117's destruction were apparent long ago, the actual attack occurred only last week.", + "PADDED_8": "While the effects of lv-117's destruction were apparent long ago, the actual attack occurred only last week.", + "EXTRACTED_INIT_BOX": "While the effects of ux-nyms destruction were apparent long ago, the actual attack occurred only last week.", + "PADDED_4_EXTRACTED": "While the effects of lw-19's destruction were apparent coming along, the actual attack occurred only last week.", + "PADDED_8_EXTRACTED": "While the effects of ultron's destruction were apparent & long ago, the actual attack occurred only last week.", + "PADDED_8_DILATION_1": "While the effects of livh'ys destruction were apparent long ago, the actual attack occurred only last week.", + "PAD_8_FRACT_0_5": "While the effects of uv-rays destruction were apparent along ago, the actual attack was only last week.", + "PAD_8_FRACT_0_2": "While the effects of lv-4198's destruction were apparent long ago, the actual attack occurred only last week." + }, + "6": { + "INITIAL_BOX": "After cybertron's invasion by human forces, a signal was sent into space: \"welcome death.\".", + "DEFAULT": "After cybertron's invasion by human forces, a signal was sent into space: \"welcome death.\".", + "DEFAULT_GREY_PAD": "After cybertron's invasion by human forces, a signal was sent into space: \"welcome death.\".", + "PADDED_4": "After cybertron's invasion by human forces, a signal was sent into space: \"welcome death.\".", + "PADDED_8": "After cybertron's invasion by human forces, a signal was sent into space: \"welcome death.\".", + "EXTRACTED_INIT_BOX": "After cybertron's invasion by human forcex a signal was sent into space: \"welcome to death...\".", + "PADDED_4_EXTRACTED": "After cybertron's invasion by human forces a signal was sent into space that was picked up by \"death\"!?", + "PADDED_8_EXTRACTED": "After cybertron's invasion by human forces a signal was sent into space warning them of our impending death.", + "PADDED_8_DILATION_1": "After cybertron's invasion by human forces signal was sent into space \"welcome death\"", + "PAD_8_FRACT_0_5": "After cybertron's invasion by human forces a signal was sent into space stating \"welcome to earth\".", + "PAD_8_FRACT_0_2": "After cybertron's invasion by human forces, a signal was sent into space, \"well come death\"." + }, + "7": { + "INITIAL_BOX": "Not terribly helpful.", + "DEFAULT": "Not terribly helpful.", + "DEFAULT_GREY_PAD": "Not terribly helpful.", + "PADDED_4": "Not terribly helpful.", + "PADDED_8": "Not terribly helpful.", + "EXTRACTED_INIT_BOX": "Not terribly helpful.", + "PADDED_4_EXTRACTED": "Not terribly helpful.", + "PADDED_8_EXTRACTED": "Not terribly helpful.", + "PADDED_8_DILATION_1": "Not terribly helpful.", + "PAD_8_FRACT_0_5": "Not terribly helpful.", + "PAD_8_FRACT_0_2": "Not terribly helpful." + }, + "8": { + "INITIAL_BOX": "But a second, more recent transmission was one word, \" something i 'd never heard before ...", + "DEFAULT": "But a second, more recent transmission was one word, something i'd never heard before...", + "DEFAULT_GREY_PAD": "But a second, more recent transmission was one word, something i'd never heard before...", + "PADDED_4": "But a second, more recent transmission was one word, something i'd never heard before...", + "PADDED_8": "But a second, more recent transmission was one word, something i'd never heard before...", + "EXTRACTED_INIT_BOX": "But a second transmission was one word, something i'd never heard before...", + "PADDED_4_EXTRACTED": "But a second agency transmission was one word i'd never heard before...", + "PADDED_8_EXTRACTED": "But a second big transmission was one word, something i've never heard before...", + "PADDED_8_DILATION_1": "But a second more recent transmission was one word, something i'd never heard before.", + "PAD_8_FRACT_0_5": "But a second more recent transmission was one word, something it never heard before...", + "PAD_8_FRACT_0_2": "But a second more powerful transformation was one word, something it never heard before..." + }, + "9": { + "INITIAL_BOX": "...unicron.", + "DEFAULT": "...unicron.", + "DEFAULT_GREY_PAD": "...unicron.", + "PADDED_4": "...unicron.", + "PADDED_8": "...unicron.", + "EXTRACTED_INIT_BOX": "I 'm expecting you both to be here at 8 o'clock sharp?", + "PADDED_4_EXTRACTED": "I'm going to go with you?", + "PADDED_8_EXTRACTED": "@#$%^&*()?", + "PADDED_8_DILATION_1": "Unicron.", + "PAD_8_FRACT_0_5": "Laughinggrowl?", + "PAD_8_FRACT_0_2": "...lingggrow?" + }, + "10": { + "INITIAL_BOX": "We learned of the destruction of an uninhabited world, prion, where a titan had died.", + "DEFAULT": "We learned of the destruction of an uninhabited world, prion, where a titan had died.", + "DEFAULT_GREY_PAD": "We learned of the destruction of an uninhabited world, prion, where a titan had died.", + "PADDED_4": "We learned of the destruction of an uninhabited world, prion, where a titan had died.", + "PADDED_8": "We learned of the destruction of an uninhabited world, prion, where a titan had died.", + "EXTRACTED_INIT_BOX": "We learned of the destruction of an unhabited world, known somewhere a star had died.", + "PADDED_4_EXTRACTED": "We learned of the destruction of an unhabited world, far beyond any human realm where a titan had died.", + "PADDED_8_EXTRACTED": "We learned of the destruction of an unhabited world, delta zero, where a 77-straw had died.", + "PADDED_8_DILATION_1": "We learned of the destruction of an uninhabited world, prion, where a titan had died.", + "PAD_8_FRACT_0_5": "We learned of the destruction of an unnamed world, where a titan had died.", + "PAD_8_FRACT_0_2": "We learned of the destruction of an uninhabited world, prison world, where a titan had died." + }, + "11": { + "INITIAL_BOX": "Then another dead planet was consumed - gorlam prime, where i had raised a titan myself.", + "DEFAULT": "Then another dead planet was consumed-- gorlam prime, where i had raised a titan myself.", + "DEFAULT_GREY_PAD": "Then another dead planet was consumed - gorlam prime, where i had raised a titan myself.", + "PADDED_4": "Then another dead planet was consumed - gorlam prime, where i had raised a titan myself.", + "PADDED_8": "Then another dead planet was consumed-- gorlam prime, where i had raised a titan myself.", + "EXTRACTED_INIT_BOX": "Then another dead planet was conquerued by forallan prime, whereas i had raised a titan myself.", + "PADDED_4_EXTRACTED": "Then another dead planet was consumed - godzilla prime, wherever it had raised a titan myself.", + "PADDED_8_EXTRACTED": "Then another dead planet was consumed -- forlorn planet prime, where i had raised a titan myself.", + "PADDED_8_DILATION_1": "Then another dead planet was consumed - gorlamp prime, where i had raised a titan myself.", + "PAD_8_FRACT_0_5": "Then another dead planet was consumed -- gorgon prime, where i had raised a titan myself.", + "PAD_8_FRACT_0_2": "Then another dead planet was conquered - gormlam prime, somewhere i had besieged a titan myself." + }, + "12": { + "INITIAL_BOX": "Next came velocitron - a cybertronian colony. you saw what was left, rom.", + "DEFAULT": "Next came velocitron-- a cybertronian colony. you saw what was left, rom.", + "DEFAULT_GREY_PAD": "Next came velocitron-- a cybertronian colony. you saw what was left, rom.", + "PADDED_4": "Next came velocitron - a cybertronian colony. you saw what was left, rom.", + "PADDED_8": "Next came velocitron-- a cybertronian colony. you saw what was left, rom.", + "EXTRACTED_INIT_BOX": "Next came vergo's tronch - a cybertroian colonize you ain't what was left form", + "PADDED_4_EXTRACTED": "Next came virus/gtron= a cybertonian colony. you saw what was left. eom.", + "PADDED_8_EXTRACTED": "Next came veronica-- a cybertronian colonist you saw what he was left.", + "PADDED_8_DILATION_1": "Next came velocitron-- a cybertronian colony. you saw what was left, rom.", + "PAD_8_FRACT_0_5": "Next came vulcanization-- a cybertrojanian colony: \" you saw what was left, zoma.", + "PAD_8_FRACT_0_2": "Next came velocitorion--- a cybertronian colony: you know what was left, bomba." + }, + "13": { + "INITIAL_BOX": "Unicorn is building itself out of the worlds it kills, becoming bigger--more powerful--with each extinction.", + "DEFAULT": "Unicron is building itself out of the worlds it kills, becoming bigger--more powerful--with each extinction.", + "DEFAULT_GREY_PAD": "Unicron is building itself out of the worlds it kills, becoming bigger--more powerful--with each extinction.", + "PADDED_4": "Unicron is building itself out of the worlds it kills, becoming bigger--more powerful--with each extinction.", + "PADDED_8": "Unicron is building itself out of the worlds it kills, becoming bigger--more powerful--with each extinction.", + "EXTRACTED_INIT_BOX": "Junction is building itself out of the worlds it kills, becoming more powerful - with each extinction.", + "PADDED_4_EXTRACTED": "Uniceon is embarrassing itself out of the world's affairs, becoming bigger - more powerful - with each extinction.", + "PADDED_8_EXTRACTED": "Unicorn is building itself out of the world's talks, becoming bigger -- more powerful-- with each extinction.", + "PADDED_8_DILATION_1": "Unicron is building itself out of the world's kills, becoming bigger=more powerful=with each extinction.", + "PAD_8_FRACT_0_5": "Unicon is building itself out of the world's talents, becoming bigger and more powerful with each extinction.", + "PAD_8_FRACT_0_2": "Unicorn is building itself out of the world's bills, becoming bigger--now more powerful--with each extinction." + }, + "14": { + "INITIAL_BOX": "It's murdering cybertron's colonies. which means--", + "DEFAULT": "It's murdering cybertron's colonies. which means--", + "DEFAULT_GREY_PAD": "It's murdering cybertron's colonies. which means -", + "PADDED_4": "It's murdering cybertron's colonies. which means--", + "PADDED_8": "It's murdering cybertron's colonies. which means -", + "EXTRACTED_INIT_BOX": "It's mulldezing cybeltron's con games. which means -", + "PADDED_4_EXTRACTED": "It 's mulibeeing cybertron's colonies. which means -", + "PADDED_8_EXTRACTED": "It's mulding cybertron's colonies, which means -", + "PADDED_8_DILATION_1": "It's murdering cybertron's colonies. which means--", + "PAD_8_FRACT_0_5": "It 's muled-deeling cybersition's colonies, which means -", + "PAD_8_FRACT_0_2": "It's mulder being overzealous colonel, which means -" + }, + "15": { + "INITIAL_BOX": "In addition to the rest of our worlds,\" earth is in danger. i have dragged the humans into mortal danger.", + "DEFAULT": "In addition to the rest of our worlds, earth is in danger. i have dragged the humans into mortal danger.", + "DEFAULT_GREY_PAD": "In addition to the rest of our worlds, earth is in danger. i have dragged the humans into mortal danger.", + "PADDED_4": "In addition to the rest of our worlds, earth is in danger. i have dragged the humans into mortal danger.", + "PADDED_8": "In addition to the rest of our worlds, earth is in danger. i have dragged the humans into mortal danger.", + "EXTRACTED_INIT_BOX": "In addition to the rest of que worlds, earth is in danger! i have pragged the humans into more tek dangers?", + "PADDED_4_EXTRACTED": "In addition to the rest of our worlds, earth is in danger! i have fragile humans into more than 764 damage?", + "PADDED_8_EXTRACTED": "In addition to the best of the worlds, earth is in range! i have flown among the humans into more than david's.", + "PADDED_8_DILATION_1": "In addition to the rest of our worlds, earth! i have dragged the humans into mortal danger.", + "PAD_8_FRACT_0_5": "In addition to the rest of our worlds, earth is in danger! we have peace among the humans into more than twice their number.", + "PAD_8_FRACT_0_2": "In addition to the rest of our world, earth is in panic! you have plagued the humans into more than i dareber." + }, + "16": { + "INITIAL_BOX": "Three billion elonians died today. ...", + "DEFAULT": "Three billion elonians died today ...", + "DEFAULT_GREY_PAD": "Three billion elonians died today ...", + "PADDED_4": "Three billion elonians died today. ...", + "PADDED_8": "Three billion elonians died today ...", + "EXTRACTED_INIT_BOX": "Three billion elonians died today...", + "PADDED_4_EXTRACTED": "Three billion elonians died today...", + "PADDED_8_EXTRACTED": "Three billion elonians died today...", + "PADDED_8_DILATION_1": "Three billion elonians died today.", + "PAD_8_FRACT_0_5": "Three billion elonians died today...", + "PAD_8_FRACT_0_2": "Three billion flonians died today..." + } + }, + "WARE_ACME_024.jpg": { + "0": { + "DEFAULT_GREY_PAD": "Lil' glyph.", + "INITIAL_BOX": "Lil' glyph", + "DEFAULT": "Lil' glyph.", + "PADDED_4": "Lil' glyph", + "PADDED_8": "Lil' glyph.", + "EXTRACTED_INIT_BOX": "\" glyphh", + "PADDED_4_EXTRACTED": "I' glyphh", + "PADDED_8_EXTRACTED": "I' glyphh", + "PADDED_8_DILATION_1": "I' glyphh", + "PAD_8_FRACT_0_5": "I' glyphh", + "PAD_8_FRACT_0_2": "I' glyph" + }, + "1": { + "INITIAL_BOX": "Almost done...", + "DEFAULT": "Almost done...", + "DEFAULT_GREY_PAD": "Almost done...", + "PADDED_4": "Almost done...", + "PADDED_8": "Almost done...", + "EXTRACTED_INIT_BOX": "Almost done...", + "PADDED_4_EXTRACTED": "Almost done...", + "PADDED_8_EXTRACTED": "Almost done...", + "PADDED_8_DILATION_1": "Almost done...", + "PAD_8_FRACT_0_5": "Almost done...", + "PAD_8_FRACT_0_2": "Almost done..." + }, + "2": { + "INITIAL_BOX": "Oh hey, i -", + "DEFAULT": "Oh hey, i - -", + "DEFAULT_GREY_PAD": "Oh hey, i -", + "PADDED_4": "Oh hey, i -", + "PADDED_8": "Oh hey, i -", + "EXTRACTED_INIT_BOX": "Oh hey, i", + "PADDED_4_EXTRACTED": "Oh hey, i", + "PADDED_8_EXTRACTED": "Oh hey, i", + "PADDED_8_DILATION_1": "Oh hey, i", + "PAD_8_FRACT_0_5": "Oh hey, i", + "PAD_8_FRACT_0_2": "Oh hey, i" + }, + "3": { + "INITIAL_BOX": "Pinnacle of", + "DEFAULT": "Pinnacle of", + "DEFAULT_GREY_PAD": "Pinnacle.", + "PADDED_4": "Pinnacle of", + "PADDED_8": "Pinnacle of", + "EXTRACTED_INIT_BOX": "Pinnacle.", + "PADDED_4_EXTRACTED": "Pinnacle.", + "PADDED_8_EXTRACTED": "Pinnacle of", + "PADDED_8_DILATION_1": "Pinnacle of", + "PAD_8_FRACT_0_5": "Pinnacle of", + "PAD_8_FRACT_0_2": "Pinnacle of" + }, + "4": { + "INITIAL_BOX": "Man oh man ... that sure was one heckuva last night ... whew!", + "DEFAULT": "Man oh man ... that sure was one heckuva orgy last night ... whew!", + "DEFAULT_GREY_PAD": "Man oh man ... that sure was one heckuva orgy last night ... whew!", + "PADDED_4": "Man oh man ... that sure was one heckuva last night ... whew!", + "PADDED_8": "Man oh man ... that sure was one heckuva orgy last night ... whew!", + "EXTRACTED_INIT_BOX": "Man oh man ... that sure was one heck of a night last night ... where?", + "PADDED_4_EXTRACTED": "Man oh man ... that sure was one heckuvva last night ... where?", + "PADDED_8_EXTRACTED": "Man oh man ... that sure was one heckluva drag last night ... where?", + "PADDED_8_DILATION_1": "Man oh man ... that sure was one heckuva big night ... when?", + "PAD_8_FRACT_0_5": "Man oh man ... that sure was one heckuva drigy last night ... where?", + "PAD_8_FRACT_0_2": "Man oh man ... that sure was one heckuva orgy last night ... wow?" + }, + "5": { + "INITIAL_BOX": "Summer", + "DEFAULT": "Summer", + "DEFAULT_GREY_PAD": "Summer", + "PADDED_4": "Summer", + "PADDED_8": "Summer", + "EXTRACTED_INIT_BOX": "Summer", + "PADDED_4_EXTRACTED": "Summer", + "PADDED_8_EXTRACTED": "Summer", + "PADDED_8_DILATION_1": "Summer", + "PAD_8_FRACT_0_5": "Summer", + "PAD_8_FRACT_0_2": "Summer" + }, + "6": { + "INITIAL_BOX": "Oh, those jews", + "DEFAULT": "Oh, those jews", + "DEFAULT_GREY_PAD": "Oh, those jews", + "PADDED_4": "Oh, those jews", + "PADDED_8": "Oh, those jews", + "EXTRACTED_INIT_BOX": "Oh, those jews!", + "PADDED_4_EXTRACTED": "Oh, those jews!", + "PADDED_8_EXTRACTED": "Oh, those jews", + "PADDED_8_DILATION_1": "Oh, those jews!", + "PAD_8_FRACT_0_5": "Oh, those jews", + "PAD_8_FRACT_0_2": "Oh, those jews" + }, + "7": { + "INITIAL_BOX": "Man, fuck this shit", + "DEFAULT": "Man, fuck this shit", + "DEFAULT_GREY_PAD": "Man, fuck this shit", + "PADDED_4": "Man, fuck this shit", + "PADDED_8": "Man, fuck this shit", + "EXTRACTED_INIT_BOX": "Man, fuck this shit", + "PADDED_4_EXTRACTED": "Man, fuck this shit.", + "PADDED_8_EXTRACTED": "Man, fuck this shit", + "PADDED_8_DILATION_1": "Man, this shit", + "PAD_8_FRACT_0_5": "Man, fuck this shit", + "PAD_8_FRACT_0_2": "Man, fuck this shit" + }, + "8": { + "INITIAL_BOX": "I'm sick of hauling ass for those fucking pharaohs.", + "DEFAULT": "I'm sick of hauling ass for those fucking pharaohs.", + "DEFAULT_GREY_PAD": "I'm sick of hauling ass for those fucking pharaohs.", + "PADDED_4": "I'm sick of hauling ass for those fucking pharaohs.", + "PADDED_8": "I'm sick of hauling ass for those fucking pharaohs.", + "EXTRACTED_INIT_BOX": "I'm sick of hauling ass for those fucking pharohs.", + "PADDED_4_EXTRACTED": "I'm sick of hauling ass for those fucking pharohs.", + "PADDED_8_EXTRACTED": "I'm sick of hauling ass for those fucking pharmaholes.", + "PADDED_8_DILATION_1": "I'm sick of hauling ass for those fucking pharaohs.", + "PAD_8_FRACT_0_5": "I'm sick of hauling ass for those fucking pharaohs.", + "PAD_8_FRACT_0_2": "I'm sick of hauling ass for those fucking pharaohs." + }, + "9": { + "INITIAL_BOX": "Our new altarpiece\"", + "DEFAULT": "Our new altarpiecetm", + "DEFAULT_GREY_PAD": "Our new altarpiecetm", + "PADDED_4": "Our new altarpiecetm", + "PADDED_8": "Our new altarpiece\"tm", + "EXTRACTED_INIT_BOX": "Our new altarpiece", + "PADDED_4_EXTRACTED": "Our new altarpiece", + "PADDED_8_EXTRACTED": "Lir new altarpiece", + "PADDED_8_DILATION_1": "Our new altarpiece", + "PAD_8_FRACT_0_5": "Lir new altarpiece", + "PAD_8_FRACT_0_2": "Her new altarpiece." + }, + "10": { + "INITIAL_BOX": "And over here are the twelve stations of the cross ...", + "DEFAULT": "And over here are the twelve stations of the cross ...", + "DEFAULT_GREY_PAD": "And over here are the twelve stations of the cross ...", + "PADDED_4": "And over here are the twelve stations of the cross ...", + "PADDED_8": "And over here are the twelve stations of the cross ...", + "EXTRACTED_INIT_BOX": "And over here are the twelve stations of time cross.", + "PADDED_4_EXTRACTED": "And over here we are the twelve stations of the cross ...", + "PADDED_8_EXTRACTED": "And over hereie are the twelve staylions of the cross\"", + "PADDED_8_DILATION_1": "And over hereie are the twelve stations of the cross. \"", + "PAD_8_FRACT_0_5": "And over here are the twelve stations of the cross ...", + "PAD_8_FRACT_0_2": "And over here are the twelve stations of the cross ..." + }, + "11": { + "INITIAL_BOX": "I'll just sneak a little doodle on this wall to believe my anger.", + "DEFAULT": "I'll just sneak a little doodle on this wall to believe my anger.", + "DEFAULT_GREY_PAD": "I'll just sneak a little doodle on this wall to relieve my anger.", + "PADDED_4": "I'll just sneak a little doodle on this wall to relieve my anger.", + "PADDED_8": "I'll just sneak a little doodle on this wall to relieve my anger.", + "EXTRACTED_INIT_BOX": "I'll just sneak a little doodle on this wall to believe my anger.", + "PADDED_4_EXTRACTED": "I'll just sneak a little doodle on this wall to relieve my anger.", + "PADDED_8_EXTRACTED": "I 'll just sneak a little doodle on this wall to believe my anger.", + "PADDED_8_DILATION_1": "I'll just sneak a little doodle on this wall to relieve my anger.", + "PAD_8_FRACT_0_5": "I 'll just sneak a little doodle on this wall to believe my anger.", + "PAD_8_FRACT_0_2": "I 'll just sneak a little doodle on this wall to believe my anger." + }, + "12": { + "INITIAL_BOX": "Ha ha -- they'll never find it here!", + "DEFAULT": "Ha ha -- they'll never find it here!", + "DEFAULT_GREY_PAD": "Ha ha -- they'll never find it here!", + "PADDED_4": "Ha ha -- they'll never find it here!", + "PADDED_8": "Ha ha -- they'll never find it here!", + "EXTRACTED_INIT_BOX": "Hah hah 'they'll never find t here! '", + "PADDED_4_EXTRACTED": "Ha ha! they'll never find 'em here?", + "PADDED_8_EXTRACTED": "Ha ha! they'll never find it here? !", + "PADDED_8_DILATION_1": "Ha ha they'll never find 'em here!", + "PAD_8_FRACT_0_5": "Ha ha! they'll never find 'em here? !", + "PAD_8_FRACT_0_2": "Ha ha they'll never find t here!" + }, + "13": { + "INITIAL_BOX": "Why do you paint twelve christ's, when we know there is only one?", + "DEFAULT": "Why do you paint twelve christ's, when we know there is only one?", + "DEFAULT_GREY_PAD": "Why do you paint twelve christ's, when we know there is only one?", + "PADDED_4": "Why do you paint twelve christ's, when we know there is only one?", + "PADDED_8": "Why do you paint twelve christ's, when we know there is only one?", + "EXTRACTED_INIT_BOX": "Why do you paint twelve christ's, when we know there is only one?", + "PADDED_4_EXTRACTED": "Why do you paint twelvelove christ's, when we know there is only one?", + "PADDED_8_EXTRACTED": "Why do you paint twelve christ's, when we know there is only one?", + "PADDED_8_DILATION_1": "Why do you paint twelve christ's, when we know there is only one?", + "PAD_8_FRACT_0_5": "Why do you paint twelve christ's, when we know there is only one?", + "PAD_8_FRACT_0_2": "Why do you paint twelve christ's, when we know there is only one?" + }, + "14": { + "INITIAL_BOX": "Civilization.", + "DEFAULT": "Civilization.", + "DEFAULT_GREY_PAD": "Civilization.", + "PADDED_4": "Civilization.", + "PADDED_8": "Civilization.", + "EXTRACTED_INIT_BOX": "Iv lizaton", + "PADDED_4_EXTRACTED": "\"iv\" lizaton", + "PADDED_8_EXTRACTED": "Iv lization", + "PADDED_8_DILATION_1": "Iv lization", + "PAD_8_FRACT_0_5": "Iv lization", + "PAD_8_FRACT_0_2": "Iv lization" + }, + "15": { + "INITIAL_BOX": "Oy!", + "DEFAULT": "Oy!", + "DEFAULT_GREY_PAD": "Oy!", + "PADDED_4": "Oy!", + "PADDED_8": "Oy!", + "EXTRACTED_INIT_BOX": "I'm sorry i didn't mean to hurt you.", + "PADDED_4_EXTRACTED": "I'm sorry i didn't mean to hurt you.", + "PADDED_8_EXTRACTED": "I'm sorry i didn't mean to hurt you.", + "PADDED_8_DILATION_1": "I'm sorry i didn't mean to hurt you.", + "PAD_8_FRACT_0_5": "I'm sorry i didn't mean to hurt you.", + "PAD_8_FRACT_0_2": "I'm sorry i didn't mean to hurt you." + }, + "16": { + "INITIAL_BOX": "Match the stereotype with his or her appropriate physiognomic template:", + "DEFAULT": "Match the stereotype with his or her appropriate physiognomic template;", + "DEFAULT_GREY_PAD": "Match the stereotype with his or her appropriate physiognomic template;", + "PADDED_4": "Match the stereotype with his or her appropriate physiognomic template;", + "PADDED_8": "Match the stereotype with his or her appropriate physiognomic template;", + "EXTRACTED_INIT_BOX": "Amatch the sttergo ypee with his or her apporriate physiognomic template?", + "PADDED_4_EXTRACTED": "Amych? the stereo viper with his or her appropriate phisiologic template?:", + "PADDED_8_EXTRACTED": "Matcha! the stergo yipes with his or her appropriate physionomic template?", + "PADDED_8_DILATION_1": "Matcha! the stereo type ype with his or her appropriate physiologic templates;", + "PAD_8_FRACT_0_5": "Mastermind: the stereo type yipes with his or her appropriate physiognomic template.", + "PAD_8_FRACT_0_2": "Matcha! the stereo type ype with his or her appropriate physiognomic template." + }, + "17": { + "INITIAL_BOX": "Sir! mr. magnate, sir! i have solved the problem of railcar ennui, master magnate!", + "DEFAULT": "Sir! mr. magnate, sir! i have solved the problem of railcar ennui, master magnate!", + "DEFAULT_GREY_PAD": "Sir! mr. magnate, sir! i have solved the problem of railcar ennui, master magnate!", + "PADDED_4": "Sir! mr. magnate, sir! i have solved the problem of railcar ennui, master magnate!", + "PADDED_8": "Sir! mr. magnate, sir! i have solved the problem of railcar ennui, master magnate!", + "EXTRACTED_INIT_BOX": "Our! mr. magnate, sir! i have solved the problem of malcolm ennui, master magnate?", + "PADDED_4_EXTRACTED": "Oir? mr. magnate, sir! i have solved the problem of railcar ennui, master magnate?", + "PADDED_8_EXTRACTED": "Sir! mr. magnate, sir! i have solved the problem of railcar ennell, master magnate?", + "PADDED_8_DILATION_1": "Our!? mr. magnate, sir! / i have solved the problem of railcar ennuji, master magnate?", + "PAD_8_FRACT_0_5": "Our? mr. magnate, sir! i have solved the problem of railcar ennui, master magnate?", + "PAD_8_FRACT_0_2": "Our? mr. magnate, sir! i have solved the problem of railcar ennuj, master magnate?" + }, + "18": { + "INITIAL_BOX": "Uncle rodolphe", + "DEFAULT": "Uncle rodolpho", + "DEFAULT_GREY_PAD": "Uncle rodolphe", + "PADDED_4": "Uncle rodolpho", + "PADDED_8": "Uncle rodolphe", + "EXTRACTED_INIT_BOX": "Uncle rodolpho", + "PADDED_4_EXTRACTED": "Uncle rodolpho", + "PADDED_8_EXTRACTED": "Uncle rodolpho", + "PADDED_8_DILATION_1": "Uncle rodolpho", + "PAD_8_FRACT_0_5": "Uncle rodolpho", + "PAD_8_FRACT_0_2": "Uncle rodolpho" + }, + "19": { + "INITIAL_BOX": "The kindly old swiss guy who invented comics", + "DEFAULT": "The kindly old swiss guy who invented comics", + "DEFAULT_GREY_PAD": "The kindly old swiss guy who invented comics", + "PADDED_4": "The kindly old swiss guy who invented comics", + "PADDED_8": "The kindly old swiss guy who invented comics", + "EXTRACTED_INIT_BOX": "The kindly old swiss guy who invented comics", + "PADDED_4_EXTRACTED": "The kindly old swiss guy who invented comics", + "PADDED_8_EXTRACTED": "The kindly old swiss guy who invented comics", + "PADDED_8_DILATION_1": "The kindly old swiss guy who invented comics", + "PAD_8_FRACT_0_5": "The kindly old swiss guy who invented comics", + "PAD_8_FRACT_0_2": "The kindly old swiss guy who invented comics" + }, + "20": { + "INITIAL_BOX": "William randolph hearst", + "DEFAULT": "William randolph hearst", + "DEFAULT_GREY_PAD": "William randolph hearst", + "PADDED_4": "William randolph hearst", + "PADDED_8": "William randolph hearst", + "EXTRACTED_INIT_BOX": "William randolph hearst", + "PADDED_4_EXTRACTED": "William randolph hearst", + "PADDED_8_EXTRACTED": "William randolph hearst", + "PADDED_8_DILATION_1": "William randolph hearst", + "PAD_8_FRACT_0_5": "William randolph hearst", + "PAD_8_FRACT_0_2": "William randolph hearst" + }, + "21": { + "INITIAL_BOX": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!", + "DEFAULT": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!", + "DEFAULT_GREY_PAD": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!", + "PADDED_4": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!", + "PADDED_8": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!", + "EXTRACTED_INIT_BOX": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to mail their goldmann american asses?", + "PADDED_4_EXTRACTED": "Liggen... i don't care if international copyright law presents recognizable picture stories... i want to mail their goddamn american asses!", + "PADDED_8_EXTRACTED": "Lussen ... i don't care if international copyright law pushes my recognize picture stories ... i want to nail their goddamn american asses!", + "PADDED_8_DILATION_1": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to mail their goddamn american asses!", + "PAD_8_FRACT_0_5": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!", + "PAD_8_FRACT_0_2": "Lissen ... i don't care if international copyright law doesn't recognize picture stories ... i want to nail their goddamn american asses!" + }, + "22": { + "INITIAL_BOX": "What our democracy needs is its own native artform!", + "DEFAULT": "What our democracy needs is its own native artform!", + "DEFAULT_GREY_PAD": "What our democracy needs is its own native artform!", + "PADDED_4": "What our democracy needs is its own native artform!", + "PADDED_8": "What our democracy needs is its own native artform!", + "EXTRACTED_INIT_BOX": "What our democracy needs is its own native anti-form!?", + "PADDED_4_EXTRACTED": "What our democracy needs is its own native art-form!", + "PADDED_8_EXTRACTED": "What our democracy needs is its own native art-form!", + "PADDED_8_DILATION_1": "Hat our democracy needs is its own native artform!", + "PAD_8_FRACT_0_5": "Hat our democracy needs is its own native artform!?", + "PAD_8_FRACT_0_2": "Hat our democracy needs is its own native artform!?" + }, + "23": { + "INITIAL_BOX": "Yes, but...", + "DEFAULT": "Yes, but...", + "DEFAULT_GREY_PAD": "Yes, but...", + "PADDED_4": "Yes, but...", + "PADDED_8": "Yes, but...", + "EXTRACTED_INIT_BOX": "Y.", + "PADDED_4_EXTRACTED": "Y.", + "PADDED_8_EXTRACTED": "Y", + "PADDED_8_DILATION_1": "Y", + "PAD_8_FRACT_0_5": "Y", + "PAD_8_FRACT_0_2": "Y" + }, + "24": { + "INITIAL_BOX": "The thief in my picture story doesn't look mean and stupid enough", + "DEFAULT": "The thief in my picture story doesn't look mean and stupid enough", + "DEFAULT_GREY_PAD": "The thief in my picture story doesn't look mean and stupid enough", + "PADDED_4": "The thief in my picture story doesn't look mean and stupid enough", + "PADDED_8": "The thief in my picture story doesn't look mean and stupid enough", + "EXTRACTED_INIT_BOX": "The thief in my picture story doesn't look mean and stupid enough!", + "PADDED_4_EXTRACTED": "The thief in my picture story doesn't look mean and stupid enough!", + "PADDED_8_EXTRACTED": "The thief in my picture story doesn't look mean and stupid enough", + "PADDED_8_DILATION_1": "The thief in my picture story doesn't look mean and stupid enough.", + "PAD_8_FRACT_0_5": "The thief in my picture story doesn't look mean and stupid enough", + "PAD_8_FRACT_0_2": "The thief in my picture story doesn't look mean and stupid enough." + }, + "25": { + "INITIAL_BOX": "Something like those german and swiss pic- ture books i loved as a kid... \"", + "DEFAULT": "Something like those german and swiss pic- ture books i loved as a kid... \"", + "DEFAULT_GREY_PAD": "Something like those german and swiss pic- ture books i loved as a kid... \"", + "PADDED_4": "Something like those german and swiss pic- ture books i loved as a kid... \"", + "PADDED_8": "Something like those german and swiss pic- ture books i loved as a kid... \"", + "EXTRACTED_INIT_BOX": "Something uk house german and swiss pie to make books i loved as a kid.", + "PADDED_4_EXTRACTED": "Methinks uk house german and swiss pics toga books i loved as a kid.", + "PADDED_8_EXTRACTED": "Metthing uk mosie german and swiss pie tome books\" i thought as a kid,", + "PADDED_8_DILATION_1": "Methling uk hose german and swiss pig ture books i loved as a kid.", + "PAD_8_FRACT_0_5": "Something like mosie german and swiss pie tore books i loved as a kid.", + "PAD_8_FRACT_0_2": "Something like mose german and swiss pie tore books i loved as a kid." + }, + "26": { + "INITIAL_BOX": "Hey, that's it?", + "DEFAULT": "Hey, that's it?", + "DEFAULT_GREY_PAD": "Hey, that's it?", + "PADDED_4": "Hey, that's it?", + "PADDED_8": "Hey, that's it?", + "EXTRACTED_INIT_BOX": "Hey, that's it?", + "PADDED_4_EXTRACTED": "Hey, that's it?", + "PADDED_8_EXTRACTED": "Hey, that's it?", + "PADDED_8_DILATION_1": "Hey, that's it?", + "PAD_8_FRACT_0_5": "Hey, that's it?", + "PAD_8_FRACT_0_2": "Hey, that's it?" + }, + "27": { + "INITIAL_BOX": "I'll make him look more like one of those filthy, good- for- nothing american sons of bitches.", + "DEFAULT": "I'll make him look more like one of those filthy, good- for- nothing american sons of bitches.", + "DEFAULT_GREY_PAD": "I'll make him look more like one of those filthy, good-for-nothing american sons of bitches", + "PADDED_4": "I'll make him look more like one of those filthy, good- for-nothing american sons of bitches.", + "PADDED_8": "I'll make him look more like one of those filthy, good-for-nothing american sons of bitches.", + "EXTRACTED_INIT_BOX": "It'll make him look more like one of those filthy, good-for-nothing american sons of bitches.", + "PADDED_4_EXTRACTED": "It'll make him look more like one of those filthy, good-for-nothing american sons of bitches.", + "PADDED_8_EXTRACTED": "It'll make him look more like one of those filthy, good-for-nothing american sons of bitches.", + "PADDED_8_DILATION_1": "It'll make him look more like one of those filthy, good-for-nothing american sons of bitches.", + "PAD_8_FRACT_0_5": "It'll make him look more like one of those filthy, good-for-nothing american sons of bitches.", + "PAD_8_FRACT_0_2": "It'll make him look more like one of those filthy, good-for-nothing american sons of bitches." + }, + "28": { + "INITIAL_BOX": "I'll just have one of my staff artists plagiarize them?", + "DEFAULT": "I'll just have one of my staff artists plagiarize them!", + "DEFAULT_GREY_PAD": "I'll just have one of my staff artists plagiarize them!", + "PADDED_4": "I'll just have one of my staff artists plagiarize them!", + "PADDED_8": "I'll just have one of my staff artists plagiarize them!", + "EXTRACTED_INIT_BOX": "Raul must blame one of his staff artists! plagiarize me?", + "PADDED_4_EXTRACTED": "Paul must make one of his staff artists plagiarize max!", + "PADDED_8_EXTRACTED": "Zahl just blane one of my staff artists plugarville!", + "PADDED_8_DILATION_1": "Well just have one of my staff artists plagiarize henry!", + "PAD_8_FRACT_0_5": "Ranal just blame one of his staff artists plagiarizes madey!", + "PAD_8_FRACT_0_2": "I'm just blaming one of my staff artists! plagiarizes everyone?" + }, + "29": { + "INITIAL_BOX": "Jazz, comics, booze & college songs", + "DEFAULT": "Jazz, comics, booze & college songs", + "DEFAULT_GREY_PAD": "Jazz comics, booze & college songs", + "PADDED_4": "Jazz, comics, booze & college songs", + "PADDED_8": "Jazz comics, booze & college songs", + "EXTRACTED_INIT_BOX": "Jazz, comics, booze & college songs", + "PADDED_4_EXTRACTED": "Jazz, comics, booze & college songs", + "PADDED_8_EXTRACTED": "Jazz, comics, booze & college songs", + "PADDED_8_DILATION_1": "Jazz, comics, booze & college songs", + "PAD_8_FRACT_0_5": "Jazz, comics, booze & college songs", + "PAD_8_FRACT_0_2": "Jazz, comics, booze & college songs" + }, + "30": { + "INITIAL_BOX": "1 criminal plagiarist sodomite junkie caveman senator jew irishman negro honky cartoonist god", + "DEFAULT": "1 criminal plagiarist sodomite junkie caveman senator jew irishman negro honky cartoonist god", + "DEFAULT_GREY_PAD": "1 criminal plagiarist sodomite junkie caveman senator jew irishman negro honky cartoonist god", + "PADDED_4": "1 criminal plagiarist sodomite junkie caveman senator jew irishman negro honky cartoonist god", + "PADDED_8": "1 criminal plagarist sodomite junkie caveman senator jew irishman negro honky cartoonist god", + "EXTRACTED_INIT_BOX": "A criminalist flagrantly sidodomite junkie cavern senator jewish irish negro honky cartoonist godd", + "PADDED_4_EXTRACTED": "One two three five seven eight eleven twelve.", + "PADDED_8_EXTRACTED": "1 criminal flagaristic socialite junkie caveman senator jewish negro honky cartoonist god?", + "PADDED_8_DILATION_1": "1 criminal plagiarist flagrantly sodomite junkie caveman senator jew fushiman negro honky cartoonist god.", + "PAD_8_FRACT_0_5": "1 criminal flagariost 2 slobmitte 3 junkie 4 caverman senator 5 jew 6 irish negro honky cartoonist 7 11 12 god", + "PAD_8_FRACT_0_2": "1 criminal plagiarist flagrantly sodomite junkie cavern senator jewish negro honky cartoonist god 2 3 4 5 6 7 8 9 10 11 12" + }, + "31": { + "INITIAL_BOX": "Answers next week.", + "DEFAULT": "Answers next week.", + "DEFAULT_GREY_PAD": "Answers: next week.", + "PADDED_4": "Answers: next week.", + "PADDED_8": "Answers: next week.", + "EXTRACTED_INIT_BOX": "Answers next week", + "PADDED_4_EXTRACTED": "Answers next week", + "PADDED_8_EXTRACTED": "Answers next week", + "PADDED_8_DILATION_1": "Answers next week", + "PAD_8_FRACT_0_5": "Answers next week", + "PAD_8_FRACT_0_2": "Answers next week" + }, + "32": { + "INITIAL_BOX": "Edweard", + "DEFAULT": "Edweard", + "DEFAULT_GREY_PAD": "E.dweard", + "PADDED_4": "Edweard", + "PADDED_8": "E.dweard", + "EXTRACTED_INIT_BOX": "Edweard", + "PADDED_4_EXTRACTED": "E.d.weard", + "PADDED_8_EXTRACTED": "F.d.wearard", + "PADDED_8_DILATION_1": "F.d.weard", + "PAD_8_FRACT_0_5": "F.d.wearard", + "PAD_8_FRACT_0_2": "F.d.weard" + }, + "33": { + "INITIAL_BOX": "Oh, those jews", + "DEFAULT": "Oh, those jews", + "DEFAULT_GREY_PAD": "Oh, those jews", + "PADDED_4": "Oh, those jews", + "PADDED_8": "Oh, those jews", + "EXTRACTED_INIT_BOX": "Oh, those jews!", + "PADDED_4_EXTRACTED": "Oh, those jews!", + "PADDED_8_EXTRACTED": "Oh, those jews?", + "PADDED_8_DILATION_1": "Oh, those jews?", + "PAD_8_FRACT_0_5": "Oh, those jews", + "PAD_8_FRACT_0_2": "Oh, those jews" + }, + "34": { + "INITIAL_BOX": "See? he pretends to be this waspy christian guy...", + "DEFAULT": "See? he pretends to be this waspy christian guy...", + "DEFAULT_GREY_PAD": "See? he pretends to be this waspy christian guy...", + "PADDED_4": "See? he pretends to be this waspy christian guy...", + "PADDED_8": "See? he pretends to be this waspy christian guy...", + "EXTRACTED_INIT_BOX": "See? he pretends to be this waspy christian guy...", + "PADDED_4_EXTRACTED": "See? his pretends to be this waspy christian guy...", + "PADDED_8_EXTRACTED": "See? he pretends to be this waspy christian guy ...", + "PADDED_8_DILATION_1": "See? he pretends to be this waspy christian guy...", + "PAD_8_FRACT_0_5": "See? he pretends to be this waspy christian guy ...", + "PAD_8_FRACT_0_2": "See? he pretends to be this waspy christian guy ..." + }, + "35": { + "INITIAL_BOX": "\" but really he 's a cape - wearing ubermensch! it 'll sell like hotcakes! \"", + "DEFAULT": "\"\"... but really he's a cape-wearing ubermensch! it'll sell like hotcakes! \"", + "DEFAULT_GREY_PAD": "\"\" but really he 's a cape-wearing ubermensch! it 'll sell like hotcakes! \"", + "PADDED_4": "... but really he 's a cape - wearing ubermensch! it 'll sell like hotcakes?", + "PADDED_8": "\"\" but really he 's a cape-wearing ubermensch! it 'll sell like hotcakes! \"", + "EXTRACTED_INIT_BOX": "\" but really he 's a cape wearing ubermensch! i 'll sell like hotcakes! \"", + "PADDED_4_EXTRACTED": "\" but really he 's a cape wearing ubermenschen! it 'll sell like hotcakes! \"", + "PADDED_8_EXTRACTED": "\"\" but really he 's a cape wearing ubermenschen! i'll sell like hotcakes! \"", + "PADDED_8_DILATION_1": "\"\" but really he 's a cape wearing ubermensch! i 'll sell like hotcakes! \"", + "PAD_8_FRACT_0_5": "\"\" but really he 's a cape wearing ubermenschen! i 'll sell like hotcakes! \"", + "PAD_8_FRACT_0_2": "\"\" but really he 's a cape wearing ubermenschen! i 'll sell like hotcakes! \"" + }, + "36": { + "INITIAL_BOX": "Cheerleaders for the cause.", + "DEFAULT": "Cheerleaders for the cause.", + "DEFAULT_GREY_PAD": "Cheerleaders for the cause", + "PADDED_4": "Cheerleaders for the cause.", + "PADDED_8": "Cheerleaders for the cause", + "EXTRACTED_INIT_BOX": "Leaders he ause", + "PADDED_4_EXTRACTED": "Leaders heause", + "PADDED_8_EXTRACTED": "Leaders make the cause.", + "PADDED_8_DILATION_1": "Leaders use.", + "PAD_8_FRACT_0_5": "Leaders heause", + "PAD_8_FRACT_0_2": "Leaders heause" + }, + "37": { + "INITIAL_BOX": "Haha ... why yes, of course ... i used to love to read them as a boy!", + "DEFAULT": "Haha ... why yes, of course... i used to love to read them as a boy!", + "DEFAULT_GREY_PAD": "Haha ... why yes, of course ... i used to love to read them as a boy!", + "PADDED_4": "Haha ... why yes, of course ... i used to love to read them as a boy!", + "PADDED_8": "Haha ... why yes, of course ... i used to love to read them as a boy!", + "EXTRACTED_INIT_BOX": "Hannah... awy nes, of course ... i used to love to read them as a boy?", + "PADDED_4_EXTRACTED": "Mama ... why yes, of course ... i used to love to read them as a boy?", + "PADDED_8_EXTRACTED": "Mahir ... why yes, of course ... i used to love to read them as a boy!", + "PADDED_8_DILATION_1": "Haha ... why yes, of course ... i used to love to read them as a boy?", + "PAD_8_FRACT_0_5": "Haha ... why yes, of course ... i used to love to read them as a boy?", + "PAD_8_FRACT_0_2": "Haha ... why yes, of course ... i used to love to read them as a boy!" + }, + "38": { + "INITIAL_BOX": "I dunno...", + "DEFAULT": "I dunno...", + "DEFAULT_GREY_PAD": "I dunno...", + "PADDED_4": "I dunno...", + "PADDED_8": "I dunno...", + "EXTRACTED_INIT_BOX": "I dunno...", + "PADDED_4_EXTRACTED": "I dunno...", + "PADDED_8_EXTRACTED": "I dunno...", + "PADDED_8_DILATION_1": "I dunno...", + "PAD_8_FRACT_0_5": "I dunno...", + "PAD_8_FRACT_0_2": "I dunnno..." + }, + "39": { + "INITIAL_BOX": "I use them as a symbol for the spiritual poverty of american culture.", + "DEFAULT": "I use them as a symbol for the spiritual poverty of american culture.", + "DEFAULT_GREY_PAD": "I use them as a symbol for the spiritual poverty of american culture.", + "PADDED_4": "I use them as a symbol for the spiritual poverty of american culture.", + "PADDED_8": "I use them as a symbol for the spiritual poverty of american culture.", + "EXTRACTED_INIT_BOX": "I use them as a symbol for the spiritual poverty of america! an culture", + "PADDED_4_EXTRACTED": "I use them as a symbol for the spiritual poverty of america and our culture.", + "PADDED_8_EXTRACTED": "I use them as a symbol for the spiritual poverty of america an culture", + "PADDED_8_DILATION_1": "I use them as a symbol for the spiritual poverty of america an culture", + "PAD_8_FRACT_0_5": "I use them as a symbol for the spiritual poverty of america an culture", + "PAD_8_FRACT_0_2": "I use them as a symbol for the spiritual poverty of america an culture" + }, + "40": { + "INITIAL_BOX": "What... you gotta better idea how to get laid?", + "DEFAULT": "What... you gotta better idea how to get laid?", + "DEFAULT_GREY_PAD": "What ... you gotta better idea how to get laid?", + "PADDED_4": "What... you gotta better idea how to get laid?", + "PADDED_8": "What... you gotta better idea how to get laid?", + "EXTRACTED_INIT_BOX": "What ... you gotta better idea how to get laio?", + "PADDED_4_EXTRACTED": "What ... you gotta better idea how to get laid?", + "PADDED_8_EXTRACTED": "What ... you gotta better idea how to get land?", + "PADDED_8_DILATION_1": "What ... you gotta better idea how to get laid?", + "PAD_8_FRACT_0_5": "What ... you gotta better idea how to get laid?", + "PAD_8_FRACT_0_2": "What ... you gotta better idea how to get laid?" + }, + "41": { + "INITIAL_BOX": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "DEFAULT": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "DEFAULT_GREY_PAD": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "PADDED_4": "I have absolutely nothing against an art for children......on the contrary, i'm for it!", + "PADDED_8": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "EXTRACTED_INIT_BOX": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "PADDED_4_EXTRACTED": "I have absolutely nothing against an art for children......on the contrary, i'm for it!", + "PADDED_8_EXTRACTED": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "PADDED_8_DILATION_1": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "PAD_8_FRACT_0_5": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!", + "PAD_8_FRACT_0_2": "I have absolutely nothing against an art for children ... on the contrary, i'm for it!" + }, + "42": { + "INITIAL_BOX": "Movies on paper hang in there, mosley?", + "DEFAULT": "Movies on paper hang in there, mosley?", + "DEFAULT_GREY_PAD": "Movies on paper hang in there mosley?", + "PADDED_4": "Movies on paper hang in there, mosley?", + "PADDED_8": "Movies on paper hang in there mosley?", + "EXTRACTED_INIT_BOX": "Hang in there, mossley! movies on paper?", + "PADDED_4_EXTRACTED": "Hang in there mossley! movies on paper?", + "PADDED_8_EXTRACTED": "Hang in there, mossley's! movies on paper?", + "PADDED_8_DILATION_1": "Hang in there, mosley! \"", + "PAD_8_FRACT_0_5": "Movies on paper hang in there mosley's?", + "PAD_8_FRACT_0_2": "Movies on paper hang in there mosley's?" + }, + "43": { + "INITIAL_BOX": "I think they're wonder- ful, because they get people interested in real reading!", + "DEFAULT": "I think they're wonder- ful, because they get people interested in real reading!", + "DEFAULT_GREY_PAD": "I think they're wonder- ful, because they get people interested in real reading!", + "PADDED_4": "I think they're wonder- ful, because they get people interested in real reading!", + "PADDED_8": "I think they're wonder- ful, because they get people interested in real reading!", + "EXTRACTED_INIT_BOX": "Think they're wonders - full-, because they get people interested in real reading!", + "PADDED_4_EXTRACTED": "Think they're wonderful - full, because they get people interested in real reading?", + "PADDED_8_EXTRACTED": "Think they're the wonder- ful , because they get people interested in avail reading!", + "PADDED_8_DILATION_1": "Think they're wonder- full, because they get people interested in real reading!", + "PAD_8_FRACT_0_5": "Think they're wondering - full, because they get people interested in several readings!", + "PAD_8_FRACT_0_2": "Think they're wonder- ful, because they get people interested in actual reading!" + }, + "44": { + "INITIAL_BOX": "Hang in there, mosley!", + "DEFAULT": "Hang in there, mosley!", + "DEFAULT_GREY_PAD": "Hang in there, mosley!", + "PADDED_4": "Hang in there, mosley!", + "PADDED_8": "Hang in there, mosley!", + "EXTRACTED_INIT_BOX": "Hang 'em there, mosley?", + "PADDED_4_EXTRACTED": "Hang in there, mosley!?", + "PADDED_8_EXTRACTED": "Hang in there mosley!?", + "PADDED_8_DILATION_1": "Hang in there mosley!?", + "PAD_8_FRACT_0_5": "Hang in there mosley?", + "PAD_8_FRACT_0_2": "Hang in there, mosley?" + }, + "45": { + "INITIAL_BOX": "I'm hit! i'm hit?", + "DEFAULT": "I'm hit! i'm hit?", + "DEFAULT_GREY_PAD": "I'm hit! i'm hit?", + "PADDED_4": "I'm hit! i'm hit?", + "PADDED_8": "I'm hit! i'm hit?", + "EXTRACTED_INIT_BOX": "I'm hitt! i'm hitt?", + "PADDED_4_EXTRACTED": "I'm hit! ?", + "PADDED_8_EXTRACTED": "I'm hit! i'm hit?", + "PADDED_8_DILATION_1": "I'm hit!?", + "PAD_8_FRACT_0_5": "I'm hit! i'm hit?", + "PAD_8_FRACT_0_2": "I'm hit! i'm hit?" + }, + "46": { + "INITIAL_BOX": "Mean while", + "DEFAULT": "Mean while", + "DEFAULT_GREY_PAD": "Mean while", + "PADDED_4": "Mean while", + "PADDED_8": "Mean while", + "EXTRACTED_INIT_BOX": "Mean while", + "PADDED_4_EXTRACTED": "Mean while", + "PADDED_8_EXTRACTED": "Mean while", + "PADDED_8_DILATION_1": "Mean while", + "PAD_8_FRACT_0_5": "Mean while", + "PAD_8_FRACT_0_2": "Mean while" + }, + "47": { + "INITIAL_BOX": "A short history of canada begins:", + "DEFAULT": "A son history of carnage", + "DEFAULT_GREY_PAD": "A son history of a carnivorous plant.", + "PADDED_4": "A short history of a long time", + "PADDED_8": "A short history of a continuing", + "EXTRACTED_INIT_BOX": "Assassins creed unity an game of art and war.", + "PADDED_4_EXTRACTED": "A messenger of the gods?", + "PADDED_8_EXTRACTED": "A southern hospitality go an dine!", + "PADDED_8_DILATION_1": "Astonishingly enough, captain marvel was an american hero of the golden age!", + "PAD_8_FRACT_0_5": "Astonishingly one of them is!", + "PAD_8_FRACT_0_2": "A son of krypton!" + }, + "48": { + "INITIAL_BOX": "Note: sumerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people, however.", + "DEFAULT": "Note: sumerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people, however.", + "DEFAULT_GREY_PAD": "Note: sumerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people, however.", + "PADDED_4": "Note: sumerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people, however.", + "PADDED_8": "Note: sumerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people, however.", + "EXTRACTED_INIT_BOX": "Note! summerians wrote in different picture-writing, daily regular writing, and so are of little interest. they say screen no native was born perfectly nice people somewhere", + "PADDED_4_EXTRACTED": "Note: summerians didn't invent pictures; writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people however", + "PADDED_8_EXTRACTED": "Note! summerians didn't invent picture-writing; only regular writing, and so are of little interest. they seem to have begun perfectly nice people however", + "PADDED_8_DILATION_1": "Note! summerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people however", + "PAD_8_FRACT_0_5": "Note: sumerians didn't invent picture-writing; only regular writing, and so are of little interest. they seem to have been perfectly nice people however", + "PAD_8_FRACT_0_2": "Note? sumerians didn't invent picture- writing, only regular writing, and so are of little interest. they seem to have been perfectly nice people however" + }, + "49": { + "INITIAL_BOX": "Guess i 'd better get down to brass tacks and finish up that\" dialogue on democracy\" before the senator gets here ... then i 'll sodomize my 12-year - old slave boy?", + "DEFAULT": "Guess i 'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here ... then i 'll sodomize my 12-year - old slave boy?", + "DEFAULT_GREY_PAD": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here ... then i 'll sodomize my 12-year- old slave boy?", + "PADDED_4": "Guess i 'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here ... then i 'll sodomize my 12-year - old slave boy?", + "PADDED_8": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here ... then i 'll sodomize my 12-year- old slave boy?", + "EXTRACTED_INIT_BOX": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here... then pull suddomize my 12-year- old slaye boy?", + "PADDED_4_EXTRACTED": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here... then i'll soondomize my 12-year- old slaye boy!", + "PADDED_8_EXTRACTED": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here... then pull someonmize my 12-year old slaye boy?", + "PADDED_8_DILATION_1": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here... then i'll soon domitrize my 12-year- old slaye boy?", + "PAD_8_FRACT_0_5": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here... then i'll summonize my 12-year-old slaye boy?", + "PAD_8_FRACT_0_2": "Guess i'd better get down to brass tacks and finish up that \"dialogue on democracy\" before the senator gets here... then i'll someday my 12-year- old slave boy!" + }, + "50": { + "INITIAL_BOX": "Monsieur! norman, sir! i believe i have found a way to occupy the men through the long wait between our military encounters!!", + "DEFAULT": "Monsieur! norman, sir! i believe i have found a way to occupy the men through the long wait between our military encounters!", + "DEFAULT_GREY_PAD": "Monsieur! norman, sir! i believe i have found a way to occupy the men through the long wait between our military encounters!", + "PADDED_4": "Monsieur! norman, sir! i believe i have found a way to occupy the men through the long wait between our military encounters!", + "PADDED_8": "Monsieur! norman, sir! i believe i have found a way to occupy the men through the long wait between our military encounters!", + "EXTRACTED_INIT_BOX": "Monsieur! norman, sir! i believe i have found a way to occ ppy the men through the long wait between our military encounters!", + "PADDED_4_EXTRACTED": "Monsieur! norman, sir! i believe i have found a way to occ py the men through the long what between our military encounters?", + "PADDED_8_EXTRACTED": "Monsieur! norman, sir! i believe i have found a way to occ py the men through the long wait between our military encounters?", + "PADDED_8_DILATION_1": "Monsieur! norman, sir! i believe i have found a way to occ py the men through the long wait between our mlitary encounters!!", + "PAD_8_FRACT_0_5": "Monsieur! norman, sir! i believe i have found a way to occ py the men through the long wait between our military encounters!", + "PAD_8_FRACT_0_2": "Monsieur! norman, sir! i believe i have found a way to occ py the men through the long wait between our military encounters!" + }, + "51": { + "INITIAL_BOX": "See? here on this vellum ... crass, hateful drawings of the ugly, fat anglo-saxons we all hate so much! we can pass it around and share a hearty laugh together", + "DEFAULT": "See? here on this vellum ... crass, hateful drawings of the ugly, fat anglo-saxons we all hate so much! we can pass it around and share a hearty laugh together?", + "DEFAULT_GREY_PAD": "See? here on this vellum ... crass, hateful drawings of the ugly, fat anglo-saxons we all hate so much! we can pass it around and share a hearty laugh together", + "PADDED_4": "See? here on this vellum ... crass, hateful drawings of the ugly, fat anglo-saxons we all hate so much! we can pass it around and share a hearty laugh together.", + "PADDED_8": "See? here on this vellum ... crass, hateful drawings of the ugly, fat anglo-saxons we all hate so much! we can pass it around and share a hearty laugh together.", + "EXTRACTED_INIT_BOX": "Gee? here on this vellium grass, hazeful drawings of the ugly, fat anglo saxons we all hate so much! we can pass it around and smile a hearty laugh together", + "PADDED_4_EXTRACTED": "See? here on this wellum class, hatred drownness of the ugly, fat anglo saxons we all hate so much! we can pass it around and share a hearty laugh together.", + "PADDED_8_EXTRACTED": "Free? here on this vellium cross, hateful drawness of the ugly, fat anglo saxons we all hate so much! / we can pass it around and say a hearty laugh together.", + "PADDED_8_DILATION_1": "Sree? herie on this vellum crass, hateful drawings of the ugly, fat anglo saxons we all hate so much/ we can pass it around and gain a hearty laugh together...", + "PAD_8_FRACT_0_5": "Sweet? here on this vellium cross, hateful drawings of the ugly, fat anglo saxons we all hate so much! we can pass it around and share a hearty laugh together...", + "PAD_8_FRACT_0_2": "Sleep? here on this vellium crass, hateful drawings of the ugly, fat anglo saxons we all hate so much! we can pass it around and we're a hearty laugh together..." + }, + "52": { + "INITIAL_BOX": "Ah ... poorly... methinks the block too worn for any further impressing ...", + "DEFAULT": "Ah ... poorly... methinks the block too worn for any further impressing ...", + "DEFAULT_GREY_PAD": "Ah ... poorly ... methinks the block too worn for any further impressing ...", + "PADDED_4": "Ah ... poorly... methinks the block too worn for any further impressing ...", + "PADDED_8": "Ah ... poorly... methinks the block too worn for any further impressing ...", + "EXTRACTED_INIT_BOX": "Ah ... poodly ... anethinks the block too worrin for any further impressing ...", + "PADDED_4_EXTRACTED": "Ah ... poopy l... anethiks the block too worn for any further impressing ...", + "PADDED_8_EXTRACTED": "Ah ... poorly... anethinks the block too worn for any further impressing ...", + "PADDED_8_DILATION_1": "Ah ... poorly jetthinks the block too worn for any further impressing. \"", + "PAD_8_FRACT_0_5": "Ah ... poorly... anethinks the block too worn for any further impressing...", + "PAD_8_FRACT_0_2": "Ah ... poorly... jmethinks the block too worn for any further impressing. \"" + }, + "53": { + "INITIAL_BOX": "We have over three miles soldiers to entertain!", + "DEFAULT": "We have over three miles soldiers to entertain!", + "DEFAULT_GREY_PAD": "We have over trois milles soldiers to entertain!", + "PADDED_4": "We have over three miles soldiers to entertain!", + "PADDED_8": "We have over three miles soldiers to entertain!", + "EXTRACTED_INIT_BOX": "Ha! over trois mille's soldier to en tain?", + "PADDED_4_EXTRACTED": "\" ha! ovei trois mille soldier to en tain! \"", + "PADDED_8_EXTRACTED": "Ha! ove trous milles soldier to en train?", + "PADDED_8_DILATION_1": "\" ha ! ovet trois milles soldier to en \"\"tain! \"", + "PAD_8_FRACT_0_5": "Ha! ovet trois mille soldier to en tain?", + "PAD_8_FRACT_0_2": "\" ha! ovet trois mille soldier to en tain! \"" + }, + "54": { + "INITIAL_BOX": "Idiot!", + "DEFAULT": "Idiot!", + "DEFAULT_GREY_PAD": "Idiot!", + "PADDED_4": "Idiot!", + "PADDED_8": "Idiot!", + "EXTRACTED_INIT_BOX": "\" idiot!", + "PADDED_4_EXTRACTED": "\" i did it!", + "PADDED_8_EXTRACTED": "I did it?", + "PADDED_8_DILATION_1": "Idiot?", + "PAD_8_FRACT_0_5": "No!?", + "PAD_8_FRACT_0_2": "No!?" + }, + "55": { + "INITIAL_BOX": "Do not bother me again until the invention of printing!", + "DEFAULT": "Do not bother me again until the invention of printing!", + "DEFAULT_GREY_PAD": "Do not bother me again until the invention of printing!", + "PADDED_4": "Do not bother me again until the invention of printing!", + "PADDED_8": "Do not bother me again until the invention of printing!", + "EXTRACTED_INIT_BOX": "Do not bother me again until the invention of printing!", + "PADDED_4_EXTRACTED": "Do not bother me again until the invention of printing!", + "PADDED_8_EXTRACTED": "Do not both her me again until the invention of printing!", + "PADDED_8_DILATION_1": "Do not bother me again until the invention of printing!", + "PAD_8_FRACT_0_5": "Do not bother me again until the invention of printing!", + "PAD_8_FRACT_0_2": "Do not bother me again until the invention of printing!" + }, + "56": { + "INITIAL_BOX": "Nah ... these stupid poor people know no better -- we shall certainly sell them all!", + "DEFAULT": "Nah ... these stupid poor people know no better -- we shall certainly sell them all!", + "DEFAULT_GREY_PAD": "Nah ... these stupid poor people know no better -- we shall certainly sell them all?", + "PADDED_4": "Nah ... these stupid poor people know no better -- we shall certainly sell them all!", + "PADDED_8": "Nah ... these stupid poor people know no better -- we shall certainly sell them all!", + "EXTRACTED_INIT_BOX": "What they stormed into wasn't just a bunch of people buttering up to her-- it was all cordoned off secretly.", + "PADDED_4_EXTRACTED": "What they should knowing poor people are better off with all circumstances see themselves.", + "PADDED_8_EXTRACTED": "With these strict rules, people know no better than to sell themselves.", + "PADDED_8_DILATION_1": "Nah... these stupid poor people know no better--we shall certainly sell them all.", + "PAD_8_FRACT_0_5": "\" well, these people know no better - we 'll all be selling them off!", + "PAD_8_FRACT_0_2": "Nigeria! these stupid poor people knowing better we should all vernatically seek remal." + }, + "57": { + "INITIAL_BOX": "Breadside", + "DEFAULT": "Broadside", + "DEFAULT_GREY_PAD": "Broaden your mind?", + "PADDED_4": "E. side", + "PADDED_8": "Broadsidede", + "EXTRACTED_INIT_BOX": "\"i'm sorry.\"", + "PADDED_4_EXTRACTED": "Cheers?", + "PADDED_8_EXTRACTED": "Alright?", + "PADDED_8_DILATION_1": "Brilliant!", + "PAD_8_FRACT_0_5": "\" i don't know what to do! \"", + "PAD_8_FRACT_0_2": "\"i'm sorry.\"" + }, + "58": { + "INITIAL_BOX": "How goes the woodcut bible printing, brother broadside?", + "DEFAULT": "How goes the woodcut bible printing, brother broadside?", + "DEFAULT_GREY_PAD": "How goes the woodcut bible printing, brother broadside?", + "PADDED_4": "How goes the woodcut bible printing, brother broadside?", + "PADDED_8": "How goes the woodcut bible printing, brother broadside?", + "EXTRACTED_INIT_BOX": "How goes the woodcut bible printing, brother broadside?", + "PADDED_4_EXTRACTED": "How goes the woodcut bible printing, brother broadside?", + "PADDED_8_EXTRACTED": "How goes the woodcut bible printing, brother broadside?", + "PADDED_8_DILATION_1": "How goes the woodcut bible printing, brother broadside?", + "PAD_8_FRACT_0_5": "How goes the woodcut bible printing, brother broadside?", + "PAD_8_FRACT_0_2": "How goes the woodcut bible printing, brother broadside?" + }, + "59": { + "INITIAL_BOX": "Look ... see? picture stories, imported from switzerland, which seem to move before the eyes -- perfect for our ever-accelerating world and the increasingly affluent middle class ... they're all the rage in europe!", + "DEFAULT": "Look ... see? picture stories, imported from switzerland, which seemed to move before the eyes -- perfect for our ever-accelerating world and the increasingly affluent middle class ... they're all the rage in europe!", + "DEFAULT_GREY_PAD": "Look ... see? picture stories imported from switzerland, which seem to move before the eyes -- perfect for our ever-accelerating world and the increasingly affluent middle class... they're all the rage in europe!", + "PADDED_4": "Look ... see? picture stories, imported from switzerland, which seem to move before the eyes -- perfect for our ever-accelerating world and the increasingly affluent middle class ... they're all the rage in europe!", + "PADDED_8": "Look ... see? picture stories imported from switzerland, which seem to move before the eyes -- perfect for our ever-accelerating world and the increasingly affluent middle class ... they're all the rage in europe!", + "EXTRACTED_INIT_BOX": "Look ... see? picture story imported from switzerland, which seemed to move before the eyes - perfect for our ever accelerating world and the increasingly efficient middle class - they 're all the rage in europe!", + "PADDED_4_EXTRACTED": "Look ... see? picture stohl imported from switzerland, which seemed to move towards the eyes - perfect for our ever accelerating world and the increasingly argumentative middle class ... everybody's all the rage in europe!", + "PADDED_8_EXTRACTED": "Look ... see? picture stunned, imported from switzerland, which seems to move before the eyes - perfect for our ever accelerating world and the increasingly affuent middle class... everybody's all the rage in europe!", + "PADDED_8_DILATION_1": "Look ... see? picture storied, imported from switzerlandland, which i seem to move before the eyes - perfect for our ever-accelerating world and the increasingly affluent middle class people everywhere - all the rage in europe! \"", + "PAD_8_FRACT_0_5": "Look ... seep? picture storm imported from switzerland, which i seem to move before the eyes ... perfect for our ever accelerating world and the increasingly afflient middle class... they're all the rage in europe!", + "PAD_8_FRACT_0_2": "Look...... seep? picture story imported from switzerland, which i seem to move before the eyes ... perfect for our ever accelerating world and the increasingly afflient middle class... they're all the rage in europe!" + }, + "60": { + "INITIAL_BOX": "Y-yes, but we think it shall sell quite well!", + "DEFAULT": "Y-yes, but we think it shall sell quite well!", + "DEFAULT_GREY_PAD": "Y-yes, but we think it shall sell quite well!", + "PADDED_4": "Y-yes, but we think it shall sell quite well!", + "PADDED_8": "Y-yes, but we think it shall sell quite well!", + "EXTRACTED_INIT_BOX": "Y yes, but we hink it shall sell quite well.", + "PADDED_4_EXTRACTED": "It yes, but we think it shall cell quite well.", + "PADDED_8_EXTRACTED": "Y yes, but we think it shall cell quite well.", + "PADDED_8_DILATION_1": "Y yes, but we hink it shall sell quite well.", + "PAD_8_FRACT_0_5": "Y yes, but we hink it shall cell quite well.", + "PAD_8_FRACT_0_2": "Y yes, but we hink it shall sell quite well." + }, + "61": { + "INITIAL_BOX": "But this is just puerile swill!", + "DEFAULT": "But this is just puerile swill!", + "DEFAULT_GREY_PAD": "But this is just puerile swill!", + "PADDED_4": "But this is just puerile swill!", + "PADDED_8": "But this is just puerile swill!", + "EXTRACTED_INIT_BOX": "But this is just puerile swill!", + "PADDED_4_EXTRACTED": "But this is just puerile swill!", + "PADDED_8_EXTRACTED": "But this is just puerile swill!", + "PADDED_8_DILATION_1": "But this is just puerile swill!", + "PAD_8_FRACT_0_5": "But this is just puerile swill!", + "PAD_8_FRACT_0_2": "But this is just puerile swill!" + }, + "62": { + "INITIAL_BOX": "Some of the finer cuts are almost besmircht beyond our gaze, see? is it not an insult to god, as such?", + "DEFAULT": "Some of the finer cuts are almost besmircht beyond our gaze? is it not an insult to god, as such?", + "DEFAULT_GREY_PAD": "Some of the finer cuts are almost besmircht beyond our gaze. see? is it not an insult to god as such?", + "PADDED_4": "Some of the finer cuts are almost besmircht beyond our gaze. see? is it not an insult to god, as such?", + "PADDED_8": "Some of the finer cuts are almost besmircht beyond our gaze. see? is it not an insult to god, as such?", + "EXTRACTED_INIT_BOX": "Some of the finer cuts are almost besmirched beyond our gaze? is it not an insult to god, as such?", + "PADDED_4_EXTRACTED": "Some of the finer cuts are almost beyond our gaze, see? is it not an insult to good, as such?", + "PADDED_8_EXTRACTED": "Some of the finer cuts are almost beyond our gaze see? is it not an insult to god, as such?", + "PADDED_8_DILATION_1": "Some of the finer cuts are almost beyond our gaze, see? is it not an insult to god, as such?", + "PAD_8_FRACT_0_5": "Some of the finer cuts are almost besmircht beyond our gaze see? is it not an insult to god, as such?", + "PAD_8_FRACT_0_2": "Some of the finer cuts are almost besmircht beyond our gaze see? is it not an insult to god, as such?" + }, + "63": { + "INITIAL_BOX": "Don't bother me again until cinema has superseded theater!", + "DEFAULT": "Don't bother me again until cinema has superseded theater!", + "DEFAULT_GREY_PAD": "Don't bother me again until cinema has supperseeded theater!", + "PADDED_4": "Don't bother me again until cinema has supeseded theater!", + "PADDED_8": "Don't bother me again until cinema has supperseeded theater!", + "EXTRACTED_INIT_BOX": "Don't bother me again until cinema has superseded theater?", + "PADDED_4_EXTRACTED": "Don't bother me again until cinema has superseded theater?", + "PADDED_8_EXTRACTED": "Don't bother me again until cinema has supervised theater!", + "PADDED_8_DILATION_1": "Don't bother me again, until cinema has supervised theater!?", + "PAD_8_FRACT_0_5": "Don't bother me again until cinema has super-sized theater!", + "PAD_8_FRACT_0_2": "Don't bother me again until cinema has super-sized theater!" + }, + "64": { + "INITIAL_BOX": "Haha - tis true?", + "DEFAULT": "Haha - tis true!", + "DEFAULT_GREY_PAD": "Haha - tis true?", + "PADDED_4": "Haha - tis true!", + "PADDED_8": "Haha - tis true!", + "EXTRACTED_INIT_BOX": "Haha - this trade", + "PADDED_4_EXTRACTED": "Haha - this true", + "PADDED_8_EXTRACTED": "Nia! this trade?", + "PADDED_8_DILATION_1": "Ha ha - this true", + "PAD_8_FRACT_0_5": "Ha ha - his true", + "PAD_8_FRACT_0_2": "Ha ha - his true" + }, + "65": { + "INITIAL_BOX": "Comics a history", + "DEFAULT": "Comics a history", + "DEFAULT_GREY_PAD": "Comics a history", + "PADDED_4": "Comics a history", + "PADDED_8": "Comics a history", + "EXTRACTED_INIT_BOX": "Omic - a history", + "PADDED_4_EXTRACTED": "Omic - a history", + "PADDED_8_EXTRACTED": "Omic - a history", + "PADDED_8_DILATION_1": "Omic - a history", + "PAD_8_FRACT_0_5": "Omic - a history", + "PAD_8_FRACT_0_2": "Omic - a history" + }, + "66": { + "INITIAL_BOX": "Wonderful, sweetheart; wonnerful ... yer a natch'ril!", + "DEFAULT": "Wonderful, sweetheart, wonnerful... yer a natchril!", + "DEFAULT_GREY_PAD": "Wonderful, sweetheart, wonnerful ... yer a natch'ril!", + "PADDED_4": "Wonderful, sweetheart, wonnerful ... yer a natch'ril!", + "PADDED_8": "Wonnerful, sweetart, wonnerful ... yer a natch'ril!", + "EXTRACTED_INIT_BOX": "A wonderful, sweetheart, wonnerful ... yer a matchme!l?", + "PADDED_4_EXTRACTED": "Wonnerful, sweetstart, wonnerful ... yer a matchmi!?", + "PADDED_8_EXTRACTED": "Wonnerful, sweetart, wonnerful ... yera a matchmade!?", + "PADDED_8_DILATION_1": "Wonnerful, sweeetart, wonnerful ... yer a matchril!?", + "PAD_8_FRACT_0_5": "Wonnerful, sweetart, wonnerful ... yera matchmi!?", + "PAD_8_FRACT_0_2": "Wonnerful, sweetart, wonnerful ... yera matchril!?" + }, + "67": { + "INITIAL_BOX": "This time, though, let's lose the clothes, whattadoyasay? there's an extra fifty in it for ya!", + "DEFAULT": "This time, though, let's lose the clothes, whaddyasay? there's an extra fifty in it for ya!", + "DEFAULT_GREY_PAD": "This time, though, let's lose the clothes, whaddyasay? there's an extra fifty in it for ya!", + "PADDED_4": "This time, though, let's lose the clothes, whaddyasay? there's an extra fifty in it for ya!", + "PADDED_8": "This time, though, let's lose the clothes, whaddyasay? there 's an extra fifty in it for ya!", + "EXTRACTED_INIT_BOX": "This time through, let's lose the clothes, whattadamsay? there 's an extra fifty in it for ya!", + "PADDED_4_EXTRACTED": "This time though, let's lose the clothes, whatdidyasay? there's an extra fifty in it for ya!", + "PADDED_8_EXTRACTED": "This time through, let's lose the clothes, whatta say? there's an extra empty in it for ya!", + "PADDED_8_DILATION_1": "This time though, let's lose the clothes, whatdyasay? there's an extra fifty in it for ya!", + "PAD_8_FRACT_0_5": "This time though, let's lose the clothes, whatta say? there's an extra empty in it for ya!", + "PAD_8_FRACT_0_2": "This time though, let's lose the clothes, whattadayasay? there's an extra empty in it for ya!" + }, + "68": { + "INITIAL_BOX": "Dude! hey, mr. score! check it out -- comics are now, like, a respected language, with an aesthetic grounding all their own?", + "DEFAULT": "Dude! hey, mr. score! check it out -- comics are now, like, a respected language, with an aesthetic grounding all their own?", + "DEFAULT_GREY_PAD": "Dude! hey, mr. score! check it out -- comics are now, like, a respected language, with an aesthetic grounding all their own?", + "PADDED_4": "Dude! hey, mr. score! check it out -- comics are now. like, a respected language, with an aesthetic grounding all their own?", + "PADDED_8": "Dude! hey, mr. score! check it out -- comics are now, like, a respected language, with an aesthetic grounding all their own?", + "EXTRACTED_INIT_BOX": "Dude! hey, mr. scope! check out - comics are now like, a respected language with an aesthetic grounding all their own?", + "PADDED_4_EXTRACTED": "Dude! hey, mr. scope! check tout - comics are now, like, a respected language, with an aesthetic groundflooring all their own?", + "PADDED_8_EXTRACTED": "Dude? hey, mr. scope! check tout - comics are now like a respected language, with an aesthetic groundiing all their own?", + "PADDED_8_DILATION_1": "Dude! hey, mr. score! check tout - comics are now, like, a respected language; with an aesthetic groundiing all their own?", + "PAD_8_FRACT_0_5": "Dude! hey, mr. score! check tout - comics are now, like, a respected language, with an aesthetic grounding all their own?", + "PAD_8_FRACT_0_2": "Dude! hey, mr. score! check tout - comics are now, like, a respected language, with an aesthetic grounding all their own?" + }, + "69": { + "INITIAL_BOX": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "DEFAULT": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "DEFAULT_GREY_PAD": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PADDED_4": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PADDED_8": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "EXTRACTED_INIT_BOX": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PADDED_4_EXTRACTED": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PADDED_8_EXTRACTED": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PADDED_8_DILATION_1": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PAD_8_FRACT_0_5": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die.", + "PAD_8_FRACT_0_2": "Doomed am i to this life of no respect (illustrated books) and soon, i shall die." + }, + "70": { + "INITIAL_BOX": "Ach! oh well ... at least i have given something back to the motherland, however slight. '", + "DEFAULT": "Ach! oh well ... at least i have given something back to the motherland, however slight. \"", + "DEFAULT_GREY_PAD": "Ach! oh well ... at least i have given something back to the motherland, however slight.", + "PADDED_4": "Ach! 'oh well ... at least i have given something back to the motherland, however slight. \"", + "PADDED_8": "Ach! oh well ... at least i have given something back to the motherland, however slight.", + "EXTRACTED_INIT_BOX": "Oh! well ... at east i have given some hiring back to the northerland, however slight", + "PADDED_4_EXTRACTED": "Oh! oh well ... at east i have given something back to the motherland, however slightly", + "PADDED_8_EXTRACTED": "Ach! ' on well ... at least i have given something back to the motherland, however slight", + "PADDED_8_DILATION_1": "Acha! oh well ... at least i have given some hing back to the motherland, however slight?", + "PAD_8_FRACT_0_5": "Ach! on well ... at least i have given some hing back to the motherland, however slight", + "PAD_8_FRACT_0_2": "Ach! oh well ... at least i have given some hing back to the motherland, however slight." + }, + "71": { + "INITIAL_BOX": "See? they address \"topics like the holocaust, spirituality, notions of identity, and sex! plus they win pulitzer prizes ... and harvey awards!", + "DEFAULT": "See? they address topics like the holocaust, spirituality, notions of identity, and sex! plus they win pulitzer prizes ...and harvey awards!", + "DEFAULT_GREY_PAD": "See? they address topics like the holocaust, spirituality, notions of identity, and sex! plus they win pulitzer prizes ...and harvey awards!", + "PADDED_4": "See? they address 'topics like the holocaust, spirituality, notions of identity, and sex! plus they win pulitzer prizes ... and harvey awards!", + "PADDED_8": "See? they address topics like the holocaust, spirituality, notions of identity, and sex! plus they win pulitzer prizes ...and harvey awards!", + "EXTRACTED_INIT_BOX": "See? they address topics like the holocaust, spirituality, motions of identity and sex! people win pulitzer prizes...and harvey awards?", + "PADDED_4_EXTRACTED": "Gee? they address topics like the holocaust, spirituality, notions of identity and sex; promotes \"they win pulitzer prizes ...and awards!?\"", + "PADDED_8_EXTRACTED": "Gee? they address topics like the holocaust, spirituality, notions of identity and sex! plus, they win pulitzer prizes...and marvey awards?", + "PADDED_8_DILATION_1": "Geeb? they address topics i like the holocaust, spirituality, notions of identity and sex! plus \" they won pulitzer prizes\" +\" annie awards! %", + "PAD_8_FRACT_0_5": "Oh? they address topics like the holocaust, spirituality, nations of identity and sex! plus, they win pulitzer prizes ...and marvey awards?", + "PAD_8_FRACT_0_2": "Oh? they address topics like the holocaust, spirituality, notions of identity, and sex! plus, they win pulitzer prizes ...and marvey awards?" + }, + "72": { + "INITIAL_BOX": "Don't ever bother me again! i 'm trying to get to the top level of my superman video game?", + "DEFAULT": "Don't ever bother me again! i 'm trying to get to the top level of my superman video game?", + "DEFAULT_GREY_PAD": "Don't ever bother me again! i 'm trying to get to the top level of my superman video game?", + "PADDED_4": "Don't ever bother me again! i 'm trying to get to the top level of my superman video game?", + "PADDED_8": "Don't ever bother me again! i 'm trying to get to the top level of my superman video game?", + "EXTRACTED_INIT_BOX": "Don't ever bother me again? i 'm trying to get to the top level of my superman video game!", + "PADDED_4_EXTRACTED": "Don't ever bother me again? i 'm trying to get to the top level of my superman video game.", + "PADDED_8_EXTRACTED": "Don't ever bother me again? i 'm trying to get to the top level of my superman video game.", + "PADDED_8_DILATION_1": "Don't ever bother me again! i 'm trying to get to the top level of my superman video game", + "PAD_8_FRACT_0_5": "Don't ever bother me again? i 'm trying to get to the top level of my superman video game", + "PAD_8_FRACT_0_2": "Don't ever bother me again? i 'm trying to get to the top level of my superman video game" + }, + "73": { + "INITIAL_BOX": "Yay! we won! we won?", + "DEFAULT": "Yay! we won? we won?", + "DEFAULT_GREY_PAD": "Yay! we won! we won?", + "PADDED_4": "Yay! we won! we won?", + "PADDED_8": "Yay! we won! we won?", + "EXTRACTED_INIT_BOX": "Yay! we won? wow?", + "PADDED_4_EXTRACTED": "Yay! we won! wwe how'm?", + "PADDED_8_EXTRACTED": "Yay! we won! wow?", + "PADDED_8_DILATION_1": "Yay! we won? we won?", + "PAD_8_FRACT_0_5": "Yay! we won! we won?", + "PAD_8_FRACT_0_2": "Yay! we won! we won?" + }, + "74": { + "INITIAL_BOX": "High?", + "DEFAULT": "High score!", + "DEFAULT_GREY_PAD": "Highlight", + "PADDED_4": "High score", + "PADDED_8": "High score", + "EXTRACTED_INIT_BOX": "Ss", + "PADDED_4_EXTRACTED": "S", + "PADDED_8_EXTRACTED": "Ss", + "PADDED_8_DILATION_1": "S", + "PAD_8_FRACT_0_5": "\"s\"", + "PAD_8_FRACT_0_2": "Ss" + }, + "75": { + "INITIAL_BOX": "Man, i guess i'll just draw another strip about dope...", + "DEFAULT": "Man, i guess i'll just draw another strip about dope...", + "DEFAULT_GREY_PAD": "Man, i guess i'll just draw another strip about dope...", + "PADDED_4": "Man, i guess i'll just draw another strip about dope...", + "PADDED_8": "Man, i guess i'll just draw another strip about dope...", + "EXTRACTED_INIT_BOX": "Main, i guess it just drew another strip about dope", + "PADDED_4_EXTRACTED": "Man, i guess i'm just drunk another strip about dope", + "PADDED_8_EXTRACTED": "Man, i guess i just didn't another strip about dope", + "PADDED_8_DILATION_1": "Man, i guess i'll just draw another strip about dope", + "PAD_8_FRACT_0_5": "Man, i guess i just draw another strip about dope", + "PAD_8_FRACT_0_2": "Man, i guess i'll just draw another strip about dope" + }, + "76": { + "INITIAL_BOX": "Scribbly", + "DEFAULT": "Scribbly", + "DEFAULT_GREY_PAD": "Scribbly", + "PADDED_4": "Scribbly", + "PADDED_8": "Scribbly", + "EXTRACTED_INIT_BOX": "C bbb", + "PADDED_4_EXTRACTED": "C bbb!", + "PADDED_8_EXTRACTED": "C bbb!", + "PADDED_8_DILATION_1": "C bbl", + "PAD_8_FRACT_0_5": "C bbb", + "PAD_8_FRACT_0_2": "C bbl" + }, + "77": { + "INITIAL_BOX": "Saddy", + "DEFAULT": "Saddy", + "DEFAULT_GREY_PAD": "Saddy", + "PADDED_4": "Saddy", + "PADDED_8": "Saddy", + "EXTRACTED_INIT_BOX": "Sir?", + "PADDED_4_EXTRACTED": "Sadd", + "PADDED_8_EXTRACTED": "S \"dd", + "PADDED_8_DILATION_1": "Sadd", + "PAD_8_FRACT_0_5": "S rd", + "PAD_8_FRACT_0_2": "S \"" + }, + "78": { + "INITIAL_BOX": "Sir?", + "DEFAULT": "Sir?", + "DEFAULT_GREY_PAD": "Sir?", + "PADDED_4": "Sir?", + "PADDED_8": "Sir?", + "EXTRACTED_INIT_BOX": "I'm sorry?", + "PADDED_4_EXTRACTED": "\"?", + "PADDED_8_EXTRACTED": "What?", + "PADDED_8_DILATION_1": "R?", + "PAD_8_FRACT_0_5": "\"?", + "PAD_8_FRACT_0_2": "\"" + }, + "79": { + "INITIAL_BOX": "\"mom, i want to be a cartoonist.\".", + "DEFAULT": "Mom, i want to be a cartoonist.", + "DEFAULT_GREY_PAD": "Mom, i want to be a cartoonist.", + "PADDED_4": "Mom, i want to be a cartoonist.", + "PADDED_8": "Mom, i want to be a cartoonist.", + "EXTRACTED_INIT_BOX": "To want to now! how?", + "PADDED_4_EXTRACTED": "\" mom! i want to be a cartoonist!", + "PADDED_8_EXTRACTED": "\" mom! i want to be a cartoonist! \"", + "PADDED_8_DILATION_1": "\" mom, i want to be a cartoonist! \"", + "PAD_8_FRACT_0_5": "\" mom, i want to be a cartoonist! \"", + "PAD_8_FRACT_0_2": "\" mom, i want to be a cartoonist! \"" + }, + "80": { + "INITIAL_BOX": "Sir?", + "DEFAULT": "Sir?", + "DEFAULT_GREY_PAD": "Sir?", + "PADDED_4": "Sir?", + "PADDED_8": "Sir?", + "EXTRACTED_INIT_BOX": "Sir?", + "PADDED_4_EXTRACTED": "Sir?", + "PADDED_8_EXTRACTED": "Sir?", + "PADDED_8_DILATION_1": "Sir?", + "PAD_8_FRACT_0_5": "Sir?", + "PAD_8_FRACT_0_2": "Sir?" + }, + "81": { + "INITIAL_BOX": "Sigh *", + "DEFAULT": "Sigh *", + "DEFAULT_GREY_PAD": "Sigh *", + "PADDED_4": "Sigh *", + "PADDED_8": "Sigh *", + "EXTRACTED_INIT_BOX": "Slight.", + "PADDED_4_EXTRACTED": "Sigh!", + "PADDED_8_EXTRACTED": "\" sigh!", + "PADDED_8_DILATION_1": "Sigh*", + "PAD_8_FRACT_0_5": "Slight!", + "PAD_8_FRACT_0_2": "Sigh!" + } + }, + "ronson-031.jpg": { + "1": { + "DEFAULT_GREY_PAD": "Junto al abreviado crecia el vigoroso trono de una gran parra, que trepaba habilmente la tierra...", + "INITIAL_BOX": "Junto al abreviado crecia el vigoroso trono de una gran parra, que trepaba habilmente la tapia ...", + "DEFAULT": "Junto al abreviado crecia el vigoroso trono de una gran parra, que trepaba hasta la cima...", + "PADDED_4": "Junto al abreviado crecia el vigoroso tronco de una gran parra, que trepaba h\u00e1blmente la tapia ...", + "PADDED_8": "Junto al abreviado crecia el vigoroso trono de una gran parra, que trepaba habilmente la tierra ...", + "EXTRACTED_INIT_BOX": "Junto al aprendaredo crec\u00eda e vigroso tronco de una gran parra, que trepar\u00e1 habiliente la tapa...", + "PADDED_4_EXTRACTED": "Junto al abreviador cre\u00eda el vigrosos tronco de una gran parra, que trepar\u00e1 habilmente la tapa...", + "PADDED_8_EXTRACTED": "Junto al abreviador cre\u00eda el vigiloso trono de una gran parra, que trepara habilmente - la tierra ...", + "PADDED_8_DILATION_1": "Junt\u00f3 al abreviado crec\u00eda el vigoroso tronco de una gran parra, que trepaba haciendo la tierra....", + "PAD_8_FRACT_0_5": "Junto al abreviado crecia el vigoroso trono de una gran parra, que trepaba haciendo la tierra ...", + "PAD_8_FRACT_0_2": "Junto al abreviado crecia el vigrosos tronco de una gran parra, que trepaba haciendo la tierra...." + }, + "0": { + "DEFAULT_GREY_PAD": "En una visita reciente a la casa despues de muchos d\u00e9cadas, me sorprend\u00ed al ver el abevadero. lo recordaba mucho m\u00e1s alto.", + "INITIAL_BOX": "En una visita reciente a la casa despues de muchos decadas, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "DEFAULT": "En una visita reciente a la casa despu\u00e9s de muchos a\u00f1os, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_4": "En una visita reciente a la casa despu\u00e9s de muchos a\u00f1os, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_8": "En una visita reciente a la casa despues de muchos d\u00e9cades, me sorprend\u00ed al ver el abuceradero. lo recordaba mucho m\u00e1s alto.", + "EXTRACTED_INIT_BOX": "En una visita reciente a la casa despues de muchos d\u00e9cadas, me sorprend\u00ed al ver el abrevadero. lo regarda mucho m\u00e1s alto.", + "PADDED_4_EXTRACTED": "En una visita reciente a la casa despu\u00e9s de muchas d\u00e9cadas, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_8_EXTRACTED": "En una visita reciente a la casa despu\u00e9s de muchos a\u00f1os, me sorprend\u00ed al ver el airvadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_8_DILATION_1": "En una visita reciente a la casa despu\u00e9s de muchos a\u00f1os, me sorprend\u00ed al ver el abreviado. lo recordaba mucho m\u00e1s alto.", + "PAD_8_FRACT_0_5": "En una visita reciente a la casa despu\u00e9s de muchos a\u00f1os, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PAD_8_FRACT_0_2": "En una visita reciente a la casa despu\u00e9s de muchos a\u00f1os, me sorprend\u00ed al ver el libro recordado mucho m\u00e1s alto." + }, + "2": { + "DEFAULT_GREY_PAD": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "INITIAL_BOX": "Supongo que este recurrido tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "DEFAULT": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "PADDED_4": "Supongo que este recurrido tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "PADDED_8": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "EXTRACTED_INIT_BOX": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "PADDED_4_EXTRACTED": "Supongo que este recuperado tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "PADDED_8_EXTRACTED": "Supongo que este recurrido tramposo revela la escuela que tiene el mundo cuando eres un ni\u00f1o.", + "PADDED_8_DILATION_1": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "PAD_8_FRACT_0_5": "Supongo que este recurrido tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o.", + "PAD_8_FRACT_0_2": "Supongo que este recurrido tramposo revela la escala que tiene el mundo cuando eres un ni\u00f1o." + }, + "3": { + "DEFAULT_GREY_PAD": "Y se extienda hasta el cobertizo de entrada. hacia principio del verano lucia sus exuberantes hojas y comenzaba ya a florecer.", + "INITIAL_BOX": "Y se extienda hasta el cobertizo de entrada, haciendo pricipio del verano lucia sus exuberantes flores y comenzaba ya a florecer.", + "DEFAULT": "Y se extienda hasta el cobertizo de entrada, haciendo pruebo del verano lucia sus exuberantes flores y comenza ya a florecer.", + "PADDED_4": "Y se extienda hasta el cobertizo de entrada, haciendo pricipio del verano lucia sus exuberantes flores y comenzaba ya a florecer.", + "PADDED_8": "Y se extendia hasta el cobertizo de entreda. hacia principio del verano lucio sus exuberantes hogars y comenzaba a florecer.", + "EXTRACTED_INIT_BOX": "Y se extienda hasta el cobertizo de entrada. hacia principio del verano lucia sus exuberantes huertos y comenzaba ya a florecer.", + "PADDED_4_EXTRACTED": "Y se extendia hasta el cobertizo de entreda. hacian principio del verano lucila sus exuberantes hores; yo mezenba a ya florecer.", + "PADDED_8_EXTRACTED": "Y se extiendia hasta el cobertizo de entrada. hacia principio del verano lucia sus exuberantes hojas y flores; ya a florecer.", + "PADDED_8_DILATION_1": "Y se extiendia hasta el cobertizo de entrada. hacia principio del verano lucia sus exuberantes hojas y flores; ya a florcer.", + "PAD_8_FRACT_0_5": "Y se extiendia hasta el cobertizo de entrada. hacia principio del verano lucia sus exuberantes hojas y flores; pero al florecer.", + "PAD_8_FRACT_0_2": "Y se extendia hasta el cobertizo de entrada. hacia principio del verano lucia sus exuberantes hores y comenzaba ya a florecer." + }, + "4": { + "DEFAULT_GREY_PAD": "Era entones cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuantaba el dia y se extendia de sol a sol.", + "INITIAL_BOX": "Era entones cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuantela dias y se extendia de sol a sol.", + "DEFAULT": "Era entones cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuantaba el dia y se extendia de sol a sol.", + "PADDED_4": "Era entones cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuantela diya y se extendia de sol a sol.", + "PADDED_8": "Era entones cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el dia y se extendia de sol a sol.", + "EXTRACTED_INIT_BOX": "Era entones cuando daba comenzola la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el dia y se extendia de sol a sol.", + "PADDED_4_EXTRACTED": "Era entones cuando daba comenzola la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el dia y se extendia de sol a sol.", + "PADDED_8_EXTRACTED": "Era entones cuando daba comenz\u00f3 la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el dia y se extend\u00eda de sol a sol.", + "PADDED_8_DILATION_1": "Era entones cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuantaba el dia y se extendia de sol a sol.", + "PAD_8_FRACT_0_5": "Era entones cuando daba comenzola la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el dia y se extendia de sol a sol.", + "PAD_8_FRACT_0_2": "Era entones cuando daba comenzola la cosecha del cereal..la jornada se iniciaba tan pronto despuntaba el dia y se extendia de sol a sol." + }, + "5": { + "DEFAULT_GREY_PAD": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre miles de dorados; hoz y zocueta en risite, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "INITIAL_BOX": "Requerdo a aquellos segadores arriendose paso entre meses doradas, hoy y zocueta en este ristra; con una compa\u00f1ia de las chicharras.", + "DEFAULT": "Requerdo a aquellos segadores abri\u00e9ndose paso entre sus doradas hoy y zocueta en este ristre; con una compa\u00f1\u00eda de las chicharras.", + "PADDED_4": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre miles de dorados, hoz y zocueta en este ristre, con una compa\u00f1\u00eda de las chicharras.", + "PADDED_8": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre miles de dorados; hoy y zqueuta en este ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "EXTRACTED_INIT_BOX": "Requerdo a aquellos segadores arrendosose paso entere meses doradas, hoz y zooqeta en pistre; con la unica compania de las chicarras.", + "PADDED_4_EXTRACTED": "Requerido a aquellos segadores arrendose paso entre sus doradas, hoy zouqueta en este rincon con una compania de las chicharas.", + "PADDED_8_EXTRACTED": "Recuerdo a aquellos segadores arriendosas paso entere mesas doradas; hot zooqeta en rigtine; con una unica compania de las chicarras.", + "PADDED_8_DILATION_1": "Recuerdo a aquellos segadores arriendoso paso entire mes doradas; hoz y zooqeta en risting, con la unica compania de las chicarras.", + "PAD_8_FRACT_0_5": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre miles de dorados, hoy zozquetas en pie, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PAD_8_FRACT_0_2": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre miles de dorados; hoy y zouketa en este rinche, con la \u00fanica compa\u00f1\u00eda de las chicharas." + } + } + } + } +} \ No newline at end of file diff --git a/pcleaner/_testbed/experiment/Tesseract.json b/pcleaner/_testbed/experiment/Tesseract.json new file mode 100644 index 00000000..b713fc0b --- /dev/null +++ b/pcleaner/_testbed/experiment/Tesseract.json @@ -0,0 +1,4062 @@ +{ + "ocr_model": "Tesseract", + "runs": { + "Tesseract-crop-post": { + "Strange_Tales_172005.jpg": { + "0": { + "INITIAL_BOX": "Eneonered by great gnarled cypress jrfes, the ancient manor stands alone on the outski) 2 of mew ce eans, kept tidy by a white-haired old man known only as bambs, 3", + "DEFAULT": "Eneowered by great gnarled cypress jrfes, the ancient manor ! alone on the eit of mew rce: eans, kept tipy by a white-haired ao han known only as", + "DEFAULT_GREY_PAD": "Enbowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a white-haired old man known only as", + "PADDED_4": "Enbonered by great gnarled cypress jrfes, the ancient manor stands alone on the eit of mew rce: eans, kept tipy by a white-haired ao lo man known only as", + "PADDED_8": "Enbonered by great gnarled cypress trees, the ancient manor stands alone on the pag hile of new orleans, kept tipy by a white-haired ao lo man known omy as", + "EXTRACTED_INIT_BOX": "Fhbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans. kept tipy by a whi te-haire old man known only as bambi] .", + "PADDED_4_EXTRACTED": "Enbonsred by great shale cypress trees, the anci manor stands alone on the [tskirts of new orleans, kept tidy by a whi te- haired old man known only as b8ambl .", + "PADDED_8_EXTRACTED": "Enbonered by great snarled cypress trees, the ancient nmanor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as 8ambli .", + "PADDED_8_DILATION_1": "Outskirts of new orleans, kept tipy by a white-haired old man known only as sams .", + "PAD_8_FRACT_0_5": "Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl .", + "PAD_8_FRACT_0_2": "Enbonered by great snarled cypress trees, the ancient manor stands alone on the outskirts of new orleans, kept tipy by a whi te-haired old man known only as b8ambl ." + }, + "1": { + "INITIAL_BOX": "The house and the old man are alike in many ways; tall, prolid, patient, contented always 0 wait until. their. master cones mome ~~", + "DEFAULT": "The house and the old man are alike in many ways; tall, proud, patient, conten tel always to wait until their. master cones home ~~", + "DEFAULT_GREY_PAD": "The house and the old man are alike in many ways; tall, prolid, patient, contented always to wait until their. * master cones home ~-", + "PADDED_4": "The house and the oldman are alike in many ways; tall, proud, patient, contented a ways 0 wait until their. aster comes home ~~ | }", + "PADDED_8": "The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their. master comes home", + "EXTRACTED_INIT_BOX": "Ee house and the old man por alike in many ways; tall, proud, patient, contented always t\u00b0 wait until their. master cones home ~~", + "PADDED_4_EXTRACTED": "The house and the old man are alike in many ways; tall, proud, patient, contented always t\u00b0 wait until their. master cones home --", + "PADDED_8_EXTRACTED": "The house and the old man are alike in many ways; tall, proud, patient, contented always t\u00b0 wait until their. master comes home --", + "PADDED_8_DILATION_1": "The house and the old man are alike in many ways, tall, proud, nt, contented live walt gtie their, master comes home -=", + "PAD_8_FRACT_0_5": "The house and the old man are alike in many ways; tall, proud, patient, contented always t\u00b0 wait until. their. master comes home ~~", + "PAD_8_FRACT_0_2": "The house and the old man are alike in many ways; tall, proud, patient, contented always t\u00b0 wait until their. master comes home ~~" + }, + "2": { + "INITIAL_BOX": "\u201cand one in ee would appear.", + "DEFAULT": "And one in ee would appear.", + "DEFAULT_GREY_PAD": "\u201cand one in > need of some help, it would appear .", + "PADDED_4": "F and one in ee would appear.", + "PADDED_8": "7 and one in ee would appear,", + "EXTRACTED_INIT_BOX": "And one in fee would appear.", + "PADDED_4_EXTRACTED": "And one in fee would appear.", + "PADDED_8_EXTRACTED": "And one in fee wolld appear.", + "PADDED_8_DILATION_1": "And one in need of some help, it wolld appear .", + "PAD_8_FRACT_0_5": "And one in eee pe would appear.", + "PAD_8_FRACT_0_2": "And one in eet sve would appear." + }, + "3": { + "INITIAL_BOX": "Re bambli-~ we have a", + "DEFAULT": "Rir guest.", + "DEFAULT_GREY_PAD": "\" bambl-- we have a guest.", + "PADDED_4": "\" bambli-\u2014 we have a gliest.", + "PADDED_8": "Zf mbl == we have a guest.", + "EXTRACTED_INIT_BOX": "\u2014 we have a i=s7t.", + "PADDED_4_EXTRACTED": "Bambli we have a gliest.", + "PADDED_8_EXTRACTED": "Bambl ~~ we have a guest.", + "PADDED_8_DILATION_1": "Bambli ~~ we have a gliest.", + "PAD_8_FRACT_0_5": "Bambl ~~ we have a guest.", + "PAD_8_FRACT_0_2": "Bambl ~~ we have a guest." + }, + "4": { + "INITIAL_BOX": "Tonight, he comes noost slamming open the caken", + "DEFAULT": "=~and tonight, he comes host sane oo", + "DEFAULT_GREY_PAD": "~~and tonight, he comes urgently, slanming open", + "PADDED_4": "P comes slamming open the caken", + "PADDED_8": "Tonight, he comes host slamming open", + "EXTRACTED_INIT_BOX": "~and tonight, he comes urgently, slamming open", + "PADDED_4_EXTRACTED": "=~and tonight, he comes urgently, slamming open", + "PADDED_8_EXTRACTED": "=~and tonight, he comes slamming open urgently,", + "PADDED_8_DILATION_1": "=and tonight, he comes most slamming open the front", + "PAD_8_FRACT_0_5": "=~and tonight, he comes slamming open urgently,", + "PAD_8_FRACT_0_2": "=~and tonight, he comes slamming open urgently," + }, + "5": { + "INITIAL_BOX": "Tell me naster. how may bambli serve 7", + "DEFAULT": "Tell me masts - how may bambli . serve 7 _", + "DEFAULT_GREY_PAD": "Tell me, master: how may bambli serve 7", + "PADDED_4": "Tel oe er-- 5 ow a = 7", + "PADDED_8": "Yy i tell me master - how may bambli serve 7 _,", + "EXTRACTED_INIT_BOX": "Tell me master how may bambli serve 7", + "PADDED_4_EXTRACTED": "Tell me master. how may bambli serve 7", + "PADDED_8_EXTRACTED": "Tell me master... how may bambli serve't", + "PADDED_8_DILATION_1": "Tell me, master: how may bambli serve 7", + "PAD_8_FRACT_0_5": "Tell me master... how may bambli serve'7", + "PAD_8_FRACT_0_2": "Tell me master... how may bambli serve'7" + }, + "6": { + "INITIAL_BOX": "\u00a37 \u00bb and perhaps some dry clothes... 7 /", + "DEFAULT": "R warm, bambli-~ and perhaps", + "DEFAULT_GREY_PAD": "Some blankets to keep her. warm, bambli-- and perhaps some dry \\ clothes--7 /.", + "PADDED_4": "We and perhaps c oe /", + "PADDED_8": "Some. blankets to keep he arm , bambli-= and perhaps some dry clothes. 2s", + "EXTRACTED_INIT_BOX": "Some blankets to keep her warm, banbli-- and perhaps. some dry clothes", + "PADDED_4_EXTRACTED": "Warm, bambli-- and perhaps. som", + "PADDED_8_EXTRACTED": "Some blankets to keep her warm, banbli-~ and perhaps. some dry clothes", + "PADDED_8_DILATION_1": "Gone blankets to keep her. warm, bambli-~ and perhaps some dry", + "PAD_8_FRACT_0_5": "Some blankets to keep her. warm, bambli-~ and perhaps. some dry clothes", + "PAD_8_FRACT_0_2": "Some blankets to keep her, warm, bambli-~ and perhaps. some dry clothes" + }, + "7": { + "INITIAL_BOX": "The the old man's fades down the hall as... 7", + "DEFAULT": "The the old mans fades down the hall s.00", + "DEFAULT_GREY_PAD": "As.", + "PADDED_4": "The the old mans fades down the hall sra", + "PADDED_8": "The the old mans fades down the hal. srl see", + "EXTRACTED_INIT_BOX": "The the old man's fades down the hall s.,00", + "PADDED_4_EXTRACTED": "The the old mans fades down the hall sra", + "PADDED_8_EXTRACTED": "The the old mans fades down the hall sire", + "PADDED_8_DILATION_1": "The old man's footsteps the hall as.re", + "PAD_8_FRACT_0_5": "The the old mans fades donn the hall sire", + "PAD_8_FRACT_0_2": "The the old mans fades down the hall sere" + }, + "8": { + "INITIAL_BOX": "How curious the 4 fate. whims of had t not chanced to stroll along the river yl tonight ==", + "DEFAULT": "How curious the a whims of fate . had t not chanced to stroll along the river tonight~~ >", + "DEFAULT_GREY_PAD": "How curious the d 7e . whims of fa; had i not chanced to stroll along the river tonight--", + "PADDED_4": "How curious the a whims of fate . - had i not chanced to stroll along the river yl tonight~-", + "PADDED_8": "* how curious the p whims of fate . - had i not chanced . to stroll along _ the river 3 tonight-~", + "EXTRACTED_INIT_BOX": "Hin ef fare\u201d had i not chanced to stroll along the river. tonigmt=~", + "PADDED_4_EXTRACTED": "We sea had i not chanced to stroll along the river. tonight-~", + "PADDED_8_EXTRACTED": "How curious the whims of fate . had i not chanced to stroll along the river tonight-~", + "PADDED_8_DILATION_1": "How curious the whims of fate . had i not chanced to stroll along the r/| ver tonight-~", + "PAD_8_FRACT_0_5": "How curious the whims of fate . had i not chanced to stroll along the river tonight-~", + "PAD_8_FRACT_0_2": "How curious the whims of fate . had i not chanced to stroll along the river tonight-~" + }, + "9": { + "INITIAL_BOX": "Fas oulckly as t ca, master.", + "DEFAULT": "Aulckly \u201cmaster as t can,", + "DEFAULT_GREY_PAD": "Ickl as t can", + "PADDED_4": "Aulckly as t can, \u2018masrer.", + "PADDED_8": "Tiie as t can, \\ master ,", + "EXTRACTED_INIT_BOX": "Aulckly as t can, master,", + "PADDED_4_EXTRACTED": "Alley as t can, master,", + "PADDED_8_EXTRACTED": "Allckry as t can, master.", + "PADDED_8_DILATION_1": "Aulckly as t can,", + "PAD_8_FRACT_0_5": "Aulckry as t can, master.", + "PAD_8_FRACT_0_2": "Aulckry as t can, master." + }, + "10": { + "INITIAL_BOX": "gasp!z everything's w- whirling around me!t can't stand sr rea", + "DEFAULT": "Gasp! everything's w- whi rns atound me!i can't stand we a", + "DEFAULT_GREY_PAD": "5gasp!z everything's | w- whirling around] me!t can't stand ue a", + "PADDED_4": "Fgasp/= everything's whirling around me!i can't stand ea", + "PADDED_8": "Ega5p/% everything s\\ w- whirling around me! t can't stand ue... slee", + "EXTRACTED_INIT_BOX": "Fgasp!z everything \u00a7 w- whirling around me!i can't stand ue.", + "PADDED_4_EXTRACTED": "Sgasp/z everything s w- whirling around me!i can't stand ue.", + "PADDED_8_EXTRACTED": "Sgasp/z everything s w- whirling around me!i can't stand up.", + "PADDED_8_DILATION_1": "Gasp! everything s w-whirling around wel] i can't stand", + "PAD_8_FRACT_0_5": "Gasp/z everything s w- whirling around me!i can't stand up.", + "PAD_8_FRACT_0_2": "Gasp/z everything s w- whirling around me!i can't stand up." + }, + "2": { + "INITIAL_BOX": "Clark!i'm falling' \u201chelp! help! \u2014", + "DEFAULT": "Clark!i'm falling! help! help", + "DEFAULT_GREY_PAD": "Ark!i'm falling", + "PADDED_4": "Ie lottie eto clark!i'm falling help! help!", + "PADDED_8": "\\ help! help\u2019", + "EXTRACTED_INIT_BOX": "Clark!i'm falling! help! help!", + "PADDED_4_EXTRACTED": "Clark! i'm falling\u2019 help! help!", + "PADDED_8_EXTRACTED": "Clark! i'm falling\u2019 _ help! help!", + "PADDED_8_DILATION_1": "Clark!i'm falling! . help! help!", + "PAD_8_FRACT_0_5": "Clark! i'm falling\u2019 _ help! help!", + "PAD_8_FRACT_0_2": "Clark! i'm falling\u2019 _ help! help!" + }, + "3": { + "INITIAL_BOX": "I-i'm bs \"passing ohhh", + "DEFAULT": "I-i'm se ou", + "DEFAULT_GREY_PAD": "I-t'm eq \"passing out... ohhh.", + "PADDED_4": "I-i'm yr a ohh hh.", + "PADDED_8": "-i'm passing qut... ohh hh.", + "EXTRACTED_INIT_BOX": "I-i'm passing ohhh?", + "PADDED_4_EXTRACTED": "I-i'm passing ohhhh.", + "PADDED_8_EXTRACTED": "I-i'm passing ohhha.", + "PADDED_8_DILATION_1": "I-i'm passing ohhha.", + "PAD_8_FRACT_0_5": "I-i'm passing ohhhh.", + "PAD_8_FRACT_0_2": "I-i'm passing ohhha." + }, + "4": { + "INITIAL_BOX": "Action comics", + "DEFAULT": "Action comics", + "DEFAULT_GREY_PAD": "Action comics", + "PADDED_4": "Action comics", + "PADDED_8": "Action comics", + "EXTRACTED_INIT_BOX": "Action comics", + "PADDED_4_EXTRACTED": "Action comics", + "PADDED_8_EXTRACTED": "Action comics", + "PADDED_8_DILATION_1": "Action comics", + "PAD_8_FRACT_0_5": "Action comics", + "PAD_8_FRACT_0_2": "Action comics" + }, + "5": { + "INITIAL_BOX": "Then, seconds later...", + "DEFAULT": "Then, seconds later.", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Then, seconds", + "PADDED_8": "", + "EXTRACTED_INIT_BOX": "Then, seconds later..", + "PADDED_4_EXTRACTED": "Then, seconds later.", + "PADDED_8_EXTRACTED": "Then, seconds later.", + "PADDED_8_DILATION_1": "Then, seconds later.", + "PAD_8_FRACT_0_5": "Then, seconds later.", + "PAD_8_FRACT_0_2": "Then, seconds later." + }, + "6": { + "INITIAL_BOX": "Great caesars ghost! { this /s black magic! we've been transportel to the weirdest world, tit ever saw!", + "DEFAULT": "Great caesar's ghost! this /s black magic! we've been transported, to the weirdest world, i ever saw!", + "DEFAULT_GREY_PAD": "\u2018great caesar's ghost! || this /\u00a7 black magic! we've been transportel to the weirdest world, i ever saw.", + "PADDED_4": "Great caesar's ghost! \\ this /\u00a7 black magic! i ever saw!", + "PADDED_8": "I ever saw/", + "EXTRACTED_INIT_BOX": "Great caesar's ghost! this /s black magic! we've been transportel to the weirdest world tit ever saw.", + "PADDED_4_EXTRACTED": "Great caesar's ghost! this as black magic! we've been transported to the weirdest world i ever saw!", + "PADDED_8_EXTRACTED": "Great caesar's ghost! f this /\u00a7 black magic! : we've been transported to the weirdest world i ever saw.", + "PADDED_8_DILATION_1": "Great caesar's ghost! e this /5 black magic! we've been transported to the weirdest world i ever saw/", + "PAD_8_FRACT_0_5": "Great caesar's ghost! f this /\u00a7 black magic! : we've been transported to the weirdest world i ever saw.", + "PAD_8_FRACT_0_2": "Great caesar's ghost! f this /\u00a7 black magic! : we've been transported to the weirdest world i ever saw!" + }, + "7": { + "INITIAL_BOX": "...|t certainly isn't our earth, perry. look at the size of those bees.", + "DEFAULT": "...|t certainly isn't our earth, perry look at the size of those bees!", + "DEFAULT_GREY_PAD": "\"...it certainly isn't our | earth, perry look at the| \\s|ze of those bees.", + "PADDED_4": "T certainly isn't our earth, perry! look at the \\s|ze of those bees.", + "PADDED_8": "It certainly isn't our earth, perry! poor a the size of thos!", + "EXTRACTED_INIT_BOX": "...it certainly isnt our earth, perry. look at the size of those bees.", + "PADDED_4_EXTRACTED": "...it certainly isnt our earth, perry. look at the size of those bees.", + "PADDED_8_EXTRACTED": "...it certainly isnt our earth, perry. look at the size of those bees.", + "PADDED_8_DILATION_1": "...|it certainly isnt our earth, perry\u201d look at the size of those bees!", + "PAD_8_FRACT_0_5": "...it certainly isnt our earth, perry! look at the size of those bees.", + "PAD_8_FRACT_0_2": "...it certainly isnt our earth, perry! look at the size of those bees." + }, + "8": { + "INITIAL_BOX": "Watch out, clark!", + "DEFAULT": "Watch out, clark)", + "DEFAULT_GREY_PAD": "Watch out, clark!", + "PADDED_4": "#3 watch out, clark!", + "PADDED_8": "\u201cwatch out\u201d ial ae clark!", + "EXTRACTED_INIT_BOX": "Watch out, clark!", + "PADDED_4_EXTRACTED": "Watch out, clark!", + "PADDED_8_EXTRACTED": "Watch out, clark!", + "PADDED_8_DILATION_1": "Watch out, clark!", + "PAD_8_FRACT_0_5": "Watch out, clark!", + "PAD_8_FRACT_0_2": "Watch out, clark!" + }, + "9": { + "INITIAL_BOX": "Owwww.", + "DEFAULT": "Owwww.,", + "DEFAULT_GREY_PAD": "Owwww.", + "PADDED_4": "Owwww.", + "PADDED_8": "Owwww.", + "EXTRACTED_INIT_BOX": "Oowwwww.", + "PADDED_4_EXTRACTED": "Owwww.,", + "PADDED_8_EXTRACTED": "Owwww.,", + "PADDED_8_DILATION_1": "Owwww.", + "PAD_8_FRACT_0_5": "Owwww.,", + "PAD_8_FRACT_0_2": "Owwww.," + }, + "10": { + "INITIAL_BOX": "Yet the bee's stinger went ws. right through my uniform and penetrated my skin! that means. the fabric of pe ay costume has become dj} satie fued 1,", + "DEFAULT": "Yet the bee's stinger went = right through my uniform ando penetrated my skin! that mean the fabric of my superman costume has become ordinary, cloth! \u00a3", + "DEFAULT_GREY_PAD": "Yet the bee's stinger went ~ right through my uniform and penetrated my skin! that means. the fabric of sera ry costume vis become din a s clots! a", + "PADDED_4": "\"yet the bee's stinger went ~~ right through my uniform and enetrated my skin! that means. the fabric of my superman costume has become ordinary, clot! &", + "PADDED_8": "Yet the bee's stinger went right through my uniform and \\penetrated my skin! that means. the fabric of my superman costume has become ordinar! cloth!", + "EXTRACTED_INIT_BOX": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!", + "PADDED_4_EXTRACTED": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!", + "PADDED_8_EXTRACTED": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!", + "PADDED_8_DILATION_1": "Yet the bee's stinger we right through my tform and penetrated my skin! that means. the fabric of my superman othe has become ordinary clot?", + "PAD_8_FRACT_0_5": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!", + "PAD_8_FRACT_0_2": "Yet the bee's stinger went right through my uniform and penetrated my skin! that means. the fabric of my superman costume has become ordinary cloth!" + }, + "11": { + "INITIAL_BOX": "Hurry. let's beat it before we get stung, foo = aii", + "DEFAULT": "Hurry. let's beat it before we get stung, tod yi", + "DEFAULT_GREY_PAD": "Hurry. let's ) beat it before] we get stung, k 00! __j", + "PADDED_4": "Hurry! let's beat it b=fore we get stung, \u00a2 tool: puma", + "PADDED_8": "Hurry. let's beat it before we get stung, too!", + "EXTRACTED_INIT_BOX": "Hurry. let's beat it before we get stung, too!", + "PADDED_4_EXTRACTED": "Hurry! let's beat it before we get stung, too!", + "PADDED_8_EXTRACTED": "Hurry! let's beat it before we get stung, too!", + "PADDED_8_DILATION_1": "Hurry! let's beat it before we get stung, too!", + "PAD_8_FRACT_0_5": "Hurry! let's beat it before we get stung, too!", + "PAD_8_FRACT_0_2": "Hurry! let's beat it before we get stung, too!" + }, + "12": { + "INITIAL_BOX": "Ggreat guns!...2g, .pa/n i feel n pain! as superman, i should be > invulnerable! 1 have unbreakabl \u2018skin! under my clark kent clothes, im wearing an woestructisle superman uniform ! =", + "DEFAULT": "Ggreat guns!...2gasp/z...pain i feel n fan! as superman, i should be invulnerable! 1 have unbreakable skin! under my clark kent clothes, i'm wearing an /noestruct/ele superman uniform !", + "DEFAULT_GREY_PAD": "Ggreat guns!...3gasp/=... rain t feel fan! as superman, i should be invulnerable\u00ae 1 have unbreakabli skin! under my clark kent clothes, ia wearing an /noestryct/iele supe iperman uniform !", + "PADDED_4": "\u2018ggreat guns!...3gasp/5... pain i feel pain? as superman, i should be invilnerable | t ne unbreakasle gkin! under my clark kent clothes, tih wearing an indestructible superman uniform", + "PADDED_8": "Ggreat guns!...2gasp/z...pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ele $ superman uniform ! &", + "EXTRACTED_INIT_BOX": "Ggreat guns!...2g/ pain t feel pain? as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform *", + "PADDED_4_EXTRACTED": "Ggreat guns!...2gasp/:... pain t feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform !", + "PADDED_8_EXTRACTED": "Ggreat guns!...2gasp/:... pain t feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /noestruct/ble superman uniform !", + "PADDED_8_DILATION_1": "Fain! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !", + "PAD_8_FRACT_0_5": "Ggreat guns!...2gasp/:...pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an /nodestruct/ible superman uniform !", + "PAD_8_FRACT_0_2": "Ggreat guns!...2gasp/:... pain i feel fan! as superman, i should be invulnerable! 1 have unbreakabl skin! under my clark kent clothes, i'm wearing an indestructible superman uniform !" + }, + "13": { + "INITIAL_BOX": "Abruptly...", + "DEFAULT": "Abruptly...", + "DEFAULT_GREY_PAD": "Abruptly.", + "PADDED_4": "Abruptly...", + "PADDED_8": "", + "EXTRACTED_INIT_BOX": "Abruptly. ..", + "PADDED_4_EXTRACTED": "Abruptly...", + "PADDED_8_EXTRACTED": "Abruptly. ..", + "PADDED_8_DILATION_1": "", + "PAD_8_FRACT_0_5": "Abruptly. ..", + "PAD_8_FRACT_0_2": "Abruptly...." + }, + "14": { + "INITIAL_BOX": "Great caesar's ghost. he's spinning a web of g/ant, sr strands --", + "DEFAULT": "Great caesars \u00b0 ghost! he's spinning & web of g/a, silk strands", + "DEFAULT_GREY_PAD": "Great caesar's | ghost! he's spinning k web of giant, silk strands -- as tough as steel! ne]", + "PADDED_4": "Great caesars ghost. he's spinning a web of g/ant, | silk strands as tough as steel!", + "PADDED_8": "Great caesai ghost. he's spinning a web of g/ant, silk strands -- as tough as steel!", + "EXTRACTED_INIT_BOX": "Great caesar's ghost. he's spinning a web of g/ant, silk strands --", + "PADDED_4_EXTRACTED": "Great caesar's ghost! he's spinning a web of g/ant, \"silk strands -- as tough as steel!", + "PADDED_8_EXTRACTED": "Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!", + "PADDED_8_DILATION_1": "Great caesars ghost! he's spinning _ a web of g/ant,", + "PAD_8_FRACT_0_5": "Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!", + "PAD_8_FRACT_0_2": "Great caesar's ghost! he's spinning _ a web of g/ant, \"silk strands -- as tough as steel!" + }, + "15": { + "INITIAL_BOX": "I-i feel the heat of the sun...the pain of the bee-sting ... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers! i've become an ordinary mortal in this world. /", + "DEFAULT": "I-i feel the heat of the sum...the pain of the bee-sting... the heavy weight of my { pack! every human discomfort... good grief! i've lost all my super -powersx ve become an ordinary mortal in this world! j", + "DEFAULT_GREY_PAD": "\\i-t feel the heat of the sun...the pain of the bee-sting... the heavy weight of my & pack! every human discomfort... good grief! i've lost all my super-powers i've become an ordinary mortal in this world. /.", + "PADDED_4": "I-i feel the heat of the sun...the pain of the bee-sting... the heavy weight of my { pack! every human discomfort... good grief! i've lost all \\my super-powersx ve become an ordinary mortal in this world.", + "PADDED_8": "I-i feel the heat of the sun...the pain of the bee-sting... the heavy weight of my ! every human discomfort... good grief! i've lost all \\my super-powers! \u2018ve become an ordinary mortal in this world. x", + "EXTRACTED_INIT_BOX": "I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal im this world.", + "PADDED_4_EXTRACTED": "I-i feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.", + "PADDED_8_EXTRACTED": "I-t feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! t've lost all my super-powers. ve become an ordinary mortal in this world.", + "PADDED_8_DILATION_1": "I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve ie an ordinary mortal in this world.", + "PAD_8_FRACT_0_5": "I-i feel the heat of the sun. the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve become an ordinary mortal in this world.", + "PAD_8_FRACT_0_2": "I-i feel the heat of the sun., the pain of the bee-sting... the heavy weight of my pack! every human discomfort... good grief! i've lost all my super-powers. ve become an ordinary mortal in this world." + }, + "16": { + "INITIAL_BOX": "Enormous spider- like creature is going berserk, as if the sight of us excited him (into mad spinning get back! that 2", + "DEFAULT": "Like creature is going berserk, as if the sight of us excited him into mad spinning, get back! that \\ enormous spider-", + "DEFAULT_GREY_PAD": "(get back! that enormous spider- like creature 1 \u00a7 going berserk, as if the sight of us excited him into mad spinning]", + "PADDED_4": "(get back! that enormous spider- as if the sight of us excited him [into mad spinning", + "PADDED_8": "As if the sight of us excited him {into mad spinning", + "EXTRACTED_INIT_BOX": "Get back. that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning", + "PADDED_4_EXTRACTED": "Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning", + "PADDED_8_EXTRACTED": "Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning g [", + "PADDED_8_DILATION_1": "Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning\u2019 g [", + "PAD_8_FRACT_0_5": "Get back! that enormous spider- like creature is going berserk, 25 if the sight of us excited him into mad spinning g [", + "PAD_8_FRACT_0_2": "Get back! that enormous spider- like creature is going berserk, as if the sight of us excited him into mad spinning gs [" + } + }, + "Adolf_Cap_01_008.jpg": { + "0": { + "INITIAL_BOX": "Ya le dije\u201d a mi padre que lo en- un aguse- ro en un \u00e1rbol.", + "DEFAULT": "Ya le dije\" a mi padre - que lo en- en un aguje-* \u00e1rbol. ro en un", + "DEFAULT_GREY_PAD": "Ya le dije: a mi padre: \u00a1que lo en- un aguje- ro en un", + "PADDED_4": "Ya le dije\u201d a mi padre. que lo en- un aguje-* ro en un \u00e1rbol.", + "PADDED_8": "Ya le dije = a mi padre - que lo en- * en un aguie- * ro en un \u00e1rbol. |", + "EXTRACTED_INIT_BOX": "Ya le dije a mi padre que lo en- un aguse= ro en un \u00e1rbol.", + "PADDED_4_EXTRACTED": "Ya le dije a mi padre que lo en- un aguse= ro en un \u00e1rbol.", + "PADDED_8_EXTRACTED": "Ya le dije am padre que lo en- en un aguse= ro en un arbol.", + "PADDED_8_DILATION_1": "Ya le dije ami padre que lo en- en un aguse- ro en un arbol.", + "PAD_8_FRACT_0_5": "Ya le dije am padre que lo en- en un aguse- ro en un \u00e1rbol.", + "PAD_8_FRACT_0_2": "Ya le dije am padre que lo en- en un aguse- ro en un arbol." + }, + "1": { + "INITIAL_BOX": "Iriera de agu\u00ed\u00ed", + "DEFAULT": "Iriera de aqu\u00ed/", + "DEFAULT_GREY_PAD": "Iriera e aqu\u00ed", + "PADDED_4": "Piera de aqu\u00ed", + "PADDED_8": "Iriera > de au", + "EXTRACTED_INIT_BOX": "Ipiera de aqu\u00ed", + "PADDED_4_EXTRACTED": "Ipiera de aqu\u00ed", + "PADDED_8_EXTRACTED": "Ipiera de aqui", + "PADDED_8_DILATION_1": "Iriera de aqu\u00ed/", + "PAD_8_FRACT_0_5": "Iriera de aqu\u00ed", + "PAD_8_FRACT_0_2": "Ipiera de aqu\u00ed" + }, + "2": { + "INITIAL_BOX": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "DEFAULT": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "DEFAULT_GREY_PAD": "Te he pre- guntado| \u00a1que qui\u00e1 escribi\u00f3 sto.", + "PADDED_4": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "PADDED_8": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "EXTRACTED_INIT_BOX": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "PADDED_4_EXTRACTED": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "PADDED_8_EXTRACTED": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "PADDED_8_DILATION_1": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "PAD_8_FRACT_0_5": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto.", + "PAD_8_FRACT_0_2": "Te he pre- guntado que qui\u00e9n escribi\u00f3 esto." + }, + "3": { + "INITIAL_BOX": "1no lo sl no quiero hablar m\u00e1s", + "DEFAULT": "1no lo s\u00e9l no quiero hablar m\u00e1s ello.", + "DEFAULT_GREY_PAD": "\u00a1no lo sel| \u00a1no quiero! hablar ello.", + "PADDED_4": "1no lo s\u00e9l no quiero hablar m\u00e1s ello.", + "PADDED_8": "1no lo s\u00e9l no quiero hablar m\u00e1s ello.", + "EXTRACTED_INIT_BOX": "Ino lo s\u00e1 \u00f1o quiero hablar m\u00e1s", + "PADDED_4_EXTRACTED": "No lo s\u00e9l no quiero hablar m\u00e1s ello.", + "PADDED_8_EXTRACTED": "No lo s\u00e9l no quiero hablar m\u00e1s ello.", + "PADDED_8_DILATION_1": "No lo s\u00e9l no quiero hablar m\u00e1s ello.", + "PAD_8_FRACT_0_5": "\u00a1no lo s\u00e9l no quiero hablar m\u00e1s ello.", + "PAD_8_FRACT_0_2": "\u00a1no lo s\u00e9l no quiero hablar m\u00e1s ello." + }, + "4": { + "INITIAL_BOX": "Adolf, lo que dice ese papel.", + "DEFAULT": "Adolf, lo que dice ese papel", + "DEFAULT_GREY_PAD": "'adolf, lo que es completa mente falso.", + "PADDED_4": "Adolf, lo que dice ese papel es completa chad e falso. demagogia peligrosa. f", + "PADDED_8": "Adolf, lo que dice ese papel es completa mente falso.", + "EXTRACTED_INIT_BOX": "Adolf, lo que dice ese papel.", + "PADDED_4_EXTRACTED": "Adolf, lo que dice ese papel es completa mente falso. es demagogia peligrosa.", + "PADDED_8_EXTRACTED": "Adolf, lo que dice ese papel es completa mente falso. es demagogia peligrosa.", + "PADDED_8_DILATION_1": "Adolf, lo que dice ese papel es completa mente falso, peligrosa.", + "PAD_8_FRACT_0_5": "Adolf, lo que dice ese papel es completa mente falso. es demagogia peligrosa.", + "PAD_8_FRACT_0_2": "Adolf, lo que dice ese papel es completa- mente falso. es demagogia peligrosa." + }, + "5": { + "INITIAL_BOX": "Entonces, \u00bfpor qu\u00e9 le preo- cupa", + "DEFAULT": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "DEFAULT_GREY_PAD": "\u00bfpor qu\u00e9 ccuppa", + "PADDED_4": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "PADDED_8": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "EXTRACTED_INIT_BOX": "Entonces, \u00bfpor qu\u00e9 le preo- cupa", + "PADDED_4_EXTRACTED": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "PADDED_8_EXTRACTED": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "PADDED_8_DILATION_1": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "PAD_8_FRACT_0_5": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?", + "PAD_8_FRACT_0_2": "Entonces, \u00bfpor qu\u00e9 le preo- cupa tanto?" + }, + "6": { + "INITIAL_BOX": "El consulado no permite la circulaci\u00f3n de ese tpo de", + "DEFAULT": "El consulado no permite la circulaci\u00f3n de ese tpo de", + "DEFAULT_GREY_PAD": "El consulado no permite la \u00a1circulaci\u00f3n del ese tipo de", + "PADDED_4": "El consulado no permite la circulaci\u00f3n de ese tpo de", + "PADDED_8": "El consulado no permite la circulaci\u00f3n de ese tpo de", + "EXTRACTED_INIT_BOX": "El consulado no permite la circulaci\u00f3n de ese tipo de", + "PADDED_4_EXTRACTED": "El consulado no permite la circulaci\u00f3n de ese tipo de", + "PADDED_8_EXTRACTED": "El consulado no permite la circulaci\u00f3n de ese tipo de", + "PADDED_8_DILATION_1": "El consulado no permite la circulaci\u00f3n de ese tipo de", + "PAD_8_FRACT_0_5": "El consulado no permite la circulaci\u00f3n de ese tpo de", + "PAD_8_FRACT_0_2": "El consulado no permite la circulaci\u00f3n de ese tpo de" + }, + "7": { + "INITIAL_BOX": "Wo s\u00e9 nada!", + "DEFAULT": "Wo s\u00e9 nada!", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Wo s\u00e9", + "PADDED_8": "Es ae", + "EXTRACTED_INIT_BOX": "Lo s\u00e9 wo s\u00e9,", + "PADDED_4_EXTRACTED": "Wo s\u00e9", + "PADDED_8_EXTRACTED": "Lo s\u00e9 ne", + "PADDED_8_DILATION_1": "Es ne", + "PAD_8_FRACT_0_5": "Lo s\u00e9 ne", + "PAD_8_FRACT_0_2": "Lo s\u00e9 ne" + }, + "8": { + "INITIAL_BOX": "Tengo - que slen- \u00bfentien- des?", + "DEFAULT": "Tengo 2 que slen- ciarlos, \u00bfentien-", + "DEFAULT_GREY_PAD": "Tengo \u00a1que salen: ciarlos, \u00bfentien-", + "PADDED_4": "Tengo 2 que silen- ciarlos, \u00bfentien- |", + "PADDED_8": "Tengo que salen: ciarlos, \u00bfentien- y des7", + "EXTRACTED_INIT_BOX": "Tengo que slen- \u00bfentien- des?", + "PADDED_4_EXTRACTED": "Tengo que slen- \u00bfentien- des?", + "PADDED_8_EXTRACTED": "Tengo que slen- \u00bfentien- des7", + "PADDED_8_DILATION_1": "Tengo que slen- \u00bfentien- dest", + "PAD_8_FRACT_0_5": "Tengo que slen- \u00bfentien- des7", + "PAD_8_FRACT_0_2": "Tengo que slen- \u00bfentien- des7" + }, + "9": { + "INITIAL_BOX": "Ahora,", + "DEFAULT": "Ahora,", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Ahora, habla/", + "PADDED_8": "Pe", + "EXTRACTED_INIT_BOX": "Ahora,", + "PADDED_4_EXTRACTED": "Ahora,", + "PADDED_8_EXTRACTED": "Ahora, wabla/", + "PADDED_8_DILATION_1": "Ahora, wabla/", + "PAD_8_FRACT_0_5": "Ahora, wabla/", + "PAD_8_FRACT_0_2": "Ahora, wabla/" + }, + "10": { + "INITIAL_BOX": "\u00bfest\u00e1s encu- briendo a al- guien?", + "DEFAULT": "\u00bfestas encu- briendo a al- guient", + "DEFAULT_GREY_PAD": "\u00bfestas encu- briendo; a al- guient.", + "PADDED_4": "\u00bfestas encu- briendo a al- guient", + "PADDED_8": "\u00bfestas encu- briendo a al- guient", + "EXTRACTED_INIT_BOX": "\u00bfestas briendo encu- a al- guien?", + "PADDED_4_EXTRACTED": "\u00bfestas encu- briendo a al- guient", + "PADDED_8_EXTRACTED": "\u00bfestas encu- briendo a al- guient", + "PADDED_8_DILATION_1": "\u00bfestas encuu- briendo a al- guient", + "PAD_8_FRACT_0_5": "\u00bfestas encu- briendo a al- guient", + "PAD_8_FRACT_0_2": "\u00bfestas encu- briendo a al- guient" + } + }, + "Barnaby_v1-028.png": { + "0": { + "INITIAL_BOX": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "DEFAULT": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "DEFAULT_GREY_PAD": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PADDED_4": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PADDED_8": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "EXTRACTED_INIT_BOX": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PADDED_4_EXTRACTED": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PADDED_8_EXTRACTED": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PADDED_8_DILATION_1": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PAD_8_FRACT_0_5": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . .", + "PAD_8_FRACT_0_2": "Barnaby, | should like to bestow a boon upon this household in return for its hospitality. . . . shall i rid the locality of werewolves? charm the cattle and the crops against the ravages of evil spirits? . . ." + }, + "1": { + "INITIAL_BOX": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "DEFAULT": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "DEFAULT_GREY_PAD": "We haven't any cattle and pop's victory garden is growing fine. and i don\u2019t think we\u2019ve had much trouble with werewolves at all.", + "PADDED_4": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PADDED_8": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "EXTRACTED_INIT_BOX": "We haven\u2019t any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PADDED_4_EXTRACTED": "We haven\u2019t any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PADDED_8_EXTRACTED": "We haven\u2019t any cattle and pop's victory garden is growing fine. and | don't think we've had much trouble with werewolves at all.", + "PADDED_8_DILATION_1": "We haven't any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all.", + "PAD_8_FRACT_0_5": "We haven\u2019t any cattle and pop's victory garden is growing fine. and i don\u2019t think we\u2019ve had much trouble with werewolves at all.", + "PAD_8_FRACT_0_2": "We haven\u2019t any cattle and pop's victory garden is growing fine. and i don't think we've had much trouble with werewolves at all." + }, + "2": { + "INITIAL_BOX": "Then perhaps i can drive out snakes? or witch a well? . .", + "DEFAULT": "Then perhaps i can drive out snakes? or witch a well? .", + "DEFAULT_GREY_PAD": "Then perhaps i can drive out snakes? or witch a well? , .", + "PADDED_4": "Then perhaps i can drive out snakes? or witch a well? .", + "PADDED_8": "Then perhaps i can drive out snakes? or witch a well? .", + "EXTRACTED_INIT_BOX": "Then perhaps | can drive out snakes? or witch a well? . .", + "PADDED_4_EXTRACTED": "Then perhaps | can drive out snakes? or witch a well? .", + "PADDED_8_EXTRACTED": "Then perhaps | can drive out snakes? or witch a well? . .", + "PADDED_8_DILATION_1": "Then perhaps i can drive out snakes? or witch a well? . .", + "PAD_8_FRACT_0_5": "Then perhaps | can drive out snakes? or witch a well? , .", + "PAD_8_FRACT_0_2": "Then perhaps | can drive out snakes? or witch a well? , ." + }, + "3": { + "INITIAL_BOX": "Where the end of this divining rod turns to the earth we'll find water.", + "DEFAULT": "Where the end of this divining rod turns to the earth we'll find water.", + "DEFAULT_GREY_PAD": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_4": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_8": "Where the end of this divining rod turns to the earth we'll find water.", + "EXTRACTED_INIT_BOX": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_4_EXTRACTED": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_8_EXTRACTED": "Where the end of this divining rod turns to the earth we'll find water.", + "PADDED_8_DILATION_1": "Where the end of this divining rod turns to the earth we'll find water.", + "PAD_8_FRACT_0_5": "Where the end of this divining rod turns to the earth we'll find water.", + "PAD_8_FRACT_0_2": "Where the end of this divining rod turns to the earth we'll find water." + }, + "4": { + "INITIAL_BOX": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "DEFAULT": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "DEFAULT_GREY_PAD": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PADDED_4": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PADDED_8": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "EXTRACTED_INIT_BOX": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PADDED_4_EXTRACTED": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PADDED_8_EXTRACTED": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PADDED_8_DILATION_1": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PAD_8_FRACT_0_5": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley.", + "PAD_8_FRACT_0_2": "Gosh. don\u2019t go to a lot of trouble, mr. o'malley." + }, + "5": { + "INITIAL_BOX": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "DEFAULT": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "DEFAULT_GREY_PAD": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PADDED_4": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PADDED_8": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "EXTRACTED_INIT_BOX": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PADDED_4_EXTRACTED": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PADDED_8_EXTRACTED": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PADDED_8_DILATION_1": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PAD_8_FRACT_0_5": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad.", + "PAD_8_FRACT_0_2": "When you grow to manhood and inherit this estate you'll thank your fairy godfather for this well right in your cellar, m\u2019lad." + }, + "6": { + "INITIAL_BOX": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "DEFAULT": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "DEFAULT_GREY_PAD": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PADDED_4": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PADDED_8": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "EXTRACTED_INIT_BOX": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PADDED_4_EXTRACTED": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PADDED_8_EXTRACTED": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PADDED_8_DILATION_1": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PAD_8_FRACT_0_5": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy.", + "PAD_8_FRACT_0_2": "Ah, that's it! | shall bestow the blessing of a never-failing water supply upon this plot of land... fetch me a forked stick, m\u2019boy." + }, + "7": { + "INITIAL_BOX": "But we've got city water, mr. o'malley.", + "DEFAULT": "But we've got city water, mr. o'malley.", + "DEFAULT_GREY_PAD": "But we've got city water, mr. o'malley.", + "PADDED_4": "But we've got city water, mr. o'malley.", + "PADDED_8": "But we've got city water, mr. o'malley.", + "EXTRACTED_INIT_BOX": "But we've got city water, mr. o'malley.", + "PADDED_4_EXTRACTED": "But we've got city water, mr. o'malley.", + "PADDED_8_EXTRACTED": "But we've got city water, mr. o'malley.", + "PADDED_8_DILATION_1": "But we've got city water, mr. o'malley.", + "PAD_8_FRACT_0_5": "But we've got city water, mr. o'malley.", + "PAD_8_FRACT_0_2": "But we've got city water, mr. o'malley." + }, + "8": { + "INITIAL_BOX": "Er... is there any beer on ice, m\u2019boy?", + "DEFAULT": "Er... is there any beer on ice, m\u2019boy?", + "DEFAULT_GREY_PAD": "Er... is there any beer on ice, m\u2019bo", + "PADDED_4": "Er... is there any beer on ice, m\u2019boy?", + "PADDED_8": "Er... is there any beer on ice, m'boy?", + "EXTRACTED_INIT_BOX": "Er... is there any beer on ice, m'boy?", + "PADDED_4_EXTRACTED": "Er... is there any beer on ice, m\u2019boy?", + "PADDED_8_EXTRACTED": "Er... is there any beer on ice, m\u2019boy?", + "PADDED_8_DILATION_1": "Er... is there any beer on ice, m'boy?", + "PAD_8_FRACT_0_5": "Er... is there any beer on ice, m\u2019boy?", + "PAD_8_FRACT_0_2": "Er... is there any beer on ice, m'boy?" + }, + "9": { + "INITIAL_BOX": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, . . makes me thirsty merely to describe it . . .", + "DEFAULT": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, . . makes me thirsty merely to describe it . . .", + "DEFAULT_GREY_PAD": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth. ., makes me thirsty merely to describe it . . .", + "PADDED_4": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, . . makes me thirsty merely to describe it . . .", + "PADDED_8": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, . . makes me thirsty merely to describe it . . .", + "EXTRACTED_INIT_BOX": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, .. makes me thirsty merely to describe it . . .", + "PADDED_4_EXTRACTED": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, .. makes me thirsty merely to describe it . . .", + "PADDED_8_EXTRACTED": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, .. makes me thirsty merely to describe it . . .", + "PADDED_8_DILATION_1": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, . . makes me thirsty merely to describe it . . .", + "PAD_8_FRACT_0_5": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, . . makes me thirsty merely to describe it . . .", + "PAD_8_FRACT_0_2": "Not to be compared in taste with the clear, cool, beautiful nectar of the natural earth, , . makes me thirsty merely to describe it . . ." + }, + "10": { + "INITIAL_BOX": "- rocket {oh (4 con", + "DEFAULT": "C rockety jor con", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Crockety jonson", + "PADDED_8": "Ec rocket] jor nson", + "EXTRACTED_INIT_BOX": ". rocket {oh (4 con", + "PADDED_4_EXTRACTED": "C rocket jonson", + "PADDED_8_EXTRACTED": "Crocket jornmson", + "PADDED_8_DILATION_1": "Crocket jor nso", + "PAD_8_FRACT_0_5": "Crocket jonmgon", + "PAD_8_FRACT_0_2": "Crocket jornmson" + }, + "11": { + "INITIAL_BOX": "We can really get along all right with the city water supply . . .", + "DEFAULT": "We can really get along all right with the city water supply . . .", + "DEFAULT_GREY_PAD": "We can really get along all right with the city water supply . . .", + "PADDED_4": "We can really get along all right with the city water supply . . .", + "PADDED_8": "We can really get along all right with the city water supply . . .", + "EXTRACTED_INIT_BOX": "We can really get along all right with the city water supply . . .", + "PADDED_4_EXTRACTED": "We can really get along all right with the city water supply . . .", + "PADDED_8_EXTRACTED": "We can really get along all right with the city water supply . . .", + "PADDED_8_DILATION_1": "We can really get along all right with the city water supply . . .", + "PAD_8_FRACT_0_5": "We can really get along all right with the city water supply . . .", + "PAD_8_FRACT_0_2": "We can really get along all right with the city water supply . . ." + }, + "12": { + "INITIAL_BOX": "Hush, m\u2019boy. this is a time of conservation.", + "DEFAULT": "Hush, m\u2019boy. this is a time of conservation.", + "DEFAULT_GREY_PAD": "Hush, m\u2019boy. this is a time of conservation.", + "PADDED_4": "Hush, m\u2019boy. this is a time of conservation.", + "PADDED_8": "Hush, m\u2019boy. this is a time of conservation.", + "EXTRACTED_INIT_BOX": "Hush, m\u2019boy. this is a time of conservation.", + "PADDED_4_EXTRACTED": "Hush, m\u2019boy. this is a time of conservation.", + "PADDED_8_EXTRACTED": "Hush, m\u2019boy. this is a time of conservation.", + "PADDED_8_DILATION_1": "Hush, m\u2019boy. this is a time of conservation.", + "PAD_8_FRACT_0_5": "Hush, m\u2019boy. this is a time of conservation.", + "PAD_8_FRACT_0_2": "Hush, m\u2019boy. this is a time of conservation." + }, + "13": { + "INITIAL_BOX": "We're being patriotic\u2014", + "DEFAULT": "We're being patriotic\u2014", + "DEFAULT_GREY_PAD": "We're being patriotic\u2014", + "PADDED_4": "We're being patriotic\u2014", + "PADDED_8": "We're being patriotic\u2014", + "EXTRACTED_INIT_BOX": "We're being patriotic\u2014", + "PADDED_4_EXTRACTED": "We're being patriotic\u2014", + "PADDED_8_EXTRACTED": "We're being patriotic\u2014", + "PADDED_8_DILATION_1": "We're being patriotic\u2014", + "PAD_8_FRACT_0_5": "We're being patriotic\u2014", + "PAD_8_FRACT_0_2": "We're being patriotic\u2014" + }, + "14": { + "INITIAL_BOX": "Ah! here's the place! bring the pickaxe.", + "DEFAULT": "Ah! here's the place! bring the pickaxe.", + "DEFAULT_GREY_PAD": "Ah! here's the place! bring the pickaxe.", + "PADDED_4": "Ah! here's the place! bring the pickaxe.", + "PADDED_8": "Ah! here's the place! bring the pickaxe.", + "EXTRACTED_INIT_BOX": "Ah! here's the place! bring the pickaxe.", + "PADDED_4_EXTRACTED": "Ah! here's the place! bring the pickaxe.", + "PADDED_8_EXTRACTED": "Ah! here's the place! bring the pickaxe.", + "PADDED_8_DILATION_1": "Ah! here's the place! bring the pickaxe.", + "PAD_8_FRACT_0_5": "Ah! here's the place! bring the pickaxe.", + "PAD_8_FRACT_0_2": "Ah! here's the place! bring the pickaxe." + }, + "15": { + "INITIAL_BOX": "Crockett jorunnsonn", + "DEFAULT": "Crockett jourunnsonn", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Crockett jonson", + "PADDED_8": "A crockett jonunnson", + "EXTRACTED_INIT_BOX": "Crockett jorunsonn", + "PADDED_4_EXTRACTED": "Crockett jonson", + "PADDED_8_EXTRACTED": "Crockett jonunnsonn", + "PADDED_8_DILATION_1": "Crockett jonunnsonn", + "PAD_8_FRACT_0_5": "Crockett jonson", + "PAD_8_FRACT_0_2": "Crockett jonunson" + } + }, + "Cannon-292.jpg": { + "0": { + "INITIAL_BOX": "This is it. the v-2.7 it will", + "DEFAULT": "This is it. the v-2.7 , it will \u201d win the world for us.\u201d", + "DEFAULT_GREY_PAD": "This is it.", + "PADDED_4": "Pos bo eae this is it. the v-2.7 , it will win the world for us.\u201d", + "PADDED_8": "Re this is it. the v-2.7 it will win the world for us.", + "EXTRACTED_INIT_BOX": "This is it. the v-4.7 it will win the world | for us.", + "PADDED_4_EXTRACTED": "This is it. the v-2.7 it will win the world for us.\u201d", + "PADDED_8_EXTRACTED": "This is it. the v-2.7 it will win the world for us.", + "PADDED_8_DILATION_1": "This is it. the v-2.7 it will win the world for us.", + "PAD_8_FRACT_0_5": "This is it. the v-2.7 it will win the world for us.", + "PAD_8_FRACT_0_2": "This is it. the v-2.7 it will win the world for us." + }, + "1": { + "DEFAULT": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an allout attack on the (nited aa states.\u201d", + "INITIAL_BOX": "When that nuclear war: head explodes over moscow, they will not stop to ask who fired it / they will launch an all-out attack on the ynv/7tec a states 7", + "DEFAULT_GREY_PAD": "When that nuclear war: head explodes over moscow, they will not stop to ask who fired it / they will launch an all-out attack on the united states.\u201d", + "PADDED_4": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an all-out attack on the wiree ds states.\u201d", + "PADDED_8": "When that nuclear war] head explodes over moscow, they will not stop to ask who fired it / they will launch an all-out attack on the ynv/7tec states.\u201d", + "EXTRACTED_INIT_BOX": "When that nuclear war: head explodes over moscow, they will not stop to ask who fired it / they will launch an allout attack on the lite states 7", + "PADDED_4_EXTRACTED": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an allout attack on the lite states.\u201d", + "PADDED_8_EXTRACTED": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an allout attack on the lite states.\u201d", + "PADDED_8_DILATION_1": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an allout attack on the lw/ted states.\u201d", + "PAD_8_FRACT_0_5": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an all-out attack on the wv/7ec states.\u201d", + "PAD_8_FRACT_0_2": "When that nuclear war- head explodes over moscow, they will not stop to ask who fired it / they will launch an allout attack on the lw/ted states.\u201d" + }, + "2": { + "INITIAL_BOX": "'\u2014. and bo74 major powers will w/pe each other out.\u201d then we will move in and the world will be at our feet 4 oer tag.\u201d der tag., morgen o\u00a3 ganze | welt 7 has i 4 has", + "DEFAULT": "\u2019. and bo74 major ~n powers will w/pe each other out.\u201d then we will move in and the world will be at our feet 4 oer tag, der morgen tag.\u201d die ganze | welt 7 has a has", + "DEFAULT_GREY_PAD": "~. and o74 major powers will w/pe ach other out.\u201d the we will move in and the world will be at our feet oer tag. \u2019 der tag. morgen die ganze zene; 7 has", + "PADDED_4": "\u2014. and zo74 major powers will w/pe each other out./ then we will move in and the world will be at our peel derhis, \u2019 der morgen die 25, welt; id aa haz a. 1", + "PADDED_8": "\u2014 and o74 major powers will w725 each other out.\u201d then we will move in and the world will be at our feel oer tac der / morge/ ga n die wze welt? ha.\u201d >, aahal", + "EXTRACTED_INIT_BOX": "And zo74 major powers will w/pe each other out.\u201d then we will move in and the world will be at our feelers \u2019 der morgen die ganze fay shas", + "PADDED_4_EXTRACTED": "And zo74 major powers will w/pe each other out.\u201d then we will move in and the world will be at our feel oer tag.\u201d der morgen die ganze zener shas", + "PADDED_8_EXTRACTED": "And zo74 major powers will w/pe each other out.\u201d then we will move in and the world will be at our feel oer tag.\u201d der morgen die ganze zener shas", + "PADDED_8_DILATION_1": "And zo74 major powers will w/pe each other out.\u201d then we will move in and the world will be at our a tac der morgen die ganze zene 7 has", + "PAD_8_FRACT_0_5": "\u00ab. and o74 major powers will w/pe each other out,\u201d then we will move in and the world will be at our feet der tag, \u2019 der morgen tag.\u201d die ganze zenelt; 7 has", + "PAD_8_FRACT_0_2": "\u00ab= and zo74 major powers will w/pe each other out.\u201d then we will move in and the world will be at our gp \u2019 der morgen die ganze ay 7 has" + }, + "3": { + "INITIAL_BOX": "He's crazy. butit just cove j", + "DEFAULT": "He's crazy. but it just couto ji happen... }j", + "DEFAULT_GREY_PAD": "He's craz.", + "PADDED_4": "He's crazy... but it just coo j 5 happen...", + "PADDED_8": "He's crazy... butit just coup j happen", + "EXTRACTED_INIT_BOX": "He's crazy... butit just couo happen...", + "PADDED_4_EXTRACTED": "He's crazy... butit just couo happen...", + "PADDED_8_EXTRACTED": "He's crazy... butit just couo happen...", + "PADDED_8_DILATION_1": "He's crazy... butit just couo happen...", + "PAD_8_FRACT_0_5": "He's crazy... but it just cowo happen...", + "PAD_8_FRACT_0_2": "He's crazy... butit just couo happen..." + }, + "4": { + "INITIAL_BOX": "Me , cannon... , it will \u00ab begin to show you your duties", + "DEFAULT": ") some, cannon... > i will begin to show you your duties!", + "DEFAULT_GREY_PAD": "Come , cannon. i will begin to show you our duties!", + "PADDED_4": ") come, cannon... i will 4 begin to show you your duties!", + "PADDED_8": "Come, cannon... > i will begin to show you your duties!", + "EXTRACTED_INIT_BOX": "Show you your duties", + "PADDED_4_EXTRACTED": "Come , cannon... i will begin to show you your duties", + "PADDED_8_EXTRACTED": "Come , cannon... i will begin to show you your duties", + "PADDED_8_DILATION_1": "Come , cannon... i will begin to show you your duties", + "PAD_8_FRACT_0_5": "Come , cannon... i will begin to show you your duties", + "PAD_8_FRACT_0_2": "Come , cannon... i will begin to show you your duties" + }, + "5": { + "INITIAL_BOX": "How soon 2", + "DEFAULT": "How soon fi", + "DEFAULT_GREY_PAD": "How soon 2", + "PADDED_4": "How soon 2", + "PADDED_8": "How soon", + "EXTRACTED_INIT_BOX": "How soon 4", + "PADDED_4_EXTRACTED": "How soon 2", + "PADDED_8_EXTRACTED": "How soon zz", + "PADDED_8_DILATION_1": "How soon 2", + "PAD_8_FRACT_0_5": "How soon 2", + "PAD_8_FRACT_0_2": "How soon zz" + }, + "6": { + "INITIAL_BOX": "For days, cannon waits and watches, learning the layout of the castle.", + "DEFAULT": "For days, cannon waits and watches, learning the layout of the castle.", + "DEFAULT_GREY_PAD": "For days, cannon waits and watches, learning the layout of the castle.", + "PADDED_4": "For days, cannon waits and watches, learning the layout of the castle.", + "PADDED_8": "For days, cannon waits and watches, learning the layout of the castle...", + "EXTRACTED_INIT_BOX": "For days, cannon waits and watches, learning the layout of the castle.", + "PADDED_4_EXTRACTED": "For days, cannon waits and watches, learning the layout of the castle.", + "PADDED_8_EXTRACTED": "For days, cannon waits and watches, learning the layout of the castle...", + "PADDED_8_DILATION_1": "For days, cannon waits and watches, learning the layout of the castle...", + "PAD_8_FRACT_0_5": "For days, cannon waits and watches, learning the layout of the castle...", + "PAD_8_FRACT_0_2": "For days, cannon waits and watches, learning the layout of the castle..." + }, + "7": { + "INITIAL_BOX": "Final count- down ina days now! i", + "DEFAULT": "Final count- down ina days now? /", + "DEFAULT_GREY_PAD": "Final count-| down ina few days now.", + "PADDED_4": "\u201cfinal ina", + "PADDED_8": "Final sn , ina few days now! /", + "EXTRACTED_INIT_BOX": "Final count- down ina days now/", + "PADDED_4_EXTRACTED": "Final count- down ina days now!", + "PADDED_8_EXTRACTED": "Final count- down ina days now!", + "PADDED_8_DILATION_1": "Final count- down ina days now!", + "PAD_8_FRACT_0_5": "Final count- down ina few days now!", + "PAD_8_FRACT_0_2": "Final count- down ina days now!" + }, + "8": { + "INITIAL_BOX": "One rocket z how can that be z", + "DEFAULT": "28 one rocket z how can that be z", + "DEFAULT_GREY_PAD": "Rocket z how can that be z|", + "PADDED_4": "Yo one rocket z how can that be z", + "PADDED_8": "Rocket z how can that be 7", + "EXTRACTED_INIT_BOX": "One rocket 7 how can that be z", + "PADDED_4_EXTRACTED": "One rocket 7 how can that be 7", + "PADDED_8_EXTRACTED": "One rocket 7 how can that be 7", + "PADDED_8_DILATION_1": "One rocket z how can that be 7", + "PAD_8_FRACT_0_5": "One rocket z how can that be 7", + "PAD_8_FRACT_0_2": "One rocket z how can that be 7" + }, + "9": { + "INITIAL_BOX": "My hands are | healing nicely. can handle a gun again ..", + "DEFAULT": "My hands are y healing nicely. can handle a gun again..", + "DEFAULT_GREY_PAD": "My hands are healing nicely. can handle a un again", + "PADDED_4": "My hands are healing nicely. can handle a gun again..", + "PADDED_8": "My hands are healing nicely. can handle a gun again... /", + "EXTRACTED_INIT_BOX": "My hands are healing nicely. can handle a gun again ..", + "PADDED_4_EXTRACTED": "My hands are healing nicely. can handle a gun again.. |", + "PADDED_8_EXTRACTED": "My hands are healing nicely. can handle a gun again.. /", + "PADDED_8_DILATION_1": "My hands are healing nicely. can handle a gun again.. /", + "PAD_8_FRACT_0_5": "My hands are healing nicely. can handle a gun again.. /", + "PAD_8_FRACT_0_2": "My hands are healing nicely. can handle a gun again... /" + }, + "10": { + "INITIAL_BOX": "= \u00a5 what's as 7s | fd y 4 foe pin. some one doesn't trust me / |", + "DEFAULT": "Ey what's as this 7 a _ no firing pin. some: one doesn't trust me.\u201d", + "DEFAULT_GREY_PAD": "What's this 7 firing pinal. some one doesn't trust me./", + "PADDED_4": "Trust me./", + "PADDED_8": "E what's y. this 7 no firing pinal. some one doesn't trust me./", + "EXTRACTED_INIT_BOX": "One doesn't trust me 7", + "PADDED_4_EXTRACTED": "E what's yo this zt a _ no firing pinal. some one doesn't trust me ./", + "PADDED_8_EXTRACTED": "E what's yo this? a hno firing pinal. some one doesn't trust me ./", + "PADDED_8_DILATION_1": "Ee what's yo this t a no firing pinal. some one doesn't trust me ./", + "PAD_8_FRACT_0_5": "E what's nw this 7 one doesn\" t trust me./", + "PAD_8_FRACT_0_2": "E what's yo this? a _ hno firing pin. some one doesn't trust me.\u201d" + }, + "11": { + "INITIAL_BOX": "And then one day, as the mad dictator reviews his army ...", + "DEFAULT": "And then one day, as the mad dictator reviews his army ...", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "And then one day, as the mad dictator reviews his army cc", + "PADDED_8": "And then one day, as the mad dictator reviews his army...", + "EXTRACTED_INIT_BOX": "And then one day, as the mad dictator reviews his army ...", + "PADDED_4_EXTRACTED": "And then one day, as the mad dictator reviews his army ..c", + "PADDED_8_EXTRACTED": "And then one day, as the mad dictator reviews his army...", + "PADDED_8_DILATION_1": "And then one day, as the mad dictator reviews his army...", + "PAD_8_FRACT_0_5": "And then one day, as the mad dictator reviews his army...", + "PAD_8_FRACT_0_2": "And then one day, as the mad dictator reviews his army..." + }, + "12": { + "INITIAL_BOX": "A stirring sight, isn't it, herr cannon z soon they , will be united with their aryan brothers allover \\ the world\u2019 /", + "DEFAULT": "A stirring sight, isn't it, herr cannon 7 soon they j will be united with their aryan brothers allover \u00abthe world\u2019 j", + "DEFAULT_GREY_PAD": "A stirring sight, isn't it, herr cannon z soon thi will be united with their aryan brothers allover the world", + "PADDED_4": "A stirring sight, isn't it, herr cannon z soon they will be united 7 with their aryan brothers allover \\ the world\u2019 /", + "PADDED_8": "A stirring sight, isn't it, herr cannon z soon they will be united with the ir aryan brothers all over the world, j", + "EXTRACTED_INIT_BOX": "A stirring sight, isn't it, herr cannon z soon they will be united with their aryan brothers allover the world\u201d", + "PADDED_4_EXTRACTED": "A stirring sight, isn't it, herr cannon 2 soon they will be united with their aryan brothers allover the world\u201d", + "PADDED_8_EXTRACTED": "A stirring sight, isn't it, herr cannon 2 soon they will be united with their aryan brothers allover the world.\u201d", + "PADDED_8_DILATION_1": "A stirring sight, isn't it, herr cannon 2 soon they will be united with their aryan brothers allover the world.\u201d", + "PAD_8_FRACT_0_5": "A stirring sight, isn't it, herr cannon z soon they will be united with their aryan brothers allover the world.\u201d", + "PAD_8_FRACT_0_2": "A stirring sight, isn't it, herr cannon 2 soon they will be united with their aryan brothers allover the world.\u201d" + }, + "13": { + "INITIAL_BOX": "Yes. 1 can hardly > wait, fuhrer/", + "DEFAULT": "Yes. 1 can hardly > wait, fuhrer", + "DEFAULT_GREY_PAD": "Yes, 1 can hardly ait, fuhrer /]", + "PADDED_4": "In \\y fuhrer uy", + "PADDED_8": "Yes. 1 can hardly wait, fuhrer", + "EXTRACTED_INIT_BOX": "Yes. 1 can hardly wait, funrer !", + "PADDED_4_EXTRACTED": "1 can hardly wait, my fuhrer /", + "PADDED_8_EXTRACTED": "1 can hardly wait, my fuhrer /", + "PADDED_8_DILATION_1": "1 can hardly wait, my fuhrer /", + "PAD_8_FRACT_0_5": "1 can hardly wait, my fuhrer/", + "PAD_8_FRACT_0_2": "1 can hardly wait, my fuhrer/" + }, + "14": { + "INITIAL_BOX": "I'd \u00b0 better move fast/", + "DEFAULT": "Io better move fasst/", + "DEFAULT_GREY_PAD": "Id fast/", + "PADDED_4": "\u201d1'd better move fast/", + "PADDED_8": "217) better move fast.", + "EXTRACTED_INIT_BOX": "Id better move fast\u2019", + "PADDED_4_EXTRACTED": "Id better move fast/", + "PADDED_8_EXTRACTED": "Id better move fast.", + "PADDED_8_DILATION_1": "Id better move fast.", + "PAD_8_FRACT_0_5": "I'd better move fast,", + "PAD_8_FRACT_0_2": "I'd better move fast." + }, + "15": { + "INITIAL_BOX": "Presently the leader's attention becomes entirely fixed on the display...", + "DEFAULT": "Presently the leader's attention becomes entirely fixed on the display...", + "DEFAULT_GREY_PAD": "Presently the leader\" attention becomes entirely fixed on the display...", + "PADDED_4": "Presently the leader's attention becomes entirely fixed on the display.", + "PADDED_8": "Presently the leader's attention becomes entirely fixed on the display.", + "EXTRACTED_INIT_BOX": "Presently the leader's attention becomes entirely fixed on the display...", + "PADDED_4_EXTRACTED": "Presently the leader's attention becomes entirely fixed on the display...", + "PADDED_8_EXTRACTED": "Presently the leader's attention becomes entirely fixed on the display...", + "PADDED_8_DILATION_1": "Presently the leader's attention becomes entirely fixed on the display...", + "PAD_8_FRACT_0_5": "Presently the leader's attention becomes entirely fixed on the display...", + "PAD_8_FRACT_0_2": "Presently the leader's attention becomes entirely fixed on the display..." + }, + "16": { + "INITIAL_BOX": "Now.\u201d", + "DEFAULT": "Now.\u201d", + "DEFAULT_GREY_PAD": "Now.", + "PADDED_4": "Now.\u201d", + "PADDED_8": "Now.\u201d j", + "EXTRACTED_INIT_BOX": "Now", + "PADDED_4_EXTRACTED": "Wow", + "PADDED_8_EXTRACTED": "Wow", + "PADDED_8_DILATION_1": "Wow", + "PAD_8_FRACT_0_5": "Wow", + "PAD_8_FRACT_0_2": "Wow" + }, + "17": { + "INITIAL_BOX": "Quickly and quietly, cannon makes his way to the radio room ...", + "DEFAULT": "Quickly and quietly, cannon makes his way to the radio room...", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Quickly and quietly, cannon makes his way to the radio room...", + "PADDED_8": "Quickly and quietly, cannon makes his way to the radio room", + "EXTRACTED_INIT_BOX": "Quickly and quietly, cannon makes his way to the radio room \u00ab..", + "PADDED_4_EXTRACTED": "Quickly and quietly, cannon makes his way to the radio room...", + "PADDED_8_EXTRACTED": "Quickly and quietly, cannon makes his way to the radio room.", + "PADDED_8_DILATION_1": "Quickly and quietly, cannon makes his way to the radio room.", + "PAD_8_FRACT_0_5": "Quickly and quietly, cannon makes his way to the radio room.", + "PAD_8_FRACT_0_2": "Quickly and quietly , cannon makes his way to the radio room ww." + }, + "18": { + "INITIAL_BOX": "Cannon at co-ordinates k-10! and jz 2z.", + "DEFAULT": "\" aannon at cannon at co-ordinates k-iol and jizz... 4", + "DEFAULT_GREY_PAD": "Cannon at o-ordinates k-101 and jz 2...", + "PADDED_4": "Cannon at [co-ordinates k-101 and jz 2... 4", + "PADDED_8": "Cannon at [co-ordinates k-101 and jz 2...", + "EXTRACTED_INIT_BOX": "Cannon at co-ordinates k-101 and jz 2...", + "PADDED_4_EXTRACTED": "Cannon at co-ordinates k-101 and jz 2...", + "PADDED_8_EXTRACTED": "Cannon at co-ordinates k-101 and jz2...", + "PADDED_8_DILATION_1": "Cannon at co-ordinates k-101 and jz2z...", + "PAD_8_FRACT_0_5": "Cannon at co-ordinates k-101 and jz2...", + "PAD_8_FRACT_0_2": "Cannon at co-ordinates k-101 and jz 2..." + }, + "19": { + "INITIAL_BOX": "Quickly he sets the trans mitter to his secret frequency --.", + "DEFAULT": "Quickly he sets the trans- mitter to his secret frequency ---", + "DEFAULT_GREY_PAD": "Quickly he sets the trans mitter to his secret frequency", + "PADDED_4": "Quickly he sets the trans- mitter to his secret frequency .--", + "PADDED_8": "Quickly he sets the trans- mitter to his secret frequency", + "EXTRACTED_INIT_BOX": "Quickly he sets the trans: mitter to his secret frequency -...", + "PADDED_4_EXTRACTED": "Quickly he sets the trans- mitter to his secret frequency --.", + "PADDED_8_EXTRACTED": "Quickly he sets the trans - mitter to his secret frequency --.", + "PADDED_8_DILATION_1": "Quickly he sets the trans- mitter to his secret frequency .-.", + "PAD_8_FRACT_0_5": "Quickly he sets the trans - mitter to his secret frequency -..", + "PAD_8_FRACT_0_2": "Quickly he sets the trans- mitter to his secret frequency .-." + }, + "20": { + "INITIAL_BOX": "Situation desperate - it's reo plan one.\u201d t repeat...", + "DEFAULT": "Situation desperate ~ t's reo plan one.\u201d i repeat...", + "DEFAULT_GREY_PAD": "Situation", + "PADDED_4": "\"situation | situation desperate ~ it's red plan one.\u201d \u2018i repeat..", + "PADDED_8": "Situation \u00a5 desperate ~ t's reo plan one.\u201d i repeat.", + "EXTRACTED_INIT_BOX": "Situation desperate - it's reo plan one.\u201d t repeat...", + "PADDED_4_EXTRACTED": "Situation desperate ~ i repeat...", + "PADDED_8_EXTRACTED": "Situation desperate ~ it's reo plan one.\u201d i repeat...", + "PADDED_8_DILATION_1": "Situation desperate ~ t's reo plan one.\u201d i repeat...", + "PAD_8_FRACT_0_5": "Situation desperate ~ it's reo plan one.\u201d i repeat...", + "PAD_8_FRACT_0_2": "Situation desperate ~ t's reo plan one.\u201d i repeat..." + }, + "21": { + "INITIAL_BOX": "It's cannon? = a spy/", + "DEFAULT": "Canwon\u2019 . a spy./ /", + "DEFAULT_GREY_PAD": "Cannon. a spy.", + "PADDED_4": "It! canon\u2019 a spy.", + "PADDED_8": "Canon\u2019 a spy. /", + "EXTRACTED_INIT_BOX": "Ts cannon: he /s a spy /", + "PADDED_4_EXTRACTED": "It's / cannon. /s a spy./", + "PADDED_8_EXTRACTED": "Its canon he /s c%", + "PADDED_8_DILATION_1": "Its canon he /s c2%", + "PAD_8_FRACT_0_5": "It's y cannon he /s a spy./", + "PAD_8_FRACT_0_2": "Its cannon he /s a spy!" + }, + "22": { + "INITIAL_BOX": "And at that moment", + "DEFAULT": "And at that moment ...", + "DEFAULT_GREY_PAD": "And at that moment.", + "PADDED_4": "And at that moment", + "PADDED_8": "And at that moment", + "EXTRACTED_INIT_BOX": "And at that moment ..", + "PADDED_4_EXTRACTED": "And at that moment ..", + "PADDED_8_EXTRACTED": "And at that moment...", + "PADDED_8_DILATION_1": "And at that moment...", + "PAD_8_FRACT_0_5": "And at that moment...", + "PAD_8_FRACT_0_2": "And at that moment ..." + }, + "23": { + "INITIAL_BOX": "Don't move or you die right now. traitor /", + "DEFAULT": "Vem malt aas\u201d don't move or you die right now, traitor |", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Vm aat aai don't move or you die right \u201cnow, traitor _", + "PADDED_8": "Ydown't move or you die right now, traitor 4", + "EXTRACTED_INIT_BOX": "Dont move or you die right now. traitor", + "PADDED_4_EXTRACTED": "Dont move or you die right now, traitor\u201d", + "PADDED_8_EXTRACTED": "Dont move or you die right now, traitor", + "PADDED_8_DILATION_1": "Dont move or you die right now, traitor", + "PAD_8_FRACT_0_5": "Don't move or you die right now, traitor", + "PAD_8_FRACT_0_2": "Don't move or you die right now, traitor" + }, + "24": { + "INITIAL_BOX": "- search him. otto.", + "DEFAULT": "Search him, otto /", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Search him, otto", + "PADDED_8": "Search \u201chim, otto /", + "EXTRACTED_INIT_BOX": "Search him, otto", + "PADDED_4_EXTRACTED": "Search him, otto", + "PADDED_8_EXTRACTED": "Search him, otto /", + "PADDED_8_DILATION_1": "Search him, otto /", + "PAD_8_FRACT_0_5": "Search him, otto /", + "PAD_8_FRACT_0_2": "Search him, otto /" + }, + "25": { + "INITIAL_BOX": "Tense as a coiled spring, cannon waits as they disarm him.", + "DEFAULT": "Tense as a coiled spring, cannon waits as they disarm him...", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Tense as a coiled spring, cannon waits as they disarm him...", + "PADDED_8": "Tense as a coiled spring, cannon waits as they disarm him", + "EXTRACTED_INIT_BOX": "Tense as a coiled spring, cannon waits as they disarm him...", + "PADDED_4_EXTRACTED": "Tense as a coiled spring, cannon waits as they disarm him...", + "PADDED_8_EXTRACTED": "Tense as a coiled spring, cannon waits as they disarm him...", + "PADDED_8_DILATION_1": "Tense as a coiled spring, cannon waits as they disarm him...", + "PAD_8_FRACT_0_5": "Tense as a coiled spring, cannon waits as they disarm him..c", + "PAD_8_FRACT_0_2": "Tense as a coiled spring, cannon waits as they disarm him..." + }, + "26": { + "INITIAL_BOX": "The fuhrer will be very interested.", + "DEFAULT": "The fuhrer will be very interested.", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Lue creed the fuhrer will be very . interested.", + "PADDED_8": "Ltr the fuhrer will be jery \\ interested", + "EXTRACTED_INIT_BOX": "The fuhrer will be very interested.", + "PADDED_4_EXTRACTED": "The fuhrer will be very interested.", + "PADDED_8_EXTRACTED": "The fuhrer will be very interested.", + "PADDED_8_DILATION_1": "The fuhrer will be very interested.", + "PAD_8_FRACT_0_5": "The fuhrer will be very interested.", + "PAD_8_FRACT_0_2": "The fuhrer will be very interested." + }, + "27": { + "INITIAL_BOX": "Then \u2014", + "DEFAULT": "Then \u2014.", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Then \u2014", + "PADDED_8": "Then", + "EXTRACTED_INIT_BOX": "Then \u2014", + "PADDED_4_EXTRACTED": "Then \u2014", + "PADDED_8_EXTRACTED": "Then \u2014", + "PADDED_8_DILATION_1": "Then \u2014", + "PAD_8_FRACT_0_5": "Then \u2014", + "PAD_8_FRACT_0_2": "Then \u2014" + }, + "28": { + "INITIAL_BOX": "What-./z this transmitter is still open 7 4", + "DEFAULT": "What-.7z this transmitter 1s still open.\u201d a", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "What-.7z this transmitter 1s still open.\u201d", + "PADDED_8": "What-.7z this transmitter 1s , stillopen.\u201d", + "EXTRACTED_INIT_BOX": "What-./z this transmitter is still open 7", + "PADDED_4_EXTRACTED": "What-.7z this transmitter is still open.\u201d", + "PADDED_8_EXTRACTED": "What-.7z this transmitter is still open.\u201d", + "PADDED_8_DILATION_1": "What-.7z this transmitter 1s still open.\u201d", + "PAD_8_FRACT_0_5": "What-7z this transmitter 1s still open.\u201d", + "PAD_8_FRACT_0_2": "What-/z this transmitter 1s still open.\u201d" + }, + "29": { + "INITIAL_BOX": "The momentary distrac - tion |s enough ./ a blur of motion, and...", + "DEFAULT": "The momentary distrac - tion |s enough \u00a9 a blur of motion, and...", + "DEFAULT_GREY_PAD": "The momentary distrac - tion |s enough \u00a9 a blur of motion, and", + "PADDED_4": "The momentary distrac - tion |s enough \u00a9 a blur of motion, and...", + "PADDED_8": "The momentary distrac - tion |s enough \u00a9 a blur of motion, and", + "EXTRACTED_INIT_BOX": "The momentary distrac - tion |s enough ./ a blur of motion, and...", + "PADDED_4_EXTRACTED": "The momentary distrac - tion is enough \u00a9 a blur of motion, and...", + "PADDED_8_EXTRACTED": "The momentary distrac - tion is enough \u00a9 a blur of motion, and...", + "PADDED_8_DILATION_1": "The momentary distrac - tion |s enough \u00a9 a blur of motion, and...", + "PAD_8_FRACT_0_5": "The momentary distrac - tion is enough / a blur of motion, and...", + "PAD_8_FRACT_0_2": "The momentary distrac~ tion is enough / a blur of motion, and..." + } + }, + "FOX_CHILLINTALES_T17_012.jpg": { + "1": { + "DEFAULT": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me...", + "INITIAL_BOX": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me...", + "DEFAULT_GREY_PAD": "Sw en ti en a door nu in fl a rl av ef us ol man ih at he df (n ss ee", + "PADDED_4": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me...", + "PADDED_8": "T last the door swung open to reveal our cadaverous old man who peered t a", + "EXTRACTED_INIT_BOX": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me.", + "PADDED_4_EXTRACTED": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me.", + "PADDED_8_EXTRACTED": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me.", + "PADDED_8_DILATION_1": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me.", + "PAD_8_FRACT_0_5": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me.", + "PAD_8_FRACT_0_2": "At last the door swung open to reveal a cadaverous old man who peered out into the darkness at me." + }, + "0": { + "INITIAL_BOX": "Who are you? what do you want? \u2018yao", + "DEFAULT": "Who are you? what do you want? \u2018yom", + "DEFAULT_GREY_PAD": "Who are you? what do you want? yum", + "PADDED_4": "Who are you? what do you want? yau", + "PADDED_8": "Who are you? what do you want?", + "EXTRACTED_INIT_BOX": "Who are you? what do you want?", + "PADDED_4_EXTRACTED": "Who are you? what do you want?", + "PADDED_8_EXTRACTED": "Who are you? what do you want?", + "PADDED_8_DILATION_1": "Who are you? what do you want?", + "PAD_8_FRACT_0_5": "Who are you? what do you want?", + "PAD_8_FRACT_0_2": "Who are you? what do you want?" + }, + "2": { + "INITIAL_BOX": "I want to see professor blutress. the agency sent me.", + "DEFAULT": "I want to see ; professor blutress. the agency sent me.", + "DEFAULT_GREY_PAD": "I want to see professor blutress. the agency sent me.", + "PADDED_4": "| i want to see professor . blutress. the agency sent me.", + "PADDED_8": ") i want to see ~ professor - blutress. the agency sent me.", + "EXTRACTED_INIT_BOX": "1 want to see professor blutress. the agency sent me.", + "PADDED_4_EXTRACTED": "1 want to see professor blutress. the agency sent me.", + "PADDED_8_EXTRACTED": "1 want to see professor blutress. the agency sent me.", + "PADDED_8_DILATION_1": "I want to see professor blutress. the agency sent me.", + "PAD_8_FRACT_0_5": "1 want to see professor blutress. the agency sent me.", + "PAD_8_FRACT_0_2": "I want to see professor blutress. the agency sent me." + }, + "3": { + "INITIAL_BOX": "Oh, yes. you must be the secretary i sent for. i'm professor blutress rr come in. come in.", + "DEFAULT": "Oh,yes. you must be the secretary i sent for. i'm professor blutress-\u2014 rr come in. come in. <3)", + "DEFAULT_GREY_PAD": "Oh, yes. you must be the secretary i sent for. i'm professor blutress=-\u2014 come in. come in. j", + "PADDED_4": "Oh, yes. you must be the secretary i sent for. i'm professor blutress=-~\u2014 rr come in. come in. za", + "PADDED_8": "Oh,yes. you must be the secretary 1 sent for. i'm professor blutress=-~\u2014 by come in. come in. od", + "EXTRACTED_INIT_BOX": "Oh, yes. you must 8e the secretary 1 sent for, i'm professor blutress-~~\u2014 gome in, come in.", + "PADDED_4_EXTRACTED": "Oh, yes. you must be the secretary 1 sent for, i'm professor blutress-~~\u2014 come in. come in.", + "PADDED_8_EXTRACTED": "Oh, yes. you must be the secretary 1 sent for, i'm professor blutress-~~\u2014 come in. come in.", + "PADDED_8_DILATION_1": "Oh,yes. you must be the secretary 1 sent for. i'm professor blutress=-\u2014 come in. come in.", + "PAD_8_FRACT_0_5": "Oh, yes. you must be the secretary 1 sent for, i'm professor blutress-~\u2014 come in. come in.", + "PAD_8_FRACT_0_2": "Oh, yes. you must be the secretary 1 sent for, i'm professor blutress~~- come in. come in." + }, + "4": { + "INITIAL_BOX": "\u00a38 we entered the pro- fessor's study an unearthly. scream came from the dark recesses of the house.", + "DEFAULT": "As we entered the pro- fessor's study an unearthly. scream came from the dark recesses of the house...", + "DEFAULT_GREY_PAD": "We red 0- or's y an earthly. scream me from e dark esses of the house.", + "PADDED_4": "As we entered the pro- fessor's study an unearthly. scream ame from the dark recesses of the house...", + "PADDED_8": "As we entered the pro- fessor's study an unearthly, scream came from the dark | recesses of the house...", + "EXTRACTED_INIT_BOX": "Re rom e ark ecesses of e house.", + "PADDED_4_EXTRACTED": "Re rom e ark ecesses of e house.", + "PADDED_8_EXTRACTED": "Re rom e ark ecesses of e house.", + "PADDED_8_DILATION_1": "Re rom e ark ecesses of e house.", + "PAD_8_FRACT_0_5": "Re rom e ark ecesses of e house.", + "PAD_8_FRACT_0_2": "Re rom e ark ecesses of e house." + }, + "5": { + "INITIAL_BOX": "Good lord! what was that? it sounded\" like a scream!", + "DEFAULT": "Good lord! what was that? it sounded\u2122 like a scream!", + "DEFAULT_GREY_PAD": "Good lord! what was that? it sounded\u2122 like a scream!", + "PADDED_4": "Good lord! what was that? it sounded\u2122 like a scream!", + "PADDED_8": "Tr tw a yttyy ttt 00d lord! what was that? it sounded\" k ream! like a scream", + "EXTRACTED_INIT_BOX": "Good lord! what was that? it sounded\" like a scream!", + "PADDED_4_EXTRACTED": "Good lord! what was that? it sounded like a scream!", + "PADDED_8_EXTRACTED": "Good lord! what was that? it sounded like a scream!", + "PADDED_8_DILATION_1": "Good lord! what was that? it sounded like a scream", + "PAD_8_FRACT_0_5": "Good lord! what was that? it sounded like a scream!", + "PAD_8_FRACT_0_2": "Good lord! what was that? it sounded like a scream!" + }, + "6": { + "INITIAL_BOX": "A scream? i heard nothing, mr. howe.", + "DEFAULT": "\"sh scream? i heard | nothing, mr. howe.", + "DEFAULT_GREY_PAD": "A | creas i heard nothing, mr. howe.", + "PADDED_4": "A scream? i heard nothing, mr. howe.", + "PADDED_8": "A scream? i heard nothing, mr. howe.", + "EXTRACTED_INIT_BOX": "A scream? i heard nothing, mr. howe.", + "PADDED_4_EXTRACTED": "A scream? i heard nothing, mr. howe.", + "PADDED_8_EXTRACTED": "A scream? i heard nothing, mr. howe.", + "PADDED_8_DILATION_1": "A scream? i heard nothing, mr. howe.", + "PAD_8_FRACT_0_5": "A scream? i heard nothing, mr. howe.", + "PAD_8_FRACT_0_2": "A scream? i heard nothing, mr. howe." + }, + "7": { + "INITIAL_BOX": "1 could have sworn -- perhaps it was just my imagination.", + "DEFAULT": "1 could have sworn-- \u2018perhaps it was just my imagination.", + "DEFAULT_GREY_PAD": "1 could have sworn -- \u2018perhaps it was just my imagination. t(", + "PADDED_4": "1 could\u2019 have sworn-- perhaps it was just my imagination.", + "PADDED_8": "1 could have sworn-- perhaps it was just my imagination.", + "EXTRACTED_INIT_BOX": "1 could: have sworn-- perhaps it was just my imagination. ti", + "PADDED_4_EXTRACTED": "1 could have sworn-- perhaps it was just my imagination. tc", + "PADDED_8_EXTRACTED": "1 could\" have sworn-- perhaps it was just my imagination. 0", + "PADDED_8_DILATION_1": "I could: have sworn-- perhaps it was just my imagination. to", + "PAD_8_FRACT_0_5": "1 could\" have sworn-- perhaps it was just my imagination. 0", + "PAD_8_FRACT_0_2": "1 could\" have sworn-- perhaps it was just my imagination. 0" + }, + "8": { + "INITIAL_BOX": "You are tired. st let me take you 0 your room. we'll discuss your duties in the morning...come...", + "DEFAULT": "In you are tired. st j/let me take you 0 your room. we'll discuss your duties in the morning. ..come... |", + "DEFAULT_GREY_PAD": "- you are tired. st let me take you to your room. we'll discuss your duties in the morning...come...", + "PADDED_4": "In you are tired. st let me take you 0 your room. we'll discuss your duties in the morning. ..come...", + "PADDED_8": "Rn == you are tired. st j=/let me take you 0 your room. we'll discuss your duties in the morning. ..come...", + "EXTRACTED_INIT_BOX": "N-\u2014 you are tired. st let me take you to your room. we'll discuss your duties in the morning. ..come...", + "PADDED_4_EXTRACTED": "In -- you are tired. st let me take you to your room. we'll discuss your duties in the morning. ..come...", + "PADDED_8_EXTRACTED": "Rn = \u2014 you are tired. st let me take you to your room. we'll discuss your duties in the morning. . . come...", + "PADDED_8_DILATION_1": "Rn == you are tired. st let me take you to your room. we'll discuss your duties in the morning. . .come...", + "PAD_8_FRACT_0_5": "Rn == you are tired. st let me take you to your room. we'll discuss your duties in the morning. . . come...", + "PAD_8_FRACT_0_2": "Rn == you are tired. st let me take you to your room. we'll discuss your duties in the morning me..." + }, + "9": { + "INITIAL_BOX": "1 he next morning 1 awoke early and started downstairs. on the way down i passed a heavy barred door.", + "DEFAULT": "1 he next morning i awoke early and started downstairs. on the way down 1 passed a heavy barred door...", + "DEFAULT_GREY_PAD": "The next morning i awoke early and started downstairs. on the way down i passed a heavy barred door ...", + "PADDED_4": "He ne ng il awoke early and started nstal \u201con the way down i passed a eavy barred do!", + "PADDED_8": "He next morning il awoke early and started ownstairs. on the way down i passed a heavy barred door ...", + "EXTRACTED_INIT_BOX": "Morning e early and started downstairs. on the way down 1 passed a heavy barred doc", + "PADDED_4_EXTRACTED": "Morning e early and started downstairs. on the way down 1 passed a heavy barred doo", + "PADDED_8_EXTRACTED": "Morning e early and started downstairs. on the way down 1 passed a heavy barred doo", + "PADDED_8_DILATION_1": "Morning e early and started downstairs. on the way down i passed a heavy barred doo", + "PAD_8_FRACT_0_5": "Morning e early and started downstairs. on the way down 1 passed a heavy 8arred doo", + "PAD_8_FRACT_0_2": "Morning e early and started downstairs, on the way down 1 passed a heavy 8arred doo" + }, + "10": { + "INITIAL_BOX": "Uek/ it's awful...", + "DEFAULT": "Ugh! it's awful...", + "DEFAULT_GREY_PAD": "Ugh! it's awful...", + "PADDED_4": "Ugh! it's awful...", + "PADDED_8": "Ugh! it's awful...", + "EXTRACTED_INIT_BOX": "Uek/ t's awful...", + "PADDED_4_EXTRACTED": "Ueh? it's awful...", + "PADDED_8_EXTRACTED": "Ueh? it's awful...", + "PADDED_8_DILATION_1": "Ueh! it's awful...", + "PAD_8_FRACT_0_5": "Ueh?! it's awful...", + "PAD_8_FRACT_0_2": "Ueh? it's awful..." + }, + "11": { + "INITIAL_BOX": "That's,strange/ i | wonder what's behind it? it's thick enough to be a vault. \u201c&", + "DEFAULT": "That's.strange/ i j wonder what's behind it? it's thick enough to be a vault. \u201c4", + "DEFAULT_GREY_PAD": "That's.strangez i 1 wonder what's behind it? it's thick enough to be a vault. 4", + "PADDED_4": "That's.strange/ i} wonder what's behind it? it's thick enough to be a vault. \u201chd", + "PADDED_8": "That's,strange / tl wonder what's behind it? it's thick enough to be a vault.", + "EXTRACTED_INIT_BOX": "That's..strange / tl wonder what's behind it? it's thick enough to be a vault.", + "PADDED_4_EXTRACTED": "That's..strange / tl wonder what's behind it? it's thick enough to be a vault.", + "PADDED_8_EXTRACTED": "That's..strange / tl wonder what's behind it? it's thick enough to be a vault.", + "PADDED_8_DILATION_1": "That's.strange 7 i wonder what's behind it? it's thick enough to be a vault.", + "PAD_8_FRACT_0_5": "That's..strange / tl wonder what's behind itz it's thick enough to be a vault.", + "PAD_8_FRACT_0_2": "That's..strange / tl wonder what's behind itz it's thick enough to be a vault." + }, + "12": { + "INITIAL_BOX": "Supbenty i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "DEFAULT": "Subpenly i became aware of a frightful odor that | seemed to come from behind the | door. the | nauseating stench of decayed, rotting flesh...", + "DEFAULT_GREY_PAD": "Subbenly dthe nauseating stench of decaye| rotti flesh", + "PADDED_4": "A frightful ddor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_8": "A frightful odor that seemed to | come from behind the door. the nauseating stench of decayed, rotting flesh...", + "EXTRACTED_INIT_BOX": "Supoenwy i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_4_EXTRACTED": "Supoenwy i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_8_EXTRACTED": "Supoenwy i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PADDED_8_DILATION_1": "Suopeniy aware of a frightful odor that seemed to come from bewind the door. the nauseating stench of decayed, rotting flesh...", + "PAD_8_FRACT_0_5": "Suppenwy i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh...", + "PAD_8_FRACT_0_2": "Suppenwy i became aware of a frightful odor that seemed to come from behind the door. the nauseating stench of decayed, rotting flesh..." + } + }, + "Galactus_12.jpg": { + "1": { + "DEFAULT": "Vy nova is ny here, o master. i have heard your call and. stand ready to po your h bidding. a", + "INITIAL_BOX": "/'_nova is here, o master. i have heard your call and. sas \\ bidding.", + "DEFAULT_GREY_PAD": "Nova is here, o master. i have heard your call and. stand ready to| 70 your bidding.", + "PADDED_4": "Nova is here, \u00a9 maste! i have heard your call and. stand ready to po your bh 3(dding.", + "PADDED_8": "Nova is here, o master. i have heard your call and. stand ready to ww your wi", + "EXTRACTED_INIT_BOX": "Nova is here, o master. i have heard your call and. stand ready to po your bidding.", + "PADDED_4_EXTRACTED": "Nova 1s here, o master. i have heard your call and. stand ready to po your bidding.", + "PADDED_8_EXTRACTED": "Nova 1s here, o master. i have heard your call and. stand ready to po your bidding.", + "PADDED_8_DILATION_1": "Nova is here, o master. 70 your bidding.", + "PAD_8_FRACT_0_5": "Nova 1s here, o master. i have heard your call and. stand ready to 70 your bidding.", + "PAD_8_FRACT_0_2": "Nova 1s here, o master. i have heard your call and. stand ready to 70 your bidding." + }, + "0": { + "INITIAL_BOX": "Anz he /5 galactus", + "DEFAULT": "Anp he [5 galactus.", + "DEFAULT_GREY_PAD": "Ano he 2 | gacacrus|", + "PADDED_4": "V2 he /: | gacactus.", + "PADDED_8": "And he [5 galactus.", + "EXTRACTED_INIT_BOX": "Ani he 15 galactus", + "PADDED_4_EXTRACTED": "Ant he [3 galactus", + "PADDED_8_EXTRACTED": "Anp he [5 galactus", + "PADDED_8_DILATION_1": "And he 15 galacrus", + "PAD_8_FRACT_0_5": "And he [5 galactus", + "PAD_8_FRACT_0_2": "And he 15 gacacrus" + }, + "2": { + "INITIAL_BOX": "Devourer of worlds | harbinger of ultimate doom! hi\u00a7 manlike for roy i ancire fa", + "DEFAULT": "Evourer of worlds | harbing! tea ree soom mncie foxit 15 only in her eyes. pum", + "DEFAULT_GREY_PAD": "Devourer of worlds ! harbinger of ultimate doom! i$ manlike form 5 only in her eyes. pom", + "PADDED_4": "Devourer of worlds | harbinger of ultimate doom! 15 manlike form 15 only in her eye: -", + "PADDED_8": "Devourer of worlds ! harbinger of ultimate doom! 41 manlike form 75 only in her eyes.", + "EXTRACTED_INIT_BOX": "Pevourer of worlds ' harbinger of ultimate pom 41s manlike form 75 only in her eyes.", + "PADDED_4_EXTRACTED": "Pevourer of worlds * harbinger of lltimate doom! hi3 manlike form 75 only in her eyes.", + "PADDED_8_EXTRACTED": "Pevourer of worlds | harbinger of lltimate doom! hi3 manlike form 15 only in her eyes.", + "PADDED_8_DILATION_1": "Devourer of worlds | harbinger of ultimate doom ! his manlike form 15 only in her eyes.", + "PAD_8_FRACT_0_5": "Devourer of worlds | harbinger of ultimate doom! wiz manlike form 75 only in her eyes.", + "PAD_8_FRACT_0_2": "Devourer of worlds | harbinger of lltimate doom his manlike form 5 only in her eyes." + }, + "3": { + "INITIAL_BOX": "Cor his true shape 15 unguessed and unguess- able. he isa living r= ab ct 2 air ror. [", + "DEFAULT": "For his true shape 15 unguessed and ungles: able. he 15 a living. force of nature.", + "DEFAULT_GREY_PAD": "Rue shape 5 unslessel and uncles 754 living zowce of narure [tt]", + "PADDED_4": "Shape 15 | unglessed and ungless~ able. he 5 a living ae de nar. [tt", + "PADDED_8": "For his true shape 15 unguessed and ungless able. he 54 living i force of nature.", + "EXTRACTED_INIT_BOX": "For his true shape [5 unglessed and unguess* able. he 15.1 livag. force of nature.", + "PADDED_4_EXTRACTED": "For his true shape [5 unglessed and unguess~ able. he 51 livag. force of natur!", + "PADDED_8_EXTRACTED": "For h1s, true shape [5 unglessed and unguess~ able. he 51 livag. force of natur", + "PADDED_8_DILATION_1": "For his, true shape [5 unguessed and unguess* able. he 15 living. force of nature.", + "PAD_8_FRACT_0_5": "For his, true shape 5 unglessed and unguess* able. he 151 liviag. force of natur", + "PAD_8_FRACT_0_2": "For his, true shape 15 unglessed and unguess* able. he 15 living. force of natur" + }, + "4": { + "INITIAL_BOX": "Then heed well my words, most loyal of ny heralds. 4", + "DEFAULT": "\" then heed _ \\ well my words, most loyal of \u201cmy heralds, 4", + "DEFAULT_GREY_PAD": "Then heed well my words, most loyal of my heralds.", + "PADDED_4": "7 then heed well my words, most loyal of my heralds, a", + "PADDED_8": "Then heed well my words, most loyal of \\ my heralds.", + "EXTRACTED_INIT_BOX": "Then heed well my words most loyal of \u2018my heralds.", + "PADDED_4_EXTRACTED": "Then heed well my words, most loyal of my heralds,", + "PADDED_8_EXTRACTED": "Then heed well my words, most loyal of my heralds,", + "PADDED_8_DILATION_1": "Then heed well my words, most loyal of my heralds,", + "PAD_8_FRACT_0_5": "Then heed well my words, most loyal of my heralds,", + "PAD_8_FRACT_0_2": "Then heed well my words, most loyal of my heralds," + }, + "5": { + "INITIAL_BOX": "Y the shatterep galaxy from which you have returned is not unique! the long range scanners with: in this ship detect a vast procession of such destruction: reaching haleway across the universe.", + "DEFAULT": "P_ the shattered galaxy from which you have returned 15 not unique! the long range scanners with- in this ship detect a vast procession of such destruction: reaching haleway across the universe.", + "DEFAULT_GREY_PAD": "The shattered galaxy from which you have returned 15 not unique! the long range scanners with in this ship detect a vast procession of \u2018such destruction: l reaching halfway across the \u201cuniverse.", + "PADDED_4": "Y. the shattered galaxy from which you have returned 15 not unique! the long range scanners with- in this ship detect a vast procession of such destruction: reaching haleway across the i \u201cuniverse.", + "PADDED_8": "E shattered galaxy from which voliss you have returned 15 not unique the long range scanners with- in this ship detect a vast procession of such destruction: reaching halfway. across the i universe", + "EXTRACTED_INIT_BOX": "The shattered galaxy from which you have returned 1s not unique the long range_scanners with in th \u00a7 ship detect a vast procession of \u2018such destruction: reaching halfway across the universe...", + "PADDED_4_EXTRACTED": "The shattered galaxy from which you have returned 15 not unique the long range scanners with- in th \u00a7 ship detect a vast procession of \u2018such destruction: reaching halfway across the universe.", + "PADDED_8_EXTRACTED": "The shattered galaxy from which you have returned 15 not unique the long range_scanners with- in th \u00a7 ship detect a vast procession of \u2018such destruction: reaching halfway across the universe.", + "PADDED_8_DILATION_1": "The shattered galaxy from which you have returned 16 not unique! the long range scanners with- in th \u00a7 ship petect a vast procession of such destruction: reaching haleway across the universe.", + "PAD_8_FRACT_0_5": "The shattered galaxy from which you have returned 15 not unique the long range_scanners with- in th \u00a7 ship detect a vast procession of \u2018such destruction: reaching halfway across the universe.", + "PAD_8_FRACT_0_2": "The shattered galaxy from which you have returned 15 not unique the long range_scanners with- in th \u00a7 ship detect a vast procession of such destruction: reaching halfway across the universe." + }, + "6": { + "INITIAL_BOX": "The last galactus story con", + "DEFAULT": "The last galactus story con", + "DEFAULT_GREY_PAD": "The last galactus story conl", + "PADDED_4": "The last galactus story con", + "PADDED_8": "The last galactus story cont", + "EXTRACTED_INIT_BOX": "The ast galactus story con", + "PADDED_4_EXTRACTED": "The last galactus story con", + "PADDED_8_EXTRACTED": "The last galactus story cont", + "PADDED_8_DILATION_1": "The last galactus story cont", + "PAD_8_FRACT_0_5": "The last galactus story cont", + "PAD_8_FRACT_0_2": "The last galactus story cont" + }, + "7": { + "INITIAL_BOX": "It begins an near te point of universal. srigin, and envs at sra ptefarts rl fone", + "DEFAULT": "It begins, quite near the point of universal. origin, and ends at sour long since departed former a\" home.", + "DEFAULT_GREY_PAD": "It begins, quite near the, point of universal. origin, and ends a sour long since departed former ome. /", + "PADDED_4": "It begins y quite near the point of universal. origin, and ends at sour long since | departed former home,", + "PADDED_8": "It begins \\ quite near the point of universal. origin, and ends at sour long since departed former home.", + "EXTRACTED_INIT_BOX": "It begins quite near the point of universal origin, and ends at sour long since departed former home", + "PADDED_4_EXTRACTED": "It begins quite near the point of universal origin, and enus at sour long since departed former home", + "PADDED_8_EXTRACTED": "It begins quite near the point of universal origin, and enus at sour long since departed former home", + "PADDED_8_DILATION_1": "It begins, quite near the, point of universal. origin) and ends at your long since departed former home", + "PAD_8_FRACT_0_5": "It begins quite near the point of universal origin, and ends at sour long since departed former home", + "PAD_8_FRACT_0_2": "It begins quite near the point of universal origin, and ends at sour long since departed former home" + }, + "8": { + "INITIAL_BOX": "| the galaxy men once called the milky aay?", + "DEFAULT": "\u201d the galaxy _ men once called the milky to 4", + "DEFAULT_GREY_PAD": "The galaxy. en once called) the milky 77374", + "PADDED_4": "P the galaxy men once called the milky i all", + "PADDED_8": "Y the galaxy. men once called the milky way?", + "EXTRACTED_INIT_BOX": "The galaxy men once called the milky way\u201d", + "PADDED_4_EXTRACTED": "The galaxy men once called the milky nay?", + "PADDED_8_EXTRACTED": "The galaxy men once called the milky way?", + "PADDED_8_DILATION_1": "The galaxy. men once called the milky way?", + "PAD_8_FRACT_0_5": "The galaxy. men once called the milky way?", + "PAD_8_FRACT_0_2": "The galaxy. men once called the milky way?" + }, + "9": { + "INITIAL_BOX": "Es next issu", + "DEFAULT": "Ies next issu", + "DEFAULT_GREY_PAD": "Es next 55", + "PADDED_4": "Jes next issu|", + "PADDED_8": "Ues next issue", + "EXTRACTED_INIT_BOX": "Es next i1s5lu", + "PADDED_4_EXTRACTED": "Jes next is5u", + "PADDED_8_EXTRACTED": "Ues next is5u", + "PADDED_8_DILATION_1": "Ues next issu", + "PAD_8_FRACT_0_5": "Ues next is5u", + "PAD_8_FRACT_0_2": "Ues next issu" + } + }, + "INOUE_KYOUMEN_002.png": { + "1": { + "DEFAULT": "\u4f8d\u3063\u3066!", + "INITIAL_BOX": "\u306d\u3048\u3049 \u7afa\u3063\u3066!", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "\u4f8d\u3063\u3066!", + "PADDED_8": "\u5f85\u3063\u3066!", + "EXTRACTED_INIT_BOX": "\u306d\u3048\u3049 \u7afa\u3063\u3066!", + "PADDED_4_EXTRACTED": "\u4eba\u7afa\u3063\u3066!", + "PADDED_8_EXTRACTED": "\u4f8d\u3063\u3066!", + "PADDED_8_DILATION_1": "\u4f8d\u3063\u3066!", + "PAD_8_FRACT_0_5": "\u7afa\u3063\u3066!", + "PAD_8_FRACT_0_2": "\u4f8d\u3063\u3066!" + }, + "0": { + "INITIAL_BOX": "", + "DEFAULT": "\u7483\u3005\u4f9d!", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "\u7483\u3005\u4f9d!", + "PADDED_8": "\u7483\u3005\u4f9d!", + "EXTRACTED_INIT_BOX": "", + "PADDED_4_EXTRACTED": "\u7483\u3005\u4f9d!", + "PADDED_8_EXTRACTED": "\u7483\u3005\u4f9d!", + "PADDED_8_DILATION_1": "\u7483\u3005\u4f9d!", + "PAD_8_FRACT_0_5": "\u7483\u3005\u4f9d!", + "PAD_8_FRACT_0_2": "\u7483\u3005\u4f9d!" + }, + "2": { + "INITIAL_BOX": "", + "DEFAULT": "\u6591\u3005\u4f9d\u3063\u3066\u3070!", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "\u7483\u3005\u4f9d\u3063\u3066\u3070!", + "PADDED_8": "\u7409\u3005\u4f9d\u3063\u3066\u3070!", + "EXTRACTED_INIT_BOX": "", + "PADDED_4_EXTRACTED": "\u7409\u3005\u4f9d\u3063\u3066\u3070!", + "PADDED_8_EXTRACTED": "\u7409\u3005\u4f9d\u3063\u3066\u3070!", + "PADDED_8_DILATION_1": "\u7409\u3005\u4f9d\u3063\u3066\u3070!", + "PAD_8_FRACT_0_5": "\u7389\u3005\u4f9d\u3063\u3066\u3070!", + "PAD_8_FRACT_0_2": "\u7409\u3005\u4f9d\u3063\u3066\u3070!" + }, + "3": { + "INITIAL_BOX": "\u7267 \u540c\u300c \u4e86\u7389\u3005\u4f9d", + "DEFAULT": "\u82ad \u9593 \u5dfe \u516d", + "DEFAULT_GREY_PAD": "|\u6539\u8036\u7d20\u3005\u4f9d|", + "PADDED_4": "\u7267\u8036\u7483\u3005\u4f9d", + "PADDED_8": "\u82ad \u764c \u5d17 - \u3044 )", + "EXTRACTED_INIT_BOX": "\u7267 \u540c\u300c \u7483\u3005\u4f9d", + "PADDED_4_EXTRACTED": "\u7267\u8036\u7483\u3005\u4f9d", + "PADDED_8_EXTRACTED": "\u64ec\u4e5f\u7389\u3005\u4f9d", + "PADDED_8_DILATION_1": "\u64ec\u601d\u6591\u3005\u4f9d", + "PAD_8_FRACT_0_5": "\u7af6 \u3044 )", + "PAD_8_FRACT_0_2": "\u64ec\u6065\u6591\u3005\u4f9d" + }, + "4": { + "INITIAL_BOX": "", + "DEFAULT": "\u6e5b \u8cdc \u978d \u5e9c \u758f", + "DEFAULT_GREY_PAD": "\u8fc2 \u4e92 \u56e0", + "PADDED_4": "\u66d9", + "PADDED_8": "\u8449 \u8ca1 \u8ecb \u3044 - \u8a69", + "EXTRACTED_INIT_BOX": "", + "PADDED_4_EXTRACTED": "\u7531", + "PADDED_8_EXTRACTED": "\u7267\u597d\u697c\u3005\u4f9d", + "PADDED_8_DILATION_1": "\u7267\u8036\u697c\u3005\u4f9d", + "PAD_8_FRACT_0_5": "\u7267\u4e86\u5c0b\u697c\u3005\u4f9d", + "PAD_8_FRACT_0_2": "\u7267\u4e86\u5c0b\u697c\u3005\u4f9d" + }, + "5": { + "INITIAL_BOX": "\u8457", + "DEFAULT": "\u4e09 9", + "DEFAULT_GREY_PAD": "\u307a\u2471", + "PADDED_4": "\u3042", + "PADDED_8": "- 9", + "EXTRACTED_INIT_BOX": "\u304a", + "PADDED_4_EXTRACTED": "\u5de7", + "PADDED_8_EXTRACTED": ") 9", + "PADDED_8_DILATION_1": ") \u30fb9", + "PAD_8_FRACT_0_5": ") 9", + "PAD_8_FRACT_0_2": ") 9" + }, + "6": { + "INITIAL_BOX": "-]", + "DEFAULT": "\u3008\u246am", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "\u8070\u2048", + "PADDED_8": ") \u3008\u300f", + "EXTRACTED_INIT_BOX": ")", + "PADDED_4_EXTRACTED": ") \u3008\u3078\u246am", + "PADDED_8_EXTRACTED": ") \u3008m", + "PADDED_8_DILATION_1": ") \u3008m", + "PAD_8_FRACT_0_5": ") \u3008m\u300fm", + "PAD_8_FRACT_0_2": ") \u3008m" + }, + "7": { + "INITIAL_BOX": "\u4e00\u8247\u306b\u6620\u3089\u308d\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3046\u306e?", + "DEFAULT": "\u4e00\u7dd2\u306b\u5e30\u3089\u308d\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3083\u3046\u306e?", + "DEFAULT_GREY_PAD": "\u4e00\u507d\u306b\u5e30\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3045\u306e-", + "PADDED_4": "\u4e00\u7dd2\u306b\u5e30\u3089\u308d\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3046\u306e?", + "PADDED_8": "\u4e00\u7dd2\u306b\u5e30\u3089\u308d\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3046\u306e?", + "EXTRACTED_INIT_BOX": "\u4e00\u707d\u306b\u5cf0\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3083\u3046\u306e?", + "PADDED_4_EXTRACTED": "\u4e00\u7e6a\u306b\u5e33\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3083\u3085\u3046\u306e?", + "PADDED_8_EXTRACTED": "\u4e00\u7e6a\u306b\u5e33\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3083\u3085\u3046\u306e?", + "PADDED_8_DILATION_1": "\u4e00\u7dd2\u306b\u5e30\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3046\u306e?", + "PAD_8_FRACT_0_5": "\u4e00\u79cb\u306b\u65cf\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u4f55\u3067\u5148\u306b\u884c\u3063\u3061\u3083\u3085\u3083\u3046\u306e?", + "PAD_8_FRACT_0_2": "\u4e00\u7dd2\u306b\u5e30\u3089\u3046\u3063\u3066 \u8a00\u3063\u3066\u308b\u306e\u306b \u5e81\u3067\u5148\u306b\u884c\u3063\u3061\u3085\u3083\u3046\u306e?" + }, + "8": { + "INITIAL_BOX": "\u5c71 \u7b46 e \u4e86 \u51e7 u \u3044 > \u3085 \u30f0 \u3044 \u305f \u3085 \u3077 > w \u305b s8n", + "DEFAULT": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u652f\u5ea6\u3059\u308b\u306e \u5fc5\u3044\u3093\u3060\u3082\u3093", + "DEFAULT_GREY_PAD": "\u3054 \u4e86 \u5237 \u9ab8 \u534a \u3005 \u30f0 \u3044 \u305f \u3081 \u304b \u3089 | w \u305b s8n", + "PADDED_4": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u652f\u5ea6\u3059\u308b\u306e \u5fc5\u3044\u3093\u3060\u3082\u3093", + "PADDED_8": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u652f\u5ea6\u3059\u308b\u306e \u6027\u3044\u3093\u3060\u3082\u3093", + "EXTRACTED_INIT_BOX": "\u5d29 \u7fcc \u3054 \u4e86 \u3057 > \u3083 \u30c4 \u30f0 \u3044 \u305f \u3085 \u3044 \u309d \u3067 \u305b sn", + "PADDED_4_EXTRACTED": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u652f\u5ea6\u3059\u308b\u306e \u5fc5\u3044\u3093\u3060\u3082\u3093", + "PADDED_8_EXTRACTED": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u652f\u5ea6\u3059\u308b\u306e \u8b02\u3044\u3093\u3060\u3082\u3093", + "PADDED_8_DILATION_1": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u8b00\u5ea6\u3059\u308b\u306e \u9000\u3044\u3093\u3060\u3082\u3093", + "PAD_8_FRACT_0_5": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u8f09\u5ea6\u3059\u308b\u306e \u9000\u3044\u3093\u3060\u3082\u3093", + "PAD_8_FRACT_0_2": "\u3060\u3063\u3066\u30a2\u30f3\u30bf \u5409\u5ea6\u3059\u308b\u306e \u5c5e\u3044\u3093\u3060\u3082\u3093" + } + }, + "manga_0033.jpg": { + "1": { + "DEFAULT": "M \u3086 _ \u307f \u3081 \u306b \u30c8 - \u5a18 \u66fc 9 \u5982 \u5d50", + "INITIAL_BOX": "\u307f \u307f \u306b \u30c8 - ] \u30cb ] \u3053 \u300f \u4e16 \u3012", + "DEFAULT_GREY_PAD": "\u30eb _ \u30db \u306f \u307f \u306b \u3001", + "PADDED_4": "\u306b\u3044[ \u597d\u7f778\u98c4\u7531", + "PADDED_8": "\u306e\u306c_\u3088\u30c9\u307f\u3081 \u306b\u30c8- \u5373\u87ba9\u904e\u5d50", + "EXTRACTED_INIT_BOX": "\u307f \u30c3 \u306b \u3001 \u30c9 - ] \u30cb ] \u3053 \u300f \u4e16 \u3012", + "PADDED_4_EXTRACTED": "\u306b\u3044[. \u30b4\u30e0\u30df\u30cb\u30e5\u30ef\u203c", + "PADDED_8_EXTRACTED": "\u3043_ \u30c8\u30db\u30c9\u30c3 \u306b\u3001\u30c9- \u5b50\u87ba9\u904e\u5d50", + "PADDED_8_DILATION_1": "W\u30eb_\u30db\u30c9\u3085\u307f \u306b\u30c8\u30c9- \u5b50\u87ba9\u904e\u5d50", + "PAD_8_FRACT_0_5": "W\u3085_\u30db\u3085\u30c3 \u306b\u3001 -", + "PAD_8_FRACT_0_2": "M_\u307f\u307f \u8944\u4e0a\u8b02\u9084\u300d \u5373\u66d9b\u904e\u5d50" + }, + "0": { + "INITIAL_BOX": "\u6ce1\u308c\u306f\u3001 \u30de} \u4e0ap \u71b1\u306b\u3046\u304c\u3055\u308c\u305f\u7af6\u304b \u30b5\u30c8\u30eb\u30e2\u30ce [ \u307f\u306e\u3046\u308a\u2669\u3089\u306a \u300e\u9b5a\u6790\u8001\u306b\u306e\u9b91\u8a2a\u304b", + "DEFAULT": "\u7d42\u308c\u306f\u3001\u30fe= \u6cb3 \u71b1\u306b\u3046\u304c\u304b\u3055\u308c\u305f\u5922\u304b \u30b5\u30c8\u30eb\u30e2\u30ce [ \u307f\u306e\u3046\u308a\u2669\u3089\u306a \u300f\u9b91\u6790\u82b1\u300d\u306e\u614b\u8a18\u304b", + "DEFAULT_GREY_PAD": "\u300e\u9b5a\u6790\u82b1\u306b\u306e\u5f2f\u7576\u304b", + "PADDED_4": "\u9583\u308c\u306f\u3001\u30fe= \u5e9c\u306b\u3046\u304c\u3055\u308c\u305f\u5922\u304b \u30b5\u30c8\u30eb\u30e2\u30ce [ \u307f\u306e\u3046\u308a\u2669\u3089\u306a \u300e\u9b91\u6790\u82b1\u306e\u7d22\u304b", + "PADDED_8": "\u7435x\u3086\u306a\\}\u30ec[ \u4e0a \u8a69\u4ee3\u3046\u304c\u3055\u308c\u305f\u80cc\u304b \u30b5\u30c8\u30eb\u30e2\u30ce -\u904e\u6803\u82b1\u3042\u7d22\u60b6\u304b", + "EXTRACTED_INIT_BOX": "\u5149\u308c\u306f \u5305 \u8a3a\u306b\u3046\u304c\u3055\u308c\u305f\u534a\u304b \u30b5\u30c8\u30eb\u30e2\u30ce \u300e\u9b5a\u6790\u8001\u306b\u306e\u80fd\u8a2a\u304b", + "PADDED_4_EXTRACTED": "\u5149\u308c\u306f \u74b0 \u86ee\u3044\u3089\u304c\u3055\u308c\u305f\u7af6 \u300e\u89e3\u6790\u8005\u300f\u306e\u80fd\u529b\u304b", + "PADDED_8_EXTRACTED": "\u5149\u308c\u306f \u5305 \u8a3a\u9078\u3089\u304c\u3055\u308c\u305f\u5145\u304b \u30b5\u30c8\u30eb\u30e2\u30ce \u300e\u9b5a\u67cf\u8001\u306e\u5f2f\u60b6\u304b", + "PADDED_8_DILATION_1": "\u5149\u308c\u306f \u5305 \u8a1f\u306b\u306b\u3089\u304c\u3055\u308c\u305f\u5373\u304b \u30b5\u30c8\u30eb\u30e2\u30ce \u300e\u9b5a\u67cf\u8001\u306e\u5f2f\u60b6\u304b", + "PAD_8_FRACT_0_5": "\u53ea\u308c\u306f \u3001\u30c9y \u8a1f\u6c11\u3089\u304c\u3055\u308c\u305f\u554f\u304b \u300e\u89e3\u6790\u8005\u300f\u306e\u80fd\u529b\u304b", + "PAD_8_FRACT_0_2": "\u5149\u308c\u306f \u5305 \u8912\u306b\u3089\u304c\u3055\u308c\u305f\u7af6\u304b \u30b5\u30c8\u30eb\u30e2\u30f3\u30ce -\u6ccc\u6790\u82b1\u300d\u306e\u5f2f\u60b6\u304b" + }, + "2": { + "INITIAL_BOX": "\u704c \u9192 \u7403 \u4e5f \u5076 \u671b \u7fbd \u3076 \u3077 \u6cbf", + "DEFAULT": "\u704c \u4e0a \u66f2 \u2026 \u4eba es - \u671b \u7c97 \u3084 \u3089 \u62b1", + "DEFAULT_GREY_PAD": "\u56de \u3010 \u76f4 \u307f \u7d30 \u53e3 m \u4ea4 \u300f \u5448 \u59dc \u3002 \u5eab -", + "PADDED_4": "\u704c \u758f \u4f8d \u671b \u7fbd \u3076 \u3077 \u6cbf", + "PADDED_8": "\u704c \u7389 \u5016 \u2048 \u7fbd \u3076 \u3077 \u6cbf", + "EXTRACTED_INIT_BOX": "\u5f53 \u5e45 \u300c \u5bfa - w", + "PADDED_4_EXTRACTED": "\u5f53 \u5410 \u4ea1", + "PADDED_8_EXTRACTED": "", + "PADDED_8_DILATION_1": "- -\u4e0a\u62bc \u3068\u3089\u3068\u53d6\u6c60 :", + "PAD_8_FRACT_0_5": "", + "PAD_8_FRACT_0_2": "\u5442 \u6247 \u6709 \u5c0b - \u7fbd \u3076 \u307f \u6cb9 \u5411 \u4e86 \u307f" + }, + "3": { + "INITIAL_BOX": "\u6c34\u6e6f", + "DEFAULT": "\u3089 \u3076 s\u300f \u7b87 \u3085 \u4e86 \u6109", + "DEFAULT_GREY_PAD": "\u6d6a \u5bd3 \u6db8 * \u8a18 \u3002 \u4e9e \u8755 \u3044 )", + "PADDED_4": "\u7d2c \u7af6 \u4eba", + "PADDED_8": "\u300c\u203b\u6cb3 \u99d0\u61c7 \u89aa\u6065- \u4e09\u4ecb", + "EXTRACTED_INIT_BOX": "9\u3069\u92ea\u5473 - \u2661\u4e5d\u2470\u79df \u6b96\u30e8", + "PADDED_4_EXTRACTED": "\u306a 9\u3069\u6298\u59b9 0\u4e5d\u306e\u4e86 9\u30f0", + "PADDED_8_EXTRACTED": "\u6cf3 \u8b39 - \u4e0a \u300f \u7537 \u6709 \u3002 \u3044 s", + "PADDED_8_DILATION_1": "\u6cf3 \u8b39 - \u300f \u7537 \u6709 \u3002 \u3044 s", + "PAD_8_FRACT_0_5": "\u6d6a \u4e0a \u300fs \u6db8 * \u8a18 \u3044 \u3044 s", + "PAD_8_FRACT_0_2": "\u6cf3 \u8afe - \u4e0a \u300a \u821e \u309d \u6709 \u3044 s" + }, + "4": { + "INITIAL_BOX": "\u5712 \u6c50 \u5c71 \u87f9 \u6c57 \u8ab0 \u300c \u305f \u7576 \u9078 ]", + "DEFAULT": "\u8fc2 \u89d2 \u304a \u5c48 \u6de1 \u8ca0 \u306c \u300f", + "DEFAULT_GREY_PAD": "\u5929 \u89d2 \u6c57 \u5bfa \u3084 \u3089 \u6c5f \u6109 \u3044 \u660c \u8ac7 \u5c0f", + "PADDED_4": "\u8a08 \u304d \u2048 \u5c71 \u87f9 - \u3089 \u3085 \u3068 - \u7389", + "PADDED_8": "\u885d \u8096 \u5c71 \u89d2 \u6c41 \u5806 \u754c \u6cca \u77ed \u306b", + "EXTRACTED_INIT_BOX": "\u8cb7 \u4e0a \u9806 \u89d2 \u3058 \u3044 \u5348 \u660c \u5c0f \u30eb \u3069 -", + "PADDED_4_EXTRACTED": "\u8cb7 \u4e0a \u9806 \u89d2 \u8a08 \u3060 \u5348 \u660c - \u5c0f \u3001 \u30eb \u3068 -", + "PADDED_8_EXTRACTED": "\u8cb7 \u4e0a \u9806 \u89d2 \u3044 \u307f \u6c5f \u5f27 \u4ecf \u5348 \u660c \u5c0f - \u30eb b \u3068 < \u30df", + "PADDED_8_DILATION_1": "\u6c57 \u3078 \u9806 \u89d2 \u3084 \u307f \u6c5f \u78a7 \u4ecf \u885d \u660c \u5c0f \u30eb b \u305f < \u30df", + "PAD_8_FRACT_0_5": "\u5c71 \u87f9 \u3044 \u3084 \u307f \u6c5f \u78a7 \u4ecf \u8ab0 m \u3089 \u8b0e \u6cea \u66fe \u65e8 \u6642 \u300d", + "PAD_8_FRACT_0_2": "\u7d50 \u5929 \u89d2 \u3044 \u3081 \u307f \u6c5f \u78a7 \u4ecf \u3044 \u6109 \u660c \u203b \u5c60 \u66fe \u6642 \u300d" + }, + "5": { + "INITIAL_BOX": "\u6c57 s\u300f \u7531 \u3067 \u30f1 \u7576 \u51fa \u9632 \u5973", + "DEFAULT": "\u6cbb s \u5448 r \u3006 \u3041 \u5f18 \u2026 \u305b \u300c- \u3077 j \u3081 \u306c \u6ef4", + "DEFAULT_GREY_PAD": "\u6e6f s \u5687 r \u524d \u2026 \u305b \u3089 - - \u3081 \u6f23", + "PADDED_4": "S \u5687 r \u3057 \u8cbc \u7d05 o \u3081 \u704c", + "PADDED_8": "\u6c57 \u2026 s\u300f \u65cf \u300c \u4e8b \u2473 \u30fe \\", + "EXTRACTED_INIT_BOX": "S \u53f6 \u90ca \u3089 - \u8aa4", + "PADDED_4_EXTRACTED": "\u6cbf \u2026 s \u53f6 \u3089 - \u8aa4", + "PADDED_8_EXTRACTED": "\u6cbf \u2026 s \u53f6 \u90ca \u3089 - \u8aa4", + "PADDED_8_DILATION_1": "\u6cbf \u2026 s \u53f6 \u6bc5 \u3089 - \u8aa4", + "PAD_8_FRACT_0_5": "\u6e2c s \u53f6 \u524d \u2026 \u305b \u3089 - \u3085 \u4ed8", + "PAD_8_FRACT_0_2": "\u6cbf \u2026 s \u53f6 \u6b63 \u3089 \u300em \u6c11" + }, + "6": { + "INITIAL_BOX": "\u6e29 \u3091 - \u306b \u307a \u3092 \u3001. \u81e3", + "DEFAULT": "\u3092 \u3001 \u3057 ' \u845b \u81e3", + "DEFAULT_GREY_PAD": "\u6cb9 \u516d \u6ffe \u4ea4 \u4e2d \u3068 \u3058 \u845b", + "PADDED_4": "\u6e29 m \u4e2d s \u3089\u300f \u6ffe \u4ea4 \u4e2d \u845b \u5d4c \u3092 \u300d \u3057 .", + "PADDED_8": "\u6e29 m \u4e2d s \u3089\u300f \u6ffe \u4ea4 \u4e2d \u845b \u5d4c", + "EXTRACTED_INIT_BOX": "\u3057f6\u30eb\u30eb\u6e21\u3080\u6709 \u2026\u5c07-", + "PADDED_4_EXTRACTED": "\u305764\u30eb\u5165\u3080\u72d7 \u2026\u5c07-", + "PADDED_8_EXTRACTED": "\u305764\u30eb\u5165\u3080\u72d7 \u2026\u5c07-", + "PADDED_8_DILATION_1": "Fn\u5165\u3081\u5c3e \u2026\u4e86\u3044-", + "PAD_8_FRACT_0_5": "Fn\u5165\u3081\u72d7 \u2026\u6c93\u6539-", + "PAD_8_FRACT_0_2": "\u305764\u5165\u3081\u6027 \u2026\u5c07-" + }, + "7": { + "INITIAL_BOX": "\u5ddd\u641c\u3044\u8a08 \u540a \u54c1m\u305b\u5e45\u3067- \u74f6\u306f \u6fc0\u9598 \u3044", + "DEFAULT": "\u3044\u6539\u905c\u4ed9\u534a\u5bb0\u6cb9 , \u30ee\u3073\u3055\u304d\u3052\u306e\u3067 \u653e\u8cfc \u8ab0\u3089\u304a\u79cb\u3081", + "DEFAULT_GREY_PAD": "\u305b \u6628 \u3067 - \u65cf \u8868 \u533a \u9127 \u5fb9 \u8b39 m \u840e", + "PADDED_4": "| \u54e1 m \u305b \u62bc \u3067 \u8272 \u3001 \u7814 \u8ad6 \u9084 \u3011 \u3070 \u65cf \u8868 - \u4ee3 /", + "PADDED_8": "\u554f \u2026 \u305b \u7d66 - \u65cf \u8868 \u305f \u3079 \u3091 \u4eba \u3067 \u305f - \u5e83 \u6ecb \u92fc \u30dd \u59d3", + "EXTRACTED_INIT_BOX": "\u3044\u3044\u3048 \u3082\u3063\u3074\u3055\u3055\u3084\u304b\u3067 \u304f\u308b \u306bm \u8ab0\u306b\u3067\u3082\u3042\u308b\u3082\u306e", + "PADDED_4_EXTRACTED": "\u3044\u3044\u3048 \u3082\u3063\u3074\u3055\u3055\u3084\u304b\u3067 \u304f\u308b \u306bm \u8ab0\u306b\u3067\u3082\u3042\u308b\u3082\u306e", + "PADDED_8_EXTRACTED": "\u3044\u3044\u3048 \u3082\u3063\u3074\u3055\u3055\u3084\u304b\u3067 \u304f\u308b \u306bm \u8ab0\u306b\u3067\u3082\u3042\u308b\u3082\u306e", + "PADDED_8_DILATION_1": "\u3044\u3044 \u3082\u3063\u3069\u3055\u3055\u3084\u304b\u3067 \u840c\u610f \u306bm \u8ab0\u306b\u3067\u3082\u3042\u308b\u3082\u306e", + "PAD_8_FRACT_0_5": "\u3002 d\u3082\u3063\u304e\u3055\u3055\u3084\u304b\u3067 \u304f\u30a4\u308b\u3089\u3044\u3044 \u8ab0\u306b \u3067\u3082\u3042\u308b\u3082\u306e", + "PAD_8_FRACT_0_2": "\u3044\u3044\u305f\u3048\u305f \u3082\u3063\u3074\u3055\u3055\u3084\u304b\u3067 \u534a \u5317m \u8ab0\u306b\u3067\u3082\u3042\u308b\u3082\u306e" + }, + "8": { + "INITIAL_BOX": "\u8b5a \u828b", + "DEFAULT": "\u7537\u8ce3\u540d \u4e3b \u6065 \u5edf \\\u3049 \u3063\u5348\u6751 \u82ad \u79fb\u4e86", + "DEFAULT_GREY_PAD": "\u30fd - \u30f3 \u9078 \u4e5f - \u5302 \u4e16 \u963b \u8a55 \u758f -\u300b \u5448 \u5fdc \u3054 \u5eca \" \u306c", + "PADDED_4": "\u9675\u7d50 \u89d2 \u8ced \u9154 \u9769 \\\u3049 -\u4f3ce\u8868- \u7f77 \u3084\u4e16\u80af\u5abd", + "PADDED_8": "\u5858\u8a69 \u8ca0 \u7389 \u5de7 \u543e \u8863\u7389 \u306b\u3054\u786b\u306b\u3089", + "EXTRACTED_INIT_BOX": ": ] \u3076 \u30df \u3053 \u3054 \u30df", + "PADDED_4_EXTRACTED": ": ] \u30df", + "PADDED_8_EXTRACTED": ": \u309d - \u3001 \u81e8", + "PADDED_8_DILATION_1": "\u5713 \u500b \u904e p \u5e06 \u6c72 \u5c71", + "PAD_8_FRACT_0_5": "\u9091 \u66d9 \u309db", + "PAD_8_FRACT_0_2": "\u672b \u60b6s \u3002 \u52a0 \u52a0-o \u7e3d \u5713\u2661" + }, + "9": { + "INITIAL_BOX": "- \u6d6a \u3043 \u3044\u300f - \u9996 \" \u7531 \u8d74 \u7d04 \u300e[ \u8089 \u2661 \u5e06 \u9078 \u300d \u968a - \u81e3", + "DEFAULT": "- \u7c97 \u3043 \u30eb \u5eb6 \u6842 \u3081 \u304b \u300e \u3044 [ \u6c5f \u4e09 \u7ad9 \u7531 \u3068 \u300d \u5883 - \u97a0", + "DEFAULT_GREY_PAD": "\u5c07 \u958b \u3081 \u2048 \u8a60 \u88c5 \u5f04 \u4e2d \u300d \u9b31 \u4e16", + "PADDED_4": "\" \u75e2 m \u3081 \u304b \u738b \u4e2d \u5404 \u88c5 \u3068 \u300d \u306b \u3010 \u533a", + "PADDED_8": "\u7537 \u7c97 \u3043 \u9047 \u58ee \u4e0b \u3081 \u304b \u5c6f \u7afa \u9000 \u5e33 \u3068 \u306f \u8944 - \u57fa", + "EXTRACTED_INIT_BOX": "\u6f64 \u3043 m \u534a \u81e3 \u3081 \u8a18 \u8ad2 \u5944 \u9673 \u52a0 \u978d -", + "PADDED_4_EXTRACTED": "\u3043 \u3084 m \u534a \u81e3 \u3081 \u8a18 \u8ad2 \u5944 \u81ea \u91e3 \u978d -", + "PADDED_8_EXTRACTED": "M \u534a \u81e3 \u3081 \u8a18 \u8ad2 \u5944 \u6b63 \u9774 \u978d -", + "PADDED_8_DILATION_1": "M \u81e3 \u3081 \u6109 \u672d s \u6b63 \u53ea \u978d -", + "PAD_8_FRACT_0_5": "M \u534a \u81e3 \u3081 \u904e s \u81ea \u758f \u306b -", + "PAD_8_FRACT_0_2": "M \u5674 \u81e3 \u3081 m \u8b02 \u5e2b \u6b63 \u9d0e \u306b -" + }, + "10": { + "INITIAL_BOX": "1\u30fc\u305b\u3053\u3067 \u308c \u5148\u306f\u56db\u3092\u7576\u307e\u3059", + "DEFAULT": "\u30fc\u30c9\u3001 \u79c1\u306f\u76ee\u3092\u899a\u307e\u3059", + "DEFAULT_GREY_PAD": "\u7d2c \u82b1 \u8a63 \u7530 \u3077 \u624b \u3047 \u308d", + "PADDED_4": "\u5148\u306f\u76bf\u3092\u6f64\u307e\u3059", + "PADDED_8": "\u30fc| \u30fc\u3084\u305b\u30bc\u3053\u3067 \u8a33 \u5148\u306f\u56db\u3092\u6d63\u307e\u3059", + "EXTRACTED_INIT_BOX": "\u30fc\u30fc\u305b\u3053\u3067 \u5148\u306f\u5360\u3092\u7576\u307e\u3059", + "PADDED_4_EXTRACTED": "\u30fc\u305f \u5148\u306f\u56db\u3092\u6d63\u307e\u3059", + "PADDED_8_EXTRACTED": "\u30fc\u305f \u5148\u306f\u56db\u3092\u6d63\u307e\u3059", + "PADDED_8_DILATION_1": "\u30fc!\u3062\u3067 \u5148\u306f\u56db\u3092\u6d63\u307e\u3059", + "PAD_8_FRACT_0_5": "\u30fc!\u3062\u3055\u3067 \u5148\u306f\u76bf\u3092\u6d63\u307e\u3059", + "PAD_8_FRACT_0_2": "\u30fc\u788d!\u3062\u3067 \u5148\u306f\u56db\u3092\u6d63\u307e\u3059" + } + }, + "PIKE_BOYLOVEGIRLS_T41_012.jpg": { + "0": { + "INITIAL_BOX": "Curt! oh... i forgot we were going to play tennis!", + "DEFAULT": "Curt! oh... i forgot we were going to play . tennis!", + "DEFAULT_GREY_PAD": "Curt! oh... | i forgot we were going to play tennis!", + "PADDED_4": "Fet curt! oh... i forgot we were going to play \u00ab tennis!", + "PADDED_8": "Fom i errr pg ld curt! oh... i forgot we were going to play tennis! yy.", + "EXTRACTED_INIT_BOX": "Curt! oh... i forgot we were going to play tennis!", + "PADDED_4_EXTRACTED": "Curt! oh... i forgot we were going to play tennis!", + "PADDED_8_EXTRACTED": "Curt! oh... i forgot we were going to play tennis!", + "PADDED_8_DILATION_1": "Curt! oh... i forgot we were going to play tennis!", + "PAD_8_FRACT_0_5": "Curt! oh... i forgot we were going to play tennis!", + "PAD_8_FRACT_0_2": "Curt! oh... i forgot we were going to play tennis!" + }, + "1": { + "INITIAL_BOX": "Well ...okay, honey! i'd just as soon go for a drive, anyway!", + "DEFAULT": "Well ...okay, honey! i'd just as scon go for a drive, anyway! _.", + "DEFAULT_GREY_PAD": "Well ...okay,", + "PADDED_4": "Well ...okay, honey! i'd just as scon go for a drive, | _ anyway!", + "PADDED_8": "Ma i ts well ...okay, honey! i'd just as scon go for a drive, anyway!", + "EXTRACTED_INIT_BOX": "Well ...okay, honey! i'd just as soon go for a drive, anyway!", + "PADDED_4_EXTRACTED": "Well ...okay, honey! i'd just as soon go for a drive, anyway !", + "PADDED_8_EXTRACTED": "Well ...okay, honey! i'd just as soon go for a drive, anyway !", + "PADDED_8_DILATION_1": "Well...okay, honey! i'd just as scon go for a drive, anyway!", + "PAD_8_FRACT_0_5": "Well ...okay, honey! i'd just as scon go for a drive, anyway !", + "PAD_8_FRACT_0_2": "Well ...okay, honey! i'd just as scon go for a drive, anyway!" + }, + "2": { + "INITIAL_BOX": "1 was \u00a5 a bad mood, and curt sensed it immediately \u00ab+ -", + "DEFAULT": "1 was \u00a5 a bad mood, and curt sensed it immediately -\u00ab", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "1 was \u00a5 a bad mood, and curt sensed it immediately -- -", + "PADDED_8": "1 was \u00a5 a bad mood, and curt sensed it immediately -", + "EXTRACTED_INIT_BOX": "| was \u00a54 a bad mood, and curt sensed it immediately - + +", + "PADDED_4_EXTRACTED": "| was \u00a54 a bad mood, and curt sensed it immediately -- -", + "PADDED_8_EXTRACTED": "| was \u00a54 a bad mood, and curt sensed it immediately -- +", + "PADDED_8_DILATION_1": "1 was \u00a54 a bad mood, and curt sensed it immediately -\u00ab -", + "PAD_8_FRACT_0_5": "1 was \u00a5\u00a5 a bad mood, and curt sensed it immediately - -", + "PAD_8_FRACT_0_2": "| was \u00a54 a bad mood, and curt sensed it immediately -\u00ab -" + }, + "3": { + "INITIAL_BOX": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you harry! j", + "DEFAULT": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try _ and make you happy! jt", + "DEFAULT_GREY_PAD": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!", + "PADDED_4": "\"you've seemed so unhappy. lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try \u00bb and make you happy! jt", + "PADDED_8": "Ne eee mt eu you've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try \u00ab_ and make you happy! jt", + "EXTRACTED_INIT_BOX": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!", + "PADDED_4_EXTRACTED": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!", + "PADDED_8_EXTRACTED": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!", + "PADDED_8_DILATION_1": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!", + "PAD_8_FRACT_0_5": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!", + "PAD_8_FRACT_0_2": "You've seemed so unhappy lately, cynthy! i wish there was something 1 could do! 1 wish you'd let me try and make you happy!" + }, + "4": { + "INITIAL_BOX": "| knew what was coming, but | didn't want to give him an answer . . . not then.| tried to change the mood!", + "DEFAULT": "| knew what was coming, but | didn't want to give him an answer . . . not then.| tried to change the mood!", + "DEFAULT_GREY_PAD": "| knew what was coming, but i didn't want to give him an answer . . . not then.| tried to change the mood!", + "PADDED_4": "| knew what was coming, but | didn't want to give him an answer . . . not then.| tried to change the mood!", + "PADDED_8": "| knew what was coming, but | didn't want to give him an answer . . . not then.| tried to change the mood!", + "EXTRACTED_INIT_BOX": "| knew what was coming, but | didn't want to give him an answer . .. not then.i tried to change the mood!", + "PADDED_4_EXTRACTED": "| knew what was coming, but | didn't want to give him an answer . .. not then.i tried to change the mood!", + "PADDED_8_EXTRACTED": "| knew what was coming, but | didn't want to give him an answer . .. not then.i tried to change the mood!", + "PADDED_8_DILATION_1": "| knew what was coming, but | didn't want to give him an answer . . . not then.i tried to change the mood!", + "PAD_8_FRACT_0_5": "| knew what was coming, but 1 didn't want to give hiw an answer . .. not then.i tried to change the mood!", + "PAD_8_FRACT_0_2": "| knew what was coming, but | didn't want to give him an answer . . . not then.i tried to change the mood!" + }, + "5": { + "INITIAL_BOX": "Goodness, curt, you worry about me too much! c'mon, lets go nn. for a swim! _", + "DEFAULT": "Goodness, curt, you worry about me too much! c'mon, lets go nl. . for a swim!", + "DEFAULT_GREY_PAD": "Goodness, curt, you worry about me too! much! c'mon, lets go for a swim!", + "PADDED_4": "Goodness, curt, you worry about me too much! c'mon, lets go n\\ for a swim!", + "PADDED_8": "; a goodness, curt, you worry about me too much! c'mon, lets go for a swim! _~", + "EXTRACTED_INIT_BOX": "Goodness, curt, you worry about me too much! c'mon, lets go eor a swim!", + "PADDED_4_EXTRACTED": "Goodness, curt, you worry about me too much! c'mon, lets go for a swim!", + "PADDED_8_EXTRACTED": "Goodness, curt, you worry about me too much! c'mon, lets go for a swim!", + "PADDED_8_DILATION_1": "Goodness, curt, you worry about me too much! c'mon, lets go for a swim!", + "PAD_8_FRACT_0_5": "Goodness, curt, you worry about me too much! c'mon, lets go for a swim |", + "PAD_8_FRACT_0_2": "Goodness, curt, you worry about me too much! c'mon, lets go for a swim!" + }, + "6": { + "INITIAL_BOX": "| liked curt but \u00a7 didn't love hin, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+ + \u00ab not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria\u2019s", + "DEFAULT": "| liked curt but \u00a7 didn't love hin, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+ + \u00ab not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria's +", + "DEFAULT_GREY_PAD": "| liked curt but \u00a7 didn't love him; and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+ + \u00ab not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria's +", + "PADDED_4": "| liked curt but \u00a7 didn't love hin, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+ + \u00ab not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "PADDED_8": "| liked curt but \u00a7 didn't love hin, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+ + \u00ab not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "EXTRACTED_INIT_BOX": "1 liked curt but \u00a7 didn't love him, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+. not give in to it! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "PADDED_4_EXTRACTED": "1 liked curt but \u00a7 didn't love him, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+. not give in to it! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "PADDED_8_EXTRACTED": "1 liked curt but \u00a7 didn't love him, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab+. not give in to it! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "PADDED_8_DILATION_1": "| liked curt but 1 didn't love him, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself ++. not give in to it! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "PAD_8_FRACT_0_5": "1 liked curt but \u00a7 didn't love him, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab++ not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria's", + "PAD_8_FRACT_0_2": "1 liked curt but \u00a7 didn't love him, and | knew that marrying him would only be a surrender to my parents. | had to lick this \"problem myself \u00ab++ not give in toit! later that evening, we dropped in at gloria's. everybody gathered at gloria's" + }, + "7": { + "INITIAL_BOX": "Hl, kids! what's up?", + "DEFAULT": "Hi, kids! what's up?", + "DEFAULT_GREY_PAD": "Hi, kids! what's up?", + "PADDED_4": "Hi, kids! what's - up? a", + "PADDED_8": "Hi, kids! what's 2 pee", + "EXTRACTED_INIT_BOX": "Hl, kids! what's up?", + "PADDED_4_EXTRACTED": "Hl, kids! what's up?", + "PADDED_8_EXTRACTED": "Hi, kids! what's up?", + "PADDED_8_DILATION_1": "Hl, kids! what's up?", + "PAD_8_FRACT_0_5": "Hi, kids! what's up?", + "PAD_8_FRACT_0_2": "Hi, kids! what's up?" + }, + "8": { + "INITIAL_BOX": "Sparkling \u2018wit...brilliant conversation... penetrating thought...what did you en ie cte \u2014\u2014\u2014 ee,", + "DEFAULT": "Sparkling \u2018wit...brilliant conversation ... penetrating thought...what did you exe ct 2 e\u2014\u2014\u2014 mi", + "DEFAULT_GREY_PAD": "Sparkling wit...brilliant onversation ... penetrating! thought...what did you expect?", + "PADDED_4": "Sparkling /wit...brilliant conversation... penetrating thought...what did you re pecte a", + "PADDED_8": "Sparkling \u2018wit...brilliant conversation... penetrating thought...what did you . expect?", + "EXTRACTED_INIT_BOX": "Sparkling wit...brill'ant conversation... penetrating thought...what did you expectz", + "PADDED_4_EXTRACTED": "Sparkling wit...brill'ant conversation... penetrating thought...what did you expectz", + "PADDED_8_EXTRACTED": "Sparkling wit...brill'ant conversation... penetrating thought...what did you expectz", + "PADDED_8_DILATION_1": "Sparkling \u2018wit...brill'ant conversation... penetrating thought...what did you expect?", + "PAD_8_FRACT_0_5": "Sparkling ,wit...brill'ant conversation... penetrating thought...what did you expectz", + "PAD_8_FRACT_0_2": "Sparkling \u2018wit...brill'ant conversation... penetrating thought...what did you expect?" + }, + "9": { + "INITIAL_BOX": "He means it's the usual dull evening! _", + "DEFAULT": "He means its the usual dull evening! _", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "He means it's the usual dull evening |", + "PADDED_8": "He means it's the usual dull evening!", + "EXTRACTED_INIT_BOX": "He means its the usual dull evening!", + "PADDED_4_EXTRACTED": "He means its the usual dull evening !", + "PADDED_8_EXTRACTED": "He means its the usual dull evening!", + "PADDED_8_DILATION_1": "He means its the usual dull evening!", + "PAD_8_FRACT_0_5": "He means its the usual dull evening |", + "PAD_8_FRACT_0_2": "He means its the usual dull evening!" + }, + "10": { + "INITIAL_BOX": "My old man says im going to have to take that advertising job in new york! |", + "DEFAULT": "My old man svs im gong to have to take that advertising job in new york!", + "DEFAULT_GREY_PAD": "My old man says i'm going to have to take that advertising job in new york!", + "PADDED_4": "My old man says im gong to have to take that advertising job _ in new york!", + "PADDED_8": "\u201cmy old man say5 i'm going to have to take that advertising job in new york!", + "EXTRACTED_INIT_BOX": "My old man say5 i'm going to have to take that advertising job in new york!", + "PADDED_4_EXTRACTED": "My old man says i'm going to have to take that advertising job in new york!", + "PADDED_8_EXTRACTED": "My old man says i'm going to have to take that advertising job in new york!", + "PADDED_8_DILATION_1": "My old man says i'm going to have to take that advertising job in new york!", + "PAD_8_FRACT_0_5": "My old man says im going to have to take that advertising job in new york!", + "PAD_8_FRACT_0_2": "My old man sas i'm going to have to take that advertising job in new york!" + }, + "11": { + "INITIAL_BOX": "The conversation wandered on aimlessly... .", + "DEFAULT": "The conversation wandered on aimlessly... .", + "DEFAULT_GREY_PAD": "The conversation wandered on aimlessl", + "PADDED_4": "The conversation wandered on aimlessly .", + "PADDED_8": "The conversation wandered on aimlessly", + "EXTRACTED_INIT_BOX": "The conversation wandered on aimlessly . .", + "PADDED_4_EXTRACTED": "The conversation wandered on aimlessly . .", + "PADDED_8_EXTRACTED": "The conversation wandered on aimlessly . .", + "PADDED_8_DILATION_1": "The conversation wandered on aimlessly . .", + "PAD_8_FRACT_0_5": "The conversation wandered on aimlessly . .", + "PAD_8_FRACT_0_2": "The conversation wandered on aimlessly . ." + }, + "12": { + "INITIAL_BOX": "Yeah, me, too! hel} take me on at the bank, but i think id rather work in my uncles publishing house ! did it ever", + "DEFAULT": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncles publishing house! did it ever", + "DEFAULT_GREY_PAD": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle's publishing house !", + "PADDED_4": "Yeah, me, too! hel} take me on at the bank, but i think i'd rather work in my uncle's publishing house ! / di", + "PADDED_8": "Yeah, me, too! hele take me on at the bank, but i think i'd rather work in my uncle's publishing house ! it ever 5", + "EXTRACTED_INIT_BOX": "Yeah, me, too! he'l}. take me on at the bank, but i think i'd rather work in my uncle's publishing house ! did it ever", + "PADDED_4_EXTRACTED": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle's publishing ) house ! did it ever", + "PADDED_8_EXTRACTED": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle's publishing house ! did it ever lts,", + "PADDED_8_DILATION_1": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle's publishing house ! did it ever tn a \u2014m a \u00a9", + "PAD_8_FRACT_0_5": "Yeah, me, too! hel} take me on at the bank, but i think i'd rather work in my uncle's publishing house ! did it ever um ev let", + "PAD_8_FRACT_0_2": "Yeah, me, too! hell take me on at the bank, but i think i'd rather work in my uncle\u2019s publishing house ! did it ever om ae a i \u2014 \u00bb" + }, + "13": { + "INITIAL_BOX": "Lucky z my old mans got enough dough to support half this town ! why should 1 want to b: work ?z", + "DEFAULT": "Lucky z my old mans got enough dough to support half this town ! why should i want to : \u00ab work?z", + "DEFAULT_GREY_PAD": "Luckyz my old mans got enough dough to support half this town ! why should i want to work 2", + "PADDED_4": "Luckyz my old mans got enough dough to support hale this town ! why should i want to \u00ab work?z", + "PADDED_8": "Lucky z my old { mans got enough dough to support half this town ! why should | i want to work 2", + "EXTRACTED_INIT_BOX": "Lucky z my old mans got enough dough to support half this town! why should 1 want to work z", + "PADDED_4_EXTRACTED": "Lucky z my old mans got enough dough to support half this town! why should 1 want to work z", + "PADDED_8_EXTRACTED": "Lucky z my old mans got enough dough to support half this town! why should 1 want to work #", + "PADDED_8_DILATION_1": "Luckyz my old mans \u2018got enough dough to support half this town! why should 1 want to work 2", + "PAD_8_FRACT_0_5": "Lucky z my old mans got enough dough to support half this town! why should 1 want to work", + "PAD_8_FRACT_0_2": "Luckyz my old mans \u2018got enough dough to support half this town! why should 1 want to work #" + }, + "14": { + "INITIAL_BOX": "Ie you don't know, i'm not going to tell you!", + "DEFAULT": "Ie you don't know, i'm not going to tell you! _", + "DEFAULT_GREY_PAD": "Ie you don't know, iaa no going to tell you!", + "PADDED_4": "Ie you don't know, i'm not going to tell you!", + "PADDED_8": "Ra \u2014 tt bl le ie you don't know, i'm not going to tell you. a", + "EXTRACTED_INIT_BOX": "Ie you don't know, iaa not going to tell you!", + "PADDED_4_EXTRACTED": "Ie you don't know, i'm not going to tell you!", + "PADDED_8_EXTRACTED": "Ie you don't know, i'm not going to tell you!", + "PADDED_8_DILATION_1": "Ie you don't know, i'm not going to tell you!", + "PAD_8_FRACT_0_5": "Ie you don't know, iia not going to tell you!", + "PAD_8_FRACT_0_2": "Ie you don't know, i'm not going to tell you!" + }, + "15": { + "INITIAL_BOX": "Are yau speaking to me?", + "DEFAULT": "Are you: speaking to me?", + "DEFAULT_GREY_PAD": "Are yau speaking to me?", + "PADDED_4": "\u201care you speaking to me?", + "PADDED_8": "\u2014 mb are you speaking : to me?", + "EXTRACTED_INIT_BOX": "A | to me?", + "PADDED_4_EXTRACTED": "To met", + "PADDED_8_EXTRACTED": "\u2014 seeakng to me?", + "PADDED_8_DILATION_1": "M\u2014 seeakng to me?", + "PAD_8_FRACT_0_5": "\u2014 seeking to mef", + "PAD_8_FRACT_0_2": "M\u2014 seeking to me?" + }, + "16": { + "INITIAL_BOX": "A did it ever 1 occur to you that you're lucky to get jobsz", + "DEFAULT": "/ did it ever ld occur to you that you're lucky to get jobsz", + "DEFAULT_GREY_PAD": "Did it ever occur to you at you're luck! to get jobsz", + "PADDED_4": "Did it ever 1 occur to you that you're lucky to get jobsz", + "PADDED_8": "Did it ever jd occur to you that you're lucky _ to get jobsz _", + "EXTRACTED_INIT_BOX": "Did it ever occur to you that you're lucky to get jobsz", + "PADDED_4_EXTRACTED": "Did it ever occur to you that youre lucky to get jobsz", + "PADDED_8_EXTRACTED": "Did it ever occur to you that you're lucky to get jobsz", + "PADDED_8_DILATION_1": "Did it ever occur to you that you're lucky to get jobsz", + "PAD_8_FRACT_0_5": "Did it ever occur to you that you're lucky to get jobs#", + "PAD_8_FRACT_0_2": "Did it ever occur to you that you're lucky to get jobs#" + } + }, + "MCCAY_LITTLENEMO_090.jpg": { + "1": { + "DEFAULT": "Mon palais) est tout pres juste une petite promenade \\dansla", + "INITIAL_BOX": "Mon palais est tout pres juste une petite promenade dansla jungle", + "DEFAULT_GREY_PAD": "Mon palais", + "PADDED_4": "\"mon palais est tout pres juste une petite, promenade valigy", + "PADDED_8": "Mon palais) est tout pres juste f une petite | promenade danslajungle! ==", + "EXTRACTED_INIT_BOX": "Mon palais est tout pres juste une petite promenade dansla jungle", + "PADDED_4_EXTRACTED": "Mon palais: est tout pres juste une petite promenade dansla jungle", + "PADDED_8_EXTRACTED": "Mon palais! est tout pres juste une petite promenade danslajungly", + "PADDED_8_DILATION_1": "Mon palais! est tout pres juste une petite promenade dansla jungle:", + "PAD_8_FRACT_0_5": "Mon palais: est tout pres juste une petite promenade danslajun6ly", + "PAD_8_FRACT_0_2": "Mon palais! est toot pres juste une petite promenade danslajungly" + }, + "0": { + "INITIAL_BOX": "Sinous tenez a rester en ve... nevous esk pas de mol", + "DEFAULT": "A rester en vie.... nevous approchez (pas de mol fs vous tenez)", + "DEFAULT_GREY_PAD": "\u00a5 s1vous tenez a rester en vie.... nevous approchez pas de mo!", + "PADDED_4": "|a rester en | vie... nevous approchez \\pas de mol i vous tenez", + "PADDED_8": "Sinous tenez) \u2018|a rester en vie... nevous approchez pas de mot", + "EXTRACTED_INIT_BOX": "Sinvous tene? a rester en vie... nevous approchez pas de mol", + "PADDED_4_EXTRACTED": "Sivous tenez a rester en vie... nevous approchez pas de mol", + "PADDED_8_EXTRACTED": "Sivous tenez a rester en vie... nevous approchez pas de mou", + "PADDED_8_DILATION_1": "Sinous tenez a rester en vie... nevous approchez pas de mo!", + "PAD_8_FRACT_0_5": "Sivous tenez a rester en vie... nevous approchez pas de mol", + "PAD_8_FRACT_0_2": "S1vous tenez a rester en vie... nevous approchez pas de mol" + }, + "2": { + "INITIAL_BOX": "Vous allez avoir ce que] vous cher- hez etb encore _/\u00bb", + "DEFAULT": "Ous allez \\avoir ce que vous cher- hez etbie encor!", + "DEFAULT_GREY_PAD": "Ous allez |", + "PADDED_4": "Vous auiez", + "PADDED_8": "M ous alez \\avoir ce que} vous cher- hez etbien i encor|", + "EXTRACTED_INIT_BOX": "Us \u201crc cen", + "PADDED_4_EXTRACTED": "Us rc cen", + "PADDED_8_EXTRACTED": "Uss rr cen", + "PADDED_8_DILATION_1": "Us? ~rtt cise", + "PAD_8_FRACT_0_5": "Vs rr pn", + "PAD_8_FRACT_0_2": "Us) rr sgn" + }, + "3": { + "INITIAL_BOX": "Et mate) nant par fy] suivez. mol", + "DEFAULT": "Jlo] suivez. mol) (et mainte ant par]", + "DEFAULT_GREY_PAD": "Et maint! ant par| lei! suivez. mor", + "PADDED_4": "(et mainte) ant par] lei! suivez moi!", + "PADDED_8": "Et man tey ant pari lei! suivez. mort j", + "EXTRACTED_INIT_BOX": "Et mawte ant par ict suivez. moi!", + "PADDED_4_EXTRACTED": "Et mante ant par lei suivez. moi!", + "PADDED_8_EXTRACTED": "Et mante ant par lei suivez. mort", + "PADDED_8_DILATION_1": "Et mainte. ant par leit suivez. mort", + "PAD_8_FRACT_0_5": "Et man te ant pir ct suivez. mot!", + "PAD_8_FRACT_0_2": "Et mante ant par lei! suivez. mort" + }, + "4": { + "INITIAL_BOX": "Mon palais est moins bien que le ou ro! mais il est jolt lil nest pas pias", + "DEFAULT": "Mon palais est moins bien que $i celui du ro! mais il est joli il n'est pas | \u00a5as,", + "DEFAULT_GREY_PAD": "Mon palais es moins bien que celui du ro! (| mais il est j0li inest pas", + "PADDED_4": "Mon palais est moins bien que celui du ro! {| mas 1l est jot", + "PADDED_8": "Mon palais est moins bien que", + "EXTRACTED_INIT_BOX": "Mon palais est moins bien que celui du roi mass il est joli l nest pas fas", + "PADDED_4_EXTRACTED": "Mon palais est moins bien que celui du roi mass il est joli", + "PADDED_8_EXTRACTED": "Mon palais est moins bien que celui du roi mass il est joli l nest pas 1 0in", + "PADDED_8_DILATION_1": "Mon palais est moins bien que celui du roi mais il est jolt ln'est pas 10in", + "PAD_8_FRACT_0_5": "Mon palais est moins bien que celui du rol mns il est joli l nest pas loin", + "PAD_8_FRACT_0_2": "Mon palais est moins bien que celui du rol {mas il est joli l nest pas 10in" + }, + "5": { + "INITIAL_BOX": "Je croyais que les ma. ring devaien [nous accom? sner?ils noi ganididn |", + "DEFAULT": "9 crovais que les ma.", + "DEFAULT_GREY_PAD": "9 crovais que les ma rins devaie nous accorpy bner1ls nol", + "PADDED_4": "% crovas que les ma.", + "PADDED_8": "Al 9% crovais que les ma", + "EXTRACTED_INIT_BOX": "2 croyas que les ma ring jevaiet nols? i bner71ls no: pi sets. y", + "PADDED_4_EXTRACTED": "2 crovas que les ma ring jevaiet nols! + bner71ls no: sunent?", + "PADDED_8_EXTRACTED": "2 crovas que les ma ring jevaiet nols! + bner71ls no: sunent?", + "PADDED_8_DILATION_1": "I crovas que les ma ring jevaien nols\u00a3 fe bner71ls nov sunent?", + "PAD_8_FRACT_0_5": "2 crovas que les ma ring jevaiet nols! + bner71ls no: sunent?", + "PAD_8_FRACT_0_2": "9 crovas que les ma ring jevaier nols! + bner71ls no: sunent?" + }, + "6": { + "INITIAL_BOX": "(ne regardez pas ces gives | ou iesvont vous bombarder avec", + "DEFAULT": "Ve regardet pas ces gives | ou isvont vous bombarder avec) dix decc des noix decdez", + "DEFAULT_GREY_PAD": "Ou isvont vous |, bombarder aveck \u2018des noi k de coco xo", + "PADDED_4": "Ee) i oa ne regardez pas ces gives | ou isvont vous bombarder avec) \u2018des noi x de coc: es noi rt", + "PADDED_8": "Ne regardez pas ces swses | ou isvont vous bombarder avec] des nox decocy! ne eet", + "EXTRACTED_INIT_BOX": "Ne regardez pas ces singes 0 ibvont vous [bombarder avec des noi decoc", + "PADDED_4_EXTRACTED": "Ne regardez pas ces singes 0 ibvont vous [bombarder avec des noi x decoc", + "PADDED_8_EXTRACTED": "Ne regardez pas ces singes ou ibvont vous [bombarder avec des noi x decoc", + "PADDED_8_DILATION_1": "Ne regardez pas ces singes ou isvont vous [bombarder avec des noi k decoc", + "PAD_8_FRACT_0_5": "Ne regardez pas ces singes ou ibvont vous bombarder avec \u2018des noix decoc", + "PAD_8_FRACT_0_2": "Ne regardez. pas ces siwges ou hsvont vous bombarder avec des noi decoc" + }, + "7": { + "INITIAL_BOX": "\"oul! ovi! mais dus. wavel rien; a craindreet \\ pourriez voir |", + "DEFAULT": "Qui ovit mais vous navel rie] a craindreet ne faites fas a tion ace que \\ pourriez voir |", + "DEFAULT_GREY_PAD": "Re vous navel rie] 011 oui asv] a craindre et ne faites pas a tion a cz a wa b\\ pour oir |", + "PADDED_4": "A em pe oul ouit mais vous navel rie a craindre et ne faites pas at tion ace que \\\u00a2 pourriez voir |", + "PADDED_8": "Dui oui mais vous navel rie a craindre et 7 ne faites 726 a tion ace que \\\u00a2 pourriez voir", + "EXTRACTED_INIT_BOX": "Uitovi ma vous navel riel a craindr e efatespr 4 lovace - + po ve", + "PADDED_4_EXTRACTED": "Oui ou ma vous navel rien a craindr e- efaitespr 4 ion ace . fo", + "PADDED_8_EXTRACTED": "Oui ou ma vous navel rien a craindr e- jefaitespr 4 ion ace . & po [54", + "PADDED_8_DILATION_1": "\u201cquitoul ma vous navel riem a craindr e\u201d vefaitespa a 10n ace \u00ab e po [34", + "PAD_8_FRACT_0_5": "Qui oui ma vous navel rien a craindr e\u00b0 efntespr 4 ion ace . li", + "PAD_8_FRACT_0_2": "\u201cquifoui ma . vous navel rien a craindr e\u00b0 jefnitespa 4 iovace - po re" + }, + "8": { + "INITIAL_BOX": "\"non i ne faites pas| ga flipi now! j", + "DEFAULT": "Non | ne faites pas| \u00a2a flipi non!", + "DEFAULT_GREY_PAD": "Noi nine faites pas \u00a2a flip! non!", + "PADDED_4": "Non | ne faites pas| \u00a2a flipi non!", + "PADDED_8": "Non | ne faites psi \u00a2a flip! now!", + "EXTRACTED_INIT_BOX": "Non i ne faites pas ga flip! now!", + "PADDED_4_EXTRACTED": "Non ine faites pas ga flip! non!", + "PADDED_8_EXTRACTED": "Non ine faites pas ga flip! non!", + "PADDED_8_DILATION_1": "Non ine faites pas ga flipi no!", + "PAD_8_FRACT_0_5": "Non ine faites pas ga flip! non!", + "PAD_8_FRACT_0_2": "Non ine faites pas \u00a2a flip! non!" + }, + "9": { + "INITIAL_BOX": "000h! arretez! arretez 0004!", + "DEFAULT": "0001) arretez arretez] 000a! |", + "DEFAULT_GREY_PAD": "000h! arretez iarretel 0004!", + "PADDED_4": "000h! arretez iarre tel] 000a", + "PADDED_8": "00h] ) arretez arretee) 00h! ii", + "EXTRACTED_INIT_BOX": "000a! arretez. arretezi 0004!", + "PADDED_4_EXTRACTED": "000h! arretez: arreteli 0004!", + "PADDED_8_EXTRACTED": "000h! arretez: arreteli 0004!", + "PADDED_8_DILATION_1": "000h! arretez: arreteli 0004!", + "PAD_8_FRACT_0_5": "000h! arretez arretez! 0004!", + "PAD_8_FRACT_0_2": "000h! arretez. arreteli 0004!" + }, + "10": { + "INITIAL_BOX": "Am pourquo! donc) nemo sagite- il tant lanuit? ce sor ilne entree", + "DEFAULT": "{pourquoi donc) nemo sagite-t- hl tant lanuit?] ce sor ilne fait que se retournerl", + "DEFAULT_GREY_PAD": "Pourquo! dono nemo sagite l tant lanuit ce sor ilne {fat que se", + "PADDED_4": "{pourauo! dowd emo sagite-t: hl tant lanuit?] ce soir ilne patquese", + "PADDED_8": "{pourquoi donc) nemo sagite-t- hl tant lanuit?] ce sor ilne fait que se retournerl", + "EXTRACTED_INIT_BOX": "Pourquoi owe nemo sagite-t- jl tant lanuyt? ce sor ilne\u2019 fat que se 'retourner", + "PADDED_4_EXTRACTED": "Pourquoi doc nemo sabite-t iltant lanuit? ce sor ilne' fat que se retourner!", + "PADDED_8_EXTRACTED": "Pourquoi doc nemo sagite-t il tant lanuit? ce sor ilne' fat que se retourner!", + "PADDED_8_DILATION_1": "Pourquo! doc nemo sagite-t iltant lanuit? ce sor ilne' fat que se retourner!", + "PAD_8_FRACT_0_5": "Pourquoi done nemo sagitet iltant lanuit? ce sor ilne' fat quese atu ert", + "PAD_8_FRACT_0_2": "Pourquoi doe nemo sagite-t il tant lanuit? ce sor ilne' fait que se returner!" + } + }, + "Strange_Tales_172021.jpg": { + "1": { + "DEFAULT": "But brother yoo020 1s not yet ready to sit at the thble of the ead! jrnearnre", + "INITIAL_BOX": "\u201cbut brother vood00 1s not. yet ready to sit at the thble of the dead ly posse", + "DEFAULT_GREY_PAD": "\u201cbut brother vood00 1s not. yet ready to s(t at the thble of the pearly pm", + "PADDED_4": "But brother voo000 15 not: yet ready to sit at the thble of the ead l\" poe", + "PADDED_8": "V--but brother yoo000 15 not: yet ready to s(t at the thble of the dead!\u201d", + "EXTRACTED_INIT_BOX": "L--buit brother voodoo is not yet ready to sit at the table of the dead!\"", + "PADDED_4_EXTRACTED": "L--buit brother voodoo is not yet ready t0 sit at the thle of the pead yl\"", + "PADDED_8_EXTRACTED": "L--buit brother voodoo is not yet ready 70 sit at\u201d the thele of the dead!\u201d", + "PADDED_8_DILATION_1": "But brother voodco is not yet ready 70 sit at the thble of the dear!\"", + "PAD_8_FRACT_0_5": "But brother voodoo is not yet ready t0 sit at the thele of the pear!\u201d", + "PAD_8_FRACT_0_2": "But brother voodoo 15 not yet ready t0 sit at the thele of the dead!\u201d" + }, + "0": { + "INITIAL_BOX": "Ablur=only a blur at. first, dark menacing --greets erother yoodpoos eyes as he rises from the darkness ,,,", + "DEFAULT": "Ablur = ney 4 blur at first, dark menacing greets voodoo eyes ashe brother rises from the darkness ,,,", + "DEFAULT_GREY_PAD": "Fll | blur adh ing -- woopoos eyes as he | sere from the carkness ...|", + "PADDED_4": "Ablur~=only a blur at first, dark menacing --greets erother yoodcos eyes as he rises from the carkness ,..", + "PADDED_8": "Ablur~=only a blur at first, dark menacing --greets erother yoodcos eyes as he rises from the darkness ...", + "EXTRACTED_INIT_BOX": "Ablur-\u2014only a blur at first, dark menacing - greets ercther yoopoo's eyes as he rises from the dmrkness ,,,", + "PADDED_4_EXTRACTED": "Ablur=only a blur at first, dark menacing --greets ercther yoodpcog eyes as he rises from the darkness ,,.", + "PADDED_8_EXTRACTED": "Ablur=only a blur at first, dark menacing greets ercther yoodpcog eyes as he rises from the darkness ,..", + "PADDED_8_DILATION_1": "Ablur~=only a blur at first, dark menacing greets erother yoodooe eyes as he rises from the darkness .,.", + "PAD_8_FRACT_0_5": "Ablur=only a blur at first, dark menacing -- greets ercther yoodpoog eyes as he rises from the darkness ,..", + "PAD_8_FRACT_0_2": "Ablur~=only a blur at first, dark menacing -- greets erother yoodoo's eyes as he rises from the darkness ,.." + }, + "2": { + "INITIAL_BOX": "==and immediately egrets it!", + "DEFAULT": "==and immediately regrets it!", + "DEFAULT_GREY_PAD": "~=and immediately regrets it!", + "PADDED_4": "~=and immediately. regrets it!", + "PADDED_8": "=~and immediately | regrets it!", + "EXTRACTED_INIT_BOX": "~-and imediately", + "PADDED_4_EXTRACTED": "~-and immediately regrets it!", + "PADDED_8_EXTRACTED": "~-and immediately regrets it!", + "PADDED_8_DILATION_1": "==and immediately regrets it!", + "PAD_8_FRACT_0_5": "~=and immediately regrets it!", + "PAD_8_FRACT_0_2": "~=and immediately regrets it!" + }, + "3": { + "INITIAL_BOX": "> w-wha-?, may throat--!", + "DEFAULT": "D wwha-z may throat--!", + "DEFAULT_GREY_PAD": "> sid throat!", + "PADDED_4": "Wna? thats b 3", + "PADDED_8": "3 ha? that ! 4", + "EXTRACTED_INIT_BOX": "W-wha~? m-my throat--!", + "PADDED_4_EXTRACTED": "W-wra\u2014~? m-my throat--!", + "PADDED_8_EXTRACTED": "W-wra\u2014~? m-my throat--!", + "PADDED_8_DILATION_1": "Yd throat!", + "PAD_8_FRACT_0_5": "W-wra~? m-my throat--!", + "PAD_8_FRACT_0_2": "W-wea~? m-my throat--!" + }, + "4": { + "INITIAL_BOX": "\\the hooded men have not fled,\" thinks the still-dazed voodoo-lord. \"one, at least, has remained behind to deliver the final stroke ~~", + "DEFAULT": "Nthe hooded men have not fled, thinks the still-dazed voodoo-lord, \"one, at least, has remained behind to deliver the final stroke =~", + "DEFAULT_GREY_PAD": "Ie hooded men have not vks the | stull \u201cpze0", + "PADDED_4": "\\the hooded men have not fled,\" thinks the still-dazed voodod-lord, \"one, at least, has remained behind to deliver the final stroke ~~", + "PADDED_8": "Nthe hooded men have not fled,\" thinks the still-dazel voodoo-lord. \"one, at least, has remained behind to deliver the final strok", + "EXTRACTED_INIT_BOX": "\\the hooded men have not fled,\" thinks the still-dazeld voodoo-lorp, one, at least, has remained behind to deliver the final stroke -", + "PADDED_4_EXTRACTED": "The hooded men have not fled,\" thinks the still-pazel voodoo-lorp, one, at least, has remained behind to deliver the final stroke-", + "PADDED_8_EXTRACTED": "The hooded men have not fled,\" thinks the still-dazed voodoo-lorp, one, at least, has remained behind to deliver the final stroke-", + "PADDED_8_DILATION_1": "The hooded men have not fled,\" thinks the still-dazed voodod-lorl. \"one, at least, has remained behind to deliver the final stroke", + "PAD_8_FRACT_0_5": "The hooded men have not fled,\" thinks the still-dazed voodoo-lorp., one, at least, has remained behind to deliver the final stroke-", + "PAD_8_FRACT_0_2": "The hooded men have not fled,\" thinks the still-dazed voodoo-lorl., one, at least, has remained behind to deliver the final stroke-" + }, + "5": { + "INITIAL_BOX": "Inspector tate (7 apron yf", + "DEFAULT": "Inspector tate 1? prt", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Inspector tate 7 ra a", + "PADDED_8": "Inspector ! tate (7 cf a", + "EXTRACTED_INIT_BOX": "Inspector tate i? tt. .", + "PADDED_4_EXTRACTED": "Inspector tate i? i...", + "PADDED_8_EXTRACTED": "Inspector tate i? i..", + "PADDED_8_DILATION_1": "Inspector tate?", + "PAD_8_FRACT_0_5": "Inspector tate i? i..", + "PAD_8_FRACT_0_2": "Inspector tate i? i.." + }, + "6": { + "INITIAL_BOX": "\u201ci can imagine what yo thought, \\. man/z _ 4", + "DEFAULT": "\u201ci can imagine \\ what you thought, want ed", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "I can imagine what yo thought, man! 4", + "PADDED_8": "T can imagine what you tholght, man /", + "EXTRACTED_INIT_BOX": "I can imagine what you thought, man /", + "PADDED_4_EXTRACTED": "I can imagine what you thought, man /", + "PADDED_8_EXTRACTED": "I can imagine what you thought, man /", + "PADDED_8_DILATION_1": "I can imagine what you man / \u2019", + "PAD_8_FRACT_0_5": "I can imagine what you thought, man /", + "PAD_8_FRACT_0_2": "I can imagine what you thought, man /" + }, + "7": { + "INITIAL_BOX": "With hs, the of te. loa thrusts out a vige-like hand ~", + "DEFAULT": "With hs, the of te loa thrusts out a vige-like hand ~~", + "DEFAULT_GREY_PAD": "With this the lord of the loa thrusts out a vige-like hand", + "PADDED_4": "With hs, the of te loa thrusts out a vige-like hand ~~", + "PADDED_8": "With this, the of the lo) thrusts out a vige-like hand-=", + "EXTRACTED_INIT_BOX": "With this, the of the lo, thrusts out a vise-like hand\u00bb ~", + "PADDED_4_EXTRACTED": "With this, the of the lo, thrusts out a vise-like hand -", + "PADDED_8_EXTRACTED": "With this, the of the lo, thrusts out a vise-like hand-~", + "PADDED_8_DILATION_1": "With this, of the thrusts =like hand-~", + "PAD_8_FRACT_0_5": "With this, the of the lo, thrusts out a vige-like hand-~", + "PAD_8_FRACT_0_2": "With this, the of the lo, thrusts out a vige-like hand" + }, + "8": { + "INITIAL_BOX": "Forgive me, sir-- but it thought: -", + "DEFAULT": "\u2018forgive me, sir-- butt thought: ~", + "DEFAULT_GREY_PAD": "(forgive ne, sir-- but 4 cl,", + "PADDED_4": "(forgive * ne, sir but i tholght:-", + "PADDED_8": "Y forgive me, sir-- butt thought", + "EXTRACTED_INIT_BOX": "Forgive", + "PADDED_4_EXTRACTED": "Forgive *", + "PADDED_8_EXTRACTED": "Forgive ne, sir buti tholght--", + "PADDED_8_DILATION_1": "Forgive me, sir-~ butt thought", + "PAD_8_FRACT_0_5": "Forgive me, sir-- buti tholght--", + "PAD_8_FRACT_0_2": "Forgive * me, sir buti tholght--" + }, + "9": { + "INITIAL_BOX": "505 this one, sam. a little grogey--but hell survive. 4", + "DEFAULT": "505 this one, sam. a little grogey--but hell survive. 4", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "I gr fa ord [ian pan a a aa ae $0 this one, sam. a little grogey--but hell survive. 4", + "PADDED_8": "505 this one, sam. a little groggy- hell survive", + "EXTRACTED_INIT_BOX": "50 this one, sam. a little groggy--but hell sirvive.", + "PADDED_4_EXTRACTED": "505 this one, sam. a little groggy--but hell survive.", + "PADDED_8_EXTRACTED": "505 this one, sam. a little groggy--but hell survive .", + "PADDED_8_DILATION_1": "50's this one, sam. a little groggy--but hell survive .", + "PAD_8_FRACT_0_5": "505 this one, sam. a little groggy--but hell survive .", + "PAD_8_FRACT_0_2": "505 this one, sam. a little groggy--but hell survive ." + }, + "10": { + "INITIAL_BOX": "And. you name: == \u00a2", + "DEFAULT": "~ and your _ name --2", + "DEFAULT_GREY_PAD": "And. your nam", + "PADDED_4": "Of and our name--2", + "PADDED_8": "\u201cand. = your name-z", + "EXTRACTED_INIT_BOX": "And you names == \u00a2", + "PADDED_4_EXTRACTED": "And your name?", + "PADDED_8_EXTRACTED": "And your name--2", + "PADDED_8_DILATION_1": "And name-2", + "PAD_8_FRACT_0_5": "And your _ name--2", + "PAD_8_FRACT_0_2": "And your name--2" + }, + "11": { + "INITIAL_BOX": "Nough intro - ductions.", + "DEFAULT": "Enough intro - ductions.", + "DEFAULT_GREY_PAD": "Enough intro - ductions.", + "PADDED_4": "\u201d enough intro - puctions.", + "PADDED_8": "Enough intro - puctions", + "EXTRACTED_INIT_BOX": "Enough intro - dluctions.", + "PADDED_4_EXTRACTED": "Enough intro - dpuctions.", + "PADDED_8_EXTRACTED": "Enough intro - puctions.", + "PADDED_8_DILATION_1": "Enough intro ductions.", + "PAD_8_FRACT_0_5": "Enough intro - puctions.", + "PAD_8_FRACT_0_2": "Enough intro ~ ductions." + }, + "12": { + "INITIAL_BOX": "\u201c tony bent pown to see how you were -~", + "DEFAULT": "\u201d tony bent pown to see how you were -~", + "DEFAULT_GREY_PAD": "\u201c tony bent pown to see how you were--", + "PADDED_4": "# tony bent pown to see how you were --", + "PADDED_8": "I only bent pown to see how you were--", + "EXTRACTED_INIT_BOX": "Ti only bent ponn to see how yo were --", + "PADDED_4_EXTRACTED": "Ti only bent ponn to see how you were--", + "PADDED_8_EXTRACTED": "Ti only bent ponn to see how you were--", + "PADDED_8_DILATION_1": "Tony bent pown to see how were--", + "PAD_8_FRACT_0_5": "Ti only bent ponn to see how you were--", + "PAD_8_FRACT_0_2": "Tony bent pown to see how you were--" + }, + "13": { + "INITIAL_BOX": "F=~but, judging by the bruises on my neck, td 2ay you're fine?", + "DEFAULT": "#==but, judging by the bruises on iy neck, it'd say you're fine?", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "/=~but, judging by the bruises on my neck, td say youre fine?", + "PADDED_8": "I'd 5ay you're fine, 7", + "EXTRACTED_INIT_BOX": "-~but, judging by the bruises on my neck, td say you're fine!", + "PADDED_4_EXTRACTED": "~~ but, judging by the bruises on my neck, t'd say you're fine?", + "PADDED_8_EXTRACTED": "~~ but, judging by the bruises on my neck, t'd 5ay youre fine]", + "PADDED_8_DILATION_1": "~~but, judging by the bruises on my neck, i'd 5ay youre fine?", + "PAD_8_FRACT_0_5": "-~but, judging by the bruises on my neck, t'd 5ay youre fine]", + "PAD_8_FRACT_0_2": "~~but, judging by the bruises on my neck, t'd 5ay youre fine!" + }, + "14": { + "INITIAL_BOX": "We know who you are, mister : jericho prlmm psychologist", + "DEFAULT": "\"we know who you are, mister : tericho drumm, psychologist turned-yoodco \\ glack-\u2014", + "DEFAULT_GREY_PAD": "We know who you are, mister jericho prlmm psychologist- turned-yoodco", + "PADDED_4": "We vaal nls we know who you are, mister : jericho prlumm, psychologist\u2014 turned-yoodco \\ quack -\u2014", + "PADDED_8": "We know who you are, mister : jericho prlumm, psychologist= turned-yoodc0 \\ quack--", + "EXTRACTED_INIT_BOX": "We know who you are, mister : tericho prlimm psychologist", + "PADDED_4_EXTRACTED": "We know who you are, mister : jericho prlmm, psychologist turned-yoodeo qrliack -\u2014", + "PADDED_8_EXTRACTED": "We know who you are, mister : jericho prlmm, psychologist turned-yoodeo qlack -\u2014", + "PADDED_8_DILATION_1": "We know who you are, mister jericho drum, psychologist\u2014 turned-yoodco glack -\u2014", + "PAD_8_FRACT_0_5": "We know who you are, mister tericho prlmm, psychologist turned-yoodco qlack -\u2014", + "PAD_8_FRACT_0_2": "We know who you are, mister : jericho drlmm, psychologist turned-yoodco quack ~~" + }, + "15": { + "INITIAL_BOX": "Hawking, detect ive first-class--blit yol cancall me pete.", + "DEFAULT": "Hawking, detective first-class--but you cancall me pete.", + "DEFAULT_GREY_PAD": "Hawkins, detective firgt-class--but you cancall me pete.", + "PADDED_4": "Hawkins, detective first-class--but you cancall me pete.", + "PADDED_8": "Hawking, detective: first-class--blit you cancall me pete. ,", + "EXTRACTED_INIT_BOX": "Hawkins, detective first-class--blit you cancall me pete", + "PADDED_4_EXTRACTED": "Hawkins, detective first-class--blit you cancall me pete", + "PADDED_8_EXTRACTED": "Hawkins, detective first-class--blit you cancall me pete", + "PADDED_8_DILATION_1": "Oa call me pete", + "PAD_8_FRACT_0_5": "Hawkins, detective first-class--blit you cancall me pete", + "PAD_8_FRACT_0_2": "Hawkins, detect ive first-class--blit you cancall me pete" + }, + "16": { + "INITIAL_BOX": "Easy, sam--the \u00b0 man wollon't have been crazy enoligh to call us here if he'd hurt iv lorry.", + "DEFAULT": "\"easy, 5am--the man wollon't have been crazy enoligh to call lis here if he'd hurt lorry.", + "DEFAULT_GREY_PAD": "Easy, 5am--the man wolldn' t have been crazy enoligh \\ to call lis here if he'd hlirt 4 . lorry.", + "PADDED_4": "Easy, samn--the man wollon't have been crazy enoligh \\ to call us here 7 if he'd hlirt . lorry.", + "PADDED_8": "/ easy, 5am--the \\ man wollon't have been crazy enoligh \\ to call us here if he'd hurt , lorry.", + "EXTRACTED_INIT_BOX": "Easy, sam--the man wolldn't have been crazy enoligh to call us here: if he'd hurt lorry.", + "PADDED_4_EXTRACTED": "Easy, sam--the man wolldn't have been crazy enoligh to call us here: if he'd hurt lorry.", + "PADDED_8_EXTRACTED": "Easy, sam--the man wolldn't have been crazy enoligh to call us here: if he'd hurt lorry.", + "PADDED_8_DILATION_1": "Easy, 5am--the man wolldn't have been crazy enoligh to call us here if hep hurt", + "PAD_8_FRACT_0_5": "Easy, sam--the man wolldn't have been crazy enoligh to call us here if he'd hurt lorry.", + "PAD_8_FRACT_0_2": "Easy, sam--the man wolildn't have been crazy enoligh to call us here. if he'd hurt lorry." + }, + "17": { + "INITIAL_BOX": "", + "DEFAULT": "Very well.", + "DEFAULT_GREY_PAD": "Very well. en...", + "PADDED_4": "Very well pete 1 sos", + "PADDED_8": "Very well then... pete?)", + "EXTRACTED_INIT_BOX": "Very well then... pete", + "PADDED_4_EXTRACTED": "Very well", + "PADDED_8_EXTRACTED": "Very well then... pete 4", + "PADDED_8_DILATION_1": "Very well. then... pete", + "PAD_8_FRACT_0_5": "Very well then... pete 4", + "PAD_8_FRACT_0_2": "Very well then... pete 4" + }, + "18": { + "INITIAL_BOX": "I... wish lt knew, ingpectox she was kidnapped shortly before you arrived--by the same, men who overpowered bambl and myself ~\u2014", + "DEFAULT": "\" l...wish i anew, ingpector she was kidnapped shortly before you arrived--by the same, men who overpowered bambl and myself ~\u2014", + "DEFAULT_GREY_PAD": "I... wish lt knew, ingpector she was kidnapped shortly before you arrived--by the same, men who overpowered bambl and myself ~~", + "PADDED_4": "Pp am db an apate=lre 1 aea i... wish t knew, inspector sie was kidnapped shortly before you arrined~-by the same, men who overponered \"bambli and myself", + "PADDED_8": "I... wish lt knew, ingpector she was kidnapped shortly before you arrived--by the same, men who overpowered bambi and myself ~~", + "EXTRACTED_INIT_BOX": "I... wish i knew, ingpectox she was kidnapped shortly before yoli arrived by the same, men who overpowered bambl and wnself ~\u2014", + "PADDED_4_EXTRACTED": "I... wish i kvew, inspector she was kidnapped shortly before yoli arrived by the same, men who overpowered bambl and myself ~~", + "PADDED_8_EXTRACTED": "I... wish il knew, ingpecter she was kidnapped shortly before you arrived--by the same, men who overpowered bambl and myself ~~", + "PADDED_8_DILATION_1": "Tl... wish it knew, inspector she was kidnapped shortly before you arrived--by the same, men who over bambli and self ~\u2014", + "PAD_8_FRACT_0_5": "I... wish i knew, ingpecter she was kidnapped shortly before you arrived by the same, men who overpowered bambl and myself ~~", + "PAD_8_FRACT_0_2": "I... wish i knew, ingpector she was kidnapped shortly before you arrived by the same, men who overpowered bambl and myself ~~" + }, + "19": { + "INITIAL_BOX": "And tm warning you now==if you'e done anything to hurt my lorales, tul on a", + "DEFAULT": "--and tm warning you now==i= you've done anything to hurt my, lopales, tll... af", + "DEFAULT_GREY_PAD": "-and tm warning you |= you've now==| done anything to hurt loralee, tll ,..", + "PADDED_4": "--and tm warning you \u00a9 now==if you've done aline tofrt y, l sss", + "PADDED_8": "--and th warning you now-=i= you'\\e done anything to hurt my, lorales, tll...", + "EXTRACTED_INIT_BOX": "-and tm warning you now--|= you've done anything to hurt my lorales, tll...", + "PADDED_4_EXTRACTED": "--and th warning you now--|f you've done anything to hurt my loralee, tll...", + "PADDED_8_EXTRACTED": "--and th warning you now--|f you've done anything to hurt my lorales, tll...", + "PADDED_8_DILATION_1": "~-and th warning you now-=if youne done anything to hurt ay loralee, tll ..0", + "PAD_8_FRACT_0_5": "~-and th warning you now--|= you've done anything to hurt my lorales, tll...", + "PAD_8_FRACT_0_2": "~-and th warning you now-=|= you've done anything to hurt my lorales, tll..." + }, + "20": { + "INITIAL_BOX": "Then where", + "DEFAULT": "Then where 15 she, . prumm 7", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Then where 15 she, drumm 7", + "PADDED_8": "A [then where /5 she, drummz", + "EXTRACTED_INIT_BOX": "Then where", + "PADDED_4_EXTRACTED": "Then where 15 she, prumm 7", + "PADDED_8_EXTRACTED": "Then where 15 she, prumm 7", + "PADDED_8_DILATION_1": "Then where 15 she, drumm?", + "PAD_8_FRACT_0_5": "Then where 15 she, prumm 7", + "PAD_8_FRACT_0_2": "Then where 15 she, prumm 7" + }, + "21": { + "INITIAL_BOX": "==men who \"s claimed she was marked as sacrifice to the dark lord ~~ 4", + "DEFAULT": "=-men who claimed she was marked as sacrifice to the dark lrg 4", + "DEFAULT_GREY_PAD": "--hen spinner she 2 sate rk r bor", + "PADDED_4": "-men who claimed she to the gare", + "PADDED_8": "=--men who claimed she was marked as sacrifice | to the dark lord -=", + "EXTRACTED_INIT_BOX": "<-men who claimed she", + "PADDED_4_EXTRACTED": "<-men who claimed she was marked as sacrifice to the park lord-~", + "PADDED_8_EXTRACTED": "<-men who claimed she was marked as sacrifice to the park lord =~", + "PADDED_8_DILATION_1": "--hen claimed she as sacrifice to the park lord=-=", + "PAD_8_FRACT_0_5": "--men who claimed she was marked as sacrifice to the park lord =~", + "PAD_8_FRACT_0_2": "--men who claimed she was marked as sacrifice to the park lord-~" + }, + "22": { + "INITIAL_BOX": "After next", + "DEFAULT": "After next", + "DEFAULT_GREY_PAD": "\"continued after next page", + "PADDED_4": "Er next", + "PADDED_8": "Er next", + "EXTRACTED_INIT_BOX": "Continued af er ext", + "PADDED_4_EXTRACTED": "Continued af er ext", + "PADDED_8_EXTRACTED": "Gontinued af er ext", + "PADDED_8_DILATION_1": "Continued af er ext", + "PAD_8_FRACT_0_5": "Continued af er ext", + "PAD_8_FRACT_0_2": "Gontinued af er ext" + }, + "23": { + "INITIAL_BOX": "\u2019 marked by the sign of - the black rooster", + "DEFAULT": "/ marked by the sign of == the black rooster", + "DEFAULT_GREY_PAD": "/ merked by the sign cf", + "PADDED_4": "/ marked by the sign of", + "PADDED_8": "We [ harked by the sign of the black rooster!", + "EXTRACTED_INIT_BOX": "Marked by the sign of -- black rooster", + "PADDED_4_EXTRACTED": "Marked by the sign of -- the black rooster !", + "PADDED_8_EXTRACTED": "Marked by the sign of -- the black rooster !", + "PADDED_8_DILATION_1": "Marked by of -- the black rooster {", + "PAD_8_FRACT_0_5": "Marked by the sign of -- the black rooster {", + "PAD_8_FRACT_0_2": "Marked by the sign of -- the black rooster \u00a2" + } + }, + "Tarzan_014-21.JPG": { + "1": { + "DEFAULT": "Ae er tire but the she will be a pretty one-~ when all her cuts \u00abheal. she will ~", + "INITIAL_BOX": "But the she witl be a pretty one-~ when all her cuts heal. she will", + "DEFAULT_GREY_PAD": "Ty one. cuts shen 713", + "PADDED_4": "Ai re pr tr but the she will be a pretty one-~ when all her cuts ow heal. she will --", + "PADDED_8": "Pri tee gees cis byt the she will be a pretty one-- when all ner cuts no, heal. she will ~~", + "EXTRACTED_INIT_BOX": "But the she witt be 4 pretty one-- when all ner cuts heal. she will --", + "PADDED_4_EXTRACTED": "But the she witt be 4 pretty one-- when all ner cuts heal. she will --", + "PADDED_8_EXTRACTED": "But the she witt be 4 pretty one-- when all ner cuts heal. she will -", + "PADDED_8_DILATION_1": "But the she will wien at wee cars heal. she will ~~", + "PAD_8_FRACT_0_5": "Byt the she witt be a pretty one-- when all ner cuts heal. she will -", + "PAD_8_FRACT_0_2": "Byt the she witt be a pretty one-~ when all ner cuts heal. she will -" + }, + "0": { + "INITIAL_BOX": "Toog 2 he is brave 70 come back here, after fighting with our od", + "DEFAULT": "Pz he is brave nen rang with our le4der.", + "DEFAULT_GREY_PAD": "Toog 2 he is brave ih with our leader", + "PADDED_4": "Toog p he is brave re or =e with our leader. 4", + "PADDED_8": "Toog p/e 1s brave geren ine. 4 with our leader. 4", + "EXTRACTED_INIT_BOX": "Toog p he is brave es after fig, with our leader.", + "PADDED_4_EXTRACTED": "Toog p he is brave lt al with our leader.", + "PADDED_8_EXTRACTED": "Toog p he is brave a al with our leader", + "PADDED_8_DILATION_1": "Toog p he is brave 70 coe bach here, after fighting with", + "PAD_8_FRACT_0_5": "Toog p he is brave a al with our leader", + "PAD_8_FRACT_0_2": "Toog p he is brave ll al with our leader" + }, + "2": { + "INITIAL_BOX": "Teeka will die before she will mate with t00g--or hr davon but tals, of the tribe of kerchank! 4", + "DEFAULT": "\"teena will die before, she will mate with toog--or with anyone but tallg, of the \u201ctribe of kerchak! |", + "DEFAULT_GREY_PAD": "He will wit tri of , of the", + "PADDED_4": "Sis, \"teena will die before. she will mate with toog-- or with anyone but tale, of the tribe of kerimnak! |", + "PADDED_8": "Teena will die before, she will mate with == or with anyone but tals, of the \\ tribe of kerchak!", + "EXTRACTED_INIT_BOX": "Teeka will die before she will mate with toog-- or with anyone but tag, of the tribe of kerchak!", + "PADDED_4_EXTRACTED": "Teeka will die before she will mate with toog-- or with anyone byt tale, of the tribe of kerchank!", + "PADDED_8_EXTRACTED": "Teeka will die before, she will mate with toog-- or with anyone byt tale, of the tribe of kerman!", + "PADDED_8_DILATION_1": "Teeka will die . she wit mare wink toog--or with anyone b07 hae, of the tribe of kerchan!", + "PAD_8_FRACT_0_5": "Teeka will die before, she will mate with toog-- or with anyone but tals, of the trise of kerhak!", + "PAD_8_FRACT_0_2": "Teeka will die before, she will mate with toog-- or with anyone but tals, of the trise of kerman!" + }, + "3": { + "INITIAL_BOX": "Y qut of her she is a _ fibrce one! 4", + "DEFAULT": "Ly out of 4 she is a fibrce one.", + "DEFAULT_GREY_PAD": "Out of she is a \\_fibrce one!", + "PADDED_4": "\\ fierce onel", + "PADDED_8": "Y out of her 4 she is a n fibrce one.", + "EXTRACTED_INIT_BOX": "Y out of she 15 a fo mangan! fibrce one!", + "PADDED_4_EXTRACTED": "Y out of lo mangan i} she 15 a fibrce one!", + "PADDED_8_EXTRACTED": "Ly out of her fo mangan i+ she 15 a fibrce one!", + "PADDED_8_DILATION_1": "Ly ag ly) fs mangan) 2\u00a5 she is a fibrce one. ~", + "PAD_8_FRACT_0_5": "Ly out of her fo mangan i+ she 15 a fibrce one!", + "PAD_8_FRACT_0_2": "Ly out of her lo mangan 2+ she 15 a fibrce one!" + }, + "4": { + "INITIAL_BOX": "Just then.", + "DEFAULT": "Just then.", + "DEFAULT_GREY_PAD": "Then.", + "PADDED_4": "Just then.", + "PADDED_8": "", + "EXTRACTED_INIT_BOX": "Just then.", + "PADDED_4_EXTRACTED": "Just then...", + "PADDED_8_EXTRACTED": "Just then...", + "PADDED_8_DILATION_1": "Just then...", + "PAD_8_FRACT_0_5": "Just then...", + "PAD_8_FRACT_0_2": "Just then..." + }, + "5": { + "INITIAL_BOX": "Anil a pair ats apes comes this way.", + "DEFAULT": "Anil a pan fors gistd comes this res", + "DEFAULT_GREY_PAD": "Ot a jie ay", + "PADDED_4": "Ra s30 7 b pairs anil a pan bosse res . comes this froid", + "PADDED_8": "Mangan a dag] sansa! 8 pak of strange ap: \\ comes this {ras", + "EXTRACTED_INIT_BOX": "Mangani! a pair of strange apes comes this way.", + "PADDED_4_EXTRACTED": "Mangani! a pair of strange apes comes this way.", + "PADDED_8_EXTRACTED": "Mangani! a pair of strange apes comes this way.", + "PADDED_8_DILATION_1": "Ih! a pair sostmse des comes this way.", + "PAD_8_FRACT_0_5": "Mangani! a pair of strange apes comes this wary.", + "PAD_8_FRACT_0_2": "Mangani! a pair of strange apes comes this way." + }, + "6": { + "INITIAL_BOX": "One is an", + "DEFAULT": "\"one is an ape -- the other, monster. |", + "DEFAULT_GREY_PAD": "Ape -- the one is an aris", + "PADDED_4": "Pone 1s an ape -- the other, monster.", + "PADDED_8": "Ie 1s an ape -- the other, monster.", + "EXTRACTED_INIT_BOX": "One 1s an ape -- the ther, monster.", + "PADDED_4_EXTRACTED": "Ape -- the one 1s an other, monster.", + "PADDED_8_EXTRACTED": "One 1s an ape -- the other, monster.", + "PADDED_8_DILATION_1": "Ape -- the one is an other, monster", + "PAD_8_FRACT_0_5": "One is an ape -- the other, monster.", + "PAD_8_FRACT_0_2": "One is an ape -- the other, monster." + }, + "7": { + "INITIAL_BOX": "Ww) fdr (0 (6 rirance rs fi jake toog's sne away ?", + "DEFAULT": "\u2019 you hear, brothers? | will you (et strangers take toog's sne away?", + "DEFAULT_GREY_PAD": "S2 a you li a re | take t006's she", + "PADDED_4": "You hear, brothers? will you let strangers take toog's she away?", + "PADDED_8": "J vou wea sromiers?) wiis vo sr een : take t0og sheaat,", + "EXTRACTED_INIT_BOX": "You hear, brothers ? will you let strangers take toog's she away ?", + "PADDED_4_EXTRACTED": "You hear, brothers will you let strangers take toog's she away?", + "PADDED_8_EXTRACTED": "You hear, brothers 2 will you let strangers take toog's she away?", + "PADDED_8_DILATION_1": "You hear, brothers 2 will you let strangers take toog's she away?", + "PAD_8_FRACT_0_5": "You hear, brothers 2 will you let strangers take toog's she away?", + "PAD_8_FRACT_0_2": "You hear, brothers 2 will you let strangers take toog's she away?" + }, + "8": { + "INITIAL_BOX": "Your she 1s no concern of ours-\u201cbut we will not let amy invade our part of the jungle.", + "DEFAULT": "Your she 1s no concern. of ours-\u201cbut we will not let amy invade our part of a ii", + "DEFAULT_GREY_PAD": "Eaa ok hato f the jungle", + "PADDED_4": "Your she 1s no concern. of ours-but we will not let any invade \\our part of the jungle.", + "PADDED_8": "Your she 1s no concern. of ours-but we will not let any invade \\ our part of the mee,", + "EXTRACTED_INIT_BOX": "Your she 1s no concern of ours but we will not let any invade our part of the jungle", + "PADDED_4_EXTRACTED": "Your she 1s no concern of ours but we will not let any invade our part of the jungle", + "PADDED_8_EXTRACTED": "Your she 1s no concern of ours but we will not let any invade our part of the jungle", + "PADDED_8_DILATION_1": "Your she is no concern of ours-\u201cbut we will not let any invade our part of the jungle", + "PAD_8_FRACT_0_5": "Your she 1s no concern of ours--8ut we will not let any invade our part of the jungle", + "PAD_8_FRACT_0_2": "Your she 1s no concern of ours-<8ut we will not let any invade our part of the jungle" + }, + "9": { + "INITIAL_BOX": "\u00a350 be drag t she signe: leap on them from \\ hiding.", + "DEFAULT": "Spmg and drag t she signe: leap on them from \\ hiding.", + "DEFAULT_GREY_PAD": "Fcome--and she along? ip. on. ding.", + "PADDED_4": "Come and drag 7) she signe: leap on hem from u hiding.", + "PADDED_8": "Yy coms-- and. drag the she along? leap on 7 from d> hiding.", + "EXTRACTED_INIT_BOX": "Come. and she pr we wil leap on hiding.", + "PADDED_4_EXTRACTED": "Come --anp drag ti she signe: leap on them from hiding.", + "PADDED_8_EXTRACTED": "Come --anp drag ti she signe: leap on them from hiding.", + "PADDED_8_DILATION_1": "== and she along! we will leap on hiding.", + "PAD_8_FRACT_0_5": "Suis ap drag t) she higa: leap on them from hiding.", + "PAD_8_FRACT_0_2": "Send drag ti she aigne. leap on them from hiding." + }, + "10": { + "INITIAL_BOX": "> they are following a spoor!", + "DEFAULT": "They are. following en spoor!", + "DEFAULT_GREY_PAD": "# they are!", + "PADDED_4": "Following they are. in spoor! |", + "PADDED_8": ".. they are. folowing ia 10008", + "EXTRACTED_INIT_BOX": "They are following toces,", + "PADDED_4_EXTRACTED": "They are", + "PADDED_8_EXTRACTED": "They are following 50%", + "PADDED_8_DILATION_1": "They are following 50e:", + "PAD_8_FRACT_0_5": "They are following 505s", + "PAD_8_FRACT_0_2": "They are following 5058" + }, + "11": { + "INITIAL_BOX": "Here.", + "DEFAULT": "Was here.", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Here. 3", + "PADDED_8": "Was hdd here.", + "EXTRACTED_INIT_BOX": "Here", + "PADDED_4_EXTRACTED": "Here", + "PADDED_8_EXTRACTED": "Here", + "PADDED_8_DILATION_1": "Here", + "PAD_8_FRACT_0_5": "Here", + "PAD_8_FRACT_0_2": "Here" + }, + "12": { + "INITIAL_BOX": "Soon afterward, as tarzan and taug reach: the selfsame grove.", + "DEFAULT": "Soon afterward, as tarzan and taug reach the selfsame grove.", + "DEFAULT_GREY_PAD": "Oon afterward, as tarzan and taug reach the selfsame grove.", + "PADDED_4": "Soon afterward, as tarzan and taug reach the selfsame grove.", + "PADDED_8": "Soon afterward, as tarzan and taug reach the selfsame grove.", + "EXTRACTED_INIT_BOX": "Soon afterward, as tarzan and jaug reach the selfsame grove.", + "PADDED_4_EXTRACTED": "Soon afterward, as tarzan and taug reach the selfsame grove,", + "PADDED_8_EXTRACTED": "Soon afterward, as tarzan and taug reach the selfsame grove...", + "PADDED_8_DILATION_1": "Soon afterward, as tarzan and taug reach the selfsame grove...", + "PAD_8_FRACT_0_5": "Soon afterward, as tarzan and taug reach the selfsame grove...", + "PAD_8_FRACT_0_2": "Soon afterward, as tarzan and taug reach the selfsame grove..." + }, + "13": { + "INITIAL_BOX": "Tug! tarzan!", + "DEFAULT": "Tag! tarzan!", + "DEFAULT_GREY_PAD": "Aug! tarzan!", + "PADDED_4": "Taug! tarz,", + "PADDED_8": "! tarzan! 4", + "EXTRACTED_INIT_BOX": "Wg! tarzan", + "PADDED_4_EXTRACTED": "Taug! tarzan", + "PADDED_8_EXTRACTED": "Taug! tarzan", + "PADDED_8_DILATION_1": "Tug! tarzan", + "PAD_8_FRACT_0_5": "Taug! tarzan", + "PAD_8_FRACT_0_2": "Tug! tarzan" + }, + "14": { + "INITIAL_BOX": "Kre ccgah", + "DEFAULT": "", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "N :", + "PADDED_8": "(kreccahy", + "EXTRACTED_INIT_BOX": "Lass", + "PADDED_4_EXTRACTED": "", + "PADDED_8_EXTRACTED": "", + "PADDED_8_DILATION_1": "", + "PAD_8_FRACT_0_5": "", + "PAD_8_FRACT_0_2": "" + }, + "15": { + "INITIAL_BOX": "Too06 will /", + "DEFAULT": "Toog willy", + "DEFAULT_GREY_PAD": "Ea", + "PADDED_4": "Toog will?", + "PADDED_8": "\\ wills", + "EXTRACTED_INIT_BOX": "Too! witt !", + "PADDED_4_EXTRACTED": "Toe will!", + "PADDED_8_EXTRACTED": "Toe wilt!", + "PADDED_8_DILATION_1": "Witl?", + "PAD_8_FRACT_0_5": "Toe will!", + "PAD_8_FRACT_0_2": "Tog witl?" + }, + "16": { + "INITIAL_BOX": "\"yes! smell and the spoor-- other's.", + "DEFAULT": "Yes! r smell her spoor-- and the , other's.", + "DEFAULT_GREY_PAD": "Yes! 1 ell ther's.", + "PADDED_4": "Yes! rs smell and the , other's.", + "PADDED_8": "Smell yes! r poor-- and the other's.", + "EXTRACTED_INIT_BOX": "Yes! r smell her spoor and he", + "PADDED_4_EXTRACTED": "Yes! r smell her spoor and he othe", + "PADDED_8_EXTRACTED": "Yes! r smell her spoor and he othe", + "PADDED_8_DILATION_1": "Yes! r her spoor and he othe", + "PAD_8_FRACT_0_5": "Yes! r smell her spoor and he othe", + "PAD_8_FRACT_0_2": "Yes! r smell her spoor and he othe" + }, + "17": { + "INITIAL_BOX": "\u201cbut, where can--7", + "DEFAULT": "\"but, where can--7 |", + "DEFAULT_GREY_PAD": "Here can-- 2", + "PADDED_4": "(but, where can--7 |", + "PADDED_8": "(bu. where can--7 |", + "EXTRACTED_INIT_BOX": "But, where 4", + "PADDED_4_EXTRACTED": "But, where a", + "PADDED_8_EXTRACTED": "But, where a", + "PADDED_8_DILATION_1": "But, where a", + "PAD_8_FRACT_0_5": "But, where a", + "PAD_8_FRACT_0_2": "But, where a" + }, + "18": { + "INITIAL_BOX": "To0g ! keep her quiet!", + "DEFAULT": "To0g6! keep her quiet!", + "DEFAULT_GREY_PAD": "Ee x] too6 /", + "PADDED_4": "A yo06! keep her quiet?\u201d", + "PADDED_8": ", keep. her. quie te :", + "EXTRACTED_INIT_BOX": "T0006! keep mer quiet!", + "PADDED_4_EXTRACTED": "T0006! keep her quiet!", + "PADDED_8_EXTRACTED": "T0006! keep her quiet!", + "PADDED_8_DILATION_1": "Cg hi quie] 2", + "PAD_8_FRACT_0_5": "Toog ! keep her quiet!", + "PAD_8_FRACT_0_2": "Toog ! keep her quiet!" + }, + "19": { + "INITIAL_BOX": "Continue\u2019 after next page", + "DEFAULT": "Continue\u2019 after next page", + "DEFAULT_GREY_PAD": "Continue' \u2018after next page", + "PADDED_4": "Continue' after next page", + "PADDED_8": "Ee t\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014 continue! \"after next page", + "EXTRACTED_INIT_BOX": "Cont!nue' after next page", + "PADDED_4_EXTRACTED": "Cont!nue' after next page", + "PADDED_8_EXTRACTED": "Cont!nue' after next page", + "PADDED_8_DILATION_1": "Continue\u2019 after next page", + "PAD_8_FRACT_0_5": "Continue\u2019 after next page", + "PAD_8_FRACT_0_2": "Continue\u2019 after next page" + }, + "20": { + "INITIAL_BOX": "Bur he is an instant too late --", + "DEFAULT": "Bur he is an instant too late --", + "DEFAULT_GREY_PAD": "But he is | an instant | too late -~", + "PADDED_4": "Bur he 1s an instant too late =", + "PADDED_8": "Bur he 1s an instant too late -- eesti", + "EXTRACTED_INIT_BOX": "Bur he 1s an instant too late --", + "PADDED_4_EXTRACTED": "Bur he is an instant too late --", + "PADDED_8_EXTRACTED": "Bur he 1s an instant too late --", + "PADDED_8_DILATION_1": "Bur he is an instant too late -", + "PAD_8_FRACT_0_5": "Bur he 1s an instant too late --", + "PAD_8_FRACT_0_2": "Bur he 1s an instant too late --" + } + }, + "Transformers_-_Unicron_000-004.jpg": { + "1": { + "DEFAULT": "Platitudes are useless, prime, we have to hit that | thing\u2014now.", + "INITIAL_BOX": "| platitudes are useless, prime, we have to hit that thing\u2014now.", + "DEFAULT_GREY_PAD": "Latitudes ape useless, prime, we have to hit that thing\u2014now.", + "PADDED_4": "Platitudes 3 ape useless, prime, we have to ait that | thing now.", + "PADDED_8": "Y latitudes ape useless, prime, we have to hit that \\ thing\u2014now. 4", + "EXTRACTED_INIT_BOX": "Platitudes are useless, prime. we have to hit that thing \u2014now.", + "PADDED_4_EXTRACTED": "Platitudes ape useless, prime, we have to hit that thing\u2014now.", + "PADDED_8_EXTRACTED": "Latitudes. ape useless, prime, we have to hit that thing\u2014now.", + "PADDED_8_DILATION_1": "Latitudes. ape useless, prime, we have to hit that thing\u2014now.", + "PAD_8_FRACT_0_5": "Platitudes to hit that thing\u2014now.", + "PAD_8_FRACT_0_2": "Platitudes. are useless, prime, we have to hit that thing\u2014now." + }, + "0": { + "INITIAL_BOX": "Ra with the light of hope, there is always \"a chance for ac etoryy \u201d", + "DEFAULT": "With the light of hop: there is always a chance for \" \u201cictopy.", + "DEFAULT_GREY_PAD": "With the light of hope, there is always a chance for victory.", + "PADDED_4": "With the light of hope, there is always a chance for \\ victory. 4", + "PADDED_8": "With the a chance for victory.", + "EXTRACTED_INIT_BOX": "With the light of hope, there is always a chance for victory.", + "PADDED_4_EXTRACTED": "Wel ere there is always \u2018a chance for victory.", + "PADDED_8_EXTRACTED": "Py there is always \u2018a chance for victory.", + "PADDED_8_DILATION_1": "With the light of hope, there is always a chance for victory.", + "PAD_8_FRACT_0_5": "Py there 1s always \u2018a chance for victory.", + "PAD_8_FRACT_0_2": "El ore there is always \"a chance for victory." + }, + "2": { + "INITIAL_BOX": "Pyra magna. torchbearer.", + "DEFAULT": "Pyra magna. torchbearer.", + "DEFAULT_GREY_PAD": "Pyra magna. torchbearer.", + "PADDED_4": "Pyra magna. torchbearer.", + "PADDED_8": "R\u2014 pyra magna. torchbearer.", + "EXTRACTED_INIT_BOX": "Pyra magna. torchbearer.", + "PADDED_4_EXTRACTED": "Pyra magna. torchbearer.", + "PADDED_8_EXTRACTED": "Pyra magna. torchbearer.", + "PADDED_8_DILATION_1": "Pyra magna. torchbearer.", + "PAD_8_FRACT_0_5": "Pyra magna. torchbearer.", + "PAD_8_FRACT_0_2": "Pyra magna. torchbearer." + }, + "3": { + "INITIAL_BOX": "You know i've never run from a fight, pyra.", + "DEFAULT": "You know i've nevez run from wavy", + "DEFAULT_GREY_PAD": "You know i've never run from a fight, pypa.", + "PADDED_4": "You know tve never run from a 'fight, pyra.", + "PADDED_8": "Pou know ive 3 never run from a fight, pyra.", + "EXTRACTED_INIT_BOX": "You know ive never run from \"a fight, pyra.", + "PADDED_4_EXTRACTED": "You \u00a3now tve never run from a fight, pyra.", + "PADDED_8_EXTRACTED": "You \u00a3now tve never run from a fight, pyra.", + "PADDED_8_DILATION_1": "You know tve never run from a fight, pyra.", + "PAD_8_FRACT_0_5": "You know ive never run from a fight, pyra.", + "PAD_8_FRACT_0_2": "You know tve never run from a fight, pyra." + }, + "4": { + "INITIAL_BOX": "But even if this was winnable \u2014 and it clearly is not\u2014ask joursele..", + "DEFAULT": "R but even if this. hs winnable\u2014and it clearly is ot\u2014ask \\ yourself... /", + "DEFAULT_GREY_PAD": "But even if this was. winnable\u2014 and) it clearly is ry", + "PADDED_4": "P but even \"\\ piles winnable\u2014and fags [ot \u2014ask \\ it,", + "PADDED_8": "But even if this was winnable\u2014and it clearly is not\u2014ask yourself.", + "EXTRACTED_INIT_BOX": "But even if this was. winnable \u2014 and it clearly is not aske foubsele", + "PADDED_4_EXTRACTED": "But even if this was. winnable\u2014 and it clearly is not aske yourself.", + "PADDED_8_EXTRACTED": "But even if this was. winnable\u2014 and it clearly is not aske yourself.", + "PADDED_8_DILATION_1": "But even if this was. winnable\u2014and it clearly is not as yourself.", + "PAD_8_FRACT_0_5": "But even if this was. winnable\u2014and it clearly 1s not ask", + "PAD_8_FRACT_0_2": "But even if this was. winnable\u2014and it clearly 1s not ask yourself." + }, + "5": { + "INITIAL_BOX": "Did we me here to end lives or save them?", + "DEFAULT": "7 ..did we come here to end lives or save them? 4", + "DEFAULT_GREY_PAD": "Did we come here. \u00a9 end wes) them?", + "PADDED_4": "Did we come here to end lives or save them? 4", + "PADDED_8": "(em to end lives eee", + "EXTRACTED_INIT_BOX": "Did we to end lives oo save them?", + "PADDED_4_EXTRACTED": "Did we cone here to end lives oo save them?", + "PADDED_8_EXTRACTED": "Did we cone here to end lives oo save them?", + "PADDED_8_DILATION_1": "Did we come here to end lives or save them?", + "PAD_8_FRACT_0_5": "Did we come here to end lives or save them?", + "PAD_8_FRACT_0_2": "Did we come here to end lives or save them?" + }, + "6": { + "INITIAL_BOX": "/ thats where \u2122\\ 7 come in. this doohcreyil lrtate a world spanning shale bride ade bry hime enc cut a there.", + "DEFAULT": "That's where leer hs oobaae ll hreare wrb she bee weee be ne sing ctt h, beng ov", + "DEFAULT_GREY_PAD": "| that's where j", + "PADDED_4": "That's where z come in. this doohickey'll create a wobld-spannin' space bridge\u2014 and get every living being outta there.", + "PADDED_8": "That's where z come in. this doohickey'll create a wobld-spannin' space bridge\u2014 and get every living being outta there.", + "EXTRACTED_INIT_BOX": "That's where z come in. this doohickey ll create a wobld-spannin' space bridge and get every lving being outta there", + "PADDED_4_EXTRACTED": "That's where z come in. this doohickey'll create a wobld-spannin' space bridge and get every lving being outta there", + "PADDED_8_EXTRACTED": "That's where z come in. this doohickey'll create a wobld-spannin' space bridge and get every lving being outta there", + "PADDED_8_DILATION_1": "That's where z come in. this doohickey'll create a wobld-spannin'", + "PAD_8_FRACT_0_5": "That's where z come in. this doohickey'll create amoblespannin' space bridge adee re me being outta there", + "PAD_8_FRACT_0_2": "That's where z come in. this doohickey'll create a wobld-spannin'" + }, + "7": { + "INITIAL_BOX": "Ve pont now what this anomaly /8, or why it's doing this\u2014let alone how to stop something bigger than a planet", + "DEFAULT": "We don't n now what this anomaly /8, or why it's doing this\u2014let alone how to stop something bigger than a planet.", + "DEFAULT_GREY_PAD": "We don't now what this anomaly /8, or why it's doing this\u2014let alone", + "PADDED_4": "We don't. now what this anomaly /8, or why it's doing this\u2014let alone how to stop something bigger than a planet.", + "PADDED_8": "Y we don't ~n now what this anomaly /8, or why [t's doing. this\u2014let alone how to stop something bigger than a planet.", + "EXTRACTED_INIT_BOX": "We don't why t's doing this\u2014let alone how to stop something bigger than a planet", + "PADDED_4_EXTRACTED": "We don't now what this anomaly /8, or why t's doing this\u2014let alone how to stop something bigger than a planet.", + "PADDED_8_EXTRACTED": "We don't. why t's doing this\u2014let alone how to stop something bigger than a planet.", + "PADDED_8_DILATION_1": "We don't why t's doing this\u2014let alone how to stop something bigger than a planet.", + "PAD_8_FRACT_0_5": "We don't. this\u2014let alone how to stop something bigger than a planet.", + "PAD_8_FRACT_0_2": "We don't why ts doing this\u2014let alone how to stop something bigger than a planet." + }, + "8": { + "INITIAL_BOX": "The anomaly. is here because cur world called it.", + "DEFAULT": "| the anomaly is here because cur world called it. 4", + "DEFAULT_GREY_PAD": "The anomaly is heee because] our wobld called |", + "PADDED_4": "The anomaly is hebe because our wobld \\ called it. 4", + "PADDED_8": "P re anomaly 3 is hebe because", + "EXTRACTED_INIT_BOX": "The anomaly. is hebe because our wobld called it.", + "PADDED_4_EXTRACTED": "The anomaly. 1s hebe because our wobld called it.", + "PADDED_8_EXTRACTED": "The anomaly 1s hebe because our wobld called it.", + "PADDED_8_DILATION_1": "The anomaly is hebe because our world called it.", + "PAD_8_FRACT_0_5": "The anomaly is hee because our world called it.", + "PAD_8_FRACT_0_2": "The anomaly 18 heee because our world called it." + }, + "9": { + "INITIAL_BOX": "7 twill not \u00a9 let another. civilization die", + "DEFAULT": "7 twill not let another civilization die because of cybertron's ce actions.", + "DEFAULT_GREY_PAD": "I will not", + "PADDED_4": "I will not let another civilization die because of cybertron's \\ actiong.", + "PADDED_8": "I will not let another. civilization die", + "EXTRACTED_INIT_BOX": "I will not let another civilization die because of ybertron\" actions.", + "PADDED_4_EXTRACTED": "I will not let another civilization dig", + "PADDED_8_EXTRACTED": "I will not let another civilization dig", + "PADDED_8_DILATION_1": "I will not let another. civilization die because of ybertron\" c actions.", + "PAD_8_FRACT_0_5": "I will not let another civilization die because of cybertron's actions.", + "PAD_8_FRACT_0_2": "I will not let another civilization die because of vbertron\" c actions." + }, + "10": { + "INITIAL_BOX": "I appreciate the aid optimus prime...", + "DEFAULT": "T appreciate the aid, \u00a9 optimus prime...", + "DEFAULT_GREY_PAD": "I appreciate the add) optimus prime.", + "PADDED_4": "I appreciate the aid, \"optimus prime... |", + "PADDED_8": "I frm ie ote kr i appreciate the aid, optimus prive..", + "EXTRACTED_INIT_BOX": "I appreciate the aid optimus prime...", + "PADDED_4_EXTRACTED": "I appreciate the aid, optimus prime.", + "PADDED_8_EXTRACTED": "I appreciate the aid, optimus prime.", + "PADDED_8_DILATION_1": "I appreciate the aid, optimus prime...", + "PAD_8_FRACT_0_5": "I appreciate the aid, optimus prime...", + "PAD_8_FRACT_0_2": "T appreciate the aid, optimus prime." + }, + "11": { + "INITIAL_BOX": "Y' we just need about thirty of these suckees placed at strategic, uh, \"places.", + "DEFAULT": "Y we just need about thirty of these suckers", + "DEFAULT_GREY_PAD": "We just need about thirty of these suckers| placed at strategic, uh,", + "PADDED_4": "We just nls rs on i ees eve bh stesecl", + "PADDED_8": "We just need about thirty of these suckers placed at strategic, uh, place:", + "EXTRACTED_INIT_BOX": "We just need about thirty of these suckees placed at strategic, uh, places.", + "PADDED_4_EXTRACTED": "We just need about thirty of these suckers placed at strategic, uh, places.", + "PADDED_8_EXTRACTED": "We just need about thirty of these suckers placed at strategic, uh, places.", + "PADDED_8_DILATION_1": "We just need about thirty of these suckers placed at strategic, uh, places.", + "PAD_8_FRACT_0_5": "We just need about these suckers placed at strategic, uh,", + "PAD_8_FRACT_0_2": "We just need about thirty of these suckers placed at strategic, uh, places." + }, + "12": { + "INITIAL_BOX": "On loma, wk cai wes:", + "DEFAULT": "On loma, wk rar wes:", + "DEFAULT_GREY_PAD": "Wk 2a]", + "PADDED_4": "On loma, wk rar wes:", + "PADDED_8": "Wes:", + "EXTRACTED_INIT_BOX": "2 wk zr wi 5", + "PADDED_4_EXTRACTED": "Pp wk zr wi 5", + "PADDED_8_EXTRACTED": "Pp wk zr wi 5", + "PADDED_8_DILATION_1": "On loma, wk rar wes:", + "PAD_8_FRACT_0_5": "Oma, qin bled", + "PAD_8_FRACT_0_2": "Reg" + }, + "13": { + "INITIAL_BOX": "Wheeljack. mad scientist.", + "DEFAULT": "Wheeljack. mad scientist.", + "DEFAULT_GREY_PAD": "Wheeljack. mad scientis", + "PADDED_4": "Wheeljack. mad scientist.", + "PADDED_8": "Wheeljack. mad scientist", + "EXTRACTED_INIT_BOX": "Wheeljack. mad scientist.", + "PADDED_4_EXTRACTED": "Wheeljack. mad scientist.", + "PADDED_8_EXTRACTED": "Wheeljack. mad scientist.", + "PADDED_8_DILATION_1": "Wheeljack. mad scientist.", + "PAD_8_FRACT_0_5": "Wheeljack. mad scientist.", + "PAD_8_FRACT_0_2": "Wheeljack, mad scientist." + }, + "14": { + "INITIAL_BOX": "Yet surely you die not believe the solstar order. would be without its own defences.", + "DEFAULT": "7 yet surely you die not believe the solstar order would be without ts. \"own defenges.", + "DEFAULT_GREY_PAD": "Yet surely you di not believe the solstar order. would be without it: on defenzes.", + "PADDED_4": "Yet surely you die not believe the solstar order. would be without its. \\ own defenses.", + "PADDED_8": "Yet surely you 4 die not believe the solstar order. would be without its. own defenses.", + "EXTRACTED_INIT_BOX": "Yet surely you dib not believe the solstar order, would be without 175 own defences.", + "PADDED_4_EXTRACTED": "Yet surely you, dib not believe the solstar order, would be without its. own defenges.", + "PADDED_8_EXTRACTED": "Yet surely you dib not believe the solstar order, would be without its. own defenses.", + "PADDED_8_DILATION_1": "Yet surely you die not believe the solstar order. would be without its. own defenses.", + "PAD_8_FRACT_0_5": "Yet surely you own defenses.", + "PAD_8_FRACT_0_2": "Yet surely you, dib not believe the solstar order. would be without 178 own defenses." + }, + "15": { + "INITIAL_BOX": "Rom was the", + "DEFAULT": "Rom was the warriors the cosmos ras ever known.", + "DEFAULT_GREY_PAD": "Rom was the fibst of the", + "PADDED_4": "Rom was the warriors the cosmos ras ever known.", + "PADDED_8": "| 2op was the warriors the | cosmos has. ever known.", + "EXTRACTED_INIT_BOX": "Rom was the", + "PADDED_4_EXTRACTED": "Rom was the warriors the cosmos ras ever known.", + "PADDED_8_EXTRACTED": "Rom was the warriors the cosmos ras ever known...", + "PADDED_8_DILATION_1": "Rom was the warriors the cosmos has ever known...", + "PAD_8_FRACT_0_5": "Rom was the", + "PAD_8_FRACT_0_2": "Rom was the warriors the cosmos has ever known..." + } + }, + "Transformers_-_Unicron_000-016.jpg": { + "1": { + "DEFAULT": "Planet called lv-217 some years ago. 4" + } + }, + "WARE_ACME_024.jpg": { + "1": { + "DEFAULT": "Almost done...", + "INITIAL_BOX": "Almost done.", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Almost done...", + "PADDED_8": "Almost done...", + "EXTRACTED_INIT_BOX": "Almost done.", + "PADDED_4_EXTRACTED": "Almost done...", + "PADDED_8_EXTRACTED": "Almost done...", + "PADDED_8_DILATION_1": "Almost done...", + "PAD_8_FRACT_0_5": "Almost done...", + "PAD_8_FRACT_0_2": "Almost done..." + }, + "0": { + "INITIAL_BOX": "134 & olyph", + "DEFAULT": "8 glyph", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Lil & glyph", + "PADDED_8": "Sle & glyph", + "EXTRACTED_INIT_BOX": "Ie if a i ja j] wy by; \u00ae uy", + "PADDED_4_EXTRACTED": "== es ? \u2014 \u201c", + "PADDED_8_EXTRACTED": "Ie oly wh", + "PADDED_8_DILATION_1": "\u00a7lyph", + "PAD_8_FRACT_0_5": "Glyph", + "PAD_8_FRACT_0_2": "Glyph" + }, + "2": { + "INITIAL_BOX": "Oh hey, 1--", + "DEFAULT": "Oh hey, 1--", + "DEFAULT_GREY_PAD": "Oh", + "PADDED_4": "Oh hey, 1--", + "PADDED_8": "Oh hey, 1--", + "EXTRACTED_INIT_BOX": "Oh hey, 1", + "PADDED_4_EXTRACTED": "Oh hey, t", + "PADDED_8_EXTRACTED": "Oh hey, 1", + "PADDED_8_DILATION_1": "Oh hey, 1", + "PAD_8_FRACT_0_5": "Oh hey, 1", + "PAD_8_FRACT_0_2": "Oh hey, 1" + }, + "3": { + "INITIAL_BOX": "Pinnacle of", + "DEFAULT": "Dinnacie of", + "DEFAULT_GREY_PAD": "Dinn3cie +", + "PADDED_4": "Pinnacle o", + "PADDED_8": "Pinnacle of", + "EXTRACTED_INIT_BOX": "", + "PADDED_4_EXTRACTED": "", + "PADDED_8_EXTRACTED": "Os", + "PADDED_8_DILATION_1": "Dinnacle \u00ab", + "PAD_8_FRACT_0_5": "Pinnacle \u00abf", + "PAD_8_FRACT_0_2": "Dinnacle \u00ab" + }, + "4": { + "INITIAL_BOX": "Arr \u2014\u2014te er ren man oh man... that sure was one heckuva orgy last night... whew/", + "DEFAULT": "Re eas p.t. man oh man... that sure was one heckuva orgy last night... whew/", + "DEFAULT_GREY_PAD": "Man oh man... that sure was one heckuva orgy last night... whew/", + "PADDED_4": "Aree ane dn \u2014\u2014 man oh man... that sure was one heckuva orgy last night... whew/", + "PADDED_8": "J se en es man oh man... that sure was one heckuva orgy last night... whew/", + "EXTRACTED_INIT_BOX": "Man oh man... that sure was one heckuva orgy last night... whew/", + "PADDED_4_EXTRACTED": "Man oh man... that sure was one heckuva orgy last night... whew/", + "PADDED_8_EXTRACTED": "Man oh man... that sure was one heckuva orgy last night... whew/", + "PADDED_8_DILATION_1": "Man oh man... that sure was one heckuva orgy last night... whew/", + "PAD_8_FRACT_0_5": "Man oh man... that sure was one heckuva orgy last night... whew/", + "PAD_8_FRACT_0_2": "Man oh man... that sure was one heckuva orgy last night... whew/" + }, + "5": { + "INITIAL_BOX": "I bl ai yu | 3", + "DEFAULT": "= 3 \u00a3\u00a3)", + "DEFAULT_GREY_PAD": "Sumer", + "PADDED_4": "I ba n | | bb p a | we come. ros", + "PADDED_8": "Gumer|", + "EXTRACTED_INIT_BOX": "Ma. oy pos", + "PADDED_4_EXTRACTED": "Hl 1 er 2 oy r [pons", + "PADDED_8_EXTRACTED": "My <& s =i", + "PADDED_8_DILATION_1": "1) fa suter", + "PAD_8_FRACT_0_5": "\" 5", + "PAD_8_FRACT_0_2": "Qumep frid" + }, + "6": { + "INITIAL_BOX": "- oh. ic ooe i j =5 a", + "DEFAULT": "4 ce iy in 1 = e nd", + "DEFAULT_GREY_PAD": "Bh, those jews", + "PADDED_4": "Ud )s", + "PADDED_8": "W hry) es fe 4 s", + "EXTRACTED_INIT_BOX": "sight", + "DEFAULT_GREY_PAD": "N a sigh", + "PADDED_4": "2sight", + "PADDED_8": "3 sigh?", + "EXTRACTED_INIT_BOX": ">sigh?", + "PADDED_4_EXTRACTED": "Ssigh?", + "PADDED_8_EXTRACTED": "Ssight", + "PADDED_8_DILATION_1": "Ssight", + "PAD_8_FRACT_0_5": "Sight", + "PAD_8_FRACT_0_2": "Ssight" + } + }, + "ronson-031.jpg": { + "5": { + "INITIAL_BOX": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "DEFAULT": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre meses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "DEFAULT_GREY_PAD": "Recuerdo a aquellos segadores abriendose paso entre mieses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PADDED_4": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre meses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PADDED_8": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre meses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "EXTRACTED_INIT_BOX": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PADDED_4_EXTRACTED": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PADDED_8_EXTRACTED": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PADDED_8_DILATION_1": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas, hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PAD_8_FRACT_0_5": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas; hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras.", + "PAD_8_FRACT_0_2": "Recuerdo a aquellos segadores abri\u00e9ndose paso entre mieses doradas; hoz y zoqueta en ristre, con la \u00fanica compa\u00f1\u00eda de las chicharras." + }, + "0": { + "INITIAL_BOX": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "DEFAULT": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "DEFAULT_GREY_PAD": "En una visita reciente ala casa despu\u00e9s de uchas d\u00e9cadas, me sorprend\u00ed al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_4": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_8": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "EXTRACTED_INIT_BOX": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_4_EXTRACTED": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_8_EXTRACTED": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PADDED_8_DILATION_1": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PAD_8_FRACT_0_5": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto.", + "PAD_8_FRACT_0_2": "En una visita reciente ala casa despu\u00e9s de muchas decadas, me sorprendi al ver el abrevadero. lo recordaba mucho m\u00e1s alto." + }, + "1": { + "INITIAL_BOX": "Junto al abrevadero crec\u00eda el vigoroso troncc de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "DEFAULT": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "DEFAULT_GREY_PAD": "Unto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "PADDED_4": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia.,", + "PADDED_8": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia.,", + "EXTRACTED_INIT_BOX": "Junto al abrevadero crec\u00eda el vigoroso troncc de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "PADDED_4_EXTRACTED": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "PADDED_8_EXTRACTED": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "PADDED_8_DILATION_1": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia..,", + "PAD_8_FRACT_0_5": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia...", + "PAD_8_FRACT_0_2": "Junto al abrevadero crec\u00eda el vigoroso tronco de una gran parra, que trepaba h\u00e1bilmente la tapia..." + }, + "2": { + "INITIAL_BOX": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "DEFAULT": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "DEFAULT_GREY_PAD": "", + "PADDED_4": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "PADDED_8": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "EXTRACTED_INIT_BOX": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "PADDED_4_EXTRACTED": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "PADDED_8_EXTRACTED": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "PADDED_8_DILATION_1": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "PAD_8_FRACT_0_5": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino.", + "PAD_8_FRACT_0_2": "Supongo que este recuerdo tramposo revela la escala que tiene el mundo cuando eres un nino." + }, + "3": { + "INITIAL_BOX": "Ys es mete -rtizo \u00f1 tra mt ele ade e a 3", + "DEFAULT": "Ps ae ie nac estrenos", + "DEFAULT_GREY_PAD": "Y se/extend\u00eda hasta el \u00a1cobertizo de entrada: hacia: principio. del verano luc\u00eda, sus exuberantes hojas y/comenzaba ya a/florecer..", + "PADDED_4": "Mate ln y ,cober z 'trada a com ao le a , h jas y/comenzaba, ya an y", + "PADDED_8": "Js ee amastae cobertizo e as a la e rada. 70 \u00a1com za! y ya > ec 4 y a", + "EXTRACTED_INIT_BOX": "Y se extend\u00eda hasta el cobertizo de entrada. hacia principio del verano luc\u00eda sus exuberantes hojas y comenzaba ya a florecer.", + "PADDED_4_EXTRACTED": "Y se extend\u00eda hasta el cobertizo de entrada. hacia principio del verano luc\u00eda sus exuberantes hojas y comenzaba ya a florecer.", + "PADDED_8_EXTRACTED": "Y se extend\u00eda hasta el cobertizo de entrada. hacia principio del verano luc\u00eda sus exuberantes hojas y comenzaba ya a florecer.", + "PADDED_8_DILATION_1": "Y se extend\u00eda hasta el cobertizo de entrada. hacia principio del verano luc\u00eda sus exuberantes hojas y comenzaba ya a florecer.", + "PAD_8_FRACT_0_5": "Y se extend\u00eda hasta el cobertizo de entrada. hacia principio del verano luc\u00eda sus exuberantes hojas y comenzaba ya a florecer.", + "PAD_8_FRACT_0_2": "Y se extend\u00eda hasta el cobertizo de entrada. hacia principio del verano luc\u00eda sus exuberantes hojas y comenzaba ya a florecer." + }, + "4": { + "INITIAL_BOX": "Era entonces cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el dia y se extend\u00eda de sol a sol.", + "DEFAULT": "Era entonces cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el d\u00eda y se extend\u00eda de sol a sol.", + "DEFAULT_GREY_PAD": "Era entonces cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el d\u00eda y se extend\u00eda de sol a sol,", + "PADDED_4": "Era entonces cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el d\u00eda y se extend\u00eda de sol a sol.", + "PADDED_8": "Era entonces cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntaba el d\u00eda y se extend\u00eda de sol a sol.", + "EXTRACTED_INIT_BOX": "El e e! fs, y \u00a1a h 4 pb e e a 5 2 mi e) 2 js lo a ni ny to a, ) y vd a sn e. y \u00f1 e > di y e o la", + "PADDED_4_EXTRACTED": "A e e \u201c4 7 ane 25 n \u00a1 es ji in ye mi in ma \u00bb a e ss e) un 3 lo a, a y n gia pa do a \u00ed s nn \u00a1y pe n la y e", + "PADDED_8_EXTRACTED": "A 20 fs, el e n nd y 3 \u00bf4 e gd dol y o y un ys \u00a1e y 308 a 2 \u00ed en =n y pe da \u00bf5", + "PADDED_8_DILATION_1": "Era entonces cuando daba comenzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntara el, d\u00eda y se extend\u00eda de sol a sol.", + "PAD_8_FRACT_0_5": "Ndo n pr e in la )nces e e nu y del cer a, jo nada se in \u00a1ab\u00e1 in ej ar n dr =, un pr int \u00fa o espui antas el dia y se dl la sa fe a de k sel", + "PAD_8_FRACT_0_2": "Era entonces cuando daba comienzo la cosecha del cereal. la jornada se iniciaba tan pronto despuntasa el d\u00eda y se extend\u00eda de sol a sol." + } + } + }, + "Tesseract-crop": {} + } +} \ No newline at end of file diff --git a/pcleaner/_testbed/experiment/cache/.gitkeep b/pcleaner/_testbed/experiment/cache/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/pcleaner/_testbed/experiment/source/.gitkeep b/pcleaner/_testbed/experiment/source/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/pcleaner/_testbed/nbs/experiments.ipynb b/pcleaner/_testbed/nbs/experiments.ipynb new file mode 100644 index 00000000..bde864b5 --- /dev/null +++ b/pcleaner/_testbed/nbs/experiments.ipynb @@ -0,0 +1,7640 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp testbed/experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from __future__ import annotations\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# %reload_ext autoreload\n", + "# %autoreload 0\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# try: \n", + "# import fastcore as FC\n", + "# except ImportError: \n", + "# !pip install -q fastcore\n", + "# try:\n", + "# import rich\n", + "# except ImportError:\n", + "# !pip install -q rich\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip uninstall Pillow" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install \"pillow<10.1.0,>=8.3.2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note: we're using the `testbed` branch of PanelCleaner.**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed-colab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PanelCleaner Testbed\n", + "> An PanelCleaner space to explore, play, colaborate and experiment with ML/DL techniques.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "import contextlib\n", + "import dataclasses\n", + "import datetime\n", + "import difflib\n", + "import functools\n", + "import json\n", + "import os\n", + "import shutil\n", + "import tempfile\n", + "from collections import defaultdict\n", + "from enum import Enum\n", + "from pathlib import Path\n", + "from typing import Any\n", + "from typing import Callable\n", + "from typing import cast\n", + "from typing import Mapping\n", + "from typing import TypeAlias\n", + "\n", + "import fastcore.all as FC\n", + "import ipywidgets as W\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pcleaner.config as cfg\n", + "import pcleaner.ctd_interface as ctm\n", + "import pcleaner.image_ops as ops\n", + "import pcleaner.ocr.ocr as ocr\n", + "import pcleaner.structures as st\n", + "import torch\n", + "import traitlets as T\n", + "from IPython.display import clear_output\n", + "from IPython.display import display\n", + "from IPython.display import HTML\n", + "from loguru import logger\n", + "from pcleaner.ocr.ocr_tesseract import TesseractOcr\n", + "from PIL import Image\n", + "from PIL import ImageFilter\n", + "from rich.console import Console\n", + "from tqdm.notebook import tqdm\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from pcleaner._testbed.testbed.helpers import *\n", + "from pcleaner._testbed.testbed.ocr_metric import *\n", + "from pcleaner._testbed.testbed.visor import ContextVisor\n", + "from pcleaner._testbed.testbed.visor import Spinner\n", + "import pcleaner._testbed.testbed.web_server as web_server\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "import copy\n", + "import re\n", + "from contextlib import ExitStack\n", + "from typing import Protocol\n", + "\n", + "import fastcore.xtras # patch `Path` with some utils (like `ls()` to list folder contents)\n", + "import pcleaner.cli_utils as cli\n", + "import pcleaner.preprocessor as pp\n", + "import pytesseract\n", + "import rich\n", + "from fastcore.test import * # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| exporti\n", + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## USE_PIL" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n", + "\n", + "Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n", + "- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n", + "- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n", + "\n", + "You choose.\n", + "\n", + "If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "os.environ['USE_PIL'] = 'False'\n", + "os.environ['USE_TUNNEL'] = 'False'\n", + "SERVER = None\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tesseract setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get current version of Tesseract" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    'tesseract 5.3.4',\n",
+       "    ' leptonica-1.84.1',\n",
+       "    '  libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : \n",
+       "libwebp 1.4.0 : libopenjp2 2.5.2',\n",
+       "    ' Found NEON',\n",
+       "    ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n",
+       "    ' Found libcurl/8.6.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.12 nghttp2/1.61.0'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'tesseract 5.3.4'\u001b[0m,\n", + " \u001b[32m' leptonica-1.84.1'\u001b[0m,\n", + " \u001b[32m' libgif 5.2.1 : libjpeg 8d \u001b[0m\u001b[32m(\u001b[0m\u001b[32mlibjpeg-turbo 3.0.0\u001b[0m\u001b[32m)\u001b[0m\u001b[32m : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : \u001b[0m\n", + "\u001b[32mlibwebp 1.4.0 : libopenjp2 2.5.2'\u001b[0m,\n", + " \u001b[32m' Found NEON'\u001b[0m,\n", + " \u001b[32m' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6'\u001b[0m,\n", + " \u001b[32m' Found libcurl/8.6.0 SecureTransport \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLibreSSL/3.3.6\u001b[0m\u001b[32m)\u001b[0m\u001b[32m zlib/1.2.12 nghttp2/1.61.0'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "out = !tesseract --version # type: ignore\n", + "cprint(out)\n", + "if 'tesseract 5.' not in out[0]:\n", + " if 'tesseractd 4.' in out[0]:\n", + " cprint('Old Tesseract 4.x is installed. You should uninstall it and install Tesseract 5.x')\n", + " else:\n", + " cprint('You should install Tesseract 5.x')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **NOTE: in below cells, when you encounter lines starting with the exclamation mark `!` (`bang`), uncoment them if you want to excute the shell commands**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remove Tesseract installation\n", + "> I you have the old 4.x version, you should consider removing the installation with the following commands.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Mac (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### WIndows (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Ubuntu" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo apt-get remove tesseract-ocr\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tesseract installation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Mac (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### WIndows (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Ubuntu" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **5.x** release series is available in the [another PPA](https://launchpad.net/~alex-p/+archive/ubuntu/tesseract-ocr5) for Ubuntu **18.04**, **20.04**, and **22.04**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "refresh system package cache in case you’re still running old Ubuntu 18.04" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo apt update" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "install the software engine" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo apt install -y tesseract-ocr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "and check version:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    'tesseract 5.3.4',\n",
+       "    ' leptonica-1.84.1',\n",
+       "    '  libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : \n",
+       "libwebp 1.4.0 : libopenjp2 2.5.2',\n",
+       "    ' Found NEON',\n",
+       "    ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n",
+       "    ' Found libcurl/8.6.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.12 nghttp2/1.61.0'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'tesseract 5.3.4'\u001b[0m,\n", + " \u001b[32m' leptonica-1.84.1'\u001b[0m,\n", + " \u001b[32m' libgif 5.2.1 : libjpeg 8d \u001b[0m\u001b[32m(\u001b[0m\u001b[32mlibjpeg-turbo 3.0.0\u001b[0m\u001b[32m)\u001b[0m\u001b[32m : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : \u001b[0m\n", + "\u001b[32mlibwebp 1.4.0 : libopenjp2 2.5.2'\u001b[0m,\n", + " \u001b[32m' Found NEON'\u001b[0m,\n", + " \u001b[32m' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6'\u001b[0m,\n", + " \u001b[32m' Found libcurl/8.6.0 SecureTransport \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLibreSSL/3.3.6\u001b[0m\u001b[32m)\u001b[0m\u001b[32m zlib/1.2.12 nghttp2/1.61.0'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "out = !tesseract --version # type: ignore\n", + "cprint(out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Tesseract languages" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
tessdata path: /opt/homebrew/share/tessdata\n",
+       "
\n" + ], + "text/plain": [ + "tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Installed languages:\n",
+       "[\n",
+       "    'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n",
+       "    'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, \n",
+       "epo',\n",
+       "    'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n",
+       "    'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n",
+       "    'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n",
+       "    'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n",
+       "    'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, \n",
+       "script/Canadian_Aboriginal, script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, \n",
+       "script/Fraktur',\n",
+       "    'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, \n",
+       "script/HanT, script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, \n",
+       "script/Japanese_vert, script/Kannada, script/Khmer',\n",
+       "    'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, \n",
+       "script/Syriac, script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, \n",
+       "script/Vietnamese, sin, slk',\n",
+       "    'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n",
+       "    'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "Installed languages:\n", + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces'\u001b[0m,\n", + " \u001b[32m'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, \u001b[0m\n", + "\u001b[32mepo'\u001b[0m,\n", + " \u001b[32m'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc'\u001b[0m,\n", + " \u001b[32m'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert'\u001b[0m,\n", + " \u001b[32m'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal'\u001b[0m,\n", + " \u001b[32m'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol'\u001b[0m,\n", + " \u001b[32m'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, \u001b[0m\n", + "\u001b[32mscript/Canadian_Aboriginal, script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, \u001b[0m\n", + "\u001b[32mscript/Fraktur'\u001b[0m,\n", + " \u001b[32m'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, \u001b[0m\n", + "\u001b[32mscript/HanT, script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, \u001b[0m\n", + "\u001b[32mscript/Japanese_vert, script/Kannada, script/Khmer'\u001b[0m,\n", + " \u001b[32m'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, \u001b[0m\n", + "\u001b[32mscript/Syriac, script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, \u001b[0m\n", + "\u001b[32mscript/Vietnamese, sin, slk'\u001b[0m,\n", + " \u001b[32m'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel'\u001b[0m,\n", + " \u001b[32m'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "out = !tesseract --list-langs # type: ignore\n", + "tessdata = Path(out[0].split('\"')[1])\n", + "cprint(f\"tessdata path: {tessdata}\")\n", + "cprint(\"Installed languages:\", [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Install **best** languages and **jpn_ver** Tesseract lang\n", + "> Much better results than default langs and `jpn` language model.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best). \n", + "Donwload from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) a model trained for vertical Japanese text as found in manga.\n", + "\n", + "See [here](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html) the languages codes.\n", + "\n", + "> Note: I've not play much with `jpn` or `jpn_vert`, `manag-ocr` is surely a much better fit, but it can be educational to compare." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uncomment and excute to download the best language models:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/osd.traineddata\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn.traineddata\n", + "\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn_vert.traineddata\n", + "# or\n", + "# !wget -O jpn_vert.traineddata https://github.com/zodiac3539/jpn_vert/blob/master/jpn_ver5.traineddata\n", + "\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/spa.traineddata\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/fra.traineddata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy downloaded models to tessdata folder (double check that `tessdata` variable points to the right folder):\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
tessdata path: /opt/homebrew/share/tessdata\n",
+       "
\n" + ], + "text/plain": [ + "tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint(f\"tessdata path: {tessdata}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo mv *.traineddata $tessdata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "and remove the downloaded models:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# !rm *.traineddata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check installed languages\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    Path('/opt/homebrew/share/tessdata/spa.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/eng.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/jpn_vert.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/spa_old.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/fra.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/jpn.traineddata')\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/eng.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn_vert.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa_old.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/fra.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint(list(filter(lambda x: re.match(r'eng|jpn|jpn_vert|fra|spa', x.name), tessdata.ls()))) # type: ignore\n", + "# cprint(pytesseract.get_languages())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OCR results clean-up" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def remove_multiple_whitespaces(text):\n", + " return ' '.join(text.split())\n", + "\n", + " \n", + "def postprocess_ocr(text):\n", + " \"Basic postprocessing for English Tesseract OCR results.\"\n", + " return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize()\n", + "\n", + "def accuracy_ocr_naive(text, ground_truth):\n", + " return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text)\n", + "\n", + "\n", + "def accuracy_ocr_difflib(text, ground_truth):\n", + " \"\"\"\n", + " Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text,\n", + " using difflib's SequenceMatcher to account for differences in a manner similar to git diffs.\n", + "\n", + " :param text: The OCR-generated text.\n", + " :param ground_truth: The ground truth text.\n", + " :return: A float representing the similarity ratio between the OCR text and the ground truth, \n", + " where 1.0 is identical.\n", + " \"\"\"\n", + " # Initialize the SequenceMatcher with the OCR text and the ground truth\n", + " matcher = difflib.SequenceMatcher(None, text, ground_truth)\n", + " \n", + " # Get the similarity ratio\n", + " similarity_ratio = matcher.ratio()\n", + " \n", + " return similarity_ratio" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ground truth" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def ground_truth_path(page_data: st.PageData):\n", + " path = Path(page_data.original_path)\n", + " return path.with_stem(path.stem + '_gt').with_suffix('.txt')\n", + "\n", + "\n", + "def read_ground_truth(page_data: st.PageData, root_dir: Path):\n", + " gts_path = root_dir / ground_truth_path(page_data)\n", + " if gts_path.exists():\n", + " gts = gts_path.read_text(encoding=\"utf-8\").splitlines()\n", + " else:\n", + " gts = [\"\" for _ in range(len(page_data.boxes))]\n", + " return gts\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "source": [ + "## Cropping" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def dilate_by_fractional_pixel(image, dilation_fraction, filter_base_size=3):\n", + " \"\"\"\n", + " Dilates an image by a specified fractional pixel amount. The function calculates \n", + " the necessary scaling factor and filter size based on the desired dilation fraction.\n", + "\n", + " :param image: A PIL Image object (1-bit mode).\n", + " :param dilation_fraction: The desired fractional pixel amount for dilation (e.g., 0.2).\n", + " :param filter_base_size: The base size of the dilation filter to apply on the scaled image.\n", + " This size is adjusted based on the scaling factor to achieve the\n", + " desired dilation effect.\n", + " :return: A PIL Image object after dilation, converted back to grayscale.\n", + " \"\"\"\n", + " from PIL.Image import Resampling\n", + " # Calculate the scale factor based on the desired dilation fraction\n", + " scale_factor = int(1 / dilation_fraction)\n", + " \n", + " # Adjust the filter size based on the scale factor\n", + " # This ensures the dilation effect is proportional to the desired fraction\n", + " filter_size = max(1, filter_base_size * scale_factor // 5)\n", + "\n", + " # Convert the image to grayscale for more nuanced intermediate values\n", + " image_gray = image.convert(\"L\")\n", + "\n", + " # Resize the image to a larger size using bicubic interpolation\n", + " larger_size = (int(image.width * scale_factor), int(image.height * scale_factor))\n", + " image_resized = image_gray.resize(larger_size, Resampling.BICUBIC)\n", + "\n", + " # Apply the dilation filter to the resized image\n", + " dilated_image = image_resized.filter(ImageFilter.MaxFilter(filter_size))\n", + "\n", + " # Resize the image back to its original size using bicubic interpolation\n", + " image_dilated_fractional_pixel = dilated_image.resize(image.size, Resampling.BICUBIC)\n", + "\n", + " return image_dilated_fractional_pixel\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def extract_text(image, text_mask, box):\n", + " cropped_image = crop_box(box, image)\n", + " cropped_mask = crop_box(box, text_mask)\n", + " extracted = ops.extract_text(cropped_image, cropped_mask)\n", + " return cropped_image, cropped_mask, extracted\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lang\n", + "> language name to a language code \n", + "> every one has language codes: tesseract, comic-text-detector, earthlings...\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "_lang2pcleaner = {'English': st.DetectedLang.ENG, 'Japanese': st.DetectedLang.JA, 'Spanish': st.DetectedLang.ENG,\n", + " 'French':st.DetectedLang.ENG}\n", + "# _lang2tesseract = {'English': 'eng', 'Japanese': 'jpn'}\n", + "_lang2tesseract = {'English': 'eng', 'Japanese': 'jpn_vert', 'Spanish': 'spa', 'French': 'fra'}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "def lang2pcleaner(lang: str):\n", + " return _lang2pcleaner[lang]\n", + "\n", + "def lang2tesseract(lang: str):\n", + " return _lang2tesseract[lang]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Experiments helpers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CropMethod\n", + "> Box cropping methods.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "notebookRunGroups": { + "groupValue": "1" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class CropMethod(Enum):\n", + " INITIAL_BOX = 'Initial box'\n", + " DEFAULT = 'Default'\n", + " DEFAULT_GREY_PAD = 'Default, grey pad'\n", + " PADDED_4 = 'Padded 4px'\n", + " PADDED_8 = 'Padded 8px'\n", + " EXTRACTED_INIT_BOX = 'Extracted, init box'\n", + " PADDED_4_EXTRACTED = 'Padded 4, extracted'\n", + " PADDED_8_EXTRACTED = 'Padded 8, extracted'\n", + " PADDED_8_DILATION_1 = 'Padded 8, dilation 1'\n", + " PAD_8_FRACT_0_5 = 'Pad 8, fract. 0.5'\n", + " PAD_8_FRACT_0_2 = 'Pad 8, fract. 0.2'\n", + "\n", + " @classmethod\n", + " def __display_names__(cls):\n", + " return dict(\n", + " zip([_.value for _ in cls], \n", + " cls))\n", + "\n", + "\n", + "CM = CropMethod\n", + "\n", + "_IMAGE_METHODS = [CM.INITIAL_BOX, CM.DEFAULT, CM.DEFAULT_GREY_PAD, \n", + " CM.PADDED_4, CM.PADDED_8]\n", + "_EXTRACTED_METHODS = [CM.EXTRACTED_INIT_BOX, CM.PADDED_4_EXTRACTED, \n", + " CM.PADDED_8_EXTRACTED, CM.PADDED_8_DILATION_1, \n", + " CM.PAD_8_FRACT_0_5, CM.PAD_8_FRACT_0_2]\n", + "\n", + "\n", + "def crop_by_image(method: CM, \n", + " box: st.Box, \n", + " base: Image.Image, \n", + " preproc: cfg.PreprocessorConfig,\n", + " ):\n", + " image = None\n", + " match method:\n", + " case CM.INITIAL_BOX :\n", + " image = crop_box(box, base)\n", + " case CM.DEFAULT:\n", + " padded2_4 = (\n", + " box.pad(preproc.box_padding_initial, base.size).right_pad(\n", + " preproc.box_right_padding_initial, base.size))\n", + " image = crop_box(padded2_4, base)\n", + " case CM.DEFAULT_GREY_PAD:\n", + " image = crop_box(box, base)\n", + " image = ops.pad_image(image, 8, fill_color=(128, 128, 128))\n", + " case CM.PADDED_4:\n", + " padded4 = box.pad(4, base.size)\n", + " image = crop_box(padded4, base)\n", + " case CM.PADDED_8:\n", + " padded4 = box.pad(8, base.size)\n", + " image = crop_box(padded4, base)\n", + " case _: pass\n", + " return image\n", + "\n", + "\n", + "def crop_by_extracted(method: CM, \n", + " box: st.Box, \n", + " base: Image.Image, \n", + " mask: Image.Image,\n", + " cropped_image_path: Path,\n", + " cropped_mask_path: Path,\n", + " dilated: dict[float, Image.Image]\n", + " ):\n", + " cropped_image, cropped_mask, image = None, None, None\n", + " if method in _EXTRACTED_METHODS:\n", + " if not cropped_image_path.exists() or not cropped_mask_path.exists():\n", + " match method:\n", + " case CM.EXTRACTED_INIT_BOX:\n", + " cropped_image, cropped_mask, image = extract_text(base, mask, box)\n", + " case CM.PADDED_4_EXTRACTED:\n", + " padded4 = box.pad(4, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(base, mask, padded4)\n", + " case CM.PADDED_8_EXTRACTED:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(base, mask, padded8)\n", + " case CM.PADDED_8_DILATION_1:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(\n", + " base, dilated[1], padded8)\n", + " case CM.PAD_8_FRACT_0_5:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(\n", + " base, dilated[0.5], padded8)\n", + " case CM.PAD_8_FRACT_0_2:\n", + " padded8 = box.pad(8, base.size)\n", + " cropped_image, cropped_mask, image = extract_text(\n", + " base, dilated[0.2], padded8)\n", + " case _: pass\n", + "\n", + " return image, cropped_image, cropped_mask\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ExperimentContext\n", + "> Utility class to maintain shared state across all experiments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "SubjIdT: TypeAlias = int\n", + "RunIdT: TypeAlias = str\n", + "SubjSpecT: TypeAlias = SubjIdT | str | Path\n", + "\n", + "\n", + "class ExperimentSubject:\n", + " exp: ExperimentContext\n", + " idx: SubjIdT\n", + "\n", + " def setup(self, exp: ExperimentContext, idx: Any, *args, **kwargs): \n", + " self.exp = exp\n", + " self.idx = cast(SubjIdT, exp.normalize_idx(idx))\n", + " return self\n", + "\n", + " def __new__(cls, exp: ExperimentContext, idx: Any, *args, **kwargs):\n", + " idx = exp.normalize_idx(idx)\n", + " self = exp.subject_context(idx)\n", + " if self is None:\n", + " self = super().__new__(cls)\n", + " self = exp.setup_subject_context(idx, self, *args, **kwargs)\n", + " if self is None:\n", + " raise ValueError(f\"Can't create new subject with idx: {idx}: out of range\")\n", + " return self\n", + "\n", + "\n", + "class Result:\n", + " subject_ctx: ExperimentSubject\n", + "\n", + "class ExperimentRun:\n", + " \"A set of experiment results obtained with the same parameters.\"\n", + " exp: ExperimentContext\n", + " name: RunIdT\n", + " dt: datetime.datetime\n", + "\n", + " def setup(self, exp: ExperimentContext, name: RunIdT, *args, **kwargs): \n", + " self.exp = exp\n", + " self.name = name\n", + " return self\n", + "\n", + " @classmethod\n", + " def setup_run_name(cls, name: RunIdT, dt: datetime.datetime):\n", + " return f\"{name}_{dt.strftime('%Y%m%d-%H%M%S')}\"\n", + "\n", + " def setup_run(self, args, **kwargs):\n", + " pass\n", + " def before_result(self, *args, **kwargs):\n", + " pass\n", + " def after_result(self, result: Result, *args, **kwargs):\n", + " pass\n", + " \n", + " def __new__(cls, exp: ExperimentContext, name: RunIdT, *args, **kwargs):\n", + " self = exp.experiment_run(name)\n", + " if self is None:\n", + " self = super().__new__(cls)\n", + " self = exp.setup_experiment_run(name, self, *args, **kwargs)\n", + " return self\n", + "\n", + "\n", + "class ExperimentContext(T.HasTraits):\n", + " \"Class to maintain shared state across all file-based experiments within the experiment domain.\"\n", + " name: str\n", + " _results: dict[RunIdT, dict[SubjIdT, Any]]\n", + "\n", + " _dirty = T.Bool(default_value=False)\n", + "\n", + " CACHE_DIR_NAME: str = 'cache'\n", + " SOURCE_DIR_NAME: str = 'source'\n", + " EXP_DIR: Path = Path(\"../experiment\")\n", + "\n", + " subject_cls: Callable[..., ExperimentSubject]\n", + " def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls\n", + "\n", + " def normalize_idx(self, idx: SubjSpecT) -> SubjIdT | None:\n", + " nidx = None\n", + " if isinstance(idx, int) and idx < self.subject_count:\n", + " nidx = idx\n", + " elif isinstance(idx, str):\n", + " try:\n", + " nidx = [_.name for _ in self._paths].index(idx)\n", + " except Exception:\n", + " pass\n", + " elif isinstance(idx, Path):\n", + " if idx in self._paths:\n", + " nidx = self._paths.index(idx)\n", + " return nidx\n", + " \n", + " def path_from_idx(self, idx: SubjSpecT):\n", + " \"Relative path to the subject with the given index.\"\n", + " _idx = self.normalize_idx(idx)\n", + " if _idx is None:\n", + " raise ValueError(f\"{_idx} not found in context.\")\n", + " path = Path(self._paths[_idx])\n", + " if not path.resolve().exists():\n", + " raise ValueError(f\"{path} not found in context.\")\n", + " return path\n", + " \n", + " @property\n", + " def subject_count(self): return len(self._paths)\n", + "\n", + " @property\n", + " def run_names(self):\n", + " return list(self._exp_runs.keys())\n", + "\n", + " @property\n", + " def root_dir(self): return self._root\n", + " # Relative paths to `root_dir`\n", + " @property\n", + " def cache_dir(self): return Path(self.CACHE_DIR_NAME)\n", + " @property\n", + " def source_dir(self): return Path(self.SOURCE_DIR_NAME)\n", + " @functools.lru_cache()\n", + " def _subject_cache_dir(self, idx: SubjIdT):\n", + " path = self.path_from_idx(idx)\n", + " subject_cache_dir = self.cache_dir / path.stem\n", + " self.final(subject_cache_dir).mkdir(parents=True, exist_ok=True)\n", + " return subject_cache_dir\n", + " def subject_cache_dir(self, idx: SubjSpecT):\n", + " \"Folder to cache and save subject results. Create if needed.\"\n", + " return self._subject_cache_dir(idx)\n", + " def final(self, path: Path | str): \n", + " \"\"\"`path` relative to the root of the experiment.\n", + " If `path` is absolute, it must be in the experiment directory.\n", + " \"\"\"\n", + " if isinstance(path, str):\n", + " path = Path(path)\n", + " if path.is_absolute():\n", + " try:\n", + " path = path.relative_to(self.root_dir.resolve())\n", + " except Exception:\n", + " return Path('_not_found_/'+path.name)\n", + " return self.root_dir / path\n", + " \n", + " def empty_cache(self, idx: SubjIdT | None = None):\n", + " if idx is None:\n", + " cache_dir = self.final(self.cache_dir)\n", + " shutil.rmtree(cache_dir, ignore_errors=True)\n", + " cache_dir.mkdir(parents=True, exist_ok=True)\n", + " else:\n", + " subject_cache_dir = self.final(self.subject_cache_dir(idx))\n", + " shutil.rmtree(subject_cache_dir, ignore_errors=True)\n", + " subject_cache_dir.mkdir(parents=True, exist_ok=True)\n", + " def empty_cache_warning(self, \n", + " idx: SubjIdT | None=None, *, warn: bool=True, out: W.Output | None=None):\n", + " def on_confirm_clicked(b):\n", + " try:\n", + " self.empty_cache(idx)\n", + " print(\"Cache cleared successfully.\")\n", + " except Exception as e:\n", + " print(f\"Failed to clear cache: {e}\")\n", + " finally:\n", + " for widget in confirmation_box.children:\n", + " widget.close()\n", + "\n", + " def on_cancel_clicked(b):\n", + " print(\"Cache clear cancelled.\")\n", + " for widget in confirmation_box.children:\n", + " widget.close()\n", + "\n", + " if out is None:\n", + " out = W.Output()\n", + " cache_name = '' if idx is None else f\" of '{self.subject_cache_dir(idx).name}'\"\n", + " text = f\"Are you sure you want to clear the cache{cache_name}? This action cannot be undone.\"\n", + " with out:\n", + " if FC.IN_NOTEBOOK:\n", + " confirm_button = W.Button(description=\"Confirm\")\n", + " cancel_button = W.Button(description=\"Cancel\")\n", + " confirm_button.on_click(on_confirm_clicked)\n", + " cancel_button.on_click(on_cancel_clicked)\n", + " label = W.Label(text, style={'font_size': '1.25em', 'font_weight': 'bold'})\n", + " confirmation_box = W.VBox([label, W.HBox([confirm_button, cancel_button])])\n", + " display(confirmation_box)\n", + " else:\n", + " on_confirm_clicked(None)\n", + " return out\n", + "\n", + " def subject_context(self, idx: SubjSpecT):\n", + " \"Cached subject.\"\n", + " if (nidx := self.normalize_idx(idx)) is None:\n", + " return None\n", + " return self._subjects.get(nidx)\n", + " def setup_subject_context(self, idx: SubjIdT, /, subject: ExperimentSubject, *args, **kwargs):\n", + " \"Setup and set cached subject.\"\n", + " if idx < 0 or idx >= self.subject_count:\n", + " raise ValueError(f\"Can't create new subject with idx: {idx}: out of range\")\n", + " subject.setup(self, idx, *args, **kwargs)\n", + " self._subjects[idx] = subject\n", + " return subject\n", + "\n", + " def experiment_run(self, name: RunIdT | None = None):\n", + " \"Experiment run `name` or the last one if `name` is None or None\"\n", + " if name: \n", + " return self._exp_runs.get(name)\n", + " if self._exp_runs:\n", + " return self._exp_runs[list(self._exp_runs.keys())[-1]]\n", + " return None\n", + " def setup_experiment_run(self, name: str, run: ExperimentRun, *args, **kwargs):\n", + " \"Set or replace cached experiment run.\"\n", + " run.setup(self, name, *args, **kwargs)\n", + " self._exp_runs[name] = run\n", + " return run\n", + "\n", + " def _reset_(self):\n", + " # start over. Note: doesn't change _dirty status, intended for use in testing.\n", + " self._subjects.clear()\n", + " self._subject_cache_dir.cache_clear()\n", + "\n", + " def cleanup_model(self):\n", + " pass\n", + "\n", + " def __init__(self, name: str, paths: list[Path], root: Path | None = None, run_name: str = 'run1'):\n", + " self.name = name\n", + " self._root = root or type(self).EXP_DIR\n", + " self._paths = paths # relative paths wrt self._root\n", + " self._subjects: dict[SubjIdT, ExperimentSubject] = {}\n", + " self._exp_runs: dict[str, ExperimentRun] = {}\n", + " self._results = {}\n", + " # default run\n", + " ExperimentRun(self, run_name)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ExperimentSubject`s are created and cached int the `ExperimentContext`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "exp = ExperimentContext('test', [Path('b'), Path('c')], Path('../a'))\n", + "subj = exp.subject_context(5)\n", + "test_eq(subj, None)\n", + "\n", + "# subject haven't been created yet\n", + "_ = exp.subject_context(1)\n", + "test_is(_, None)\n", + "\n", + "# Instantiate subject\n", + "subj1 = ExperimentSubject(exp, 1)\n", + "_ = exp.subject_context(1)\n", + "test_eq(_ is not None, True)\n", + "test_is(_, subj1)\n", + "test_is(subj1, ExperimentSubject(exp, 1))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can't create `ExperimentSubject`s beyond `ExperimentContext` domain." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "test_fail(lambda:ExperimentSubject(exp, 2), 'out of range')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All paths are relative to the root of the experiment.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(exp.final('d/image.jpg'), Path('../a/d/image.jpg'))\n", + "test_eq(exp.final(Path('d/image.jpg')), Path('../a/d/image.jpg'))\n", + "p = Path('..').resolve()/'a/d/image.jpg'\n", + "test_eq(exp.final(p), Path('../a/d/image.jpg'))\n", + "test_eq(exp.final('../a/d/image.jpg'), Path('../a/../a/d/image.jpg'))\n", + "test_eq(exp.final('/other/a/d/image.jpg'), Path('_not_found_/image.jpg'))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`ExperimentRun`s are created and cached int the `ExperimentContext`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "run = exp.experiment_run('test')\n", + "test_eq(run, None)\n", + "\n", + "test_eq(exp.run_names, ['run1'])\n", + "\n", + "# Instantiate run\n", + "run = ExperimentRun(exp, 'test')\n", + "_ = exp.experiment_run('test')\n", + "test_eq(_ is not None, True)\n", + "test_is(_, run)\n", + "test_is(run, ExperimentRun(exp, 'test'))\n", + "\n", + "test_eq(exp.run_names, ['run1', 'test'])\n", + "\n", + "run2 = ExperimentRun(exp, 'test2')\n", + "\n", + "test_eq(exp.run_names, ['run1', 'test', 'test2'])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ImageContext\n", + "> A utility class to maintain image state for a `OCRExperimentContext`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "ImgIdT = SubjIdT\n", + "ImgSpecT: TypeAlias = ImgIdT | str | Path\n", + "\n", + "\n", + "class ImageContext(ExperimentSubject):\n", + " \"\"\"\n", + " A utility class to maintain image state for a ExperimentContext.\n", + "\n", + " Attributes:\n", + " json_data (dict): JSON data loaded from cached files.\n", + " page_data (st.PageData): PanelClaner page data.\n", + " base_image (Image.Image): The base image loaded from the cache.\n", + " mask (Image.Image): The mask image used for text detection.\n", + " gts (list[str]): Ground truth data for the text in the images.\n", + " mask_dilated1 (Image.Image): Image mask dilated by 1 pixel.\n", + " mask_dilated05 (Image.Image): Image mask dilated by 0.5 pixels.\n", + " mask_dilated02 (Image.Image): Image mask dilated by 0.2 pixels.\n", + "\n", + " \"\"\"\n", + " exp: ExperimentContext\n", + " idx: ImgIdT\n", + " base_image: Image.Image\n", + " mask: Image.Image\n", + " json_data: dict | None\n", + " page_data: st.PageData\n", + " _page_lang: str\n", + " _gts: list[str]\n", + " _mask_dilated1: Image.Image | None\n", + " _mask_dilated05: Image.Image | None\n", + " _mask_dilated02: Image.Image | None\n", + " \n", + " def to_dict(self):\n", + " return {\n", + " 'image_idx': self.idx,\n", + " 'page_lang': self.page_lang,\n", + " }\n", + " \n", + " @property\n", + " def image_idx(self): return self.idx\n", + " @property\n", + " def cache_dir(self): \n", + " \"Relative path of the cache directory for this image.\"\n", + " return self.exp.subject_cache_dir(self.idx)\n", + " \n", + " @property\n", + " def image_info(self): \n", + " img = self.base_image\n", + " w, h = img.size\n", + " print_size_in = size(w, h, 'in', 300)\n", + " print_size_cm = size(w, h, 'cm', 300)\n", + " required_dpi = dpi(w, h, 'Modern Age')\n", + " return (w, h), print_size_in, print_size_cm, required_dpi\n", + "\n", + " @property\n", + " def original_image_path(self): return Path(self.page_data.original_path)\n", + " @property\n", + " def image_path(self): return Path(self.page_data.image_path)\n", + " @property\n", + " def mask_path(self): return Path(self.page_data.mask_path)\n", + " @property\n", + " def image_boxes_path(self):\n", + " exp, img_path = self.exp, self.image_path\n", + " if exp.final( p := img_path.with_stem(img_path.stem + '_boxes')).exists(): \n", + " return p\n", + " _, p = page_boxes(self.page_data, exp.final(img_path))\n", + " return p.relative_to(exp.root_dir)\n", + " \n", + " # Base image properties \n", + " @property\n", + " def image_name(self): return self.original_image_path.name\n", + " @property\n", + " def image_size(self): return self.base_image.size\n", + " @property\n", + " def image_dim(self):return size(*self.image_size)\n", + " @property\n", + " def image_dpi(self): return dpi(*self.image_size)\n", + " @property\n", + " def image_print(self):\n", + " return self.image_size, self.image_dim, self.image_dpi\n", + " @property\n", + " def image_name_rich(self):\n", + " siz, dim, res = self.image_print\n", + " return f\"{self.image_name} - {siz[0]}x{siz[1]} px: {dim[0]:.2f}x{dim[1]:.2f}\\\" @ {res:.2f} dpi\"\n", + " \n", + " def setup_page_lang(self, page_lang: str | None = None):\n", + " path = self.exp.final(self.page_data.original_path).with_suffix('.json')\n", + " metadata = json.load(open(path)) if path.exists() else {}\n", + " if 'lang' in metadata and (page_lang == metadata['lang'] or page_lang is None):\n", + " self._page_lang = metadata['lang']\n", + " return\n", + " self._page_lang = metadata['lang'] = page_lang or 'English'\n", + " json.dump(metadata, open(path, 'w'), indent=2)\n", + " @property\n", + " def page_lang(self):\n", + " if self._page_lang == None:\n", + " self.setup_page_lang()\n", + " return self._page_lang\n", + " \n", + " @property\n", + " def boxes(self): return self.page_data.boxes\n", + " \n", + " def setup_ground_truth(self):\n", + " self._gts = read_ground_truth(self.page_data, self.exp.root_dir)\n", + " @property\n", + " def gts(self): \n", + " if self._gts is None:\n", + " self.setup_ground_truth()\n", + " return self._gts\n", + " \n", + " @functools.lru_cache(typed=True)\n", + " def dilated_mask(self, fraction: float):\n", + " return dilate_by_fractional_pixel(self.mask, fraction)\n", + " \n", + " def mask_dilated1(self): \n", + " if self._mask_dilated1 is None:\n", + " self._mask_dilated1 = self.mask.filter(ImageFilter.MaxFilter(3))\n", + " return self._mask_dilated1\n", + " \n", + " def mask_dilated05(self): \n", + " if self._mask_dilated05 is None:\n", + " self._mask_dilated05 = self.dilated_mask(0.5)\n", + " return self._mask_dilated05\n", + " \n", + " def mask_dilated02(self): \n", + " if self._mask_dilated02 is None:\n", + " self._mask_dilated02 = self.dilated_mask(0.2)\n", + " return self._mask_dilated02\n", + " \n", + " def dilated(self):\n", + " return {1: self.mask_dilated1(),\n", + " 0.5: self.mask_dilated05(),\n", + " 0.2: self.mask_dilated02(),}\n", + "\n", + " def __new__(cls,\n", + " exp: ExperimentContext,\n", + " idx: ImgSpecT,\n", + " *args, **kwargs) -> 'ImageContext':\n", + " return super().__new__(cls, exp, idx, *args, **kwargs) # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ResultOCR\n", + "> Store OCR results\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "@dataclasses.dataclass\n", + "class ResultOCR(Result): \n", + " subject_ctx: ImageContext\n", + " block_idx: int\n", + " ocr: str | None = None\n", + " image: Image.Image | None = None\n", + " description: str = dataclasses.field(default='', kw_only=True)\n", + "\n", + " def __post_init__(self): \n", + " self._acc = None\n", + " if self.image is None:\n", + " cache_path = self.image_ctx.exp.final(self.cache_path())\n", + " if cache_path.exists():\n", + " self.image = Image.open(cache_path)\n", + "\n", + " @property\n", + " def image_ctx(self): return self.subject_ctx\n", + " \n", + " @property\n", + " def acc(self):\n", + " if self.ocr is not None:\n", + " self._acc = accuracy_ocr_difflib(self.ocr, self.image_ctx.gts[self.block_idx])\n", + " return self._acc\n", + " @property\n", + " def suffix(self): return f\"{self.block_idx}_{self.description}\"\n", + "\n", + " def cache_path(self, suffix: str | None = None): \n", + " img_ctx = self.image_ctx\n", + " suffix = self.suffix + (('_'+suffix) if suffix else '')\n", + " img_name = img_ctx.image_path.stem\n", + " return img_ctx.cache_dir / '.crop' / f\"{img_name}_{suffix}.png\"\n", + " \n", + " def cache_image(self, image: Image.Image | None = None, suffix: str | None = None):\n", + " image = image or (self.image if not suffix else None)\n", + " box_image_path = self.cache_path(suffix)\n", + " final_path = self.image_ctx.exp.final(box_image_path)\n", + " if image and not final_path.exists():\n", + " final_path.parent.mkdir(parents=True, exist_ok=True)\n", + " image.save(final_path)\n", + " return box_image_path\n", + "\n", + " def to_dict(self):\n", + " d = dataclasses.asdict(self)\n", + " d['image_ctx'] = d['image'] = d['page_data'] = d['gts'] = None\n", + " return d\n", + "\n", + " # @classmethod\n", + " # def from_dict(cls, d: dict, page_data: st.PageData, gts: list[str]):\n", + " # return cls(**(d | {'page_data':page_data, 'gts':gts}))\n", + "\n", + " def __repr__(self): \n", + " return f\"{type(self).__name__}#block {self.block_idx:02}: {self.acc:.2f}||{self.ocr}\"\n", + "\n", + " def display(self): \n", + " visor = getattr(self, '_default_visor_type', None)\n", + " display(HTML(\n", + " visor(self).as_html() if visor else f\"{self}\"\n", + " ))\n", + " def _ipython_display_(self): self.display()\n", + "\n", + "\n", + "class ResultOCRExtracted(ResultOCR): \n", + " def __repr__(self): return super().__repr__()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ResultSet\n", + "> tagged nested dict to store image results keyed by box, and crop method\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "BoxIdT: TypeAlias = int\n", + "\n", + "class ResultSet(dict[BoxIdT, dict[CropMethod, ResultOCR]]): ...\n", + "\n", + "class ResultSetDefault(defaultdict[BoxIdT, dict[CropMethod, ResultOCR]]): ...\n", + "\n", + "def resultset_to_dict(results: ResultSet) -> dict[BoxIdT, dict[str, str]]:\n", + " d = {}\n", + " for box, box_methods in results.items():\n", + " for method, result in box_methods.items():\n", + " if box not in d:\n", + " d[box] = {}\n", + " d[box][method.name] = result.ocr\n", + " return d\n", + "\n", + "def dict_to_resultset(\n", + " image_idx: ImgIdT, \n", + " results_dict: dict[str, dict[str, str]],\n", + " result_factory: Callable\n", + " ) -> ResultSetDefault:\n", + " results = ResultSetDefault(dict[CropMethod, ResultOCR])\n", + " for box_idx, box_methods in results_dict.items():\n", + " box_idx = int(box_idx)\n", + " for method, ocr in box_methods.items():\n", + " m = CM[method]\n", + " results[box_idx][m] = result_factory(image_idx, box_idx, m, ocr)\n", + " return results\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OCRExperimentContext" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class OCRModel(Enum):\n", + " TESSERACT = 0\n", + " IDEFICS = 1\n", + " @staticmethod\n", + " def __display_names__() -> dict[str, OCRModel]:\n", + " return dict(\n", + " zip(\"Tesseract, Idefics\".split(', '), \n", + " OCRModel))\n", + "\n", + "\n", + "class OCRExperimentRun(ExperimentRun): ...\n", + "\n", + "\n", + "class OCRExperimentContext(ExperimentContext):\n", + " \"\"\"\n", + " A utility class to maintain shared state across all experiments within OCR domain.\n", + " This class encapsulates state necessary for conducting PanelCleaner OCR experiments.\n", + " \"\"\"\n", + "\n", + " config: cfg.Config\n", + " image_paths: list[Path]\n", + " ocr_model: str\n", + " force_PIL: bool\n", + " use_tunnel: bool\n", + " server: web_server.WebServer | None\n", + "\n", + " # ExperimentRun name -> Image index -> Box index -> Crop method -> Result\n", + " _results: dict[RunIdT, dict[ImgIdT, ResultSet]]\n", + "\n", + " _running = T.Bool(False)\n", + " \n", + " engines = {\n", + " 'Tesseract': cfg.OCREngine.TESSERACT, \n", + " 'Idefics': None, \n", + " 'manga-ocr': cfg.OCREngine.MANGAOCR}\n", + "\n", + "\n", + " @classmethod\n", + " def get_config(cls) -> cfg.Config:\n", + " config = cfg.load_config()\n", + " profile = config.current_profile\n", + " preprocessor_conf = profile.preprocessor\n", + " # Modify the profile to OCR all boxes.\n", + " # Make sure OCR is enabled.\n", + " preprocessor_conf.ocr_enabled = True\n", + " # Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", + " preprocessor_conf.ocr_max_size = 10**10\n", + " # Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", + " preprocessor_conf.suspicious_box_min_size = 10**10\n", + " # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", + " preprocessor_conf.ocr_blacklist_pattern = \".*\"\n", + "\n", + " # Load models if needed\n", + " gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + " model_path = config.get_model_path(gpu)\n", + " if model_path is None:\n", + " # don't mess with normal PanelCleaner, download models directly\n", + " import pcleaner.model_downloader as md\n", + " model_dir = config.get_model_cache_dir()\n", + " config.default_torch_model_path = md.download_torch_model(model_dir)\n", + " config.default_cv2_model_path = md.download_cv2_model(model_dir)\n", + "\n", + " return config\n", + " \n", + " @functools.lru_cache()\n", + " def mocr(self, lang: str):\n", + " engine = self.engines[self.ocr_model]\n", + " ocr_processor = ocr.get_ocr_processor(True, engine)\n", + " proc = ocr_processor[lang2pcleaner(lang)]\n", + " if isinstance(proc, TesseractOcr):\n", + " proc.lang = lang2tesseract(lang)\n", + " return proc\n", + "\n", + " @contextlib.contextmanager\n", + " def running(self, value: bool):\n", + " _running = self._running\n", + " self._running = value\n", + " yield\n", + " self._running = _running\n", + "\n", + " def ocr_box(self, result: ResultOCR, lang: str): \n", + " assert result.image is not None\n", + " text = self.mocr(lang)(result.image)\n", + " result.ocr = postprocess_ocr(text)\n", + " self._dirty = True\n", + " return result\n", + "\n", + " image_cache_dir = ExperimentContext.subject_cache_dir\n", + "\n", + " def _detect_text(self, img_path: Path, dest_dir: Path):\n", + " config = self.config\n", + " root_dir = self.root_dir\n", + " image_name = img_path.stem\n", + " pfl = config.current_profile\n", + " gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + " model_path = config.get_model_path(gpu)\n", + " json_data = None\n", + " with tempfile.TemporaryDirectory() as temp_dir:\n", + " temp_path = Path(temp_dir)\n", + " ctm.model2annotations(pfl.general, pfl.text_detector, model_path, [img_path], temp_path)\n", + " contents = list(temp_path.glob(f\"*{image_name}*\"))\n", + " json_path = [_ for _ in contents if _.suffix == '.json'][0]\n", + " json_data = json.loads(json_path.read_text(encoding=\"utf-8\"))\n", + " # we don't need unique names for this tests, strip uuids\n", + " for temp_file in contents:\n", + " if temp_file.suffix == \".json\": continue\n", + " new_path = dest_dir / strip_uuid(temp_file.name)\n", + " shutil.copy(temp_file, new_path)\n", + " if 'mask' in temp_file.name:\n", + " json_data['mask_path'] = str(new_path.relative_to(root_dir))\n", + " elif new_path.stem == image_name:\n", + " json_data['image_path'] = str(new_path.relative_to(root_dir))\n", + " json_data['original_path'] = str(img_path.relative_to(root_dir))\n", + " json.dump(json_data, open(dest_dir / f\"{image_name}#raw.json\", \"w\"), indent=2)\n", + " return json_data\n", + " \n", + " @functools.lru_cache()\n", + " def _load_page_data(self, image_idx: ImgIdT):\n", + " root_dir = self.root_dir\n", + " dest_dir = self.final(self.image_cache_dir(image_idx))\n", + " img_path = self.final(self.image_paths[image_idx])\n", + " image_name = img_path.stem\n", + " # read cached json\n", + " jsons = [_ for _ in dest_dir.glob(\"*#raw.json\") if image_name in _.stem]\n", + " assert len(jsons) <= 1\n", + " # generate text boxes if needed\n", + " if not jsons:\n", + " json_data = self._detect_text(img_path, dest_dir)\n", + " else:\n", + " json_file_path = jsons[0]\n", + " json_data = json.loads(json_file_path.read_text(encoding=\"utf-8\"))\n", + " \n", + " if not all((root_dir / json_data[_]).exists() \n", + " for _ in (\"image_path\", \"mask_path\", \"original_path\")):\n", + " raise ValueError(f\"Inconsistent page_data of {img_path}\")\n", + "\n", + " page_data = st.PageData(\n", + " json_data[\"image_path\"], json_data[\"mask_path\"], \n", + " json_data[\"original_path\"], json_data[\"scale\"], \n", + " [st.Box(*data[\"xyxy\"]) for data in json_data[\"blk_list\"]], \n", + " [], [], [])\n", + " # Merge boxes that have mutually overlapping centers.\n", + " page_data.resolve_total_overlaps()\n", + " return json_data, page_data\n", + "\n", + " def page_data(self, image_idx: int):\n", + " _, page_data = self._load_page_data(image_idx)\n", + " return page_data\n", + " def json_data(self, image_idx: int):\n", + " json_data, _ = self._load_page_data(image_idx)\n", + " return json_data\n", + "\n", + " def path_from_idx(self, image_idx: ImgSpecT, cached: bool = False):\n", + " \"(Relative path) to subject image. Raises ValueError if not found.\"\n", + " _idx = self.normalize_idx(image_idx)\n", + " if _idx is None:\n", + " raise ValueError(f\"{_idx} not found in context.\")\n", + " if cached:\n", + " page_data = self.page_data(_idx) # load if needed\n", + " path = Path(page_data.image_path)\n", + " else:\n", + " path = self.image_paths[_idx]\n", + " if not self.final(path).exists():\n", + " raise ValueError(f\"{path} not found in context.\")\n", + " return path\n", + "\n", + " def cached_image(self, image_idx: ImgSpecT):\n", + " \"Relative path to cached image.\"\n", + " _idx = self.normalize_idx(image_idx)\n", + " if _idx is None:\n", + " return None\n", + " page_data = self.page_data(_idx) # load if needed\n", + " path = Path(page_data.image_path)\n", + " if not self.final(path).exists():\n", + " return None\n", + " return path\n", + " \n", + " def image_context(self, image_idx: ImgSpecT) -> ImageContext | None:\n", + " \"Cached image context.\"\n", + " return cast(ImageContext, self.subject_context(image_idx))\n", + "\n", + " def display_image(self, image_path: Path | str):\n", + " \"Return a PIL image, a url, or the given (final) path.\"\n", + " final_image_path = self.final(image_path)\n", + " if self.force_PIL:\n", + " return Image.open(final_image_path)\n", + " elif self.use_tunnel:\n", + " if (server := self.server) is not None and (url := server.unc_share) is not None:\n", + " return url/final_image_path.relative_to(self.root_dir)\n", + " return final_image_path\n", + " DI = display_image\n", + "\n", + " def _update_results(self, run_name: str, img_idx: ImgIdT, results: ResultSetDefault):\n", + " self._results[run_name][img_idx] = cast(ResultSet, results)\n", + " \n", + " def _result_from(self, \n", + " image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None):\n", + " img_ctx = ImageContext(self, image_idx)\n", + " extracted = method in _EXTRACTED_METHODS\n", + " result_cls = ResultOCRExtracted if extracted else ResultOCR\n", + " result = result_cls(img_ctx, int(box_idx), None, None, description=f\"{method.value}\")\n", + " if ocr is not None:\n", + " result.ocr = ocr\n", + " return result\n", + " \n", + " def result(self, \n", + " run_name: str,\n", + " image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, \n", + " ocr: bool=True, \n", + " rebuild: bool=False) -> ResultOCR | None:\n", + " _result = self._results[run_name][image_idx][box_idx].get(method)\n", + " if not rebuild and _result is not None:\n", + " return _result\n", + " \n", + " result: ResultOCR = self._result_from(image_idx, box_idx, method)\n", + " image, cropped_image, cropped_mask = result.image, None, None\n", + " img_ctx = ImageContext(self, image_idx)\n", + " base_image = img_ctx.base_image\n", + " box = img_ctx.boxes[box_idx]\n", + " if image is None and method in _IMAGE_METHODS:\n", + " image = crop_by_image(\n", + " method, box, base_image, self.config.current_profile.preprocessor)\n", + "\n", + " if image is None and method in _EXTRACTED_METHODS:\n", + " mask = img_ctx.mask\n", + " cropped_image_path = result.cache_image(cropped_image, \"cropped\")\n", + " cropped_mask_path = result.cache_image(cropped_mask, \"mask\")\n", + " if not cropped_image_path.exists() or not cropped_mask_path.exists():\n", + " image, cropped_image, cropped_mask = crop_by_extracted(\n", + " method, box, base_image, mask, \n", + " cropped_image_path, cropped_mask_path, img_ctx.dilated())\n", + " \n", + " assert image is not None\n", + " if result.image is None:\n", + " result.image = image\n", + " result.cache_image()\n", + " if cropped_image is not None:\n", + " result.cache_image(cropped_image, \"cropped\")\n", + " if cropped_mask is not None:\n", + " result.cache_image(cropped_mask, \"mask\")\n", + " \n", + " exp_run = OCRExperimentRun(self, run_name)\n", + " if ocr:\n", + " exp_run.before_result(result)\n", + " result = self.ocr_box(result, img_ctx.page_lang)\n", + " exp_run.after_result(result)\n", + " self._results[run_name][image_idx][box_idx][method] = result\n", + " self._dirty = True\n", + " return result\n", + "\n", + " def results(self, run_name: str | None = None, img_idx: ImgIdT | None = None):\n", + " if run_name is None: return self._results\n", + " if img_idx is None: return self._results[run_name]\n", + " return self._results[run_name][img_idx]\n", + " def run_results(self, run_name: str):\n", + " return cast(dict[ImgIdT, ResultSet], self.results(run_name))\n", + " def image_results(self, run_name: str, img_idx: ImgIdT):\n", + " return cast(ResultSet, self.results(run_name, img_idx))\n", + " def box_results(self, run_name: str, img_idx: ImgIdT, box_idx: BoxIdT):\n", + " return cast(ResultSet, self.results(run_name, img_idx))[box_idx]\n", + " def method_results(self, run_name: str, img_idx: ImgIdT, method: CropMethod):\n", + " image_results = self.image_results(run_name, img_idx)\n", + " return {i: box_results.get(method) for i,box_results in image_results.items()}\n", + "\n", + " def _reset_results(self):\n", + " results = defaultdict(lambda: defaultdict(lambda: ResultSetDefault(dict)))\n", + " self._results = cast(dict[str, dict[ImgIdT, ResultSet]], results)\n", + " def _reset_results_(self, \n", + " run_name: str | None = None, \n", + " image_idx: int | None = None, \n", + " box_idx: int | None = None, \n", + " method: CropMethod | None = None):\n", + " if run_name is None and image_idx is None and box_idx is None and method is None:\n", + " self._reset_results()\n", + " return\n", + " results = self._results\n", + " models = tuple(results.keys()) if run_name is None else [run_name] if run_name in results else []\n", + " for run_name in models:\n", + " img_nodes = results[run_name]\n", + " imgs = (tuple(img_nodes.keys()) \n", + " if image_idx is None else [image_idx] if image_idx in img_nodes else [])\n", + " for img_idx in imgs:\n", + " box_nodes = img_nodes[img_idx]\n", + " boxes = (tuple(box_nodes.keys()) \n", + " if box_idx is None else [box_idx] if box_idx in box_nodes else [])\n", + " for box_idx in boxes:\n", + " if method is None:\n", + " del box_nodes[box_idx]\n", + " else:\n", + " methods = box_nodes[box_idx]\n", + " if method in methods:\n", + " del methods[method]\n", + " if not box_nodes[box_idx]:\n", + " del box_nodes[box_idx]\n", + " if not img_nodes[img_idx]:\n", + " del img_nodes[img_idx]\n", + " if not results[run_name]:\n", + " del results[run_name]\n", + " def reset_results(self, \n", + " run_name: str | None = None, \n", + " image_idx: int | None = None, \n", + " box_idx: int | None = None, \n", + " method: CropMethod | None = None):\n", + " self._reset_results_(run_name, image_idx, box_idx, method)\n", + " self._dirty = True\n", + " def _reset_(self):\n", + " super()._reset_()\n", + " self._reset_results_()\n", + " self._load_page_data.cache_clear()\n", + " self.mocr.cache_clear()\n", + "\n", + " @classmethod\n", + " def get_image_paths(cls, root_dir: Path):\n", + " if root_dir is None: return []\n", + " source_dir = root_dir / cls.SOURCE_DIR_NAME\n", + " return sorted(\n", + " [_.relative_to(root_dir) for _ in source_dir.glob(\"*\") \n", + " if _.is_file() and _.suffix.lower() in [\".jpg\", \".png\", \".jpeg\"]])\n", + "\n", + " def run_to_dict(self, run_name: RunIdT) -> dict[str, dict[BoxIdT, dict[str, str]]]:\n", + " \"JSON serializable dict of the experiment run\"\n", + " results = {}\n", + " idx2name = {i: p.name for i, p in enumerate(self.image_paths)}\n", + " run_results = cast(dict[ImgIdT, ResultSet], self.results(run_name))\n", + " for img_idx, rset in run_results.items():\n", + " results[idx2name[img_idx]] = resultset_to_dict(cast(ResultSet, rset))\n", + " return results\n", + "\n", + " # def run_to_json(self, run_name: RunIdT):\n", + " # data = {\n", + " # 'run_name': run_name,\n", + " # 'results': self.run_to_dict(run_name),\n", + " # }\n", + " # fp = self._root / f'{run_name}.json'\n", + " # with open(fp, 'w') as f:\n", + " # json.dump(data, f, indent=2)\n", + " # return fp\n", + "\n", + " # def save_results(self):\n", + " # for run_name, _ in self._exp_runs.items():\n", + " # self.run_to_json(run_name)\n", + " \n", + " def _load_run_results(self, run_name: str, run_data: dict[str, dict[str, dict[str, str]]]):\n", + " self._exp_runs[run_name] = OCRExperimentRun(self, run_name)\n", + " name2idx = {p.name: i for i, p in enumerate(self.image_paths)}\n", + " for img_name, data in run_data.items():\n", + " img_idx = name2idx.get(img_name, None)\n", + " if img_idx is None: \n", + " logger.warning(f\"Image {img_name} not found in context.\")\n", + " continue\n", + " rset: ResultSetDefault = dict_to_resultset(\n", + " ImgIdT(img_idx), \n", + " data, \n", + " result_factory=self._result_from)\n", + " self._update_results(run_name, img_idx, rset)\n", + "\n", + " # def load_results(self):\n", + " # for json_path in self.root_dir.glob(\"*.json\"):\n", + " # try:\n", + " # with open(json_path, 'r') as f:\n", + " # data = json.load(f)\n", + " # if data.keys() != {'run_name', 'results'}:\n", + " # continue\n", + " # except Exception as e:\n", + " # continue\n", + " # self._load_run_results(data)\n", + "\n", + " def to_dict(self):\n", + " data: dict = {\n", + " \"ocr_model\": self.ocr_model,\n", + " \"runs\": (rr := {})\n", + " }\n", + " for run_name, _ in self._exp_runs.items():\n", + " rr[run_name] = self.run_to_dict(run_name)\n", + " return data\n", + "\n", + " def to_json(self):\n", + " data = self.to_dict()\n", + " _ = datetime.datetime.now()\n", + " fp = self._root / f'{self.name}.json'\n", + " with open(fp, 'w') as f:\n", + " json.dump(data, f, indent=2)\n", + " return fp\n", + " \n", + " def _from_json(self):\n", + " json_path = self.root_dir / f'{self.name}.json'\n", + " if not json_path.exists(): return\n", + " try:\n", + " with open(json_path, 'r') as f:\n", + " data = json.load(f)\n", + " except Exception as e:\n", + " logger.error(f\"Error loading {json_path}: {e}\")\n", + " return\n", + " self.ocr_model = data['ocr_model']\n", + " for run_name, run_results in data['runs'].items():\n", + " self._load_run_results(run_name, run_results)\n", + " \n", + " @classmethod\n", + " def from_json(cls, root_dir: Path, name: str, config: cfg.Config | None = None):\n", + " json_path = root_dir / f'{name}.json'\n", + " try:\n", + " with open(json_path, 'r') as f:\n", + " data = json.load(f)\n", + " except Exception as e:\n", + " logger.error(f\"Error loading {json_path}: {e}\")\n", + " raise e\n", + " config = config or cls.get_config()\n", + " self = cls(data['ocr_model'], root_dir, config=config)\n", + " for run_name, run_results in data['runs'].items():\n", + " self._load_run_results(run_name, run_results)\n", + " return self\n", + " \n", + " def save(self):\n", + " fp = self.to_json()\n", + " self._dirty = False\n", + " return fp\n", + "\n", + " @classmethod\n", + " def load(cls, root_dir: Path, name: str):\n", + " return cls.from_json(root_dir, name)\n", + "\n", + "\n", + " def show(self):\n", + " config = self.config\n", + " gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + " model_path = config.get_model_path(gpu)\n", + " device = (\"mps\" if torch.backends.mps.is_available() else \"cuda\") if model_path.suffix == \".pt\" else \"cpu\"\n", + "\n", + " config.show()\n", + " cprint(\n", + " f\"{'config cache_dir':>17}: {repr(config.cache_dir)}\\n\"\n", + " f\"{'model_path':>17}: {repr(model_path)}\\n\"\n", + " f\"{'device':>17}: {repr(device)}\")\n", + " \n", + " cprint(\n", + " f\"{'force_PIL':>17}: {self.force_PIL}\\n\"\n", + " f\"{'use_tunnel':>17}: {self.use_tunnel}\\n\"\n", + " f\"{'server_url':>17}: {repr(self.server.unc_share) if self.server else ''}\\n\"\n", + " f\"{'experiment dir':>17}: {self.root_dir}\\n\"\n", + " f\"{'source_dir':>17}: {self.final(self.source_dir)}\\n\"\n", + " f\"{'cache_dir':>17}: {self.final(self.cache_dir)}\\n\"\n", + " )\n", + "\n", + "\n", + " def __init__(self, \n", + " ocr_model: str,\n", + " root_dir: Path | str | None = None, \n", + " *, \n", + " config: cfg.Config | None = None, \n", + " server: web_server.WebServer | None = None,\n", + " run_name: str = 'Tesseract-crop-post',\n", + " load: bool = True):\n", + " if root_dir is None:\n", + " root_dir = type(self).EXP_DIR\n", + " self.config = config or type(self).get_config()\n", + " self.ocr_model = ocr_model\n", + " root_dir = Path(root_dir)\n", + " super().__init__(\n", + " ocr_model, self.get_image_paths(root_dir), root=root_dir, run_name=run_name)\n", + " self.image_paths = self._paths\n", + " self._reset_results()\n", + " self._images = self._subjects\n", + " use_pil = os.environ['USE_PIL'].lower() == 'true'\n", + " self.force_PIL = use_pil\n", + " use_tunnel = os.environ['USE_TUNNEL'].lower() == 'true'\n", + " self.use_tunnel = use_tunnel\n", + " self.server = server or SERVER\n", + " if load:\n", + " self._from_json()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "@FC.patch_to(ImageContext)\n", + "def setup(self, exp: OCRExperimentContext, image_idx: ImgSpecT, page_lang: str | None = None):\n", + " super(type(self), self).setup(exp, image_idx)\n", + " self._mask_dilated1 = self._mask_dilated05 = self._mask_dilated02 = None\n", + " self.json_data, self.page_data = exp._load_page_data(self.idx)\n", + " self.setup_page_lang(page_lang)\n", + " self.mask = Image.open(exp.final(self.mask_path))\n", + " self.base_image = Image.open(exp.final(self.image_path))\n", + " self.setup_ground_truth()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "exp = OCRExperimentContext('test')\n", + "test_eq(rr := exp._results, {})\n", + "test_eq(rr['Tesseract'][0][0], {})\n", + "test_eq(rr, {'Tesseract': {0: {0: {}}}})\n", + "test_eq(rr['Tesseract'][0][0].get(CM.INITIAL_BOX), None)\n", + "rr['Tesseract'][0][0][CM.INITIAL_BOX] = 'a' # type: ignore\n", + "test_eq(rr, {'Tesseract': {0: {0: {CM.INITIAL_BOX: 'a'}}}})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SimpleResultVisor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class SimpleResultVisor:\n", + " ctx: ResultOCR\n", + "\n", + " @classmethod\n", + " def diff_tagged(cls, result: ResultOCR):\n", + " _, html = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr, False)\n", + " return f\"{html}\"\n", + "\n", + " def as_html(self):\n", + " result = self.ctx\n", + " DI = cast(OCRExperimentContext, result.image_ctx.exp).DI\n", + " if isinstance(result, ResultOCRExtracted):\n", + " return self.as_html_extracted()\n", + " has_ocr = result.ocr is not None\n", + " acc_html = ''\n", + " if has_ocr:\n", + " acc_html = f\"
{result.acc:.2f}\"\n", + " html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr)\n", + " box_image_path = result.cache_image()\n", + " html1 = get_columns_html([[DI(box_image_path)], [(result.ocr or '') + acc_html]])\n", + " if has_ocr:\n", + " html2 = f\"
{html_str1}
{html_str2}
\"\n", + " else:\n", + " html2 = f\"
{html_str1}
\"\n", + " return html1 + '\\n
\\n' + html2\n", + "\n", + " def as_html_extracted(self):\n", + " result = self.ctx\n", + " has_ocr = result.ocr is not None\n", + " DI = cast(OCRExperimentContext, result.image_ctx.exp).DI\n", + " html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr)\n", + " if has_ocr:\n", + " diff_html = f\"
{html_str1}
{html_str2}
\"\n", + " else:\n", + " diff_html = f\"
{html_str1}
\"\n", + " cropped_image_path = result.cache_image(None, \"cropped\")\n", + " cropped_mask_path = result.cache_image(None, \"mask\")\n", + " result_path = result.cache_image()\n", + " return '\\n
\\n'.join([\n", + " get_image_grid_html([\n", + " DI(cropped_image_path), DI(cropped_mask_path), DI(result_path)], 1, 3), \n", + " acc_as_html(result.acc) if has_ocr else '', \n", + " diff_html\n", + " ])\n", + " \n", + " def display(self): display(HTML(self.as_html()))\n", + " def _ipython_display_(self): self.display()\n", + "\n", + " def __init__(self, ctx: ResultOCR):\n", + " self.ctx = ctx\n", + "\n", + "\n", + "ResultOCR._default_visor_type = SimpleResultVisor # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ExperimentContext.EXP_DIR\n", + "> Directory where the source images resides (`source/`) and the resulting images will be cached (`cache/`).\n", + "\n", + "By default, is set to `../experiment/`, and that's the value used by any `ExperimentContext` if not explicitly set on `__init__`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "EXP_DIR = ExperimentContext.EXP_DIR\n", + "test_eq(EXP_DIR, Path('../experiment'))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT\n", + "> `CONTEXT` is an `ExperimentContext` object that contains the configuration and the list of image paths.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PanelCleaner Configuration\n", + "> Adapt `PanelCleaner` `Config` current config to this notebook.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get PanelCleaner config the usual way or through `OCRExperimentContext`" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "# CONFIG = cfg.load_config()\n", + "\n", + "# profile = CONFIG.current_profile\n", + "# preprocessor_conf = profile.preprocessor\n", + "# # Modify the profile to OCR all boxes.\n", + "# # Make sure OCR is enabled.\n", + "# preprocessor_conf.ocr_enabled = True\n", + "# # Make sure the max size is infinite, so no boxes are skipped in the OCR process.\n", + "# preprocessor_conf.ocr_max_size = 10**10\n", + "# # Make sure the sus box min size is infinite, so all boxes with \"unknown\" language are skipped.\n", + "# preprocessor_conf.suspicious_box_min_size = 10**10\n", + "# # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics.\n", + "# preprocessor_conf.ocr_blacklist_pattern = \".*\"\n", + "\n", + "# gpu = torch.cuda.is_available() or torch.backends.mps.is_available()\n", + "# model_path = CONFIG.get_model_path(gpu)\n", + "\n", + "# if model_path is None:\n", + "# # don't mess with normal PanelCleaner, download models directly\n", + "# import pcleaner.model_downloader as md\n", + "# model_dir = CONFIG.get_model_cache_dir()\n", + "# CONFIG.default_torch_model_path = md.download_torch_model(model_dir)\n", + "# CONFIG.default_cv2_model_path = md.download_cv2_model(model_dir)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creates the `OCRExperimentContext` object we'll use to manage experiments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "CONTEXT = OCRExperimentContext('Tesseract')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: System default\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
 config cache_dir: None\n",
+       "       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "           device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " config cache_dir: \u001b[3;35mNone\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
        force_PIL: False\n",
+       "       use_tunnel: False\n",
+       "       server_url: \n",
+       "   experiment dir: ../experiment\n",
+       "       source_dir: ../experiment/source\n",
+       "        cache_dir: ../experiment/cache\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " force_PIL: \u001b[3;91mFalse\u001b[0m\n", + " use_tunnel: \u001b[3;91mFalse\u001b[0m\n", + " server_url: \n", + " experiment dir: ..\u001b[35m/\u001b[0m\u001b[95mexperiment\u001b[0m\n", + " source_dir: ..\u001b[35m/experiment/\u001b[0m\u001b[95msource\u001b[0m\n", + " cache_dir: ..\u001b[35m/experiment/\u001b[0m\u001b[95mcache\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "test_eq(CONTEXT.config.current_profile.preprocessor.ocr_enabled, True)\n", + "test_eq(CONTEXT.config.current_profile.preprocessor.ocr_max_size, 10**10)\n", + "test_eq(CONTEXT.config.current_profile.preprocessor.suspicious_box_min_size, 10**10)\n", + "test_eq(CONTEXT.config.current_profile.preprocessor.ocr_blacklist_pattern, \".*\")\n", + "\n", + "test_eq(CONTEXT.root_dir, EXP_DIR)\n", + "test_eq(CONTEXT.cache_dir, Path(ExperimentContext.CACHE_DIR_NAME))\n", + "test_eq(CONTEXT.source_dir, Path(ExperimentContext.SOURCE_DIR_NAME))\n", + "\n", + "CONTEXT.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n", + "> `IMAGE_PATHS` is a list of image file paths that are used as input for testing the OCR methods." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy your images to the source directory or use the standard set:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IMAGE_PATHS = CONTEXT.image_paths\n", + "\n", + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ExperimentContext offers facilities to access the subjects data (images in this case) in the context.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(CONTEXT.subject_count, 32)\n", + "\n", + "test_eq(CONTEXT.subject_cache_dir(2), CONTEXT.cache_dir / CONTEXT.image_paths[2].stem)\n", + "\n", + "test_eq(CONTEXT.normalize_idx(2), 2)\n", + "test_eq(CONTEXT.normalize_idx('Action_Comics_1960-01-00_(262).JPG'), 0)\n", + "test_eq(CONTEXT.normalize_idx(IMAGE_PATHS[31]), 31)\n", + "\n", + "test_eq(CONTEXT.path_from_idx(2), IMAGE_PATHS[2])\n", + "test_eq(CONTEXT.path_from_idx('Barnaby_v1-028.png', cached=True), CONTEXT.cached_image(2))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EXP_RUN\n", + "> `EXP_RUN` is the current `OCRExperimentRun`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Tesseract-crop'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EXP_RUN = CONTEXT.experiment_run()\n", + "assert EXP_RUN is not None\n", + "RUN_NAME = EXP_RUN.name\n", + "RUN_NAME\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Context browser\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## RunSelector\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class RunSelector(ContextVisor):\n", + " def setup_controls(self):\n", + " options = self.run_names\n", + " w = W.Dropdown(\n", + " options=options, \n", + " value=self.values['run_name'],\n", + " layout={'width': 'fit-content'},\n", + " style={'description_width': 'initial'})\n", + " return {'run_name': w}\n", + "\n", + " def setup_ui(self):\n", + " ctls = self.controls\n", + " model_grp = W.HBox([ctls['run_name']])\n", + " model_grp.add_class('model_grp')\n", + " ui = W.HBox([*super().comps_ui(), model_grp])\n", + " return ui\n", + "\n", + " def __init__(self, \n", + " exp_ctx: OCRExperimentContext,\n", + " run_name: str | None=None,\n", + " run_names: list[str] | None = None,\n", + " **kwargs\n", + " ):\n", + " self.run_names: list[str] = run_names or exp_ctx.run_names\n", + " super().__init__(exp_ctx, \n", + " {'run_name': run_name or self.run_names[0]})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0c2020d24be648859a860e5164b7a488", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), options=('Tesseract-…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('run_selector')\n", + "\n", + "run_selector = RunSelector(CONTEXT)\n", + "run_selector\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MessageVisor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class MessageVisor(ContextVisor):\n", + " EMPTY = ''\n", + "\n", + " # _css = \"\"\"\n", + " # .message_visor-yXy {\n", + " # border: 1px solid red;\n", + " # }\n", + " # \"\"\"\n", + "\n", + " @property\n", + " def msg(self): return self.values['msg']\n", + " \n", + " def update_output(self, /, msg: str | None = None, **kwargs):\n", + " if msg is not None and msg != self.EMPTY:\n", + " cprint(msg)\n", + " self.values['msg'] = None\n", + " else:\n", + " clear_output()\n", + " \n", + " def setup_style(self):\n", + " self.out.add_class('message_visor-yXy')\n", + " return super().setup_style()\n", + "\n", + " def setup_controls(self):\n", + " # w = W.Label(value=self.values['msg'] if self.values['msg'] != self.EMPTY else None, \n", + " # layout={'width': 'fit-content'})\n", + " return {}#{'msg': w}\n", + "\n", + " def __init__(self, \n", + " msg: str | None = None,\n", + " **kwargs\n", + " ):\n", + " super().__init__(None, \n", + " {'msg': msg}, \n", + " **kwargs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('message_visor')\n", + "\n", + "message_visor = MessageVisor('Hello, Earth-616!')\n", + "message_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "message_visor.update(msg='Hello, mirror universe!')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "message_visor.update(msg=MessageVisor.EMPTY)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "message_visor.update(msg=\"seeya'!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "cleanupwidgets('message_visor')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Status visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class StatusVisor(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + "\n", + " @property\n", + " def save_button(self) -> W.Button: return self.controls['save'] # type: ignore\n", + " @property\n", + " def reset_button(self) -> W.Button: return self.controls['reset'] # type: ignore\n", + " \n", + " def setup_controls(self):\n", + " style={'font_size': '1em', 'font_weight': 'bold'}\n", + " sw = W.Button(description = 'save', style=style,\n", + " layout={'width': '4em'},\n", + " )\n", + " rw = W.Button(description = 'reset', style=style,\n", + " layout={'width': '5em', 'margin': '0px 0px 0px 3em'},\n", + " )\n", + " return {'save': sw, 'reset': rw}\n", + "\n", + " def setup_ux(self):\n", + " source = (self.ctx, '_dirty')\n", + " target = (self.save_button, 'disabled')\n", + " T.dlink(source, target, lambda x: not x)\n", + " target = (self.save_button.style, 'button_color')\n", + " T.dlink(source, target, lambda x: 'lightblue' if x else None)\n", + " \n", + "\n", + " def __init__(self, exp_ctx: OCRExperimentContext, **kwargs):\n", + " super().__init__(exp_ctx, {}, **kwargs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bd2189871d114efda2d8289cff17f061", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Button(description='save', disabled=True, layout=Layout(width='4em'), style=Butt…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('status_visor')\n", + "\n", + "CONTEXT._dirty = False\n", + "status_visor = StatusVisor(CONTEXT)\n", + "status_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
Where the end of this divining rod turns to the earth we'll find water.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "result = CONTEXT.result(RUN_NAME, 2, 3, CropMethod.INITIAL_BOX, ocr=False)\n", + "result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT._dirty = False\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OCRModelSelector\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class OCRModelSelector(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + " \n", + " def setup_controls(self):\n", + " options = self.models\n", + " w = W.Dropdown(\n", + " options=options, \n", + " value=self.values['model'],\n", + " layout={'width': 'fit-content'},\n", + " style={'description_width': 'initial'})\n", + " return {'model': w}\n", + "\n", + " def setup_ui(self):\n", + " ctls = self.controls\n", + " model_grp = W.HBox([ctls['model']])\n", + " model_grp.add_class('model_grp')\n", + " ui = W.HBox([*super().comps_ui(), model_grp])\n", + " return ui\n", + "\n", + " def __init__(self, \n", + " exp_ctx: OCRExperimentContext,\n", + " ocr_model: OCRModel | None=OCRModel.TESSERACT,\n", + " ocr_models: dict[str, OCRModel] | None = None,\n", + " out: W.Output | None = None\n", + " ):\n", + " self.models: dict[str, OCRModel] = ocr_models or OCRModel.__display_names__()\n", + " super().__init__(exp_ctx, \n", + " {'model': ocr_model or OCRModel.TESSERACT}, \n", + " out=out or self.out)#, ctxs=[exp_visor])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "372113f76b744e858ab64c6e2eb4e750", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(HBox(children=(Dropdown(layout=Layout(width='fit-content'), options={'Tesseract'…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model_selector = OCRModelSelector(CONTEXT)\n", + "model_selector\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ContentSelector" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class DisplayOptions(Enum):\n", + " BOXES = 0\n", + " IMAGE = 1\n", + " MASK = 2\n", + " IMAGE_MASK = 3\n", + " PAGE_DATA = 4\n", + " GROUND_TRUTH = 5\n", + " ALL = 6\n", + " RESULTS = 7\n", + " BEST_RESULTS = 8\n", + " ACCURACY = 9\n", + " DATAFRAME = 10\n", + " CONFIG = 11\n", + "\n", + " @staticmethod\n", + " def __display_names__():\n", + " return dict(\n", + " zip(\"Boxes, Image, Mask, Image & Mask, Page data, Ground truth, Image All, Results, \"\n", + " \"Best results, Accuracy, Dataframe, Config\".split(', '), \n", + " DisplayOptions))\n", + "\n", + "\n", + "class ContentSelector(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + "\n", + " def image_info(self, image_ctx: ImageContext): \n", + " img = image_ctx.base_image\n", + " (w, h), print_size_in, print_size_cm, required_dpi = image_ctx.image_info\n", + " format = PRINT_FORMATS['Modern Age']\n", + " cprint( f\"{'Width x Height':>30}: {w} x {h} pixels\\n\"\n", + " f\"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\\n\"\n", + " f\"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in\"\n", + " f\" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\\n\"\n", + " f\"Required DPI Modern Age format: {required_dpi:.3f} dpi \"\n", + " f\"({format[0]:.3f} x {format[1]:.3f} in)\")\n", + "\n", + "\n", + " def display_content(self, image_ctx: ImageContext, display_option: DisplayOptions):\n", + " DI = self.ctx.DI\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.PAGE_DATA):\n", + " self.image_info(image_ctx)\n", + " RenderJSON(image_ctx.json_data, 350, 2).display()\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH):\n", + " cprint(image_ctx.gts)\n", + " if display_option == DisplayOptions.CONFIG:\n", + " self.ctx.show()\n", + " if display_option == DisplayOptions.IMAGE:\n", + " display_image_grid([DI(image_ctx.image_path)], 1, 1)\n", + " if display_option == DisplayOptions.MASK:\n", + " display_image_grid([DI(image_ctx.mask_path)], 1, 1)\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.IMAGE_MASK):\n", + " display_image_grid([DI(image_ctx.image_path), DI(image_ctx.mask_path)], 1, 2)\n", + " if display_option in (DisplayOptions.ALL, DisplayOptions.BOXES):\n", + " display_image_grid([DI(image_ctx.image_boxes_path)], 1, 1)\n", + "\n", + "\n", + " def setup_controls(self):\n", + " options = self.display_options or {**DisplayOptions.__display_names__()}\n", + " display_option_wdgt = W.Dropdown(\n", + " options=options, \n", + " value=self.values['display_option'],\n", + " layout={'width': '120px'},\n", + " style={'description_width': 'initial'})\n", + " return {'display_option': display_option_wdgt}\n", + "\n", + "\n", + " def setup_ui(self):\n", + " ctls = self.controls\n", + " display_option_grp = W.HBox([ctls['display_option']])\n", + " display_option_grp.add_class('display_option_grp')\n", + " comps = self.comps_ui()\n", + " ui = W.HBox([*comps, display_option_grp])\n", + " return ui\n", + "\n", + "\n", + " def __init__(self, \n", + " exp_ctx: OCRExperimentContext,\n", + " display_option: DisplayOptions | None=DisplayOptions.BOXES,\n", + " display_options: Mapping[str, DisplayOptions] | None = None,\n", + " **kwargs\n", + " ):\n", + " self.display_options = display_options\n", + " super().__init__(exp_ctx, \n", + " {'display_option': display_option or DisplayOptions.BOXES}, \n", + " **kwargs)#, ctxs=[exp_visor])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "379b9649230a41a1bb6f8bf778aeb332", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(HBox(children=(Dropdown(index=2, layout=Layout(width='120px'), options={'Boxes':…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "content_selector = ContentSelector(CONTEXT, DisplayOptions.MASK)\n", + "content_selector\n" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "content_selector.update(display_option=DisplayOptions.MASK)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ImageSelector" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ImageSelector(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + "\n", + " @property\n", + " def image_ctx(self):\n", + " return ImageContext(self.ctx, self.values['image_idx'])\n", + " \n", + " def setup_controls(self):\n", + " paths = self.ctx.image_paths\n", + " w = W.Dropdown(\n", + " options={_.stem:i for i,_ in enumerate(paths)}, \n", + " value=self.values['image_idx'],\n", + " # layout={'width': 'fit-content'},\n", + " layout={'width': '25em'},\n", + " style={'description_width': 'initial'})\n", + " return {'image_idx': w}\n", + "\n", + " def update(self, image_idx: ImgSpecT | None = None, **kwargs):\n", + " if image_idx is None: return\n", + " idx = self.ctx.normalize_idx(image_idx)\n", + " if idx is None: return\n", + " super().update(image_idx=idx, **kwargs)\n", + "\n", + "\n", + " def __init__(self, \n", + " ctx: OCRExperimentContext, /, \n", + " image_idx: ImgSpecT = 0, \n", + " **kwargs):\n", + " idx = ctx.normalize_idx(image_idx)\n", + " assert idx is not None, f\"Image {image_idx} not found in experiment context\"\n", + " super().__init__(ctx, {'image_idx': idx}, **kwargs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9cc6381da8e646a1b118578ad328e52a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Dropdown(index=2, layout=Layout(width='25em'), options={'Action_Comics_1960-01-0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('image_selector')\n", + "\n", + "image_selector = ImageSelector(CONTEXT, 2)\n", + "image_selector\n" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "image_selector.update(13)\n", + "test_eq(image_selector.values['image_idx'], 13)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OCRContextVisor" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "class OCRContextVisor(ContextVisor):\n", + " ctx: OCRExperimentContext\n", + " \n", + " def update_output(self, /, image_idx: ImgIdT, **kwargs):\n", + " ctx = self.ctx\n", + " img_path = ctx.path_from_idx(image_idx, cached=ctx.force_PIL)\n", + " img = ctx.display_image(img_path)\n", + " display_image_grid([img], 1, 1)\n", + "\n", + " def update(self, image_idx: ImgSpecT | None = None, **kwargs):\n", + " if image_idx is None: return\n", + " idx = self.ctx.normalize_idx(image_idx)\n", + " if idx is None: return\n", + " super().update(image_idx=idx, **kwargs)\n", + " \n", + " def __init__(self, \n", + " ctx: OCRExperimentContext, /, \n", + " image_idx: ImgSpecT = 0, *, \n", + " out: W.Output | None=None):\n", + " super().__init__(ctx, {}, out, \n", + " ctxs={'image_idx': ImageSelector(ctx, image_idx, out=self.out)})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bbf5ee92ee4944018eca90195350d5a4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(VBox(children=(VBox(children=(Dropdown(index=2, layout=Layout(width='fit-content'), options={'A…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('ctx_visor')\n", + "\n", + "# ContextVisor(CONTEXT)\n", + "# ContextVisor(CONTEXT).display(3)\n", + "ctx_visor = OCRContextVisor(CONTEXT, 2)\n", + "ctx_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "ctx_visor.update('Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base image\n", + "> Change `BASE_IMAGE_IDX` to select a different base image to use in the examples below." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", + "\n", + "assert BASE_IMAGE_IDX is not None\n", + "img_path = CONTEXT.final(CONTEXT.image_paths[BASE_IMAGE_IDX])\n", + "assert img_path.exists()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Empty cache\n", + "> Clear the image cache used profusely throughout the examples below." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "You will be warned before the cache is emptied." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "# CONTEXT.empty_cache_warning()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "# CONTEXT.empty_cache_warning(30)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ImageContext of base image\n", + "> Creates the `ImageContext` for the base image.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If `PanelCleaner` page data is already cached, it is loaded from the cache.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT._reset_()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "page_lang = 'English'\n", + "# page_lang = 'Japanese'\n", + "# page_lang = 'Spanish'\n", + "# page_lang = 'French'\n", + "\n", + "IMAGE_CONTEXT = ImageContext(CONTEXT, BASE_IMAGE_IDX, page_lang=page_lang)\n", + "test_eq(IMAGE_CONTEXT.page_data is not None, True)\n", + "# cprint(IMAGE_CONTEXT.page_data.boxes)\n", + "RenderJSON(IMAGE_CONTEXT.json_data, 360, 2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "test_is(IMAGE_CONTEXT, ImageContext(CONTEXT, BASE_IMAGE_IDX))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualize image" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Strange_Tales_172005.jpg - 1275x1888 px: 4.25x6.29\" @ 188.32 dpi
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_image_grid([\n", + " CONTEXT.DI(IMAGE_CONTEXT.image_path), \n", + " CONTEXT.DI(IMAGE_CONTEXT.mask_path)\n", + " ], 1, 2, caption=IMAGE_CONTEXT.image_name_rich)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "boxes_path = IMAGE_CONTEXT.image_boxes_path\n", + "display_image_grid([CONTEXT.DI(boxes_path)], 1, 1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image browser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ImageContextVisor" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "notebookRunGroups": { + "groupValue": "2" + } + }, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class ImageContextVisor(ContextVisor):\n", + " ctx: ImageContext\n", + " # control_names: list[str] = ['display_option']\n", + "\n", + " _css = \"\"\"\n", + " .display_option_grp {\n", + " background-color: lightblue;\n", + " }\n", + " \"\"\"\n", + "\n", + " def image_info(self): \n", + " content_selector = cast(ContentSelector, self.comp('display_option'))\n", + " content_selector.image_info(self.ctx)\n", + "\n", + " def update_output(self, \n", + " display_option: DisplayOptions | None = None,\n", + " image_idx: ImgIdT | None = None,\n", + " **kwargs):\n", + " content_selector = cast(ContentSelector, self.comp('display_option'))\n", + " if image_idx is not None and image_idx != self.ctx.image_idx:\n", + " ctx = ImageContext(self.ctx.exp, image_idx)\n", + " assert ctx is not None\n", + " self.ctx = ctx\n", + " display_option = content_selector.values['display_option']\n", + " if display_option is None:\n", + " return\n", + " content_selector.display_content(self.ctx, display_option)\n", + " \n", + " def update(self, \n", + " display_option: DisplayOptions | None=None, \n", + " image_idx: ImgSpecT | None=None,\n", + " **kwargs):\n", + " if image_idx is not None:\n", + " if (idx := self.ctx.exp.normalize_idx(image_idx)) is not None:\n", + " kwargs['image_idx'] = idx\n", + " super().update(display_option=display_option, **kwargs)\n", + "\n", + " def __init__(self, \n", + " exp_ctx: OCRExperimentContext,\n", + " img_idx: ImgSpecT | ImageContext,\n", + " display_option: DisplayOptions=DisplayOptions.BOXES,\n", + " display_options: Mapping[str, DisplayOptions] | None = None,\n", + " out: W.Output | None = None\n", + " ):\n", + " if isinstance(img_idx, ImageContext):\n", + " ctx = img_idx\n", + " else:\n", + " assert exp_ctx is not None, \"exp_ctx must be provided if img_idx is not an ImageContext\"\n", + " ctx = ImageContext(exp_ctx, img_idx)\n", + " assert ctx is not None, f\"Image {img_idx} not found in experiment context\"\n", + " if display_options is None:\n", + " display_options = {k: v for k, v in DisplayOptions.__display_names__().items() \n", + " if k not in ('Results', 'Accuracy', 'Best results', 'Dataframe', 'Config')}\n", + " out = out or self.out\n", + " content_selector = ContentSelector(exp_ctx, \n", + " display_option=display_option, display_options=display_options, out=out)\n", + " image_selector = ImageSelector(exp_ctx, ctx.image_idx, out=out)\n", + " super().__init__(ctx, {}, out=out, \n", + " ctxs={'image_idx': image_selector, 'display_option': content_selector})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Easier to use `ImageContextVisor` to display interactively images, masks, and associated page data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "CONTEXT._reset_()" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8bfbbd7344224964994f913744b75bdc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value=\"\"\n", + " self._style = W.HTML(stl)\n", + " else:\n", + " self._style = ''\n", + " return self._style\n", + " def setup_style(self):\n", + " collate = [_.setup_style() for _ in self.comps.values()]\n", + " if self._css: collate.append(self._css)\n", + " return '\\n'.join([_ for _ in collate if _])\n", + " \n", + " def update_output(self, **kwargs): \n", + " cprint(kwargs)\n", + " \n", + " def setup_controls(self) -> dict[str, CtlT]:\n", + " widgets = [W.interactive.widget_from_abbrev(v) for k, v in self.values.items()]\n", + " widgets = {k:W.fixed(v) if w is None else w for (k, v), w in zip(self.values.items(), widgets)}\n", + " # return {k: W.Label(value=str(k)) for k,w in self.values.items()}\n", + " return widgets\n", + " \n", + " def hide(self): \n", + " if (w := self.w) is not NO_UI: \n", + " w.layout.visibility = 'hidden'\n", + " def show(self): \n", + " if (w := self.w) is not NO_UI: \n", + " w.layout.visibility = 'visible'\n", + "\n", + " def comps_ui(self):\n", + " comps = []\n", + " if self._inited: \n", + " for visor in self.comps.values():\n", + " if (visor_ui := visor._setup_ui()) is not None:\n", + " comps.append(visor_ui)\n", + " return comps\n", + "\n", + "\n", + " def _setup_ui(self):\n", + " if not self._inited: return\n", + " w = self.setup_ui()\n", + " if w is not None:\n", + " w.add_class('context-visor')\n", + " w.add_class(str(id(self)))\n", + " return w\n", + " \n", + " def setup_ui(self) -> W.DOMWidget | None:\n", + " \"\"\"Get the container widget for this comp.\n", + " This method should be the only one called when the comp is nested inside other comp.\n", + " \"\"\"\n", + " uis = [*self.comps_ui(), *self.controls.values()]\n", + " return self._ui_cls(uis) if uis else None\n", + "\n", + " def setup_display(self): \n", + " \"Generates one time ui\"\n", + " if not self._inited: return\n", + " if self._w is None:\n", + " self._w = self._setup_ui()\n", + " \n", + "\n", + " def _output(self, **kwargs):\n", + " # group keys by comp\n", + " collator = defaultdict(dict)\n", + " for k,v in kwargs.items():\n", + " if (comp := self.handler(k)) is not None:\n", + " collator[comp][k] = v\n", + " else:\n", + " # keys w/out control assigned, considered internal state\n", + " collator[self][k] = v\n", + " # group comps by output\n", + " outs = defaultdict(list)\n", + " for comp, kw in collator.items():\n", + " outs[comp.out].append((comp, kw))\n", + " for out, g in outs.items():\n", + " show_inline_matplotlib_plots()\n", + " with out:\n", + " clear_output(wait=True)\n", + " for comp, kw in g:\n", + " comp.update_output(**kw)\n", + " show_inline_matplotlib_plots()\n", + "\n", + " def _observe(self, change):\n", + " control_name = self._ctl2name[change['owner']]\n", + " kwargs = {control_name: change['new']}\n", + " updated = self._update(**kwargs)\n", + " self._output(**updated)\n", + " def setup_ux(self): pass\n", + " def _setup_ux(self): \n", + " for visor in self.comps.values():\n", + " visor._setup_ux()\n", + " self.setup_ux()\n", + " def interactive_output(self):\n", + " controls = self.all_controls\n", + " all_values = self._all_values()\n", + " for k,w in controls.items():\n", + " if k in all_values:\n", + " w.observe(self._observe, 'value')\n", + " \n", + " def display(self, **kwargs): \n", + " if not self._inited: return\n", + " if self._w is None:\n", + " self.setup_display()\n", + " self.interactive_output()\n", + " self._update(**(self.values | kwargs))\n", + " all_values = self._all_values()\n", + " self._hdlrs = {k:self._hdlrs.get(k, self) for k in all_values}\n", + " self._output(**all_values)\n", + " # ux final touches once everything (including outputs) is setup\n", + " # for visor in [*self.comps.values(), self]:\n", + " # visor.setup_ux()\n", + " self._setup_ux()\n", + " stl = self.styler\n", + " ui: list = [stl] if stl else []\n", + " if (w := self.w) is not NO_UI:\n", + " ui.append(w)\n", + " for comp in [*self.comps.values(), self]:\n", + " if comp._out is not None:\n", + " ui.append(comp._out)\n", + " self._final = W.VBox(ui)\n", + " self._display_handle = display(self._final, display_id=str(id(self)))\n", + " else:\n", + " self.update(**kwargs)\n", + " def _ipython_display_(self): self.display()\n", + "\n", + " def _update(self, update_value: bool=True, **kwargs):\n", + " updated = {}\n", + " for visor in self.comps.values():\n", + " updated.update(visor._update(update_value=update_value, **kwargs))\n", + " values = self.values\n", + " my_vals = _pops_(kwargs, self.values.keys())\n", + " for k,v in my_vals.items():\n", + " if v is not None and v != values[k]:\n", + " if update_value: \n", + " values[k] = v\n", + " updated[k] = v\n", + " return updated\n", + " def update(self, **kwargs):\n", + " updated = self._update(update_value=False, **kwargs)\n", + " controls = self.all_controls\n", + " for k,v in updated.items():\n", + " if k in controls:\n", + " if hasattr((ctl := controls[k]), 'value'):\n", + " ctl.value = v # will trigger update (self._observe)\n", + " elif k in (vv := self._name2comp):\n", + " # update manually\n", + " comp = vv[k]\n", + " if v != comp.values[k]:\n", + " comp.values[k] = v\n", + " self._output(**{k:v})\n", + " \n", + "\n", + " def close(self):\n", + " controls = self.all_controls\n", + " for w in controls.values():\n", + " try: w.unobserve(self._observe, 'value')\n", + " except: pass\n", + " if isinstance(w, W.Widget):\n", + " w.close()\n", + " for visor in self._ctxs.values():\n", + " if w := getattr(visor, '_w', None): w.close()\n", + " if visor._out is not self._out:\n", + " if o := getattr(visor, '_out', None): o.close()\n", + " visor.close()\n", + " if w := getattr(self, '_w', None): w.close()\n", + " if o := getattr(self, '_out', None): o.close()\n", + " if f := self._final: f.close()\n", + " if self._display_handle is not None:\n", + " self._display_handle.update(HTML(UPDATE_SCRIPT))\n", + "\n", + "\n", + " def __del__(self): \n", + " self.close()\n", + "\n", + " def __init__(self, \n", + " ctx: Any, \n", + " values: dict[str, Any], \n", + " out: W.Output | None = None,\n", + " ctxs: dict[str, ContextVisor] | None = None,\n", + " hdlrs: dict[str, ContextVisor] | None = None,\n", + " css: str | None = None,\n", + " ):\n", + " # Only setup some state. Controls, values and containers will be setup only when explicitly displayed\n", + " self._display_handle = None\n", + " self._final = None\n", + " self.ctx = ctx\n", + " self.values = values or {}\n", + " self._out = out\n", + " self._ctxs = comps = ctxs or {}\n", + " self._hdlrs = hdlrs or {}\n", + " if css is not None:\n", + " self._css = css\n", + " self._name2comp = name2comp = {}\n", + " for n,vv in self.all_values.items():\n", + " comp = comps.get(n, self)\n", + " for k in vv:\n", + " name2comp[k] = comp\n", + " self._inited = True\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('test_visor')\n", + "\n", + "test_visor = ContextVisor(None, {'a': 1})\n", + "test_visor\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(test_visor.values, {'a': 1})\n", + "test_visor.update(a='2')\n", + "test_eq(test_visor.values, {'a': 2})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "cleanupwidgets('test_visor')\n", + "test_eq(test_visor.w.comm, None)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "24553fa5d9f048939f4ac38bc266ea81", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(VBox(children=(VBox(children=(Text(value='bbb'),), _dom_classes=('context-visor', '13217200496'…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('vsr1')\n", + "\n", + "vsr1 = ContextVisor(\n", + " None, {'a': 3}, \n", + " ctxs={'vsr2': ContextVisor(None, {'b': 'bbb'})}, \n", + ")\n", + "test_eq(vsr1.values, {'a': 3})\n", + "test_eq(vsr1.all_values, {'vsr2': {'b': 'bbb'}, 'self': {'a': 3}})\n", + "vsr1.display()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "vsr1.update(a=2)\n", + "test_eq(vsr1.all_values, {'vsr2': {'b': 'bbb'}, 'self': {'a': 2}})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "vsr1.update(a=0, b='ccc')\n", + "test_eq(vsr1.all_values, {'vsr2': {'b': 'ccc'}, 'self': {'a': 0}})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c1e38a469327423b876d69297283e9d1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(VBox(children=(VBox(children=(Text(value='bbb'),), _dom_classes=('context-visor', '13218749008'…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('vsr2')\n", + "\n", + "vsr3 = ContextVisor(None, {'b': 'bbb'})\n", + "vsr2 = ContextVisor(\n", + " None, {'a': 3}, \n", + " ctxs={'vsr3': vsr3}, \n", + " hdlrs={'b': vsr3}\n", + ")\n", + "vsr2.display()\n", + "test_eq(vsr2.values, {'a': 3})\n", + "test_eq(vsr2.all_values, {'vsr3': {'b': 'bbb'}, 'self': {'a': 3}})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "vsr2.update(a=1, b='ccc')\n", + "test_eq(vsr2.values, {'a': 1})\n", + "test_eq(vsr2.all_values, {'vsr3': {'b': 'ccc'}, 'self': {'a': 1}})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class Vsr1(ContextVisor):\n", + " _css = \"\"\"\n", + " .vsr1 {\n", + " border: 1px solid red;\n", + " }\n", + " \"\"\"\n", + " def setup_ui(self) -> W.DOMWidget | None:\n", + " w = super().setup_ui()\n", + " if w is not None:\n", + " w.add_class('vsr1')\n", + " return w\n", + "\n", + "class Vsr2(ContextVisor):\n", + " _css = \"\"\"\n", + " .vsr2 {\n", + " border: 1px solid green;\n", + " }\n", + " \"\"\"\n", + " def setup_ui(self) -> W.DOMWidget | None:\n", + " uis = [*self.comps_ui(), *self.controls.values()]\n", + " w = W.HBox(uis) if uis else None\n", + " if w is not None:\n", + " w.add_class('vsr2')\n", + " return w\n", + "\n", + "vsr1 = Vsr1(\n", + " None, {'a': 3}, \n", + " ctxs={'vsr2': Vsr2(None, {'b': 'bbb'})}, \n", + " css = \"\"\"\n", + " .vsr1 {\n", + " border: 1px solid red;\n", + " }\n", + " \"\"\"\n", + ")\n", + "test_eq(vsr1.values, {'a': 3})\n", + "test_eq(vsr1.all_values, {'vsr2': {'b': 'bbb'}, 'self': {'a': 3}})\n", + "vsr1.display()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "cleanupwidgets('vsr1')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Spinner" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "spinner_css = \"\"\"\n", + " .wrapper-spinner {\n", + " overflow: hidden;\n", + " width: fit-content;\n", + " height: fit-content;\n", + " }\n", + " \n", + " .loading-spinner {\n", + " display: flex;\n", + " align-items: center;\n", + " justify-content: center;\n", + " border: 1px solid white;\n", + " border-radius: 50%;\n", + " }\n", + " \n", + " .spinner {\n", + " border: |border_width|px solid rgba(128,128,128,.5);\n", + " border-radius: 50%;\n", + " border-left-color: red;\n", + " animation: spin 1s infinite linear;\n", + " }\n", + " \n", + " @keyframes spin {\n", + " 0% { transform: rotate(0deg); }\n", + " 100% { transform: rotate(360deg); }\n", + " }\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# display(HTML(f\"\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# def loading_spinner(size=36, border_width=4):\n", + "# bw = border_width*2\n", + "# html: str = f'''\n", + "#
\n", + "#
\n", + "#
\n", + "# '''\n", + "# return html" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# cleanupwidgets('spinner')\n", + "\n", + "# spinner = W.HTML(loading_spinner(24), \n", + "# # layout={\n", + "# # 'overflow': 'hidden', \n", + "# # 'width': 'fit-content', \n", + "# # 'height': 'fit-content',\n", + "# # 'border': '1px solid green'\n", + "# # }\n", + "# )\n", + "# spinner.add_class('wrapper-spinner')\n", + "# spinner\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# spinner.layout.display = 'none'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# spinner.layout.display = 'block'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# cleanupwidgets('spinner')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class Spinner(ContextVisor):\n", + " ctx: T.HasTraits\n", + "\n", + " def loading_spinner(self, size=36, border_width=4):\n", + " bw = border_width*2\n", + " html: str = f'''\n", + "
\n", + "
\n", + "
\n", + " '''\n", + " return html\n", + " \n", + " def setup_controls(self):\n", + " spinner = W.HTML(self.loading_spinner(self.size, self.border_width))\n", + " spinner.add_class('wrapper-spinner')\n", + " return {'spinner': spinner}\n", + "\n", + " @property\n", + " def spinner(self) -> W.HTML: return self.controls['spinner'] # type: ignore\n", + "\n", + " def hide(self): self.spinner.layout.display = 'none'\n", + " def show(self): self.spinner.layout.display = 'block'\n", + "\n", + " def setup_ux(self):\n", + " source = (self.ctx, '_running')\n", + " target = (self.spinner.layout, 'display')\n", + " self._link = T.dlink(source, target, lambda x: 'block' if x else 'none')\n", + "\n", + " def close(self):\n", + " if l := getattr(self, '_link', None): l.unlink()\n", + " super().close()\n", + "\n", + " def __init__(self, \n", + " ctx: T.HasTraits,\n", + " size: int = 24,\n", + " border_width: int = 4,\n", + " **kwargs\n", + " ):\n", + " self.size = size\n", + " self.border_width = border_width\n", + " self._link = None\n", + " super().__init__(ctx, {}, css=spinner_css.replace('|border_width|', str(border_width)), **kwargs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('spinner')\n", + "\n", + "class _Test(T.HasTraits):\n", + " _running = T.Bool(True)\n", + "\n", + "test = _Test()\n", + "spinner = Spinner(test)\n", + "spinner.display()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# spinner.hide()\n", + "test._running = False\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "test._running = True\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "cleanupwidgets('spinner')\n", + "test_eq(test._trait_notifiers, {'_running': {'change': []}})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cleanupwidgets('vsr3')\n", + "\n", + "vsr3 = Vsr1(\n", + " None, {'a': 3}, \n", + " ctxs={'vsr2': Vsr2(None, {'b': 'bbb'}, ctxs={'spinner': Spinner(test, 20, 3)})}, \n", + " css = \"\"\"\n", + " .vsr3 {\n", + " border: 1px solid red;\n", + " }\n", + " \"\"\"\n", + ")\n", + "test_eq(vsr3.values, {'a': 3})\n", + "test_eq(vsr3.all_values, {'vsr2': {'b': 'bbb'}, 'self': {'a': 3}})\n", + "vsr3.display()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "test._running = False\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "test._running = True\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "cleanupwidgets('vsr3')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Colophon\n", + "----\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "from nbdev.export import nb_export\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_NOTEBOOK:\n", + " nb_export('visor.ipynb', '..')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pcleaner/_testbed/nbs/web_server.ipynb b/pcleaner/_testbed/nbs/web_server.ipynb new file mode 100644 index 00000000..c505abd4 --- /dev/null +++ b/pcleaner/_testbed/nbs/web_server.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[],"source":["#| default_exp testbed/web_server"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["#| export\n","\n","from __future__ import annotations\n"]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[],"source":["#| hide\n","# %reload_ext autoreload\n","# %autoreload 0\n"]},{"cell_type":"markdown","metadata":{},"source":["# install (Colab)"]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[],"source":["MOUNT_DRIVE = True\n","DEV_INSTALL = True\n","GDRIVE_MOUNT_POINT = 'drive'\n"]},{"cell_type":"code","execution_count":5,"metadata":{},"outputs":[],"source":["import os\n","from pathlib import Path\n","import fastcore.all as FC\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," text = Text(msg)\n"," text.stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, text, \"_\" * 10)\n","\n","\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," mnt_point = f\"/content/{GDRIVE_MOUNT_POINT}\"\n"," if not Path(mnt_point).exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n","\n"," drive.mount(mnt_point, force_remount=True)\n"]},{"cell_type":"markdown","metadata":{},"source":["Colab has issues with PanelClenar PIL version.: uninstall Colab one, restart wen prompted and rerun from the top.\n"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["if FC.IN_COLAB:\n"," from packaging import version\n"," import PIL\n"," pil_version = version.parse(PIL.__version__)\n"," if pil_version < version.parse(\"10\"):\n"," info('Uninstalling Pillow')\n"," !pip uninstall Pillow\n"," info('Installing Pillow')\n"," !pip install Pillow\n"]},{"cell_type":"code","execution_count":7,"metadata":{},"outputs":[],"source":["if FC.IN_COLAB:\n"," info('Installing PanelCleaner')\n"," if DEV_INSTALL:\n"," assert MOUNT_DRIVE, \"DEV_INSTALL need a mounted google drive drive\"\n"," info('Installing PanelCleaner from Google Drive')\n"," os.chdir('/content/drive/MyDrive/Shared/PanelCleaner/')\n"," !pip install -e .\n"," else:\n"," info('Installing PanelCleaner from Github')\n"," !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed-colab\n"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[],"source":["if FC.IN_COLAB:\n"," info('Installing PanelCleaner Colab requirements')\n"," import importlib.resources\n"," if DEV_INSTALL:\n"," os.chdir('pcleaner/_testbed')\n"," \n"," try:\n"," package_path = importlib.resources.files('pcleaner')\n"," info('Installing PanelCleaner testbed requirements')\n"," p = (Path(package_path)/'_testbed/requirements-colab.txt')\n"," if p.exists():\n"," !pip install -r {p}\n"," else:\n"," print(f\"colab requirements {p} not found\")\n"," except Exception:\n"," info(\"Couldn't install PanelCleaner Colab requirements\")\n"]},{"cell_type":"markdown","metadata":{},"source":["# Basic web server for serving images from Google Drive\n"]},{"cell_type":"markdown","metadata":{},"source":["# Prologue"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[],"source":["#| export\n","import getpass\n","import http.server\n","import os\n","import signal\n","import socketserver\n","import threading\n","import uuid\n","from http import HTTPStatus\n","from pathlib import Path\n","from typing import Protocol\n","\n","import portpicker\n","import psutil\n","import requests\n","import rich\n","from IPython.display import display\n","from IPython.display import HTML\n","from loguru import logger\n","from pyngrok import conf\n","from pyngrok import ngrok\n","from rich.console import Console\n"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[],"source":["#| exporti\n","from pcleaner._testbed.testbed.bottle import Bottle\n","from pcleaner._testbed.testbed.bottle import HTTPError\n","from pcleaner._testbed.testbed.bottle import response\n","from pcleaner._testbed.testbed.bottle import run\n","from pcleaner._testbed.testbed.bottle import static_file\n"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[],"source":["import fastcore.all as FC\n","import fastcore.xtras # patch Path with some utils\n","from fastcore.test import * # type: ignore\n","\n","import pcleaner._testbed.testbed.bottle as bottle\n"]},{"cell_type":"markdown","metadata":{},"source":["# Helpers"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["# pretty print by default\n","# %load_ext rich"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[],"source":["#| exporti\n","console = Console(width=104, tab_size=4, force_jupyter=True)\n","cprint = console.print\n"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[],"source":["#| export\n","\n","def display_ngrok_warning(url):\n"," did = 'ngrokFrame' + str(uuid.uuid4())\n"," html_code = f\"\"\"\n","
\n","
\n","

Ngrok displays a warning page as a security measure to prevent unintentional access to your local servers. This page requires you to confirm that you wish to proceed to the content.

\n","

Please review the ngrok warning page displayed below. If prompted, click 'Visit Page' to proceed.

\n"," Don't worry if you see a 404 or 403 error. Then, you can click the 'Close' button below to hide this section.

\n","
\n"," \n"," \n","
\n","\"\"\"\n","\n"," display(HTML(html_code))\n"]},{"cell_type":"code","execution_count":25,"metadata":{},"outputs":[],"source":["#| export\n","\n","class WebServer(Protocol):\n"," @property\n"," def public_url(self) -> str | None: ...\n"," @property\n"," def unc_share(self) -> Path | None: ...\n"," @property\n"," def prefix(self) -> str: ...\n"," @property\n"," def running(self) -> bool: ...\n"," def __init__(self, directory: Path | str = \"\"): ...\n"," def start(self): ...\n"," def stop(self): ...\n","\n","\n","def setup_ngrok(server_cls: type[WebServer], images_dir: str | Path):\n"," cprint(\n"," \"Enter your ngrok authtoken, which can be copied from \"\n"," \"https://dashboard.ngrok.com/get-started/your-authtoken\"\n"," )\n"," auth_token = getpass.getpass()\n"," conf.get_default().auth_token = auth_token\n"," ngrok.set_auth_token(auth_token)\n","\n"," server = server_cls(directory=str(images_dir))\n"," try:\n"," server.start()\n"," except Exception as e:\n"," cprint(f\"Error starting server: {e}\")\n"," return None\n","\n"," display_ngrok_warning(f\"{server.public_url}/{server.prefix}/pcleaner.png\")\n"," return server\n"]},{"cell_type":"markdown","metadata":{},"source":["----"]},{"cell_type":"markdown","metadata":{},"source":["Modify `cache_dir` path to point to the directory containing the images you want to serve\n"]},{"cell_type":"code","execution_count":26,"metadata":{},"outputs":[],"source":["cache_dir = Path('../experiment/cache')\n","test_eq(cache_dir.exists(), True)\n"]},{"cell_type":"markdown","metadata":{},"source":["# WebServerStdlib\n","> simple web server based on `http.server` and `ngrok` as reverse proxy\n"]},{"cell_type":"code","execution_count":27,"metadata":{},"outputs":[],"source":["#| exporti\n","\n","class ImageHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):\n"," def end_headers(self):\n"," for k,v in {\n"," 'ngrok-skip-browser-warning': 'true',\n"," 'User-Agent': 'MyCustomUserAgent/1.0',\n"," 'Cache-Control': 'public, max-age=86400'\n"," }.items():\n"," self.send_header(k, v)\n"," super().end_headers()\n","\n"," def do_GET(self):\n"," if self.is_image_request(self.path):\n"," super().do_GET()\n"," else:\n"," self.send_error(HTTPStatus.FORBIDDEN, \"Only image files are accessible.\")\n","\n"," def is_image_request(self, path):\n"," allowed_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.webp')\n"," _, ext = os.path.splitext(path)\n"," return ext.lower() in allowed_extensions\n"," \n"," def __init__(self, *args, **kwargs):\n"," super().__init__(*args, directory=self.directory, **kwargs)\n"]},{"cell_type":"code","execution_count":28,"metadata":{"executionInfo":{"elapsed":230,"status":"ok","timestamp":1715453246273,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"VJQya1SRr9ce"},"outputs":[],"source":["#| export\n","\n","class WebServerStdlib:\n"," \"\"\"\n"," A simple web server for serving images from a local directory using http.server and ngrok.\n"," \n"," It is intended to be used in environments like Google Colab, where direct\n"," web server hosting might not be feasible. It uses ngrok to allow images to be accessed \n"," via a public URL.\n"," \n"," Attributes:\n"," directory (str): The directory from which files are served.\n"," port (int): The local port on which the server listens.\n"," public_url (str): The ngrok public URL where the server is accessible.\n"," tunnel (ngrok.NgrokTunnel): The ngrok tunnel object.\n"," \n"," Methods:\n"," start(): Starts the web server and the ngrok tunnel.\n"," stop(): Stops the web server and disconnects the ngrok tunnel.\n"," make_request(path=\"/\"): Makes a request to the ngrok URL to fetch data from the server.\n"," \"\"\"\n"," \n"," def __init__(self, directory: Path | str=\"\"):\n"," port = portpicker.pick_unused_port()\n"," self.port = port\n"," if isinstance(directory, str): \n"," directory = Path(directory)\n"," assert directory.exists(), f\"Directory {directory} does not exist\"\n"," self.directory = str(directory.resolve())\n"," self.thread = None\n"," self.httpd = None\n"," self.public_url = None\n"," self.prefix = ''\n"," self.unc_share = None\n"," self.tunnel = None\n","\n"," @property\n"," def running(self):\n"," return self.thread is not None and self.thread.is_alive()\n"," \n"," def start_server(self):\n"," Handler = ImageHTTPRequestHandler \n"," Handler.directory = self.directory\n"," try:\n"," with socketserver.TCPServer((\"\", self.port), Handler) as httpd:\n"," self.httpd = httpd\n"," httpd.serve_forever()\n"," except OSError as e:\n"," cprint(f\"Error: {e}\")\n","\n"," def start(self):\n"," if self.thread is None or not self.thread.is_alive():\n"," self.thread = threading.Thread(target=self.start_server)\n"," self.thread.start()\n"," self.tunnel = ngrok.connect(self.port) # type: ignore\n"," self.public_url = self.tunnel.public_url\n"," if self.public_url is not None:\n"," self.unc_share = Path(self.public_url.replace('https:', ''))\n"," cprint(f\"ngrok tunnel: {self.tunnel}\")\n"," cprint(f\"Public URL: {self.public_url}\")\n"," else:\n"," cprint(\"Server is already running\")\n","\n"," def stop(self):\n"," if self.httpd:\n"," self.httpd.shutdown()\n"," self.httpd.server_close()\n"," if self.tunnel and self.tunnel.public_url:\n"," ngrok.disconnect(self.tunnel.public_url) # Use the stored tunnel object's URL\n"," cprint(\"Ngrok tunnel disconnected\")\n"," ngrok.kill()\n"," if self.thread:\n"," self.thread.join()\n"," self.thread = self.public_url = self.unc_share = None\n"," cprint(\"Server stopped\")\n","\n"," def make_request(self, path=\"/\"):\n"," \"\"\"Makes a request to the ngrok URL with headers to bypass the ngrok warning.\"\"\"\n"," if self.public_url:\n"," url = f\"{self.public_url}{path}\"\n"," headers = {\n"," \"ngrok-skip-browser-warning\": \"true\",\n"," \"User-Agent\": \"MyCustomUserAgent/1.0\"\n"," }\n"," response = requests.get(url, headers=headers)\n"," return response.text\n"," else:\n"," return \"Server not started or public URL not available.\"\n","\n"," def __enter__(self):\n"," self.start()\n"," return self\n","\n"," def __exit__(self, exc_type, exc_val, exc_tb):\n"," self.stop()\n"]},{"cell_type":"code","execution_count":29,"metadata":{},"outputs":[{"data":{"text/plain":["Path('../experiment/cache')"]},"execution_count":29,"metadata":{},"output_type":"execute_result"}],"source":["serve_dir = cache_dir\n","test_eq(serve_dir.exists(), True)\n","serve_dir"]},{"cell_type":"code","execution_count":30,"metadata":{},"outputs":[{"data":{"text/html":["
Enter your ngrok authtoken, which can be copied from \n","https://dashboard.ngrok.com/get-started/your-authtoken\n","
\n"],"text/plain":["Enter your ngrok authtoken, which can be copied from \n","\u001b[4;94mhttps://dashboard.ngrok.com/get-started/your-authtoken\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
ngrok tunnel: NgrokTunnel: \"https://36a0-83-33-227-209.ngrok-free.app\" -> \"http://localhost:55435\"\n","
\n"],"text/plain":["ngrok tunnel: NgrokTunnel: \u001b[32m\"https://36a0-83-33-227-209.ngrok-free.app\"\u001b[0m -> \u001b[32m\"http://localhost:55435\"\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Public URL: https://36a0-83-33-227-209.ngrok-free.app\n","
\n"],"text/plain":["Public URL: \u001b[4;94mhttps://36a0-83-33-227-209.ngrok-free.app\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","
\n","
\n","

Ngrok displays a warning page as a security measure to prevent unintentional access to your local servers. This page requires you to confirm that you wish to proceed to the content.

\n","

Please review the ngrok warning page displayed below. If prompted, click 'Visit Page' to proceed.

\n"," Don't worry if you see a 404 or 403 error. Then, you can click the 'Close' button below to hide this section.

\n","
\n"," \n"," \n","
\n"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["127.0.0.1 - - [21/May/2024 13:22:26] \"GET //pcleaner.png HTTP/1.1\" 200 -\n","127.0.0.1 - - [21/May/2024 13:22:46] \"GET /Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png HTTP/1.1\" 200 -\n","127.0.0.1 - - [21/May/2024 13:22:48] \"GET /Strange_Tales_172005/.crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 200 -\n","t=2024-05-21T13:22:51+0200 lvl=warn msg=\"Stopping forwarder\" name=http-55435-1681ca10-634d-4a9f-b4c3-e9ae3c2b48e2 acceptErr=\"failed to accept connection: Listener closed\"\n"]}],"source":["server = setup_ngrok(WebServerStdlib, serve_dir)\n"]},{"cell_type":"code","execution_count":31,"metadata":{},"outputs":[],"source":["assert server is not None and server.public_url is not None\n","public_url: str = server.public_url\n"]},{"cell_type":"code","execution_count":32,"metadata":{},"outputs":[],"source":["img_path = 'Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png'\n"]},{"cell_type":"code","execution_count":33,"metadata":{},"outputs":[{"data":{"text/plain":["''"]},"execution_count":33,"metadata":{},"output_type":"execute_result"}],"source":["f''"]},{"cell_type":"code","execution_count":34,"metadata":{},"outputs":[{"data":{"text/html":[""],"text/plain":[""]},"metadata":{},"output_type":"display_data"}],"source":["display(HTML(f''))"]},{"cell_type":"code","execution_count":35,"metadata":{},"outputs":[{"data":{"text/html":[""],"text/plain":[""]},"metadata":{},"output_type":"display_data"}],"source":["display(HTML(f''))\n"]},{"cell_type":"code","execution_count":37,"metadata":{},"outputs":[{"data":{"text/html":["
Ngrok tunnel disconnected\n","
\n"],"text/plain":["Ngrok tunnel disconnected\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Server stopped\n","
\n"],"text/plain":["Server stopped\n"]},"metadata":{},"output_type":"display_data"}],"source":["if server is not None: \n"," server.stop()\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["test_eq(ngrok.get_tunnels(), [])\n"]},{"cell_type":"code","execution_count":52,"metadata":{},"outputs":[{"data":{"text/plain":["52794"]},"execution_count":52,"metadata":{},"output_type":"execute_result"}],"source":["PORT = server.port\n","PORT\n"]},{"cell_type":"code","execution_count":53,"metadata":{},"outputs":[{"data":{"text/plain":["[]"]},"execution_count":53,"metadata":{},"output_type":"execute_result"}],"source":["_PID = !lsof -ti :$PORT # Find the process using PORT # type: ignore\n","if len(_PID) > 0: _PID = _PID[0]\n","_PID\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !kill -9 $_PID"]},{"cell_type":"markdown","metadata":{},"source":["# WebServerBottle\n","> simple web server based on `Bottle` and `ngrok` as reverse proxy\n"]},{"cell_type":"code","execution_count":38,"metadata":{},"outputs":[],"source":["#| exporti\n","\n","app = Bottle()\n","\n","@app.route('/images/') # type: ignore\n","def serve_image(filename):\n"," if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):\n"," return HTTPError(404, \"File not found\")\n"," response.set_header('Cache-Control', 'public, max-age=86400') # Set caching headers\n"," return static_file(filename, root=app.config['image_dir'])\n","\n","@app.route('/shutdown') # type: ignore\n","def shutdown():\n"," current_process = psutil.Process()\n"," current_process.send_signal(signal.SIGTERM)\n"]},{"cell_type":"code","execution_count":39,"metadata":{},"outputs":[],"source":["#| export\n","\n","class WebServerBottle:\n"," \"\"\"\n"," A simple web server for serving images from a local directory using ngrok.\n"," This class uses the Bottle framework to handle HTTP requests and ngrok to expose the \n"," server to the internet.It is designed to be used in environments like Google Colab, \n"," where direct web server hosting might not be feasible.\n"," \n"," Attributes:\n"," directory (Path | str): The directory from which files are served.\n"," port (int): The local port on which the server listens.\n"," public_url (str): The ngrok public URL where the server is accessible.\n"," tunnel (ngrok.NgrokTunnel): The ngrok tunnel object.\n"," \n"," Methods:\n"," start(): Starts the web server and the ngrok tunnel.\n"," stop(): Stops the web server and disconnects the ngrok tunnel.\n"," \"\"\"\n"," \n"," def __init__(self, directory: Path | str = \"\"):\n"," self.port = portpicker.pick_unused_port()\n"," if isinstance(directory, str):\n"," directory = Path(directory)\n"," assert directory.exists(), f\"Directory {directory} does not exist\"\n"," self.directory = directory\n"," self.thread = None\n"," self.httpd = None\n"," self.public_url = None\n"," self.unc_share = None\n"," self.prefix = 'images'\n"," self.tunnel = None\n"," app.config['image_dir'] = str(directory) # directory for Bottle\n"," # app.routes[0].callback.__globals__['image_dir'] = str(directory) # directory for Bottle\n","\n"," @property\n"," def running(self):\n"," return self.thread is not None and self.thread.is_alive()\n","\n"," def start_server(self):\n"," def bottle_run():\n"," run(app, host='localhost', port=self.port)\n"," \n"," self.thread = threading.Thread(target=bottle_run)\n"," self.thread.start()\n"," self.tunnel = ngrok.connect(self.port) # type: ignore\n"," self.public_url = self.tunnel.public_url\n"," if self.public_url is not None:\n"," self.unc_share = Path(self.public_url.replace('https:', ''))/self.prefix\n"," cprint(f\"ngrok tunnel: {self.tunnel}\")\n"," cprint(f\"Public URL: {self.public_url}\")\n","\n"," def start(self):\n"," if self.thread is None or not self.thread.is_alive():\n"," self.start_server()\n"," else:\n"," cprint(\"Server is already running\")\n","\n"," def stop(self):\n"," if self.tunnel and self.tunnel.public_url:\n"," ngrok.disconnect(self.tunnel.public_url)\n"," cprint(\"Ngrok tunnel disconnected\")\n"," ngrok.kill()\n"," \n"," if self.thread:\n"," self.make_request('/shutdown')\n"," self.thread.join(timeout=10)\n"," if self.thread.is_alive():\n"," print(\"Thread did not terminate, proceeding with forceful shutdown.\")\n"," else:\n"," print(\"Server thread stopped successfully.\")\n"," self.thread = self.tunnel = self.public_url = self.unc_share = None\n"," cprint(\"Server stopped\")\n","\n"," def make_request(self, path=\"/\"):\n"," \"\"\"Makes a request to the ngrok URL with headers to bypass the ngrok warning.\"\"\"\n"," if self.public_url:\n"," url = f\"{self.public_url}{path}\"\n"," headers = {\n"," \"ngrok-skip-browser-warning\": \"true\",\n"," \"User-Agent\": \"MyCustomUserAgent/1.0\"\n"," }\n"," response = requests.get(url, headers=headers)\n"," return response.text\n"," else:\n"," return \"Server not started or public URL not available.\"\n","\n"," def __enter__(self):\n"," self.start()\n"," return self\n","\n"," def __exit__(self, exc_type, exc_val, exc_tb):\n"," self.stop()\n"]},{"cell_type":"code","execution_count":40,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":278,"status":"ok","timestamp":1715452866844,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"RJuv6DOos2zp","outputId":"dbd120fb-f4cd-4cb0-e531-6eb5defce0e6"},"outputs":[{"data":{"text/plain":["Path('../experiment/cache')"]},"execution_count":40,"metadata":{},"output_type":"execute_result"}],"source":["serve_dir = cache_dir\n","test_eq(serve_dir.exists(), True)\n","serve_dir"]},{"cell_type":"code","execution_count":41,"metadata":{},"outputs":[{"data":{"text/html":["
Enter your ngrok authtoken, which can be copied from \n","https://dashboard.ngrok.com/get-started/your-authtoken\n","
\n"],"text/plain":["Enter your ngrok authtoken, which can be copied from \n","\u001b[4;94mhttps://dashboard.ngrok.com/get-started/your-authtoken\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Bottle v0.13-dev server starting up (using WSGIRefServer())...\n","Listening on http://localhost:55470/\n","Hit Ctrl-C to quit.\n","\n"]},{"data":{"text/html":["
ngrok tunnel: NgrokTunnel: \"https://0836-83-33-227-209.ngrok-free.app\" -> \"http://localhost:55470\"\n","
\n"],"text/plain":["ngrok tunnel: NgrokTunnel: \u001b[32m\"https://0836-83-33-227-209.ngrok-free.app\"\u001b[0m -> \u001b[32m\"http://localhost:55470\"\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Public URL: https://0836-83-33-227-209.ngrok-free.app\n","
\n"],"text/plain":["Public URL: \u001b[4;94mhttps://0836-83-33-227-209.ngrok-free.app\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","
\n","
\n","

Ngrok displays a warning page as a security measure to prevent unintentional access to your local servers. This page requires you to confirm that you wish to proceed to the content.

\n","

Please review the ngrok warning page displayed below. If prompted, click 'Visit Page' to proceed.

\n"," Don't worry if you see a 404 or 403 error. Then, you can click the 'Close' button below to hide this section.

\n","
\n"," \n"," \n","
\n"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["127.0.0.1 - - [21/May/2024 13:24:01] \"GET /images/pcleaner.png HTTP/1.1\" 200 17709\n","127.0.0.1 - - [21/May/2024 13:24:45] \"GET /images/Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png HTTP/1.1\" 200 137784\n","127.0.0.1 - - [21/May/2024 13:24:48] \"GET /images/images/Strange_Tales_172005/.crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 404 817\n","127.0.0.1 - - [21/May/2024 13:25:01] \"GET /images/Strange_Tales_172005/.crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 200 107550\n"]}],"source":["server = setup_ngrok(WebServerBottle, serve_dir)\n"]},{"cell_type":"code","execution_count":44,"metadata":{},"outputs":[],"source":["assert server is not None and server.public_url is not None\n","public_url: str = f\"{server.public_url}/{server.prefix}\"\n"]},{"cell_type":"code","execution_count":43,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":245,"status":"ok","timestamp":1715453635711,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"fl9GFzKXwIpN","outputId":"7f29395f-0956-43b6-bb57-467b6aa2ba6e"},"outputs":[],"source":["img_path = 'Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png'\n"]},{"cell_type":"code","execution_count":45,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":35},"executionInfo":{"elapsed":258,"status":"ok","timestamp":1715453674774,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"VC-IoChywfNG","outputId":"eb544fac-df9e-478b-a020-0f6b86b1c3ae"},"outputs":[{"data":{"text/plain":["''"]},"execution_count":45,"metadata":{},"output_type":"execute_result"}],"source":["f''"]},{"cell_type":"code","execution_count":46,"metadata":{},"outputs":[{"data":{"text/html":[""],"text/plain":[""]},"metadata":{},"output_type":"display_data"}],"source":["display(HTML(f''))\n"]},{"cell_type":"code","execution_count":48,"metadata":{},"outputs":[{"data":{"text/html":[""],"text/plain":[""]},"metadata":{},"output_type":"display_data"}],"source":["display(HTML(f''))\n"]},{"cell_type":"code","execution_count":49,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["t=2024-05-21T13:25:04+0200 lvl=warn msg=\"Stopping forwarder\" name=http-55470-05e35e60-2237-402e-b7ab-8507dd7f47dc acceptErr=\"failed to accept connection: Listener closed\"\n"]},{"data":{"text/html":["
Ngrok tunnel disconnected\n","
\n"],"text/plain":["Ngrok tunnel disconnected\n"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["Thread did not terminate, proceeding with forceful shutdown.\n"]},{"data":{"text/html":["
Server stopped\n","
\n"],"text/plain":["Server stopped\n"]},"metadata":{},"output_type":"display_data"}],"source":["if server is not None: \n"," server.stop()\n"]},{"cell_type":"code","execution_count":50,"metadata":{},"outputs":[],"source":["test_eq(ngrok.get_tunnels(), [])"]},{"cell_type":"code","execution_count":117,"metadata":{},"outputs":[{"data":{"text/plain":["58805"]},"execution_count":117,"metadata":{},"output_type":"execute_result"}],"source":["PORT = server.port\n","PORT\n"]},{"cell_type":"code","execution_count":119,"metadata":{},"outputs":[{"data":{"text/plain":["[]"]},"execution_count":119,"metadata":{},"output_type":"execute_result"}],"source":["_PID = !lsof -ti :$PORT # Find the process using PORT # type: ignore\n","if len(_PID) > 0: _PID = _PID[0]\n","_PID\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !kill -9 $_PID"]},{"cell_type":"markdown","metadata":{},"source":["# Colophon\n","----\n"]},{"cell_type":"code","execution_count":51,"metadata":{},"outputs":[],"source":["import fastcore.all as FC\n","from nbdev.export import nb_export\n"]},{"cell_type":"code","execution_count":52,"metadata":{},"outputs":[],"source":["if FC.IN_NOTEBOOK:\n"," nb_export('web_server.ipynb', '..')\n"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyOEgPXBN+hXsRiveyDno/Wy","mount_file_id":"1lU3GiBrmigaiHRD2FPnnnn5rGBg9-S3_","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0} diff --git a/pcleaner/_testbed/pcleaner.png b/pcleaner/_testbed/pcleaner.png new file mode 100644 index 0000000000000000000000000000000000000000..71355d25c4c6323b519ff5869b2c6e17d3e2a5e3 GIT binary patch literal 17709 zcmeHvc{o<<{`WG=kjN03B4wU=kTHaenaRv!<}qf)!t{HeXjStuJ=0cb$;((N7`hENe&L98txCD;5|GWktjz#eB5lZ;^0=`Azm==CE!nez{Kc2M;$CvNmBP8(i z3w&RNV|w^G4BzM%>F+=4aD+bfA3xPJwDs6U1x17f#Ux=|VNr1zVG$WoQFdWCkdP6P zKy-bR==tr5p+l+cD62ml$!MjnU(KF>kZn#r9A``Of$_ zcNIPJu33ppmDKn7QC`w01@`vCI6^j9A5lJLb@i}U2kegm1q$VrsS7em5d-45|sd{%%<9mh4Z7v_4 z$w7qky)5r%_E%v`3Y&5li-o1fxMGef&csbzzKJ}v%)a3JVA1})%LC+%)988J7N5V( zbkeH1W*jSC=uktmzox-@#77pJr4I?(11V82uAIdSRLI4(;_X!`b`)VMr=@k+A}Bub z`V%lS|KZZUNMb1ElWtdMXo4VA@-Bv^ex}-5GWIwRL0bo$9ab>V z!wX$8L{=ft%huiv>&I?~b#n2P<5+xE&%y5EAje@Mp)IWKrHplUQ4jXP8U*VY+6TMY zOFM8V$dkzi%D?~~SU+3#Ko56MUztETj^E?Tz%hDSh=cw2C4O#l9H!uA$~Ye^ySSjZ zps;{ypvxsu4tX+mSsw>S8GV&=e+~h^$#FRQ`FY6*2?YcM2nL7=;(VNhM5Lvqg@i?g zL`4PQ3IX3BPe0p00Z(5}bcjF3P{I1z`?z@dx!^q6(J^i9aQ=RB931dD`#h!2nYMW?)UQdasT}~4)#J=cdQ3o>I+X5`Ikqc0r}S*s0o~0JiLD21;+lDEB#y? z|7om$nH&1a@7MYJMBw(n#{HM8|MBj>FNUkMwPjRr_WtPfG*sj`(C5oI;Ot!-WPTqy zVx?@wB*i5JY#p&;0^;`4_5xBO5+VYQB90hQX-BNIBo_0xp)@>w{cJt$vFK1RxS$J+ zBP@my6%`h95RjB|6c!K{6Tt}BV#P!Su)?CEQW#MYTVXNLzYU@5;{sl3>;Csyp+h;q zP?EM-DRC(W2>}sD2?+slDRD;uI|+=afSshBh?Ik^s3S(i?)Oj*_A=*iJ|4EPoGu== zPFNu?Pp97>paz#w($kRR5ET^u=NUbBTR%s*L5@Sq#nV6VpC1^ycwi0uY*A~9V1y;4 zq(w0z7!gSkG2uU6`}cUpSRY^TM0CvGsrC2m=(5Pb%Yb2RQ9p$NexHYTkx}-++WO&q z3~@MjIgY=Q@?RfoLpV9u`q`@3`e9+vztXhfKhiWGWNqO;rjHJ%jdO5u4En!ajRp_9 zEL;yuuI}Os&ky>2>W_#rzp#xB{2yFFL_%6zSXkIj0Auf9Cm?Qzl@zeG6Gm4OD=Z=PV$yuV zqNu@y{^wuOR?1FX7%L_&;9w{20NxH!BLx8kBl2`$(oudTC!QM_1gZ;xd|MHRl$5j*;5Ec;- z75#_XWBzpeKUYy!2+jQe@LO4-|IM@ixZrQ40#^Tzb5Meyq6z&~(*AS{nD77O*Pjvf zKluoD_WwTQ-?Hz2&Glb%{aY6Jw=Vv#cm3B~|CR;*t&9KbUH>t2k^PetV?6=O1_0$f zIe5JW=rXaLrkVqL11*anzBKITgU3j*z8^n3#jl3*Hix`O^(eMDD2!6K@pgXl%yTysmb7C;L=uyelU+BO ztm?xoi#(cpc=AI!7WP9rycCo}2bjh+DhN&y>{AVOe)&QVgqdGJKtx1DsW`Mu{v;Py zxl;$LsHmvAmR8f?V2!z4+PilLd_NDf$ml(_k}E8Yx7(SSzLS=w;p)ok)c#;>zPFf) zh9KNV!VB3%`Z@9SWa%4D-a5DPIETAA(?n12$lmta+rB;qCnqO&clUw|xx_LVhdDWO zukANf`_6dz`Vz47@f8osRb^&o&M%u-hpH+nM)XXpojiT|T1g3iS9iCDj?O7T!P`qq zUSCSg-VWD=#O=f?l(B3cI!5*N&Z|v@bRt5+2i1MXT|3Xq%Jfd@JSb36QQ@AOn`>1V zoBniv-n;Lq^;1JkV`F2__mN6m7Duuie0W|(<%+2HxJq+l<5`}cX~ehjK0O+opZXAttV6UnPz+eBh zygW$D$Y@qP`tG6fHJ|w&o^$c6x1K-uP1`fQ#U!z`vO-|fktxR=%dyOte7wEA{U)ni zxz=^Hb1-ign{fDK2X*OrvIr4j zX$;21f0n*Ou5;O@WkYVmc5LOvz7CE-NthosXf zaB`|!SHY-qaiqS!OV<6vhYuVG@i|XVPw)1#JB8VW+jQ6v1-uuZtjo*FY#Ji)=)U|E z(i(a>UpT8h$?X{y5g$(l3CQ+M6p^^cAT1{+XHCFjX&5UjtKfRcQv5anF~zZnn3!Qi zaVW(9aVu#<98tL2*D7!9{FsTkmbP~9)RilQ?6}h160_%|hYs0(xWh$AMD**UP<>g* zhG$fC^x8I0ZEfw+(h_3R{y@6<(kUjH>gsC!9STB70`G1eH>t9EZE957L3*9fc^qO@ z*7*7Nt~|}w&0iCAVq#+0wfWxI9PZX%6Yn*9g?nf6B_^761ZZjUV<)O!*6 zPPY{%KhC$23JeaWp`|tSN}8yvF_bhKf{?b%->C>2wzaj*LW&uyVbr4|%r<&MXKigA zT{Whoqk}9A*WF)T^*hWaueG!O1ClIH=Y{}3e@|KrgOJrpLBaM>Hc@hN@~G&Q>5K{!0Al%qpN7VG&Z+e_ZfW;3i-yD7eL>%J$rNr;(vt=iG z{I+z7V*S+;2KO+dnGbqbuiv_*J*IJ%vN#Iq`D6{RDb!c^I!`ILvtVLtOIesp;$X$w z*OyaR_(+8P``d*qH?C?av0ZIyQYrK&R)GxH6PDNLpD#)t5wstbm&YZO(Qe~e6&LqW zVf*!s8;9hASCM6zUO|g0cHQ!&_;lx(a}V3#QO@MXsdrQspQtB&51aPZ2pYd}Ank5+ z^@Gx_H=L8Xu^H@q<_*uMSh`6&nlG zIVRYc8v3zx8qeJSF*|#`vJ!(lF|X+Ir55CQ`SK+Nn>=$w3^_wlX>&^C#QS*7Tg5-4 z3^IhQ;Qdz0EiEkz&7Mzl@?3h8IkZbnq%0}6vbD2VpGSNhF~qffTG=*B7k621dGGwK zrlRKZWD9ll>)|MttT+atyPsz{kly=mqDhhx5*nIr9A(=&<}virXvlNo-7T@%F~!2A zC{o1}Qrk8jn~Vn5zJ@7<2U4PVxCbyFf#MSadB}?tz9!&w4AQ;jSuW#1D>y6zxGU}rXGv_ zLBcI{h?kR7nM?47Zux$+|?b+!#X+wwB0I zjiaAmh83aovD4E?$|i#gcwBl45^ErE9HVa@G5pL@CF&qfqk`~KLJGHC8hxW^rI!8h z^jPwhQ|M^JhEI$sCCL&pGd(}gxN*-75>iL-Dl4fSSEj1>n8oJ;i?~vV7;fgCRHBBd z(R9@Y`TB1)L=q79b$-cKBJh6!sp2|9iq(3P7y&DF{HtDeVj?vG$zj4#$bI@Y=tJz> z-AnGx_3Gsllf$pl73FHTnDR`2Nigt$VOFstn)nV+Q3&Oe$ zLiM|6jg9XrWn=-ypuIyHq34M)DaJ1Jm(K^I2G5xI{5Ynlb#Kgp! z%j*kf(J~m-3xFa|N=WpwPte0kq_6_m6lbEe35-;NALdW!6%-Wi3R=1@MQVbT&tH4~ zTnrvVVzcq{8!-d>3$y2rNbPEu=1pd4{c(a=u&H)^*d8L~KcB1=LU{Gcl``x{dLCgg z5dSB4Dk>`fZ+ZqXQ@-E72=IubCwcncT$=0Rik7K_Qa<P&hDDJ;BYbPG-Q5JbWP~ zB?Vde;v#R4D|J@yv7VN0CIIko9YRA|CpPr2@xG zUS7hQclm9h>QFF>PA1;=g#YM2^ zhYy@)NNi47*j}(z&=g&rxD`=%!qDL0_~>W@RG-D2mk^V7P1mVce}23FSo{8bH4niH zx4y#{FJ6TETRwcSg{2G5I#?gV1t;Dna=|C%IyrK@Ogt1xOG`rwTxYIYcO`Cw9v>{drMeLpm()m$cjUZg zG%+R8|2p(cVPPTFR}3Ft+>fEk?x)rY05~@HcNRbHUOuIjs+g~nk&3~wvdH+2&UWUO zm{-IySOrEtdi2O{hgeihjIuq&Ea8-5UPDXEbtsH9M~_CAdv^|HCM8817jyCQ#IU z<@MV)Z=MttrHL?pV*aboI-L60F?CqGptW9;!i$xHfWW5#=bF^}*6S!MRpE|G9y|>{ z*pJmq25`%3|L*1mdLgS4+FI+4o#SGTH)$9c@+nwk4AX>dqn5|xgm@#K1uR-&0n)X? zC{NM2=ya`U1*6ibUI4~NiyeI7LM8k2d53&4cWln(gT0O4AKp5btD>gH2dUcV_4Yg; zksbiuQ$)tMR{=qKRjVJ(E}~{*>u5_8GpqDuv9-0WU1}u1yz`^7c6a$B*LIkqBI5u3 z3wCw3v&8Zhy_=icRSL#N$fx}kZVVsq3cT&_Pt7hsj+rQrl>+9)B2Ke0#Ilk_C zGkC}eK<-nmDSZ8dgEY*{H{;^Sfj3P-C4lVEU2GyG>G`$g#?jNH6covgv`zTj0zjX5 zmZw8kK4E$>m(HGz5)EFRwHx_->eQ+8T3R>nBqiluiHYIq?Cfj>w9;d*rmC9KN|Pq; zawm8JyqqtBhKZ>NmX%q?PyG-Tl?e?EO>(ixQ(nNzZhfUj$P}0^!Ni}g9D(ANaPuZb zh1ioNYg5zI;nxozK8%IDJS-2;$`~xfYFenN44J>{k;YLXJ^7slF(SR`wzOOrz3JUM z-V-NI1n-(iOqX!bvcvw~VtDa)q6`o4?mUc96-XGWU%fjZ22kE^ zZf^eThx9~O#cWAQiRr6L0+4Lx^+vutxrkR*S=p!QK^)YsRTsR5&(6Vdmh1%?U_%v1J9j~Qd7SH5CzVTk64GU86|pI?CdPBwRF7(u~1iM6sMX) z8i$Zlp>hHmRKawYz`Hg#H-&>&o#7r~4CZ}vbIjXoR4ebrCeF?3HBjD6f9~YEhTD9u*FWz<5L2 zXY%^^_;?NytCP>yb)tYf)@N_q9}D`&{D#8jC;^w8T%A@&08pIs&mBJ{n^*;@N(U~D zhHea1xZMk8J#^?$m+$FZgac{zhGpH6S(zkPl!w5ccY1QM(%AD`jnRULva)gtyboW* zG8F&ezOk7LL~iQ~1FZ(Sa%&B-p)>6)T=4ST_^z}LkY61f9A?+bs90r9Gq%qJEl=*8qILlzfg?Isn1^l8 z<%@ojuyJ*5ec{sW<>M1P8WtHDNk~EhMYKJNi2P9llp5N}-K`a>wYd?0vj3&XsF+q)w_|6WG5NHanb~iis+b0s^jJ|N)CiEr;v&vCbGa^PHCq* zc%{VyUWvs7ouA9YxOAO_EE4Db)FA(G{r)yz9KArWO_xrF&U^5~{25i#3(n$9nL*Qx z%*@QD<#wutO}YR$?@%yH>D|J&NX^d-j)gZ_>M_tQ9D>Sb;xMi&fBW`r(3>VAjS*nd zQ}%HKI4rg~{9wl#0aPzE90?5QIN%RZa8!We#$5*BDKYrc<0I6EKv=ilA25lU#r{{h zxty?O#P`gq*y}Va?1 z>j;vqjWbeGDAS9svx1^)b@`Vn0)>*_7*_BIGjkgRorS~B^3+E>VKFiL{xaJKGXAkn zS)q3WhJ*62U$e0l8%cmlgtz=ya-<{a0=%<4ihn`V2o~E~{43|kI^Wow5;-asD5Ukz5RAV!jb*W z394w%?+RK=!5cp*6+s|{Y&Q-mZTSewB`qv1&C2bFP&EA#atFswg>z>VM3-TWq@Sm! zAQT@PC?~-mlno3F7@_()sQG8m8`3qkivRfPW_jXb$N5U=wW>B@b z4?r;)h!Wpo{lV+;`T3&uAnxy@T;pUGlInH|WYcSX)-yl9y}HRH5gSQB;xpQSCo3yU zG2Eo9r8NoqRa?fHdw^!<&4dI61z)%i(40Pf8k8>xf4pTo27nURz#y9tt28ls{Ou(U#B-!G` zDEKc(OG^ONL_kfg>ww7}A|=fRPQkUT>D=|`(A~Rt0Un5ATWQTJTvZ_lO`S;4X_(gr z&D6BQmF`oz1j>r0XR^Y>7R&+wsGHUWOT2#l8nmv$6zlx%&X*n+Q6K~pfD+GNZg8it zkOy#(@fpH=t+eF^FZ(D{q?Z_i`Aey#W|n;DF%dgL5U>ad2@PkZ0i1(>eE;%f0;={X zWUhH9C}l8dU?v8xLQVHEI3M6HIXO8}mljfM_FyWH0V&GOT15#-XF9 z0ILa=c~6{6$Kb3^)MtPa0VH^HHa9-DwjyM8HuBZ0Gsecoc`}ZdFUtZS2pL^y&I;SU z0VO;|CM8X8-B9ITiEAI|U0HN|#x0gYS7&FP(L7k8+FE8gedv*o)dg~me@c7tRj#PVYi&?cRh1O5n^g|xi{|ECIjZC!P5`)g|Me?##69aUvs>&Ba6zaZ zMrNIk;48+n%0c&Jx_TxPmCTr-tg>ySI$^QiOQTBk0_IJ?=z4zu<;({t2N0j`%#{EI zeFFn{V^nfiy^~y1Q`1#wa2ld8R$}qEgsTdS*blCTRx=QAE^kfU(_aEIEDuq=3xYHu zF|nx6q`Kp$^cc|8=7ua|&stb8gYIYx7Ms5WBsDW|{1B6bt0AwLf`Gs+AfC^BXLL%f z>q|G%u3aNhQc|j_sj+i#@H<$4d`=lO4nTS{05Y{QWz+6i1#6_^!_&qjS3pXb>&~b8 z`Sa(^$38kbhsVao+y|b;fNokm1S#zkKv9UCF#-Y6Xs=~s;0s{j_$H(>U)Hs3tYiNu?NkqYwx^)c5 z&Hz6jgKgsdJV0&k9~w$ZO>KdaeoWX#F@`D}Bm!bxfL&YD_i@KCp3Lyz$ruJJ?q?lf zfGuEm1Fr&}%Ea_TgQKBA5jA4b^jnCys=!|y^#}VQ)cKDfvwQ|Y2BfL|U;fbP=mK;i z!7vV!aOuox<|L1WTEo>j=ZB`Xo*qMDN~SA-+W}? z$W^uLgKi7oUTwU#3W5{|0zmR+KVTjxYF8Bwb`6ByKF!Er0ZAuE`@Uq}JU2fj(=Fog z@NfWO1Pup!BL|(31f#PU&3Y3u@10kKB>=8^__S{3WMO`$9dMp`CkfXc?%r}*)iD6s zP)fzZ_b;Oa(l`t{B3CJozk}T54AZSP+P@5#JP-At_Hp8R`A+9qY4E+T!K$PFi?qO8@aqf2?U;+PmxeD@R*~c zqwL;lcTb6#0m~t&;8pG)gB6^Tl6L{}%*GsB{Pqe{T3WjC>EdP;Y^voD7EUNA;6~eK z_4{i=kRA`zSrb5FOyT0;(a6xAnVO0zFBbuz3q{hPJ}lI@#PkiM?}7So1qh+}4G?MK zv$CuU^Vf|}LjLtz97gqfUl7I|J2IcleBf4RQ>-{GAfWBPUS3&ga%KkC|R`UtUhmjD4J07u22GXz5cXowmIzX^`H6&0i!Ek>K=4nwIm4j?2A!3>Pk30LzYobHJC9rCM;X43M#f z!HT%l)Kt5TUn?u9$9=pf^xXC>F#sI4j}M5bkvG{8r7XblO@bpfq)BE>mq}GK#$jERRu9ZHm^ML*2$ty5vxtBo*G$bFK3AkNWR%SP{PRS(R@cums zdu9ik+7m(Co z4?rBqa~y(=0BnlXA#2_PT8|2*r86n_?`yic7NtuREODb8qZi5Di1T}(`)00v%d-(0 zC|>bab-{%nVd+#aA3$j%B_q?{!OzLf{iuyVTNbMM30~f!e&$F(Z<||NW3#iS9DI{N zwN%cX!(SS^#y0-*ry!BlyB1*^uJ<24+yLf@=LM~6yKj!i#hoKSmrTI86qJ06_Vw{m zS68=zyuoi#Ri-|*v9^YtY)O<|X%pL!Sc~J7kuj&#m4^g+UaU|9a$zI1LTb0?i*GWD zoui9{oeST!nY&yo06|hSGJb#z22<;dw5YEfsA5g#n<+Pb%&pW(!k(8ZTGH=q@sXt&1~ z+X#(N0FS2BPAtGEcK7yvt*;9jJsukmvxi6l0DA2(3tAGm{8Pp8u8^1kTcyyO*iltc zDKLm3H^+m}AzHpV3C&B3`4QNJ;Rxb1igAKdtaCd**Z8{)2hFf#Lb|TA;fhCL4wNQB zo6V)MJ0j}*$e1lS&(iWoAu#|9hgoISLHLU^#gmkjl--(4f@1p&_L+cOxuVC1ERv zAp=8%eSiJ_B_B>?c zCr1(vObSiQ)8|tT*S=tuznlu&RiHi2D1W(Xbr<9!F3Fh}=AcG50A5C*#VWvoKv3vp z$iU*?$l+f2TAc(Y_nLS|2?8|8H`!L)-XN~Z_{~ND5(IK3*hiL@np$Aa1!5)tc+W|I z{ZuGs!wd&TgI@OB|GP0*DsFPK*t%XGl9UtB#|QGE=?A+r3eBwVKYk=9A-P7T!3vtd zTad-@0s*bArHe(a;*oMSf%FY^nZOIWB|x(c+`}U)D=V!gvoC+o2JBc1qP#0jVr(lj zEj|6?cC$@rYV;hLfnd`+&xG+r88^k`)ZoXK7Vc;9I{DonYr6rgK&n{Prv-Xj~}nSmVIAggsE9-|wm=B*5(;Q#>N($D8fSBL@Ml>*#%eDZbF= zkqP+Ob{toG;%yj!4y-C{yg>mdf(oJ1_{-0UVI7i#h8v*e?b!a{12`N=NMjX!R?rhq zfH)}!4ByLC9kwR}f-LbeO@yJ}NHjaWV#>MP&ZV!u9lt(dMvx#eqUh-8PG@n~*Ady- z*`is_Nr{P(6%`VpJL}DF-rzkk)`SRyX7s7>y)_1Sfb7BU3bR5ObHwSa%Xa`w+(!frpw`76q15R^5!XlXLQ27L*lj#<;Cak&iqloM?a8j4ARpqqFu;x~& zX=#NTsxYY0%1BE~L1VN#6C)$&;%cxvL4b%8pWToa%z#!Lr~nr-JYcU^34>}?Bdg4l zyWn~R!VWD*bh6|Hh)zO7F(5BwCL;hu)6)_XbS*1H>gUgM@bKI?j`2*$$au1!5@7xd zpj;<(b=ZZroc0T@=#{r{D@vd(A)t_=;3^O>A6i@O0IfFvbaQgbmzk21mbP_pxB?U$ zy1ICJ->yqAed3J&wzIwM0|5pYRZU%;RH$zC&hPeTCiF6*0o9g6kX3LVS+=-Zzw`6V z3v&Sw?}AxksB%DLf!4rr5Sv_ki-;7#S3p^M+ts!Fa1U>*e4m7pNo8gF6Rc6`B`(r394L&49+PzMSDslte*E~v z_wOIUJX?MB`>HTMKyv!(bSkQwmPOu$+Z&4=v2=Wsr6&5&2frR4e;PXPw+6EA+{qEt z&6G96g_Dqw07~5dyr7W4aL5@DoK~uEBy^*y1|1PrVH*T?la!Rn@X|-<*`W~nSSXM# zRrxF_@0^-lkTuXyfqd)SThyx@|KrCGZ50*5qo=g80jYCs-(KoS1kJZiSVF?+0J03! zB4`L)f&DPav#+4SH#Id~0czyCv+hy-5GX0OxwkjvV5j~7d?fnw=g%~uz$|3YHX*!u z8c24H6cFIowpviR;cizbj`NZA4xuwR_TU%2*?f#uMq2P)Y89udnWC6x-J`GPJ`YEi8} z{#X%4Lyn0#Y9;ckqFGO)z^Its;IS5-4J0_OpWg!f`~X>9gM={Mk!9^3=nFeD3JCqa zf1w2I4wdH}RCmp<*Aj4Ykc%HK*UkgRkK&FylaS4JxoF}XM#d;ljW&pBh0MCFdaT{8ay#6>SKh?o&uI0aX z2w63~2P9ZX)cas`)CSE^fNnwM;7iH34m-aBnjy3#O!9teiuh0X86AnpB{Er_cthSh zMAp(_g>Jrq$k*Jp)YR03-WguQHMX#VQ}j>BXbmv8jfwF3bt8w{#zlL{dgFJmHzeFE zFAz6YgzcnE2@U|$z-0}$cc2aS24HNDe}>Mzkf%QlgYmB8kFS`l|NO@1JxavS&u>~A zD1rc#;Wz633_KPT1tjQ(jurGcXml}>hLFWR0g~%o146wS;-B9|u6Q_`VbUE^tUduD z$SK#L?U{h#H+81ijfAXJXxs$wp@;u%N{ZclQ!CtIQCQFt*B9AW#W1?GH1?K)NnHJw zh>#%&t6F6h6qCVzDV5)pu4p#8xfVhbSqhKzqrCI;u}zR82PunF=I*Xz{-Rgbe99 zWpf7zHQM_GfCLVYs&IXM8;Q^95zS7RPhZZ;9L3*MLdXk zNGEt8rur9V-2gGDvo0h6Id}8e8%WZ8&HnlM`3xWOyq?~^V%Q^X_y}QW9_%~;G|hW!;8e2QmoHz?h5_gjfGz<1YZpEvNV|XE9?B(b zQNA7*_x9a8RG5jr{FqkN7VotV@Ox>&?$D!4m2jhn2r@M{9|!uu%LjYHGKVU0Ab_cZf`%f1;o;#?-UHxBXd4YC zN~9ojm;zt4_yvNb0XZHV1fXLAA3Fx?()L$*bArm654*|HM?fV3c%bIsa1z~(b>nnq zLr>3(rV)&!8UOj|?`8+IVMfp!CJGgC9OPN44kEN|P|DD3U+ z+cV`pLdp{n70rPzZ2Vr{oy^R3fDnx0&U#~*aHh{$_-`kBl@LS_M0eHz=;5IoZr%~? z5JE)%T3v;<^SP2p*M5dF*x_JjZ!ZnogF#IM)|@OS)wW1^8U8?J|lW-6^uibnw~D7k(6Y>89o6>aO>B5wnEESlF-i@ z7cspBjmkH$Z3!q6evpQ;=Ea~}H*b3R`8j~>tC-pG0lF4Y@QXjY!OjzV*eDAn>RPT^ zoYsP7^~UtbuGC`XiIcYV39wlh{{_iB!!EnHcrv^`3dR5{06lB!Hx*4c@ke()lQuwW zhn<_dpl8Y|_Z%fq1fYHBKI)#Q*47yTdYRDt^a2bD_CVE;pP0p|%{s~O{)yLpR~@V` z!ggv*q@Cs&Ztm-0d)s5MF#%*0piw;q7YH(ROo~o$zF9JR;he~XzmRv&DhIZ$#P|HP zf(`~s$Cgh~Zf$L)eAt?roJ>ebQ3WId+l8B&o2j953Q@G`xJJiu{P$LR(JMRd*C`h+ zH8_ku^zi^W0~!ZFfnWu)d(Wpw7ePe^g(w~%XKwc#+TMiS#oTM&u<7~@mBOy#xpTxI zi2E({Q)_B|4EOVdAcW#{Cq2CtN{nXuk69#0N1mAOO_N?o|INt!NUF$}>i9{nx=*d@ zddkN0reL3r>q*PhXZrieSo)5@M-R$@w5 ziHgvThk3Ad11iw-o{@@Y&&C13!ao`?QL&1(7^PVf6DCUyTr`tZ58r4tOuLB)u_Mkz zf%ue>H4)XOhm6=D0b~Zx7pl!KR$uzT)Sa(;`plVE{CwLi%Vz`L-(hH2;SRE`%_)em kVU1!Z{ZIToCV4;=?D?9$%3>M)pO7FLsyZsAXKkYX7cKa282|tP literal 0 HcmV?d00001 diff --git a/pcleaner/_testbed/requirements-colab.txt b/pcleaner/_testbed/requirements-colab.txt new file mode 100644 index 00000000..cd5b2548 --- /dev/null +++ b/pcleaner/_testbed/requirements-colab.txt @@ -0,0 +1,7 @@ +matplotlib +rich +fastcore +nbdev +ipywidgets +pyngrok +portpicker diff --git a/pcleaner/_testbed/requirements-idefics.txt b/pcleaner/_testbed/requirements-idefics.txt new file mode 100644 index 00000000..38a68b15 --- /dev/null +++ b/pcleaner/_testbed/requirements-idefics.txt @@ -0,0 +1,4 @@ +accelerate +peft +bitsandbytes +flash-attn diff --git a/pcleaner/_testbed/requirements.txt b/pcleaner/_testbed/requirements.txt new file mode 100644 index 00000000..769650ec --- /dev/null +++ b/pcleaner/_testbed/requirements.txt @@ -0,0 +1,8 @@ +pandas +matplotlib +rich +notebook +jupyterlab +ipywidgets +fastcore +nbdev diff --git a/pcleaner/_testbed/test_idefics.ipynb b/pcleaner/_testbed/test_idefics.ipynb new file mode 100644 index 00000000..f57d250b --- /dev/null +++ b/pcleaner/_testbed/test_idefics.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":281,"status":"ok","timestamp":1716482606479,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"b6_yW3IxgHHv","outputId":"8d5ea16b-e93a-4a01-a416-714bf5042168"},"outputs":[],"source":["!nvidia-smi"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":256,"status":"ok","timestamp":1716482609343,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"BP0Y1GtRFz-b"},"outputs":[],"source":["import fastcore.all as FC\n"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":261},"executionInfo":{"elapsed":93698,"status":"ok","timestamp":1716482708865,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"lxwhxBydCiDj","outputId":"a4959f37-4f70-48e4-a460-6dc8c424fd22"},"outputs":[{"data":{"text/plain":["'4.40.2'"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["if FC.IN_COLAB:\n"," !pip install -q flash-attn --no-build-isolation\n"," !pip install -q transformers accelerate datasets peft bitsandbytes pyngrok\n","\n","import transformers\n","transformers.__version__"]},{"cell_type":"markdown","metadata":{"id":"FO-koL4wUMUg"},"source":["# Testing `Idefics` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"HoQukfO-UMUi"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"L4ZVmMW5UMUi"},"source":["We will install the more up-to-date version of PanelCleaner from GitHub. Only affects Colab notebooks."]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":6,"status":"ok","timestamp":1716482708865,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"37Viz6DZUMUi"},"outputs":[],"source":["DEV_INSTALL = True"]},{"cell_type":"markdown","metadata":{"id":"TZpfdXHLUMUj"},"source":["The best way to get the images source of the experiments is to mount your Google Drive.\n"]},{"cell_type":"code","execution_count":5,"metadata":{"executionInfo":{"elapsed":6,"status":"ok","timestamp":1716482708866,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"6hLwMGKdUMUj"},"outputs":[],"source":["MOUNT_DRIVE = DEV_INSTALL\n","GDRIVE_MOUNT_POINT = 'drive'\n"]},{"cell_type":"markdown","metadata":{"id":"-MhrGXCMUMUk"},"source":["# install"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":50},"executionInfo":{"elapsed":15792,"status":"ok","timestamp":1716482736022,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"yFeqVBJRUMUk","outputId":"49b20079-fe9c-4472-ea37-03a9be63ae2a"},"outputs":[],"source":["import os\n","from pathlib import Path\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," text = Text(msg)\n"," text.stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, text, \"_\" * 10)\n","\n","\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," mnt_point = f\"/content/{GDRIVE_MOUNT_POINT}\"\n"," if not Path(mnt_point).exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n","\n"," drive.mount(mnt_point, force_remount=True)\n"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"elapsed":53809,"status":"ok","timestamp":1716482795312,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"braPNCfcUMUk","outputId":"ee60cf75-55f9-437e-e83f-e683f2fd7ede"},"outputs":[],"source":["if FC.IN_COLAB:\n"," info('Installing PanelCleaner')\n"," if DEV_INSTALL:\n"," assert MOUNT_DRIVE, \"DEV_INSTALL need a mounted google drive\"\n"," info('Installing PanelCleaner from Google Drive')\n"," os.chdir(f\"/content/{GDRIVE_MOUNT_POINT}/MyDrive/Shared/PanelCleaner/\")\n"," !pip install -e .\n"," else:\n"," info('Installing PanelCleaner from Github')\n"," !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed\n"]},{"cell_type":"markdown","metadata":{"id":"gG-H7p4IPHMI"},"source":["**PanelCleaner** is a heavy-weight and sometimes **Colab** refuses (*silently*) to install it. If the cell below gives an error, re-run the cell abbove. That usually fixes the problem."]},{"cell_type":"code","execution_count":8,"metadata":{"executionInfo":{"elapsed":506,"status":"ok","timestamp":1716482823315,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"t3AW6j-JJ544"},"outputs":[],"source":["import importlib.resources\n","package_path = importlib.resources.files('pcleaner')\n","assert package_path.name == 'pcleaner'\n","\n","os.chdir(package_path/'_testbed')"]},{"cell_type":"code","execution_count":9,"metadata":{"executionInfo":{"elapsed":24334,"status":"ok","timestamp":1716482859750,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"q_7GYpnWUMUl"},"outputs":[],"source":["from pcleaner._testbed.testbed.experiments import ExperimentsVisor, CropMethod\n","from pcleaner._testbed.testbed.ocr_idefics import IdeficsExperimentContext\n"]},{"cell_type":"markdown","metadata":{"id":"Op4kiDaAUMUl"},"source":["----\n","# Idefics experiments"]},{"cell_type":"markdown","metadata":{"id":"rVXnbndZCQdS"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"RtGrr3D1CQdS"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":249,"status":"ok","timestamp":1716482865122,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"bL51d4LCCQdS","outputId":"7768d79b-0487-462d-fed8-3ccfe80538d6"},"outputs":[{"data":{"text/html":["
experiment\n","
\n"],"text/plain":["experiment\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(EXP_DIR)\n"]},{"cell_type":"markdown","metadata":{"id":"8Iondm2oUMUl"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"nuHPp1U7UMUl"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":11,"metadata":{"executionInfo":{"elapsed":255,"status":"ok","timestamp":1716482875511,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"vaf4PGLlUMUm"},"outputs":[],"source":["os.environ['USE_TUNNEL'] = 'True' if FC.IN_COLAB else 'False'\n","os.environ['USE_PIL'] = 'True' if FC.IN_COLAB and os.environ['USE_TUNNEL'] == 'False' else 'False'\n"]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":168},"executionInfo":{"elapsed":6227,"status":"ok","timestamp":1716482883934,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"kivkfTrcUMUm","outputId":"8b7e7b3a-04fc-45d8-8627-9ba4509d9e67"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import pcleaner._testbed.testbed.web_server as web_server\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"AUgSvi6CUMUm"},"source":["Creates the `IdeficsExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["b73009bd316d474c83984e19bbe1dd5d","8173703d1170428f9f632d0f9b3c51fb","3a90615cbf054a2c9518f6c7715a483b","37f933c907154d3cb326214e88da14f5","8eac691047c84830b008fa8c74de8e59","e8a8f2864e2346e9abd9ea7e6371901f","9493f753d3d746ebaf860074a43f8b5c","d8416277ca174bdebc879a5eeb559203","bd0155b640a9429fab35df32d32d3b5c","58335b09c62a4c77bb4dbbd2180f00f5","d991113b7e924a9ba14b2ee034bc58bb","b32a63ac227a43989f0bff7182007770","2f658b34d97e48a9accb4f8e69db0524","ba5183d590eb4269984812564b9136d1","8f04aee77f594579a7b257df7760ba07","9175ea3267514e2b946b7f583bd6c448","895dc88a4a704b849983166ffe18428e","6d8ec2e2d9f54c08b42c4fd907012cc4","bc735fd356114f6aab9a94e4ff2e88a0","c6cc1ed486e340ca9c10f83fb6d93473","bca7db38272d4b5dba307fbc893512a9","319aebd2d96f4f1c845fef5543ae98ed","f10a031bce3948e2bccca61bbb6f101c","e2f25159f7b249b9a2d820201413e190","56a8217adf3944b980a92b2aaea2bd39","5090734da55b47d5a2a6087e51891e0d","6aceed8edb8e4105b1f807e57bb25d5e","9c002b71e1f144998d8f5ce683766c2a","304ef8bb5c894f3fb9fbb9309fccb2db","935ded8cabb04c31b334a30acd346ae7","eed1b76b9a824cca8f7ddb490bcec2ff","ae0c642d219a41629b62cf8560823c1c","ea5a56efc11845e2bf38a65d8e93af7c","bbdcdc291ad74258b554e72073e1ede8","ae61d812a96a48cf84a788af5337d51e","9fd71f03002d445c8e70342c0513e369","0e5837b643174914b9a6b6c9e6ac3e22","0888d628f34b463eb01e9e5393a4f4c4","2770478ce8044a83b127904a8f588d5b","3fdbba3cf67d46cfb1632f6dcfb0210c","c7a74d0ee2254e6da29eeef1a3298656","58541ced68a4479d9926c09f14276f23","beaea513e569447fba29ca3df3a089f5","283e34e62cbd43108fea85e4408e1451","d9c15d9a41f24f1a86084b0c863d3af8","69faa9295aa143629f8ba0ce018454d4","14e4633237104f15850409f4ffcfa30a","413317a231404aa9a2a6580775f06fcb","10442533920f484dbf1974e528c6ec1f","ac43bf53858a4c1dadc600f3322f2aca","b7d7719b807048cb945fb958b06402a7","dac4186c487e46c6b56ba7c292629a41","3a50e27548d741118286fabf9630c443","7746e9fad11f4a64996a1c41d0abfc33","74ea647617734b52b8e07d4246d18109","75c820e2cdda451f9ac7daabe0fcc8ca","b7838cbddd984b368ea471cee96ff02e","c3485b737f59434aa7c757ce7d4f86d2","3c4f71788c75498faf2738f751e924f3","d20d3a3712f64dceaeea4b6aeb435b8d","e98b093c952c4e3293abdf5b736b3bd7","1c322cc01c284e21a18f06db8d481636","053288662f604af8a6fbea87e8ef9014","b02ed5020f71474a9ed0c008abb40efb","f0d73ff6d04040f1962fd50d2352f23d","a1b9b514669e4a2fae3a401d770a699e","2c3a3903877e49a58181f813aa0e37b9","28f525d59c094c03955d9215cf189007","789385e36d1e432c95036b6e3a0054b8","b8cd67195ca6414daa236d21c9189e39","ef92abc3751b4b03a673a92e817d39de","4438153338484d178ac76562ada78fc4","6a3306e72c4b447e989229c28e555e22","1b8ecfca8fc64677969da2b325b13ee9","d338945414dc4b23bb1dc1b81cc17be0","6cf2d63005134660a189daae0ad36707","3f3922cb87fb407499422ab1b43ca6a5","05a94a01013d47a5b523bb322a2ef939","1adbc348c55d4e68ba66eb1d597c926e","717d027f1da74bb5affa93b72b99a0ed","9e076e294d964217a1e3ceff9b390b7a","c604bd0d89304ea7973a6819fac981ac","1822f3f564d843dd843d580c3e120a80","3bc4fe63c9574d35bc8a0f3e940212c3","e71b8879e55848479fa6f20a4f5010ea","cd3f6165ed144369a54e4b784cac2a99","bf8adc8c29eb4021a532561b374cade2","786b35b5a60b4ef3afd8f04c9240bf1a","8e263c7c07e34d9e8cf475f8f9b0c707","d753fc74422941cfa7031efe2c996f50","5cb88a274e4c40b3a505582d54578a75","cefeb098a7b04f2782b00e28cd809833","dedf69f896794fdab878d78a44db1fcd","6d0fe13087244d38b7e2c81442467ad0","4cda5365a57e453e9bbdcc0de548f1e3","7515ab4558124374a66e7af8ac063846","9f9efc028dc1431ba717dfdac07ba7da","ac247fdb754c460dac3d2a731a04f74b","47000f0cc4ce485b9f249ac32a0dc75b","d6f64705e9da407f9959f1b124e8e47e","2305ed3b768e4930af09fa8b2809df1a","16fcee1797ec48fdba6c556d996eb0c9","397f4e0b79e84838ba3640db134d7b0d","0e7f53ea18fb47bba3f007b7116b8994","80ae139aadb6462db2ce3671a7903026","a33e2a16d266421aa5658d1ad3576fc7","099748fb95ab4942a89cc37ba2f67046","abd1fbe978bd46208e54ec410778c382","11a3455d0dd4450f87dd73430c1413ae","f54949799fe747e2b4757d73d40960e6","976de42087f34ed28f2efcab0737e107","0279524ba7574791bd706fa97b3b403f","8b9c3fb54710444eaea4519160ca1c2a","2d0c0d03b7234f87b66ee70da99f75d0","df002956627b496a8fe33a02bcec05a8","abcf597b372b42428cdf70189ccfe494","64830881d8854c39ba2f756d314e36b6","144ebe17cc25423a8d5ac32aa7e5c237","8acae939581c453eb0cc904684b730b0","b33ddee7648640558f1fcd15b1c3b5ea","e5d23d25fb4646d48d863ba26b592273","8256e914c50644669d8c273e6a100d43","924ac8d3f282407585506197682db1f5","3884716282074678bc09a72aa2a1719c","8c88d57999e84ea99a3c5166a2c0ed83","53b2c177789c405bb266916416e93109","00fb9f12f98b4e22a58f4b0030eec5b2","6d2ebf4bfad74f0d8995503b31de46cf","e7f41c7ef7b8464899301192fed93749","7f54d75d9ae6463a8779a9215ac24ecc","fb0037405add4963a01046b1ce4d84da","5cef08747ab6487bad4c1dfedc90dc2c","bc43e4251966462eaab4cb1767998f2c","0c7e4532bb5840869ab4d96ebac0d003","8e507b39dcb9489e8f660c2940aa5521","77d5ad9314534675a66ac3ba0b30a906","cdfdd7698432492586d0d19783b20b79","b0708dc0d1764e87b73cc301cdd13276","acf7496b27cd40469ca712e27c399d12","1385375ab40a450f8f23a8fe10c1184b","3b6f8dd8a65d4a389d5eb8f42665c6f2","5ef114d178f846d28108cf4ab47d6cbe","198cb1e53b974f1aa6ac623629b32f70","473175186eda43f8911ffddb1b44fd65","0b47ab0fa4aa4726a6907aaad9a8045f","30ff45a7d5fc445bb954fd9360bfe1bb","317f8b9163d84963a04c75eedff6dba9","64a9f8f2483b46c58fcdb9453cbbfd4f","d9fdbddf880c4a0cb12ae568c7c3bc02","92fc9ec04a1b4a94b0d480e79641c8e0","fc58444ff3d545fcb1714ac56106734c","babb5904ef30440b866ce0f1ead58b26","676319ec997a4ef1b77b6ca5bd1454a3","896dacde6f98494ea11c19059a4c21e1","5eb41f9599a04241816c359ab2654930","36c7b5884cb44aa389c2f12bedf90f09","1a1ad0162b574edb8ce2540c1d5e477b","8c47b62e4954445c99ceec249651bf6a","4f0e6fdfb7674ae8951e3072beef9930","9db747425d2c491a868b931156a4071e","625c065a3ba146c190d3f6b024869459","08da46aa6895493b9d8e425f917d0713","f66a7d2bb62d4ef9aec868904a60d8f7","19317ccee5a04fe4b2bade87cd24240c","60a576a0a174400b92b59f02b7ce24da","7b9b4f63980147a4964ef7cad66ee08e","b8917b9b5ed143df8aebdb2cfa259ac1","421f6c061d744875936807e23f319003","a784f312c3ed412fb4da37803dc623d8","b04c576af2aa4412b9756aa9bd683f0e","7daf76bd51514afe8984782317765a5a","e41cf0472cbf40f9b8422faef0f7df6a","05b278a44567413284655e330cc2df18","ed19a655de0e42529a3eca5b105a8e92","6e98f45681374a9aa3cf231a9327dab4","d7f7174351c44565a4778ae02f7b2d73","2d9290c11d0d45b4a6aa958e0572575d","e49c2a376b28457fab3517524e5841d9","b2397c0db81345bd9bcbecd54eb45efa","2cad0f40664d4dbd984288ddba4071c6","6a0d32fb84a84c49960baeb808bd98d4","b50cb561a7444f488f477b8a1dedbf6a","700c364b9ed742e39d7d066f38456853","a16c8faf8266441db835b2e2a5d79bbf","09ab31f01e0b43149d8abc6d4391796f","3641f29a77d34081afc26bbfb495ecda","47edd13b1e2e4d168bcd27d128c46723","31ec7df081cd4350a7ca8cc554b9f49d","75b1e6f11ab44c15a2da8fec9c9f543a","42bb6c3f4bda46b9aa3e4ca3b01e92d8","8c3a3d31ca2f4967b3f4f6f3d2ae6f90","56180eaabbda472ba401a4c033ee73df","9fa4a57f420841c9affb97477ae0fc41","2dcfc01bb0314e15937a823f9a19143b","1f198dcb97f84081bb02c70bd9c33860","293d509277c54e20bbf463ea33234aa9","2e6a59eb2a8c46aebd1776dea21d5f25","6584ea25504e48999bde51541b648769","bb9faa43334140d29e732b3bf6e86723","29dd2822500349bdb5e76d53e93072ce","eabc054fcd6b447da92f402662469c8e","af08ec8225e948648a4534d4b42b880a","38c0b2d95f2842389c12837771a8e42e","50cb4765423047cc88083d04187f5371","c68f99824c684d668f5e860a8afb2060","71a15c3988114259961d1943dd4e931c","43115512206d4717a02ccd0ef4133555","9bb79599f274472dac09277f81cf0bd1","9190d15fbcd1407b9abc3e16fc910437"]},"executionInfo":{"elapsed":436567,"status":"ok","timestamp":1716483359245,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"wroznCt0CQdT","outputId":"d288c8de-1780-44aa-d782-8d540d43f38a"},"outputs":[],"source":["# CONTEXT = IdeficsExperimentContext(EXP_DIR, server=SERVER) # quantization 'bfloat16' # Colab pro with A100 or L4, bfloat16 and FlashAttention\n","# CONTEXT = IdeficsExperimentContext(EXP_DIR, '4bits', server=SERVER) # Linux, Ampere\n","CONTEXT = IdeficsExperimentContext(EXP_DIR, '4bits', False, server=SERVER) # Colab Free tier, T4 GPUs don't support FlashAttention\n","CONTEXT.show()\n"]},{"cell_type":"markdown","metadata":{},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":257,"status":"ok","timestamp":1716483368007,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"ZXL84T8dUMUm","outputId":"23d7fc26-340e-4c6f-e8e8-b104463a6cf9"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":249,"referenced_widgets":["3d836d5327a74372b7683abcfff2db7f","cce5c58aa59a4defb50cc973cadc191a","e4494cbb70ef4e2fb04a138e12c84e34","6c875b66bc4f4c2c82c1ab733a5688a5","4dfd8a5fd8e4484facbd9c0744725b62","9505f71206d54784aa9643f0732ce5ac","35cac4342a9044d991e3f53df06fd959","c84073b5707747e5994e7e21c097c0c8","7f5e49d03e4e4dc699479b28c9861e86","e498874bec694db7930da19e87551331","2ab7e8393c274604a8a33d5f84efcd58","9f20aa7c868144f5bcf5fafa2c366d7f","1ca13fea7ccb43dea1cb1994c60e2af2","bf27826f32934db290de83c493474cce","796a5da82e134de2a279039d3a120b0f","5628322d2c6c44bd882acbd629a634ea","33d7a2f0e1f74398b7fa7b5f24ca8b2d","1e23724ddd3341c4970f0f084c32c11f","1a36a60101aa405d96f154646e1b04fa","0951a0674c6b416d888f8cc62a0a0bd7","09c2ceaac90d48018d137ee2fd656f5d","08d82c48cc1e4019bc3c7c701736b52d","d468d37a817f44ebb5c8b844fc98182a","213d5015356540c19c6e529ce18fa248","2c1daabcfc484227bb8b378e4ab06e03","14620ca258c64c03ba5ed90af4ed7733","3a1dde9734a246a6af73d9b5a52bb090","d19ccb7ec79a40a483fe40e712c74c8c","04a42dffe0854eee9b3b4f12eb7683fb","3d322afd0d754829abfc365a9044a6c0","28b2030d9fc7433baa1111795433b2d2","b654d25c42f849ac9d11ae0c76618d6d","98f0ce2e0be94481ac9bfb50d4305f62","7e90d469d6f44c22ae83f58f19afa6c1","2161a826684744bb95847f56fee5f7a2","035cea2946064cd7bd5f94a4001bffbd","10bb5a346e284e9790de482f77f34109","5d06574fe54043409073268d797ed2d6","81a51fac93124032b47605db442da668","4409342a72ed4d47b00ef8b57104943a","8c8083f3c88c493bb2c86be2ee831cc9","2cf68b7c35634bb684f1a9b5364eebff","6a39213f6bdd4f0aa6f31bc89e58f309","ce3896c172d4497b805ba27a6464c0da","0e10e9ee0da34a5fbd38d18cebc37dcb","f56c9aac9b874cb79be45bb93af2519d","83c57712c02b49b6aa44400265017bb1","902a25db5df048f9a3f5e1b3a37d3d47","852814ab86b64357bf34a7b3d5647259","d47ae1fd98304f549bb15af1885ba182","e3b557d743624c2680fedc10edc679fe","56c75f8d9290496f9f8466515cc63924","06dfdd832b014336bbf0c03a0afde588","be7840a17d064a7c8b2e2d4af600702c","f52b8b4144bd414b97fddf1e63f114b4","3293295b5c254ab8b616e97449ce3e25","4088047b645d4e27830799f7560d049f","96b6b7275b844d949a1fbda10ff8121b","016502353f614814b4e69c704e3b30fa","e7bc0e0e5fed4f6ebf068100805c2439","bc28472a9a3a4b169d12b97e7cce3559","be379bd1475147a3bd8f1bd609a41f19","ba294a8e7603470eb492fd1b64f31b24","7dbd1ebf371a45f7a04d2dbdb19619d6","857613bc6fb345988cb1d55c8b210cab","aa69c29e07674f1480124cf4a5700276"]},"executionInfo":{"elapsed":10562,"status":"ok","timestamp":1716483395560,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"GmAca8fVUMUm","outputId":"8f9ff268-bf15-48fa-e1ba-364b658f3eb2"},"outputs":[],"source":["idefics_experiment = ExperimentsVisor(\n"," CONTEXT,\n"," 'Idefics',\n"," image_idx='Strange_Tales_172005.jpg',\n"," box_idx=1,\n"," method=CropMethod.DEFAULT_GREY_PAD\n"," )\n","idefics_experiment\n"]},{"cell_type":"markdown","metadata":{"id":"26mWhbfGCQdU"},"source":["----"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":104},"executionInfo":{"elapsed":10458,"status":"ok","timestamp":1716483552101,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"L8ILu2EgUMUm","outputId":"771fe6ec-c630-4e49-f55a-9e0cabd2f4dc"},"outputs":[],"source":["CONTEXT.cleanup_model()\n","\n","if SERVER is not None:\n"," SERVER.stop()\n"," SERVER = None\n"," os.environ['USE_TUNNEL'] = 'False'\n"]}],"metadata":{"accelerator":"GPU","colab":{"gpuType":"T4","machine_shape":"hm","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.14"},"widgets":{"application/vnd.jupyter.widget-state+json":{"00fb9f12f98b4e22a58f4b0030eec5b2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"016502353f614814b4e69c704e3b30fa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"none","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0279524ba7574791bd706fa97b3b403f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_abcf597b372b42428cdf70189ccfe494","placeholder":"​","style":"IPY_MODEL_64830881d8854c39ba2f756d314e36b6","value":"model-00001-of-00007.safetensors: 100%"}},"035cea2946064cd7bd5f94a4001bffbd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"04a42dffe0854eee9b3b4f12eb7683fb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_83c57712c02b49b6aa44400265017bb1","placeholder":"​","style":"IPY_MODEL_902a25db5df048f9a3f5e1b3a37d3d47","value":"Box # (of 15):"}},"053288662f604af8a6fbea87e8ef9014":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"05a94a01013d47a5b523bb322a2ef939":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1adbc348c55d4e68ba66eb1d597c926e","IPY_MODEL_717d027f1da74bb5affa93b72b99a0ed","IPY_MODEL_9e076e294d964217a1e3ceff9b390b7a"],"layout":"IPY_MODEL_c604bd0d89304ea7973a6819fac981ac"}},"05b278a44567413284655e330cc2df18":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"06dfdd832b014336bbf0c03a0afde588":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"0888d628f34b463eb01e9e5393a4f4c4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"08d82c48cc1e4019bc3c7c701736b52d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"08da46aa6895493b9d8e425f917d0713":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0951a0674c6b416d888f8cc62a0a0bd7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"visible","width":null}},"099748fb95ab4942a89cc37ba2f67046":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"09ab31f01e0b43149d8abc6d4391796f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"09c2ceaac90d48018d137ee2fd656f5d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["model_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_81a51fac93124032b47605db442da668"],"layout":"IPY_MODEL_4409342a72ed4d47b00ef8b57104943a"}},"0b47ab0fa4aa4726a6907aaad9a8045f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d9fdbddf880c4a0cb12ae568c7c3bc02","placeholder":"​","style":"IPY_MODEL_92fc9ec04a1b4a94b0d480e79641c8e0","value":"model-00004-of-00007.safetensors: 100%"}},"0c7e4532bb5840869ab4d96ebac0d003":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b0708dc0d1764e87b73cc301cdd13276","placeholder":"​","style":"IPY_MODEL_acf7496b27cd40469ca712e27c399d12","value":"model-00003-of-00007.safetensors: 100%"}},"0e10e9ee0da34a5fbd38d18cebc37dcb":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"6em"}},"0e5837b643174914b9a6b6c9e6ac3e22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beaea513e569447fba29ca3df3a089f5","placeholder":"​","style":"IPY_MODEL_283e34e62cbd43108fea85e4408e1451","value":" 493k/493k [00:00<00:00, 5.69MB/s]"}},"0e7f53ea18fb47bba3f007b7116b8994":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"10442533920f484dbf1974e528c6ec1f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"10bb5a346e284e9790de482f77f34109":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":["wrapper-spinner"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_016502353f614814b4e69c704e3b30fa","placeholder":"​","style":"IPY_MODEL_e7bc0e0e5fed4f6ebf068100805c2439","value":"\n
\n
\n
\n "}},"11a3455d0dd4450f87dd73430c1413ae":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1385375ab40a450f8f23a8fe10c1184b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"144ebe17cc25423a8d5ac32aa7e5c237":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"14620ca258c64c03ba5ed90af4ed7733":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"14e4633237104f15850409f4ffcfa30a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_dac4186c487e46c6b56ba7c292629a41","max":1795881,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3a50e27548d741118286fabf9630c443","value":1795881}},"16fcee1797ec48fdba6c556d996eb0c9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_099748fb95ab4942a89cc37ba2f67046","max":7,"min":0,"orientation":"horizontal","style":"IPY_MODEL_abd1fbe978bd46208e54ec410778c382","value":7}},"1822f3f564d843dd843d580c3e120a80":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"19317ccee5a04fe4b2bade87cd24240c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"198cb1e53b974f1aa6ac623629b32f70":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1a1ad0162b574edb8ce2540c1d5e477b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_08da46aa6895493b9d8e425f917d0713","max":4999813712,"min":0,"orientation":"horizontal","style":"IPY_MODEL_f66a7d2bb62d4ef9aec868904a60d8f7","value":4999813712}},"1a36a60101aa405d96f154646e1b04fa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","134608506364128"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_10bb5a346e284e9790de482f77f34109"],"layout":"IPY_MODEL_5d06574fe54043409073268d797ed2d6"}},"1adbc348c55d4e68ba66eb1d597c926e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1822f3f564d843dd843d580c3e120a80","placeholder":"​","style":"IPY_MODEL_3bc4fe63c9574d35bc8a0f3e940212c3","value":"config.json: 100%"}},"1b8ecfca8fc64677969da2b325b13ee9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1c322cc01c284e21a18f06db8d481636":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1ca13fea7ccb43dea1cb1994c60e2af2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","134608517037552"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_d468d37a817f44ebb5c8b844fc98182a"],"layout":"IPY_MODEL_213d5015356540c19c6e529ce18fa248"}},"1e23724ddd3341c4970f0f084c32c11f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["method_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_98f0ce2e0be94481ac9bfb50d4305f62","IPY_MODEL_7e90d469d6f44c22ae83f58f19afa6c1","IPY_MODEL_2161a826684744bb95847f56fee5f7a2"],"layout":"IPY_MODEL_035cea2946064cd7bd5f94a4001bffbd"}},"1f198dcb97f84081bb02c70bd9c33860":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"213d5015356540c19c6e529ce18fa248":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2161a826684744bb95847f56fee5f7a2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Initial box","Default","Default, grey pad","Padded 4px","Padded 8px","Extracted, init box","Padded 4, extracted","Padded 8, extracted","Padded 8, dilation 1","Pad 8, fract. 0.5","Pad 8, fract. 0.2"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":2,"layout":"IPY_MODEL_4088047b645d4e27830799f7560d049f","style":"IPY_MODEL_96b6b7275b844d949a1fbda10ff8121b"}},"2305ed3b768e4930af09fa8b2809df1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_80ae139aadb6462db2ce3671a7903026","placeholder":"​","style":"IPY_MODEL_a33e2a16d266421aa5658d1ad3576fc7","value":"Downloading shards: 100%"}},"2770478ce8044a83b127904a8f588d5b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"283e34e62cbd43108fea85e4408e1451":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"28b2030d9fc7433baa1111795433b2d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"BoundedIntTextModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"BoundedIntTextModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"IntTextView","continuous_update":false,"description":"","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_e3b557d743624c2680fedc10edc679fe","max":14,"min":0,"step":1,"style":"IPY_MODEL_56c75f8d9290496f9f8466515cc63924","value":5}},"28f525d59c094c03955d9215cf189007":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4438153338484d178ac76562ada78fc4","placeholder":"​","style":"IPY_MODEL_6a3306e72c4b447e989229c28e555e22","value":"special_tokens_map.json: 100%"}},"293d509277c54e20bbf463ea33234aa9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"29dd2822500349bdb5e76d53e93072ce":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_50cb4765423047cc88083d04187f5371","placeholder":"​","style":"IPY_MODEL_c68f99824c684d668f5e860a8afb2060","value":"generation_config.json: 100%"}},"2ab7e8393c274604a8a33d5f84efcd58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2c1daabcfc484227bb8b378e4ab06e03":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["display_option_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6a39213f6bdd4f0aa6f31bc89e58f309"],"layout":"IPY_MODEL_ce3896c172d4497b805ba27a6464c0da"}},"2c3a3903877e49a58181f813aa0e37b9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_28f525d59c094c03955d9215cf189007","IPY_MODEL_789385e36d1e432c95036b6e3a0054b8","IPY_MODEL_b8cd67195ca6414daa236d21c9189e39"],"layout":"IPY_MODEL_ef92abc3751b4b03a673a92e817d39de"}},"2cad0f40664d4dbd984288ddba4071c6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3641f29a77d34081afc26bbfb495ecda","placeholder":"​","style":"IPY_MODEL_47edd13b1e2e4d168bcd27d128c46723","value":" 4.25G/4.25G [00:33<00:00, 120MB/s]"}},"2cf68b7c35634bb684f1a9b5364eebff":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"2d0c0d03b7234f87b66ee70da99f75d0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b33ddee7648640558f1fcd15b1c3b5ea","placeholder":"​","style":"IPY_MODEL_e5d23d25fb4646d48d863ba26b592273","value":" 4.64G/4.64G [00:34<00:00, 135MB/s]"}},"2d9290c11d0d45b4a6aa958e0572575d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e49c2a376b28457fab3517524e5841d9","IPY_MODEL_b2397c0db81345bd9bcbecd54eb45efa","IPY_MODEL_2cad0f40664d4dbd984288ddba4071c6"],"layout":"IPY_MODEL_6a0d32fb84a84c49960baeb808bd98d4"}},"2dcfc01bb0314e15937a823f9a19143b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2e6a59eb2a8c46aebd1776dea21d5f25":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2f658b34d97e48a9accb4f8e69db0524":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_895dc88a4a704b849983166ffe18428e","placeholder":"​","style":"IPY_MODEL_6d8ec2e2d9f54c08b42c4fd907012cc4","value":"preprocessor_config.json: 100%"}},"304ef8bb5c894f3fb9fbb9309fccb2db":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"30ff45a7d5fc445bb954fd9360bfe1bb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fc58444ff3d545fcb1714ac56106734c","max":4999813704,"min":0,"orientation":"horizontal","style":"IPY_MODEL_babb5904ef30440b866ce0f1ead58b26","value":4999813704}},"317f8b9163d84963a04c75eedff6dba9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_676319ec997a4ef1b77b6ca5bd1454a3","placeholder":"​","style":"IPY_MODEL_896dacde6f98494ea11c19059a4c21e1","value":" 5.00G/5.00G [00:36<00:00, 137MB/s]"}},"319aebd2d96f4f1c845fef5543ae98ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"31ec7df081cd4350a7ca8cc554b9f49d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_75b1e6f11ab44c15a2da8fec9c9f543a","IPY_MODEL_42bb6c3f4bda46b9aa3e4ca3b01e92d8","IPY_MODEL_8c3a3d31ca2f4967b3f4f6f3d2ae6f90"],"layout":"IPY_MODEL_56180eaabbda472ba401a4c033ee73df"}},"3293295b5c254ab8b616e97449ce3e25":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"33d7a2f0e1f74398b7fa7b5f24ca8b2d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["box_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_04a42dffe0854eee9b3b4f12eb7683fb","IPY_MODEL_3d322afd0d754829abfc365a9044a6c0","IPY_MODEL_28b2030d9fc7433baa1111795433b2d2"],"layout":"IPY_MODEL_b654d25c42f849ac9d11ae0c76618d6d"}},"35cac4342a9044d991e3f53df06fd959":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3641f29a77d34081afc26bbfb495ecda":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"36c7b5884cb44aa389c2f12bedf90f09":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9db747425d2c491a868b931156a4071e","placeholder":"​","style":"IPY_MODEL_625c065a3ba146c190d3f6b024869459","value":"model-00005-of-00007.safetensors: 100%"}},"37f933c907154d3cb326214e88da14f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_58335b09c62a4c77bb4dbbd2180f00f5","placeholder":"​","style":"IPY_MODEL_d991113b7e924a9ba14b2ee034bc58bb","value":" 483/483 [00:00<00:00, 33.4kB/s]"}},"3884716282074678bc09a72aa2a1719c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e7f41c7ef7b8464899301192fed93749","max":4987246632,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7f54d75d9ae6463a8779a9215ac24ecc","value":4987246632}},"38c0b2d95f2842389c12837771a8e42e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"397f4e0b79e84838ba3640db134d7b0d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_11a3455d0dd4450f87dd73430c1413ae","placeholder":"​","style":"IPY_MODEL_f54949799fe747e2b4757d73d40960e6","value":" 7/7 [04:16<00:00, 36.44s/it]"}},"3a1dde9734a246a6af73d9b5a52bb090":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"save","disabled":false,"icon":"","layout":"IPY_MODEL_0e10e9ee0da34a5fbd38d18cebc37dcb","style":"IPY_MODEL_f56c9aac9b874cb79be45bb93af2519d","tooltip":""}},"3a50e27548d741118286fabf9630c443":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3a90615cbf054a2c9518f6c7715a483b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d8416277ca174bdebc879a5eeb559203","max":483,"min":0,"orientation":"horizontal","style":"IPY_MODEL_bd0155b640a9429fab35df32d32d3b5c","value":483}},"3b6f8dd8a65d4a389d5eb8f42665c6f2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3bc4fe63c9574d35bc8a0f3e940212c3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3c4f71788c75498faf2738f751e924f3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f0d73ff6d04040f1962fd50d2352f23d","placeholder":"​","style":"IPY_MODEL_a1b9b514669e4a2fae3a401d770a699e","value":" 92.0/92.0 [00:00<00:00, 6.42kB/s]"}},"3d322afd0d754829abfc365a9044a6c0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_852814ab86b64357bf34a7b3d5647259","style":"IPY_MODEL_d47ae1fd98304f549bb15af1885ba182","value":false}},"3d836d5327a74372b7683abcfff2db7f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_cce5c58aa59a4defb50cc973cadc191a","IPY_MODEL_e4494cbb70ef4e2fb04a138e12c84e34","IPY_MODEL_6c875b66bc4f4c2c82c1ab733a5688a5","IPY_MODEL_4dfd8a5fd8e4484facbd9c0744725b62"],"layout":"IPY_MODEL_9505f71206d54784aa9643f0732ce5ac"}},"3f3922cb87fb407499422ab1b43ca6a5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3fdbba3cf67d46cfb1632f6dcfb0210c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4088047b645d4e27830799f7560d049f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"150px"}},"413317a231404aa9a2a6580775f06fcb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7746e9fad11f4a64996a1c41d0abfc33","placeholder":"​","style":"IPY_MODEL_74ea647617734b52b8e07d4246d18109","value":" 1.80M/1.80M [00:00<00:00, 13.8MB/s]"}},"421f6c061d744875936807e23f319003":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_05b278a44567413284655e330cc2df18","max":4832008016,"min":0,"orientation":"horizontal","style":"IPY_MODEL_ed19a655de0e42529a3eca5b105a8e92","value":4832008016}},"42bb6c3f4bda46b9aa3e4ca3b01e92d8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1f198dcb97f84081bb02c70bd9c33860","max":7,"min":0,"orientation":"horizontal","style":"IPY_MODEL_293d509277c54e20bbf463ea33234aa9","value":7}},"43115512206d4717a02ccd0ef4133555":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4409342a72ed4d47b00ef8b57104943a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4438153338484d178ac76562ada78fc4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"47000f0cc4ce485b9f249ac32a0dc75b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"473175186eda43f8911ffddb1b44fd65":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0b47ab0fa4aa4726a6907aaad9a8045f","IPY_MODEL_30ff45a7d5fc445bb954fd9360bfe1bb","IPY_MODEL_317f8b9163d84963a04c75eedff6dba9"],"layout":"IPY_MODEL_64a9f8f2483b46c58fcdb9453cbbfd4f"}},"47edd13b1e2e4d168bcd27d128c46723":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4cda5365a57e453e9bbdcc0de548f1e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4dfd8a5fd8e4484facbd9c0744725b62":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_aa69c29e07674f1480124cf4a5700276","msg_id":"","outputs":[{"data":{"text/html":"
Tell me master how may bambi serve?
0.90
\n
\n
Tell me, master-- how may Bambu serve?

Tell me master⎕⎕ how may bambi serve?
","text/plain":""},"metadata":{},"output_type":"display_data"}]}},"4f0e6fdfb7674ae8951e3072beef9930":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5090734da55b47d5a2a6087e51891e0d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ae0c642d219a41629b62cf8560823c1c","placeholder":"​","style":"IPY_MODEL_ea5a56efc11845e2bf38a65d8e93af7c","value":" 1.64k/1.64k [00:00<00:00, 97.0kB/s]"}},"50cb4765423047cc88083d04187f5371":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"53b2c177789c405bb266916416e93109":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"56180eaabbda472ba401a4c033ee73df":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5628322d2c6c44bd882acbd629a634ea":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"56a8217adf3944b980a92b2aaea2bd39":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_935ded8cabb04c31b334a30acd346ae7","max":1636,"min":0,"orientation":"horizontal","style":"IPY_MODEL_eed1b76b9a824cca8f7ddb490bcec2ff","value":1636}},"56c75f8d9290496f9f8466515cc63924":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"58335b09c62a4c77bb4dbbd2180f00f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"58541ced68a4479d9926c09f14276f23":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"5cb88a274e4c40b3a505582d54578a75":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7515ab4558124374a66e7af8ac063846","max":74354,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9f9efc028dc1431ba717dfdac07ba7da","value":74354}},"5cef08747ab6487bad4c1dfedc90dc2c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5d06574fe54043409073268d797ed2d6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5eb41f9599a04241816c359ab2654930":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_36c7b5884cb44aa389c2f12bedf90f09","IPY_MODEL_1a1ad0162b574edb8ce2540c1d5e477b","IPY_MODEL_8c47b62e4954445c99ceec249651bf6a"],"layout":"IPY_MODEL_4f0e6fdfb7674ae8951e3072beef9930"}},"5ef114d178f846d28108cf4ab47d6cbe":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"60a576a0a174400b92b59f02b7ce24da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"625c065a3ba146c190d3f6b024869459":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"64830881d8854c39ba2f756d314e36b6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"64a9f8f2483b46c58fcdb9453cbbfd4f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6584ea25504e48999bde51541b648769":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"676319ec997a4ef1b77b6ca5bd1454a3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"69faa9295aa143629f8ba0ce018454d4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac43bf53858a4c1dadc600f3322f2aca","placeholder":"​","style":"IPY_MODEL_b7d7719b807048cb945fb958b06402a7","value":"tokenizer.json: 100%"}},"6a0d32fb84a84c49960baeb808bd98d4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6a3306e72c4b447e989229c28e555e22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6a39213f6bdd4f0aa6f31bc89e58f309":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Boxes","Image","Mask","Image & Mask","Page data","Ground truth","Image All","Results","Best results","Accuracy","Dataframe","Config"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":7,"layout":"IPY_MODEL_ba294a8e7603470eb492fd1b64f31b24","style":"IPY_MODEL_7dbd1ebf371a45f7a04d2dbdb19619d6"}},"6aceed8edb8e4105b1f807e57bb25d5e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6c875b66bc4f4c2c82c1ab733a5688a5":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":["message_visor-yXy"],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_857613bc6fb345988cb1d55c8b210cab","msg_id":"","outputs":[]}},"6cf2d63005134660a189daae0ad36707":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6d0fe13087244d38b7e2c81442467ad0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6d2ebf4bfad74f0d8995503b31de46cf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6d8ec2e2d9f54c08b42c4fd907012cc4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6e98f45681374a9aa3cf231a9327dab4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"700c364b9ed742e39d7d066f38456853":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"717d027f1da74bb5affa93b72b99a0ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e71b8879e55848479fa6f20a4f5010ea","max":684,"min":0,"orientation":"horizontal","style":"IPY_MODEL_cd3f6165ed144369a54e4b784cac2a99","value":684}},"71a15c3988114259961d1943dd4e931c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"74ea647617734b52b8e07d4246d18109":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7515ab4558124374a66e7af8ac063846":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"75b1e6f11ab44c15a2da8fec9c9f543a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9fa4a57f420841c9affb97477ae0fc41","placeholder":"​","style":"IPY_MODEL_2dcfc01bb0314e15937a823f9a19143b","value":"Loading checkpoint shards: 100%"}},"75c820e2cdda451f9ac7daabe0fcc8ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b7838cbddd984b368ea471cee96ff02e","IPY_MODEL_c3485b737f59434aa7c757ce7d4f86d2","IPY_MODEL_3c4f71788c75498faf2738f751e924f3"],"layout":"IPY_MODEL_d20d3a3712f64dceaeea4b6aeb435b8d"}},"7746e9fad11f4a64996a1c41d0abfc33":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"77d5ad9314534675a66ac3ba0b30a906":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5ef114d178f846d28108cf4ab47d6cbe","placeholder":"​","style":"IPY_MODEL_198cb1e53b974f1aa6ac623629b32f70","value":" 4.90G/4.90G [00:38<00:00, 138MB/s]"}},"786b35b5a60b4ef3afd8f04c9240bf1a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"789385e36d1e432c95036b6e3a0054b8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1b8ecfca8fc64677969da2b325b13ee9","max":1041,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d338945414dc4b23bb1dc1b81cc17be0","value":1041}},"796a5da82e134de2a279039d3a120b0f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","134608506364320"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_3a1dde9734a246a6af73d9b5a52bb090"],"layout":"IPY_MODEL_d19ccb7ec79a40a483fe40e712c74c8c"}},"7b9b4f63980147a4964ef7cad66ee08e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b8917b9b5ed143df8aebdb2cfa259ac1","IPY_MODEL_421f6c061d744875936807e23f319003","IPY_MODEL_a784f312c3ed412fb4da37803dc623d8"],"layout":"IPY_MODEL_b04c576af2aa4412b9756aa9bd683f0e"}},"7daf76bd51514afe8984782317765a5a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7dbd1ebf371a45f7a04d2dbdb19619d6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"7e90d469d6f44c22ae83f58f19afa6c1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_f52b8b4144bd414b97fddf1e63f114b4","style":"IPY_MODEL_3293295b5c254ab8b616e97449ce3e25","value":false}},"7f54d75d9ae6463a8779a9215ac24ecc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7f5e49d03e4e4dc699479b28c9861e86":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9f20aa7c868144f5bcf5fafa2c366d7f","IPY_MODEL_1ca13fea7ccb43dea1cb1994c60e2af2","IPY_MODEL_bf27826f32934db290de83c493474cce","IPY_MODEL_796a5da82e134de2a279039d3a120b0f"],"layout":"IPY_MODEL_5628322d2c6c44bd882acbd629a634ea"}},"80ae139aadb6462db2ce3671a7903026":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8173703d1170428f9f632d0f9b3c51fb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e8a8f2864e2346e9abd9ea7e6371901f","placeholder":"​","style":"IPY_MODEL_9493f753d3d746ebaf860074a43f8b5c","value":"processor_config.json: 100%"}},"81a51fac93124032b47605db442da668":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Idefics-crop-post"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":0,"layout":"IPY_MODEL_bc28472a9a3a4b169d12b97e7cce3559","style":"IPY_MODEL_be379bd1475147a3bd8f1bd609a41f19"}},"8256e914c50644669d8c273e6a100d43":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_924ac8d3f282407585506197682db1f5","IPY_MODEL_3884716282074678bc09a72aa2a1719c","IPY_MODEL_8c88d57999e84ea99a3c5166a2c0ed83"],"layout":"IPY_MODEL_53b2c177789c405bb266916416e93109"}},"83c57712c02b49b6aa44400265017bb1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"852814ab86b64357bf34a7b3d5647259":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"857613bc6fb345988cb1d55c8b210cab":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"895dc88a4a704b849983166ffe18428e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"896dacde6f98494ea11c19059a4c21e1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8acae939581c453eb0cc904684b730b0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8b9c3fb54710444eaea4519160ca1c2a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_144ebe17cc25423a8d5ac32aa7e5c237","max":4644107632,"min":0,"orientation":"horizontal","style":"IPY_MODEL_8acae939581c453eb0cc904684b730b0","value":4644107632}},"8c3a3d31ca2f4967b3f4f6f3d2ae6f90":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e6a59eb2a8c46aebd1776dea21d5f25","placeholder":"​","style":"IPY_MODEL_6584ea25504e48999bde51541b648769","value":" 7/7 [02:49<00:00, 23.27s/it]"}},"8c47b62e4954445c99ceec249651bf6a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_19317ccee5a04fe4b2bade87cd24240c","placeholder":"​","style":"IPY_MODEL_60a576a0a174400b92b59f02b7ce24da","value":" 5.00G/5.00G [00:38<00:00, 139MB/s]"}},"8c8083f3c88c493bb2c86be2ee831cc9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"fit-content"}},"8c88d57999e84ea99a3c5166a2c0ed83":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fb0037405add4963a01046b1ce4d84da","placeholder":"​","style":"IPY_MODEL_5cef08747ab6487bad4c1dfedc90dc2c","value":" 4.99G/4.99G [00:36<00:00, 138MB/s]"}},"8e263c7c07e34d9e8cf475f8f9b0c707":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d753fc74422941cfa7031efe2c996f50","IPY_MODEL_5cb88a274e4c40b3a505582d54578a75","IPY_MODEL_cefeb098a7b04f2782b00e28cd809833"],"layout":"IPY_MODEL_dedf69f896794fdab878d78a44db1fcd"}},"8e507b39dcb9489e8f660c2940aa5521":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1385375ab40a450f8f23a8fe10c1184b","max":4899116968,"min":0,"orientation":"horizontal","style":"IPY_MODEL_3b6f8dd8a65d4a389d5eb8f42665c6f2","value":4899116968}},"8eac691047c84830b008fa8c74de8e59":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8f04aee77f594579a7b257df7760ba07":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bca7db38272d4b5dba307fbc893512a9","placeholder":"​","style":"IPY_MODEL_319aebd2d96f4f1c845fef5543ae98ed","value":" 460/460 [00:00<00:00, 35.6kB/s]"}},"902a25db5df048f9a3f5e1b3a37d3d47":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9175ea3267514e2b946b7f583bd6c448":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9190d15fbcd1407b9abc3e16fc910437":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"924ac8d3f282407585506197682db1f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_00fb9f12f98b4e22a58f4b0030eec5b2","placeholder":"​","style":"IPY_MODEL_6d2ebf4bfad74f0d8995503b31de46cf","value":"model-00002-of-00007.safetensors: 100%"}},"92fc9ec04a1b4a94b0d480e79641c8e0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"935ded8cabb04c31b334a30acd346ae7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9493f753d3d746ebaf860074a43f8b5c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9505f71206d54784aa9643f0732ce5ac":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"96b6b7275b844d949a1fbda10ff8121b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"976de42087f34ed28f2efcab0737e107":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0279524ba7574791bd706fa97b3b403f","IPY_MODEL_8b9c3fb54710444eaea4519160ca1c2a","IPY_MODEL_2d0c0d03b7234f87b66ee70da99f75d0"],"layout":"IPY_MODEL_df002956627b496a8fe33a02bcec05a8"}},"98f0ce2e0be94481ac9bfb50d4305f62":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_06dfdd832b014336bbf0c03a0afde588","placeholder":"​","style":"IPY_MODEL_be7840a17d064a7c8b2e2d4af600702c","value":"Method:"}},"9bb79599f274472dac09277f81cf0bd1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9c002b71e1f144998d8f5ce683766c2a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9db747425d2c491a868b931156a4071e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9e076e294d964217a1e3ceff9b390b7a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bf8adc8c29eb4021a532561b374cade2","placeholder":"​","style":"IPY_MODEL_786b35b5a60b4ef3afd8f04c9240bf1a","value":" 684/684 [00:00<00:00, 50.8kB/s]"}},"9f20aa7c868144f5bcf5fafa2c366d7f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","134608383824016"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_09c2ceaac90d48018d137ee2fd656f5d"],"layout":"IPY_MODEL_08d82c48cc1e4019bc3c7c701736b52d"}},"9f9efc028dc1431ba717dfdac07ba7da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"9fa4a57f420841c9affb97477ae0fc41":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9fd71f03002d445c8e70342c0513e369":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7a74d0ee2254e6da29eeef1a3298656","max":493443,"min":0,"orientation":"horizontal","style":"IPY_MODEL_58541ced68a4479d9926c09f14276f23","value":493443}},"a16c8faf8266441db835b2e2a5d79bbf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a1b9b514669e4a2fae3a401d770a699e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a33e2a16d266421aa5658d1ad3576fc7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a784f312c3ed412fb4da37803dc623d8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6e98f45681374a9aa3cf231a9327dab4","placeholder":"​","style":"IPY_MODEL_d7f7174351c44565a4778ae02f7b2d73","value":" 4.83G/4.83G [00:37<00:00, 139MB/s]"}},"aa69c29e07674f1480124cf4a5700276":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"abcf597b372b42428cdf70189ccfe494":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"abd1fbe978bd46208e54ec410778c382":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"ac247fdb754c460dac3d2a731a04f74b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ac43bf53858a4c1dadc600f3322f2aca":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"acf7496b27cd40469ca712e27c399d12":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ae0c642d219a41629b62cf8560823c1c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ae61d812a96a48cf84a788af5337d51e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2770478ce8044a83b127904a8f588d5b","placeholder":"​","style":"IPY_MODEL_3fdbba3cf67d46cfb1632f6dcfb0210c","value":"tokenizer.model: 100%"}},"af08ec8225e948648a4534d4b42b880a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9bb79599f274472dac09277f81cf0bd1","placeholder":"​","style":"IPY_MODEL_9190d15fbcd1407b9abc3e16fc910437","value":" 185/185 [00:00<00:00, 13.2kB/s]"}},"b02ed5020f71474a9ed0c008abb40efb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b04c576af2aa4412b9756aa9bd683f0e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b0708dc0d1764e87b73cc301cdd13276":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b2397c0db81345bd9bcbecd54eb45efa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a16c8faf8266441db835b2e2a5d79bbf","max":4249064488,"min":0,"orientation":"horizontal","style":"IPY_MODEL_09ab31f01e0b43149d8abc6d4391796f","value":4249064488}},"b32a63ac227a43989f0bff7182007770":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2f658b34d97e48a9accb4f8e69db0524","IPY_MODEL_ba5183d590eb4269984812564b9136d1","IPY_MODEL_8f04aee77f594579a7b257df7760ba07"],"layout":"IPY_MODEL_9175ea3267514e2b946b7f583bd6c448"}},"b33ddee7648640558f1fcd15b1c3b5ea":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b50cb561a7444f488f477b8a1dedbf6a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b654d25c42f849ac9d11ae0c76618d6d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b73009bd316d474c83984e19bbe1dd5d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8173703d1170428f9f632d0f9b3c51fb","IPY_MODEL_3a90615cbf054a2c9518f6c7715a483b","IPY_MODEL_37f933c907154d3cb326214e88da14f5"],"layout":"IPY_MODEL_8eac691047c84830b008fa8c74de8e59"}},"b7838cbddd984b368ea471cee96ff02e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e98b093c952c4e3293abdf5b736b3bd7","placeholder":"​","style":"IPY_MODEL_1c322cc01c284e21a18f06db8d481636","value":"added_tokens.json: 100%"}},"b7d7719b807048cb945fb958b06402a7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b8917b9b5ed143df8aebdb2cfa259ac1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7daf76bd51514afe8984782317765a5a","placeholder":"​","style":"IPY_MODEL_e41cf0472cbf40f9b8422faef0f7df6a","value":"model-00006-of-00007.safetensors: 100%"}},"b8cd67195ca6414daa236d21c9189e39":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6cf2d63005134660a189daae0ad36707","placeholder":"​","style":"IPY_MODEL_3f3922cb87fb407499422ab1b43ca6a5","value":" 1.04k/1.04k [00:00<00:00, 63.8kB/s]"}},"ba294a8e7603470eb492fd1b64f31b24":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"120px"}},"ba5183d590eb4269984812564b9136d1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bc735fd356114f6aab9a94e4ff2e88a0","max":460,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c6cc1ed486e340ca9c10f83fb6d93473","value":460}},"babb5904ef30440b866ce0f1ead58b26":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"bb9faa43334140d29e732b3bf6e86723":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_29dd2822500349bdb5e76d53e93072ce","IPY_MODEL_eabc054fcd6b447da92f402662469c8e","IPY_MODEL_af08ec8225e948648a4534d4b42b880a"],"layout":"IPY_MODEL_38c0b2d95f2842389c12837771a8e42e"}},"bbdcdc291ad74258b554e72073e1ede8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ae61d812a96a48cf84a788af5337d51e","IPY_MODEL_9fd71f03002d445c8e70342c0513e369","IPY_MODEL_0e5837b643174914b9a6b6c9e6ac3e22"],"layout":"IPY_MODEL_0888d628f34b463eb01e9e5393a4f4c4"}},"bc28472a9a3a4b169d12b97e7cce3559":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"fit-content"}},"bc43e4251966462eaab4cb1767998f2c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0c7e4532bb5840869ab4d96ebac0d003","IPY_MODEL_8e507b39dcb9489e8f660c2940aa5521","IPY_MODEL_77d5ad9314534675a66ac3ba0b30a906"],"layout":"IPY_MODEL_cdfdd7698432492586d0d19783b20b79"}},"bc735fd356114f6aab9a94e4ff2e88a0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bca7db38272d4b5dba307fbc893512a9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bd0155b640a9429fab35df32d32d3b5c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"be379bd1475147a3bd8f1bd609a41f19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"be7840a17d064a7c8b2e2d4af600702c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"beaea513e569447fba29ca3df3a089f5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf27826f32934db290de83c493474cce":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","134608517036592"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2c1daabcfc484227bb8b378e4ab06e03"],"layout":"IPY_MODEL_14620ca258c64c03ba5ed90af4ed7733"}},"bf8adc8c29eb4021a532561b374cade2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c3485b737f59434aa7c757ce7d4f86d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_053288662f604af8a6fbea87e8ef9014","max":92,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b02ed5020f71474a9ed0c008abb40efb","value":92}},"c604bd0d89304ea7973a6819fac981ac":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c68f99824c684d668f5e860a8afb2060":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c6cc1ed486e340ca9c10f83fb6d93473":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c7a74d0ee2254e6da29eeef1a3298656":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c84073b5707747e5994e7e21c097c0c8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"cce5c58aa59a4defb50cc973cadc191a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_35cac4342a9044d991e3f53df06fd959","placeholder":"​","style":"IPY_MODEL_c84073b5707747e5994e7e21c097c0c8","value":""}},"cd3f6165ed144369a54e4b784cac2a99":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cdfdd7698432492586d0d19783b20b79":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce3896c172d4497b805ba27a6464c0da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cefeb098a7b04f2782b00e28cd809833":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac247fdb754c460dac3d2a731a04f74b","placeholder":"​","style":"IPY_MODEL_47000f0cc4ce485b9f249ac32a0dc75b","value":" 74.4k/74.4k [00:00<00:00, 5.22MB/s]"}},"d19ccb7ec79a40a483fe40e712c74c8c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d20d3a3712f64dceaeea4b6aeb435b8d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d338945414dc4b23bb1dc1b81cc17be0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d468d37a817f44ebb5c8b844fc98182a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Action_Comics_1960-01-00_(262)","Adolf_Cap_01_008","Barnaby_v1-028","Barnaby_v1-029","Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013","Cannon-292","Contrato_con_Dios_028","Erase_una_vez_en_Francia_02_88","FOX_CHILLINTALES_T17_012","Furari_-_Jiro_Taniguchi_selma_056","Galactus_12","INOUE_KYOUMEN_002","MCCALL_ROBINHOOD_T31_010","MCCAY_LITTLENEMO_090","Mary_Perkins_On_Stage_v2006_1_-_P00068","PIKE_BOYLOVEGIRLS_T41_012","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2","Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024","Strange_Tales_172005","Strange_Tales_172021","Tarzan_014-21","Tintin_21_Les_Bijoux_de_la_Castafiore_page_39","Transformers_-_Unicron_000-004","Transformers_-_Unicron_000-016","WARE_ACME_024","Yoko_Tsuno_T01_1972-10","Your_Name_Another_Side_Earthbound_T02_084","manga_0033","ronson-031","哀心迷図のバベル 第01巻 - 22002_00_059"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":20,"layout":"IPY_MODEL_8c8083f3c88c493bb2c86be2ee831cc9","style":"IPY_MODEL_2cf68b7c35634bb684f1a9b5364eebff"}},"d47ae1fd98304f549bb15af1885ba182":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"d6f64705e9da407f9959f1b124e8e47e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2305ed3b768e4930af09fa8b2809df1a","IPY_MODEL_16fcee1797ec48fdba6c556d996eb0c9","IPY_MODEL_397f4e0b79e84838ba3640db134d7b0d"],"layout":"IPY_MODEL_0e7f53ea18fb47bba3f007b7116b8994"}},"d753fc74422941cfa7031efe2c996f50":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6d0fe13087244d38b7e2c81442467ad0","placeholder":"​","style":"IPY_MODEL_4cda5365a57e453e9bbdcc0de548f1e3","value":"model.safetensors.index.json: 100%"}},"d7f7174351c44565a4778ae02f7b2d73":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d8416277ca174bdebc879a5eeb559203":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d991113b7e924a9ba14b2ee034bc58bb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d9c15d9a41f24f1a86084b0c863d3af8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_69faa9295aa143629f8ba0ce018454d4","IPY_MODEL_14e4633237104f15850409f4ffcfa30a","IPY_MODEL_413317a231404aa9a2a6580775f06fcb"],"layout":"IPY_MODEL_10442533920f484dbf1974e528c6ec1f"}},"d9fdbddf880c4a0cb12ae568c7c3bc02":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dac4186c487e46c6b56ba7c292629a41":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dedf69f896794fdab878d78a44db1fcd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df002956627b496a8fe33a02bcec05a8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e2f25159f7b249b9a2d820201413e190":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9c002b71e1f144998d8f5ce683766c2a","placeholder":"​","style":"IPY_MODEL_304ef8bb5c894f3fb9fbb9309fccb2db","value":"tokenizer_config.json: 100%"}},"e3b557d743624c2680fedc10edc679fe":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50px"}},"e41cf0472cbf40f9b8422faef0f7df6a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e4494cbb70ef4e2fb04a138e12c84e34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","134608384422912"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_7f5e49d03e4e4dc699479b28c9861e86","IPY_MODEL_e498874bec694db7930da19e87551331"],"layout":"IPY_MODEL_2ab7e8393c274604a8a33d5f84efcd58"}},"e498874bec694db7930da19e87551331":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","134608506364032"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_33d7a2f0e1f74398b7fa7b5f24ca8b2d","IPY_MODEL_1e23724ddd3341c4970f0f084c32c11f","IPY_MODEL_1a36a60101aa405d96f154646e1b04fa"],"layout":"IPY_MODEL_0951a0674c6b416d888f8cc62a0a0bd7"}},"e49c2a376b28457fab3517524e5841d9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b50cb561a7444f488f477b8a1dedbf6a","placeholder":"​","style":"IPY_MODEL_700c364b9ed742e39d7d066f38456853","value":"model-00007-of-00007.safetensors: 100%"}},"e5d23d25fb4646d48d863ba26b592273":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e71b8879e55848479fa6f20a4f5010ea":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e7bc0e0e5fed4f6ebf068100805c2439":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e7f41c7ef7b8464899301192fed93749":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8a8f2864e2346e9abd9ea7e6371901f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e98b093c952c4e3293abdf5b736b3bd7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ea5a56efc11845e2bf38a65d8e93af7c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eabc054fcd6b447da92f402662469c8e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_71a15c3988114259961d1943dd4e931c","max":185,"min":0,"orientation":"horizontal","style":"IPY_MODEL_43115512206d4717a02ccd0ef4133555","value":185}},"ed19a655de0e42529a3eca5b105a8e92":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"eed1b76b9a824cca8f7ddb490bcec2ff":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"ef92abc3751b4b03a673a92e817d39de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f0d73ff6d04040f1962fd50d2352f23d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f10a031bce3948e2bccca61bbb6f101c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e2f25159f7b249b9a2d820201413e190","IPY_MODEL_56a8217adf3944b980a92b2aaea2bd39","IPY_MODEL_5090734da55b47d5a2a6087e51891e0d"],"layout":"IPY_MODEL_6aceed8edb8e4105b1f807e57bb25d5e"}},"f52b8b4144bd414b97fddf1e63f114b4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"f54949799fe747e2b4757d73d40960e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f56c9aac9b874cb79be45bb93af2519d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":"lightblue","font_weight":"bold"}},"f66a7d2bb62d4ef9aec868904a60d8f7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fb0037405add4963a01046b1ce4d84da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc58444ff3d545fcb1714ac56106734c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/pcleaner/_testbed/test_tesseract.ipynb b/pcleaner/_testbed/test_tesseract.ipynb new file mode 100644 index 00000000..24ca973d --- /dev/null +++ b/pcleaner/_testbed/test_tesseract.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"XLuXdPExZlIq"},"source":["# Testing `Tesseract` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"O02O4qXwZlIr"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"dD_MtS4cZlIr"},"source":["We will install the more up-to-date version of PanelCleaner from GitHub. Only affects Colab notebooks."]},{"cell_type":"code","execution_count":1,"metadata":{"id":"V3ABUVumZlIr"},"outputs":[],"source":["DEV_INSTALL = True"]},{"cell_type":"markdown","metadata":{"id":"jgisVUTBZlIs"},"source":["The best way to get the images source of the experiments is to mount your Google Drive.\n"]},{"cell_type":"code","execution_count":2,"metadata":{"id":"fFZBHyWLZlIs"},"outputs":[],"source":["MOUNT_DRIVE = DEV_INSTALL\n","GDRIVE_MOUNT_POINT = 'drive'\n"]},{"cell_type":"markdown","metadata":{"id":"X8I28oA1ZlIs"},"source":["# install (Colab)"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"nT4E7IXbcUbp"},"outputs":[],"source":["import fastcore.all as FC\n"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"jpnPDDFIcFh0"},"outputs":[],"source":["if FC.IN_COLAB:\n"," !pip install -q pyngrok\n"]},{"cell_type":"markdown","metadata":{"id":"gqYSM7iBcZsH"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"bPF0CtUaZlIs"},"outputs":[],"source":["import os\n","import re\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," text = Text(msg)\n"," text.stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, text, \"_\" * 10)\n","\n","\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," mnt_point = f\"/content/{GDRIVE_MOUNT_POINT}\"\n"," if not Path(mnt_point).exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n","\n"," drive.mount(mnt_point, force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"UjPTi7MfcdxU"},"source":["Install **PanelCleaner**"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"7y3qqqy6ZlIt"},"outputs":[],"source":["if FC.IN_COLAB:\n"," info('Installing PanelCleaner')\n"," if DEV_INSTALL:\n"," assert MOUNT_DRIVE, \"DEV_INSTALL need a mounted google drive\"\n"," info('Installing PanelCleaner from Google Drive')\n"," os.chdir(f\"/content/{GDRIVE_MOUNT_POINT}/MyDrive/Shared/PanelCleaner/\")\n"," !pip install -e .\n"," else:\n"," info('Installing PanelCleaner from Github')\n"," !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed\n"]},{"cell_type":"markdown","metadata":{"id":"6ylyum-ibQdL"},"source":["**PanelCleaner** is a heavy-weight and sometimes **Colab** refuses (*silently*) to install it. If the cell below gives an error, re-run the cell above. That usually fixes the problem."]},{"cell_type":"code","execution_count":7,"metadata":{"id":"oE3PogewbUQO"},"outputs":[],"source":["import importlib.resources\n","package_path = importlib.resources.files('pcleaner')\n","assert package_path.name == 'pcleaner'\n","\n","os.chdir(package_path/'_testbed')"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"18HJJTQebWoV"},"outputs":[],"source":["from pcleaner._testbed.testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n"]},{"cell_type":"markdown","metadata":{},"source":["## Tesseract setup"]},{"cell_type":"markdown","metadata":{},"source":["Get current version of Tesseract"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"data":{"text/html":["
[\n","    'tesseract 5.3.4',\n","    ' leptonica-1.84.1',\n","    '  libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \n","1.4.0 : libopenjp2 2.5.2',\n","    ' Found NEON',\n","    ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n","    ' Found libcurl/8.6.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.12 nghttp2/1.61.0'\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[32m'tesseract 5.3.4'\u001b[0m,\n"," \u001b[32m' leptonica-1.84.1'\u001b[0m,\n"," \u001b[32m' libgif 5.2.1 : libjpeg 8d \u001b[0m\u001b[32m(\u001b[0m\u001b[32mlibjpeg-turbo 3.0.0\u001b[0m\u001b[32m)\u001b[0m\u001b[32m : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \u001b[0m\n","\u001b[32m1.4.0 : libopenjp2 2.5.2'\u001b[0m,\n"," \u001b[32m' Found NEON'\u001b[0m,\n"," \u001b[32m' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6'\u001b[0m,\n"," \u001b[32m' Found libcurl/8.6.0 SecureTransport \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLibreSSL/3.3.6\u001b[0m\u001b[32m)\u001b[0m\u001b[32m zlib/1.2.12 nghttp2/1.61.0'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --version # type: ignore\n","cprint(out)\n","if 'tesseract 5.' not in out[0]:\n"," if 'tesseractd 4.' in out[0]:\n"," cprint('Old Tesseract 4.x is installed. You should uninstall it and install Tesseract 5.x')\n"," else:\n"," cprint('You should install Tesseract 5.x')\n"]},{"cell_type":"markdown","metadata":{},"source":["> **NOTE: in below cells, when you encounter lines starting with the exclamation mark `!` (`bang`), uncoment them if you want to excute the shell commands**\n"]},{"cell_type":"markdown","metadata":{},"source":["### Remove Tesseract installation\n","> I you have the old 4.x version, you should consider removing the installation with the following commands.\n"]},{"cell_type":"markdown","metadata":{},"source":["#### Mac (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### WIndows (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### Ubuntu"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[],"source":["# !sudo apt-get remove tesseract-ocr\n"]},{"cell_type":"markdown","metadata":{},"source":["### Tesseract installation"]},{"cell_type":"markdown","metadata":{},"source":["#### Mac (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### WIndows (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### Ubuntu"]},{"cell_type":"markdown","metadata":{},"source":["The **5.x** release series is available in the [another PPA](https://launchpad.net/~alex-p/+archive/ubuntu/tesseract-ocr5) for Ubuntu **18.04**, **20.04**, and **22.04**.\n"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[],"source":["# !sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5"]},{"cell_type":"markdown","metadata":{},"source":["refresh system package cache in case you’re still running old Ubuntu 18.04"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["# !sudo apt update"]},{"cell_type":"markdown","metadata":{},"source":["install the software engine"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[],"source":["# !sudo apt install -y tesseract-ocr"]},{"cell_type":"markdown","metadata":{},"source":["and check version:"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/html":["
[\n","    'tesseract 5.3.4',\n","    ' leptonica-1.84.1',\n","    '  libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \n","1.4.0 : libopenjp2 2.5.2',\n","    ' Found NEON',\n","    ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n","    ' Found libcurl/8.6.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.12 nghttp2/1.61.0'\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[32m'tesseract 5.3.4'\u001b[0m,\n"," \u001b[32m' leptonica-1.84.1'\u001b[0m,\n"," \u001b[32m' libgif 5.2.1 : libjpeg 8d \u001b[0m\u001b[32m(\u001b[0m\u001b[32mlibjpeg-turbo 3.0.0\u001b[0m\u001b[32m)\u001b[0m\u001b[32m : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \u001b[0m\n","\u001b[32m1.4.0 : libopenjp2 2.5.2'\u001b[0m,\n"," \u001b[32m' Found NEON'\u001b[0m,\n"," \u001b[32m' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6'\u001b[0m,\n"," \u001b[32m' Found libcurl/8.6.0 SecureTransport \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLibreSSL/3.3.6\u001b[0m\u001b[32m)\u001b[0m\u001b[32m zlib/1.2.12 nghttp2/1.61.0'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --version # type: ignore\n","cprint(out)"]},{"cell_type":"markdown","metadata":{},"source":["### Install Tesseract languages"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"data":{"text/html":["
tessdata path: /opt/homebrew/share/tessdata\n","
\n"],"text/plain":["tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Installed languages:\n","[\n","    'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n","    'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n","    'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n","    'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n","    'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n","    'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n","    'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \n","script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n","    'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \n","script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \n","script/Kannada, script/Khmer',\n","    'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \n","script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n","    'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n","    'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\n","]\n","
\n"],"text/plain":["Installed languages:\n","\u001b[1m[\u001b[0m\n"," \u001b[32m'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces'\u001b[0m,\n"," \u001b[32m'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo'\u001b[0m,\n"," \u001b[32m'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc'\u001b[0m,\n"," \u001b[32m'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert'\u001b[0m,\n"," \u001b[32m'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal'\u001b[0m,\n"," \u001b[32m'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol'\u001b[0m,\n"," \u001b[32m'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \u001b[0m\n","\u001b[32mscript/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur'\u001b[0m,\n"," \u001b[32m'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \u001b[0m\n","\u001b[32mscript/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \u001b[0m\n","\u001b[32mscript/Kannada, script/Khmer'\u001b[0m,\n"," \u001b[32m'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \u001b[0m\n","\u001b[32mscript/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk'\u001b[0m,\n"," \u001b[32m'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel'\u001b[0m,\n"," \u001b[32m'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --list-langs # type: ignore\n","tessdata = Path(out[0].split('\"')[1])\n","cprint(f\"tessdata path: {tessdata}\")\n","cprint(\"Installed languages:\", [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]])"]},{"cell_type":"markdown","metadata":{},"source":["#### Install **best** languages and **jpn_ver** Tesseract lang\n","> Much better results than default langs and `jpn` language model.\n"]},{"cell_type":"markdown","metadata":{},"source":["Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best). \n","Donwload from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) a model trained for vertical Japanese text as found in manga.\n","\n","See [here](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html) the languages codes.\n","\n","> Note: I've not play much with `jpn` or `jpn_vert`, `manag-ocr` is surely a much better fit, but it can be educational to compare."]},{"cell_type":"markdown","metadata":{},"source":["Uncomment and excute to download the best language models:\n"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[],"source":["# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/osd.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn.traineddata\n","\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn_vert.traineddata\n","# or\n","# !wget -O jpn_vert.traineddata https://github.com/zodiac3539/jpn_vert/blob/master/jpn_ver5.traineddata\n","\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/spa.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/fra.traineddata"]},{"cell_type":"markdown","metadata":{},"source":["Copy downloaded models to tessdata folder (double check that `tessdata` variable points to the right folder):\n"]},{"cell_type":"code","execution_count":17,"metadata":{},"outputs":[{"data":{"text/html":["
tessdata path: /opt/homebrew/share/tessdata\n","
\n"],"text/plain":["tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(f\"tessdata path: {tessdata}\")"]},{"cell_type":"code","execution_count":18,"metadata":{},"outputs":[],"source":["# !sudo mv *.traineddata $tessdata"]},{"cell_type":"markdown","metadata":{},"source":["and remove the downloaded models:\n"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[],"source":["# !rm *.traineddata"]},{"cell_type":"markdown","metadata":{},"source":["Check installed languages\n"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[{"data":{"text/html":["
[\n","    Path('/opt/homebrew/share/tessdata/spa.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/eng.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/jpn_vert.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/spa_old.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/fra.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/jpn.traineddata')\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/eng.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn_vert.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa_old.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/fra.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(list(filter(lambda x: re.match(r'eng|jpn|jpn_vert|fra|spa', x.name), tessdata.ls()))) # type: ignore\n","# cprint(pytesseract.get_languages())\n"]},{"cell_type":"markdown","metadata":{"id":"fErGl5xSZlI5"},"source":["----\n","# Tesseract experiments"]},{"cell_type":"markdown","metadata":{"id":"op24JaWwfsSv"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"Av0ceG7efw2L"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n"]},{"cell_type":"code","execution_count":21,"metadata":{"id":"jfMv_sdZfwVY"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{"id":"z62cVR57ZlI5"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"qMAn1mOSZlI5"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is `USE_PIL=False`. You can set the environment variable `USE_PIL=True` to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":22,"metadata":{"id":"A0il2cCMZlI5"},"outputs":[],"source":["os.environ['USE_TUNNEL'] = 'True' if FC.IN_COLAB else 'False'\n","os.environ['USE_PIL'] = 'True' if FC.IN_COLAB and os.environ['USE_TUNNEL'] == 'False' else 'False'\n"]},{"cell_type":"code","execution_count":23,"metadata":{"id":"Su0YJikGZlI5"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import pcleaner._testbed.testbed.web_server as web_server\n","\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"2ara95FoZlI5"},"source":["Creates the `OCRExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":24,"metadata":{"id":"qhQ3nY1OhgdS"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["CONTEXT = OCRExperimentContext('Tesseract', EXP_DIR, server=SERVER)\n","CONTEXT.show()"]},{"cell_type":"markdown","metadata":{"id":"5gEd0ZYOZlI5"},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{"id":"8ecqlXd_h2yc"},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":25,"metadata":{"id":"Ha8wqfTHZlI5"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":25,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"markdown","metadata":{"id":"VHtWWQnKj2eU"},"source":["----"]},{"cell_type":"code","execution_count":26,"metadata":{"id":"TZIA3E3jZlI5"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7153dba0227b423f99fe3417327523ed","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\" + + +

Error: {{e.status}}

+

Sorry, the requested URL {{repr(request.url)}} + caused an error:

+
{{e.body}}
+ %%if DEBUG and e.exception: +

Exception:

+ %%try: + %%exc = repr(e.exception) + %%except: + %%exc = '' %% type(e.exception).__name__ + %%end +
{{exc}}
+ %%end + %%if DEBUG and e.traceback: +

Traceback:

+
{{e.traceback}}
+ %%end + + +%%except ImportError: + ImportError: Could not generate the error page. Please add bottle to + the import path. +%%end +""" % __name__ + +#: A thread-safe instance of :class:`LocalRequest`. If accessed from within a +#: request callback, this instance always refers to the *current* request +#: (even on a multi-threaded server). +request = LocalRequest() + +#: A thread-safe instance of :class:`LocalResponse`. It is used to change the +#: HTTP response for the *current* request. +response = LocalResponse() + +#: A thread-safe namespace. Not used by Bottle. +local = threading.local() + +# Initialize app stack (create first empty Bottle app now deferred until needed) +# BC: 0.6.4 and needed for run() +apps = app = default_app = AppStack() + +#: A virtual package that redirects import statements. +#: Example: ``import bottle.ext.sqlite`` actually imports `bottle_sqlite`. +ext = _ImportRedirect('bottle.ext' if __name__ == '__main__' else + __name__ + ".ext", 'bottle_%s').module + + +def _main(argv): # pragma: no coverage + args, parser = _cli_parse(argv) + + def _cli_error(cli_msg): + parser.print_help() + _stderr('\nError: %s\n' % cli_msg) + sys.exit(1) + + if args.version: + print('Bottle %s' % __version__) + sys.exit(0) + if not args.app: + _cli_error("No application entry point specified.") + + sys.path.insert(0, '.') + sys.modules.setdefault('bottle', sys.modules['__main__']) + + host, port = (args.bind or 'localhost'), 8080 + if ':' in host and host.rfind(']') < host.rfind(':'): + host, port = host.rsplit(':', 1) + host = host.strip('[]') + + config = ConfigDict() + + for cfile in args.conf or []: + try: + if cfile.endswith('.json'): + with open(cfile, 'rb') as fp: + config.load_dict(json_loads(fp.read())) + else: + config.load_config(cfile) + except configparser.Error as parse_error: + _cli_error(parse_error) + except IOError: + _cli_error("Unable to read config file %r" % cfile) + except (UnicodeError, TypeError, ValueError) as error: + _cli_error("Unable to parse config file %r: %s" % (cfile, error)) + + for cval in args.param or []: + if '=' in cval: + config.update((cval.split('=', 1),)) + else: + config[cval] = True + + run(args.app, + host=host, + port=int(port), + server=args.server, + reloader=args.reload, + plugins=args.plugin, + debug=args.debug, + config=config) + + +if __name__ == '__main__': # pragma: no coverage + _main(sys.argv) diff --git a/pcleaner/_testbed/testbed/experiments.py b/pcleaner/_testbed/testbed/experiments.py new file mode 100644 index 00000000..e630487b --- /dev/null +++ b/pcleaner/_testbed/testbed/experiments.py @@ -0,0 +1,2285 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/experiments.ipynb. + +# %% ../nbs/experiments.ipynb 1 +from __future__ import annotations + + +# %% auto 0 +__all__ = ['CM', 'SubjIdT', 'RunIdT', 'SubjSpecT', 'ImgIdT', 'ImgSpecT', 'BoxIdT', 'remove_multiple_whitespaces', + 'postprocess_ocr', 'accuracy_ocr_naive', 'accuracy_ocr_difflib', 'ground_truth_path', 'read_ground_truth', + 'dilate_by_fractional_pixel', 'extract_text', 'lang2pcleaner', 'lang2tesseract', 'CropMethod', + 'crop_by_image', 'crop_by_extracted', 'ExperimentSubject', 'Result', 'ExperimentRun', 'ExperimentContext', + 'ImageContext', 'ResultOCR', 'ResultOCRExtracted', 'ResultSet', 'ResultSetDefault', 'resultset_to_dict', + 'dict_to_resultset', 'OCRModel', 'OCRExperimentRun', 'OCRExperimentContext', 'SimpleResultVisor', + 'RunSelector', 'MessageVisor', 'StatusVisor', 'OCRModelSelector', 'DisplayOptions', 'ContentSelector', + 'ImageSelector', 'OCRContextVisor', 'ImageContextVisor', 'ExperimentOCR', 'ExperimentOCRMethod', + 'ResultVisor', 'ExperimentVisor', 'ExperimentsVisor'] + +# %% ../nbs/experiments.ipynb 11 +import contextlib +import dataclasses +import datetime +import difflib +import functools +import json +import os +import shutil +import tempfile +from collections import defaultdict +from enum import Enum +from pathlib import Path +from typing import Any +from typing import Callable +from typing import cast +from typing import Mapping +from typing import TypeAlias + +import fastcore.all as FC +import ipywidgets as W +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import pcleaner.config as cfg +import pcleaner.ctd_interface as ctm +import pcleaner.image_ops as ops +import pcleaner.ocr.ocr as ocr +import pcleaner.structures as st +import torch +import traitlets as T +from IPython.display import clear_output +from IPython.display import display +from IPython.display import HTML +from loguru import logger +from pcleaner.ocr.ocr_tesseract import TesseractOcr +from PIL import Image +from PIL import ImageFilter +from rich.console import Console +from tqdm.notebook import tqdm + + +# %% ../nbs/experiments.ipynb 12 +from pcleaner._testbed.testbed.helpers import * +from pcleaner._testbed.testbed.ocr_metric import * +from pcleaner._testbed.testbed.visor import ContextVisor +from pcleaner._testbed.testbed.visor import Spinner +import pcleaner._testbed.testbed.web_server as web_server + + +# %% ../nbs/experiments.ipynb 16 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% ../nbs/experiments.ipynb 19 +os.environ['USE_PIL'] = 'False' +os.environ['USE_TUNNEL'] = 'False' +SERVER = None + + +# %% ../nbs/experiments.ipynb 55 +def remove_multiple_whitespaces(text): + return ' '.join(text.split()) + + +def postprocess_ocr(text): + "Basic postprocessing for English Tesseract OCR results." + return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize() + +def accuracy_ocr_naive(text, ground_truth): + return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text) + + +def accuracy_ocr_difflib(text, ground_truth): + """ + Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text, + using difflib's SequenceMatcher to account for differences in a manner similar to git diffs. + + :param text: The OCR-generated text. + :param ground_truth: The ground truth text. + :return: A float representing the similarity ratio between the OCR text and the ground truth, + where 1.0 is identical. + """ + # Initialize the SequenceMatcher with the OCR text and the ground truth + matcher = difflib.SequenceMatcher(None, text, ground_truth) + + # Get the similarity ratio + similarity_ratio = matcher.ratio() + + return similarity_ratio + +# %% ../nbs/experiments.ipynb 57 +def ground_truth_path(page_data: st.PageData): + path = Path(page_data.original_path) + return path.with_stem(path.stem + '_gt').with_suffix('.txt') + + +def read_ground_truth(page_data: st.PageData, root_dir: Path): + gts_path = root_dir / ground_truth_path(page_data) + if gts_path.exists(): + gts = gts_path.read_text(encoding="utf-8").splitlines() + else: + gts = ["" for _ in range(len(page_data.boxes))] + return gts + + +# %% ../nbs/experiments.ipynb 59 +def dilate_by_fractional_pixel(image, dilation_fraction, filter_base_size=3): + """ + Dilates an image by a specified fractional pixel amount. The function calculates + the necessary scaling factor and filter size based on the desired dilation fraction. + + :param image: A PIL Image object (1-bit mode). + :param dilation_fraction: The desired fractional pixel amount for dilation (e.g., 0.2). + :param filter_base_size: The base size of the dilation filter to apply on the scaled image. + This size is adjusted based on the scaling factor to achieve the + desired dilation effect. + :return: A PIL Image object after dilation, converted back to grayscale. + """ + from PIL.Image import Resampling + # Calculate the scale factor based on the desired dilation fraction + scale_factor = int(1 / dilation_fraction) + + # Adjust the filter size based on the scale factor + # This ensures the dilation effect is proportional to the desired fraction + filter_size = max(1, filter_base_size * scale_factor // 5) + + # Convert the image to grayscale for more nuanced intermediate values + image_gray = image.convert("L") + + # Resize the image to a larger size using bicubic interpolation + larger_size = (int(image.width * scale_factor), int(image.height * scale_factor)) + image_resized = image_gray.resize(larger_size, Resampling.BICUBIC) + + # Apply the dilation filter to the resized image + dilated_image = image_resized.filter(ImageFilter.MaxFilter(filter_size)) + + # Resize the image back to its original size using bicubic interpolation + image_dilated_fractional_pixel = dilated_image.resize(image.size, Resampling.BICUBIC) + + return image_dilated_fractional_pixel + + +# %% ../nbs/experiments.ipynb 60 +def extract_text(image, text_mask, box): + cropped_image = crop_box(box, image) + cropped_mask = crop_box(box, text_mask) + extracted = ops.extract_text(cropped_image, cropped_mask) + return cropped_image, cropped_mask, extracted + + +# %% ../nbs/experiments.ipynb 62 +_lang2pcleaner = {'English': st.DetectedLang.ENG, 'Japanese': st.DetectedLang.JA, 'Spanish': st.DetectedLang.ENG, + 'French':st.DetectedLang.ENG} +# _lang2tesseract = {'English': 'eng', 'Japanese': 'jpn'} +_lang2tesseract = {'English': 'eng', 'Japanese': 'jpn_vert', 'Spanish': 'spa', 'French': 'fra'} + + +# %% ../nbs/experiments.ipynb 63 +def lang2pcleaner(lang: str): + return _lang2pcleaner[lang] + +def lang2tesseract(lang: str): + return _lang2tesseract[lang] + + +# %% ../nbs/experiments.ipynb 66 +class CropMethod(Enum): + INITIAL_BOX = 'Initial box' + DEFAULT = 'Default' + DEFAULT_GREY_PAD = 'Default, grey pad' + PADDED_4 = 'Padded 4px' + PADDED_8 = 'Padded 8px' + EXTRACTED_INIT_BOX = 'Extracted, init box' + PADDED_4_EXTRACTED = 'Padded 4, extracted' + PADDED_8_EXTRACTED = 'Padded 8, extracted' + PADDED_8_DILATION_1 = 'Padded 8, dilation 1' + PAD_8_FRACT_0_5 = 'Pad 8, fract. 0.5' + PAD_8_FRACT_0_2 = 'Pad 8, fract. 0.2' + + @classmethod + def __display_names__(cls): + return dict( + zip([_.value for _ in cls], + cls)) + + +CM = CropMethod + +_IMAGE_METHODS = [CM.INITIAL_BOX, CM.DEFAULT, CM.DEFAULT_GREY_PAD, + CM.PADDED_4, CM.PADDED_8] +_EXTRACTED_METHODS = [CM.EXTRACTED_INIT_BOX, CM.PADDED_4_EXTRACTED, + CM.PADDED_8_EXTRACTED, CM.PADDED_8_DILATION_1, + CM.PAD_8_FRACT_0_5, CM.PAD_8_FRACT_0_2] + + +def crop_by_image(method: CM, + box: st.Box, + base: Image.Image, + preproc: cfg.PreprocessorConfig, + ): + image = None + match method: + case CM.INITIAL_BOX : + image = crop_box(box, base) + case CM.DEFAULT: + padded2_4 = ( + box.pad(preproc.box_padding_initial, base.size).right_pad( + preproc.box_right_padding_initial, base.size)) + image = crop_box(padded2_4, base) + case CM.DEFAULT_GREY_PAD: + image = crop_box(box, base) + image = ops.pad_image(image, 8, fill_color=(128, 128, 128)) + case CM.PADDED_4: + padded4 = box.pad(4, base.size) + image = crop_box(padded4, base) + case CM.PADDED_8: + padded4 = box.pad(8, base.size) + image = crop_box(padded4, base) + case _: pass + return image + + +def crop_by_extracted(method: CM, + box: st.Box, + base: Image.Image, + mask: Image.Image, + cropped_image_path: Path, + cropped_mask_path: Path, + dilated: dict[float, Image.Image] + ): + cropped_image, cropped_mask, image = None, None, None + if method in _EXTRACTED_METHODS: + if not cropped_image_path.exists() or not cropped_mask_path.exists(): + match method: + case CM.EXTRACTED_INIT_BOX: + cropped_image, cropped_mask, image = extract_text(base, mask, box) + case CM.PADDED_4_EXTRACTED: + padded4 = box.pad(4, base.size) + cropped_image, cropped_mask, image = extract_text(base, mask, padded4) + case CM.PADDED_8_EXTRACTED: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text(base, mask, padded8) + case CM.PADDED_8_DILATION_1: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text( + base, dilated[1], padded8) + case CM.PAD_8_FRACT_0_5: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text( + base, dilated[0.5], padded8) + case CM.PAD_8_FRACT_0_2: + padded8 = box.pad(8, base.size) + cropped_image, cropped_mask, image = extract_text( + base, dilated[0.2], padded8) + case _: pass + + return image, cropped_image, cropped_mask + + + +# %% ../nbs/experiments.ipynb 68 +SubjIdT: TypeAlias = int +RunIdT: TypeAlias = str +SubjSpecT: TypeAlias = SubjIdT | str | Path + + +class ExperimentSubject: + exp: ExperimentContext + idx: SubjIdT + + def setup(self, exp: ExperimentContext, idx: Any, *args, **kwargs): + self.exp = exp + self.idx = cast(SubjIdT, exp.normalize_idx(idx)) + return self + + def __new__(cls, exp: ExperimentContext, idx: Any, *args, **kwargs): + idx = exp.normalize_idx(idx) + self = exp.subject_context(idx) + if self is None: + self = super().__new__(cls) + self = exp.setup_subject_context(idx, self, *args, **kwargs) + if self is None: + raise ValueError(f"Can't create new subject with idx: {idx}: out of range") + return self + + +class Result: + subject_ctx: ExperimentSubject + +class ExperimentRun: + "A set of experiment results obtained with the same parameters." + exp: ExperimentContext + name: RunIdT + dt: datetime.datetime + + def setup(self, exp: ExperimentContext, name: RunIdT, *args, **kwargs): + self.exp = exp + self.name = name + return self + + @classmethod + def setup_run_name(cls, name: RunIdT, dt: datetime.datetime): + return f"{name}_{dt.strftime('%Y%m%d-%H%M%S')}" + + def setup_run(self, args, **kwargs): + pass + def before_result(self, *args, **kwargs): + pass + def after_result(self, result: Result, *args, **kwargs): + pass + + def __new__(cls, exp: ExperimentContext, name: RunIdT, *args, **kwargs): + self = exp.experiment_run(name) + if self is None: + self = super().__new__(cls) + self = exp.setup_experiment_run(name, self, *args, **kwargs) + return self + + +class ExperimentContext(T.HasTraits): + "Class to maintain shared state across all file-based experiments within the experiment domain." + name: str + _results: dict[RunIdT, dict[SubjIdT, Any]] + + _dirty = T.Bool(default_value=False) + + CACHE_DIR_NAME: str = 'cache' + SOURCE_DIR_NAME: str = 'source' + EXP_DIR: Path = Path("../experiment") + + subject_cls: Callable[..., ExperimentSubject] + def subject_factory(self) -> Callable[..., ExperimentSubject]: return type(self).subject_cls + + def normalize_idx(self, idx: SubjSpecT) -> SubjIdT | None: + nidx = None + if isinstance(idx, int) and idx < self.subject_count: + nidx = idx + elif isinstance(idx, str): + try: + nidx = [_.name for _ in self._paths].index(idx) + except Exception: + pass + elif isinstance(idx, Path): + if idx in self._paths: + nidx = self._paths.index(idx) + return nidx + + def path_from_idx(self, idx: SubjSpecT): + "Relative path to the subject with the given index." + _idx = self.normalize_idx(idx) + if _idx is None: + raise ValueError(f"{_idx} not found in context.") + path = Path(self._paths[_idx]) + if not path.resolve().exists(): + raise ValueError(f"{path} not found in context.") + return path + + @property + def subject_count(self): return len(self._paths) + + @property + def run_names(self): + return list(self._exp_runs.keys()) + + @property + def root_dir(self): return self._root + # Relative paths to `root_dir` + @property + def cache_dir(self): return Path(self.CACHE_DIR_NAME) + @property + def source_dir(self): return Path(self.SOURCE_DIR_NAME) + @functools.lru_cache() + def _subject_cache_dir(self, idx: SubjIdT): + path = self.path_from_idx(idx) + subject_cache_dir = self.cache_dir / path.stem + self.final(subject_cache_dir).mkdir(parents=True, exist_ok=True) + return subject_cache_dir + def subject_cache_dir(self, idx: SubjSpecT): + "Folder to cache and save subject results. Create if needed." + return self._subject_cache_dir(idx) + def final(self, path: Path | str): + """`path` relative to the root of the experiment. + If `path` is absolute, it must be in the experiment directory. + """ + if isinstance(path, str): + path = Path(path) + if path.is_absolute(): + try: + path = path.relative_to(self.root_dir.resolve()) + except Exception: + return Path('_not_found_/'+path.name) + return self.root_dir / path + + def empty_cache(self, idx: SubjIdT | None = None): + if idx is None: + cache_dir = self.final(self.cache_dir) + shutil.rmtree(cache_dir, ignore_errors=True) + cache_dir.mkdir(parents=True, exist_ok=True) + else: + subject_cache_dir = self.final(self.subject_cache_dir(idx)) + shutil.rmtree(subject_cache_dir, ignore_errors=True) + subject_cache_dir.mkdir(parents=True, exist_ok=True) + def empty_cache_warning(self, + idx: SubjIdT | None=None, *, warn: bool=True, out: W.Output | None=None): + def on_confirm_clicked(b): + try: + self.empty_cache(idx) + print("Cache cleared successfully.") + except Exception as e: + print(f"Failed to clear cache: {e}") + finally: + for widget in confirmation_box.children: + widget.close() + + def on_cancel_clicked(b): + print("Cache clear cancelled.") + for widget in confirmation_box.children: + widget.close() + + if out is None: + out = W.Output() + cache_name = '' if idx is None else f" of '{self.subject_cache_dir(idx).name}'" + text = f"Are you sure you want to clear the cache{cache_name}? This action cannot be undone." + with out: + if FC.IN_NOTEBOOK: + confirm_button = W.Button(description="Confirm") + cancel_button = W.Button(description="Cancel") + confirm_button.on_click(on_confirm_clicked) + cancel_button.on_click(on_cancel_clicked) + label = W.Label(text, style={'font_size': '1.25em', 'font_weight': 'bold'}) + confirmation_box = W.VBox([label, W.HBox([confirm_button, cancel_button])]) + display(confirmation_box) + else: + on_confirm_clicked(None) + return out + + def subject_context(self, idx: SubjSpecT): + "Cached subject." + if (nidx := self.normalize_idx(idx)) is None: + return None + return self._subjects.get(nidx) + def setup_subject_context(self, idx: SubjIdT, /, subject: ExperimentSubject, *args, **kwargs): + "Setup and set cached subject." + if idx < 0 or idx >= self.subject_count: + raise ValueError(f"Can't create new subject with idx: {idx}: out of range") + subject.setup(self, idx, *args, **kwargs) + self._subjects[idx] = subject + return subject + + def experiment_run(self, name: RunIdT | None = None): + "Experiment run `name` or the last one if `name` is None or None" + if name: + return self._exp_runs.get(name) + if self._exp_runs: + return self._exp_runs[list(self._exp_runs.keys())[-1]] + return None + def setup_experiment_run(self, name: str, run: ExperimentRun, *args, **kwargs): + "Set or replace cached experiment run." + run.setup(self, name, *args, **kwargs) + self._exp_runs[name] = run + return run + + def _reset_(self): + # start over. Note: doesn't change _dirty status, intended for use in testing. + self._subjects.clear() + self._subject_cache_dir.cache_clear() + + def cleanup_model(self): + pass + + def __init__(self, name: str, paths: list[Path], root: Path | None = None, run_name: str = 'run1'): + self.name = name + self._root = root or type(self).EXP_DIR + self._paths = paths # relative paths wrt self._root + self._subjects: dict[SubjIdT, ExperimentSubject] = {} + self._exp_runs: dict[str, ExperimentRun] = {} + self._results = {} + # default run + ExperimentRun(self, run_name) + + +# %% ../nbs/experiments.ipynb 78 +ImgIdT = SubjIdT +ImgSpecT: TypeAlias = ImgIdT | str | Path + + +class ImageContext(ExperimentSubject): + """ + A utility class to maintain image state for a ExperimentContext. + + Attributes: + json_data (dict): JSON data loaded from cached files. + page_data (st.PageData): PanelClaner page data. + base_image (Image.Image): The base image loaded from the cache. + mask (Image.Image): The mask image used for text detection. + gts (list[str]): Ground truth data for the text in the images. + mask_dilated1 (Image.Image): Image mask dilated by 1 pixel. + mask_dilated05 (Image.Image): Image mask dilated by 0.5 pixels. + mask_dilated02 (Image.Image): Image mask dilated by 0.2 pixels. + + """ + exp: ExperimentContext + idx: ImgIdT + base_image: Image.Image + mask: Image.Image + json_data: dict | None + page_data: st.PageData + _page_lang: str + _gts: list[str] + _mask_dilated1: Image.Image | None + _mask_dilated05: Image.Image | None + _mask_dilated02: Image.Image | None + + def to_dict(self): + return { + 'image_idx': self.idx, + 'page_lang': self.page_lang, + } + + @property + def image_idx(self): return self.idx + @property + def cache_dir(self): + "Relative path of the cache directory for this image." + return self.exp.subject_cache_dir(self.idx) + + @property + def image_info(self): + img = self.base_image + w, h = img.size + print_size_in = size(w, h, 'in', 300) + print_size_cm = size(w, h, 'cm', 300) + required_dpi = dpi(w, h, 'Modern Age') + return (w, h), print_size_in, print_size_cm, required_dpi + + @property + def original_image_path(self): return Path(self.page_data.original_path) + @property + def image_path(self): return Path(self.page_data.image_path) + @property + def mask_path(self): return Path(self.page_data.mask_path) + @property + def image_boxes_path(self): + exp, img_path = self.exp, self.image_path + if exp.final( p := img_path.with_stem(img_path.stem + '_boxes')).exists(): + return p + _, p = page_boxes(self.page_data, exp.final(img_path)) + return p.relative_to(exp.root_dir) + + # Base image properties + @property + def image_name(self): return self.original_image_path.name + @property + def image_size(self): return self.base_image.size + @property + def image_dim(self):return size(*self.image_size) + @property + def image_dpi(self): return dpi(*self.image_size) + @property + def image_print(self): + return self.image_size, self.image_dim, self.image_dpi + @property + def image_name_rich(self): + siz, dim, res = self.image_print + return f"{self.image_name} - {siz[0]}x{siz[1]} px: {dim[0]:.2f}x{dim[1]:.2f}\" @ {res:.2f} dpi" + + def setup_page_lang(self, page_lang: str | None = None): + path = self.exp.final(self.page_data.original_path).with_suffix('.json') + metadata = json.load(open(path)) if path.exists() else {} + if 'lang' in metadata and (page_lang == metadata['lang'] or page_lang is None): + self._page_lang = metadata['lang'] + return + self._page_lang = metadata['lang'] = page_lang or 'English' + json.dump(metadata, open(path, 'w'), indent=2) + @property + def page_lang(self): + if self._page_lang == None: + self.setup_page_lang() + return self._page_lang + + @property + def boxes(self): return self.page_data.boxes + + def setup_ground_truth(self): + self._gts = read_ground_truth(self.page_data, self.exp.root_dir) + @property + def gts(self): + if self._gts is None: + self.setup_ground_truth() + return self._gts + + @functools.lru_cache(typed=True) + def dilated_mask(self, fraction: float): + return dilate_by_fractional_pixel(self.mask, fraction) + + def mask_dilated1(self): + if self._mask_dilated1 is None: + self._mask_dilated1 = self.mask.filter(ImageFilter.MaxFilter(3)) + return self._mask_dilated1 + + def mask_dilated05(self): + if self._mask_dilated05 is None: + self._mask_dilated05 = self.dilated_mask(0.5) + return self._mask_dilated05 + + def mask_dilated02(self): + if self._mask_dilated02 is None: + self._mask_dilated02 = self.dilated_mask(0.2) + return self._mask_dilated02 + + def dilated(self): + return {1: self.mask_dilated1(), + 0.5: self.mask_dilated05(), + 0.2: self.mask_dilated02(),} + + def __new__(cls, + exp: ExperimentContext, + idx: ImgSpecT, + *args, **kwargs) -> 'ImageContext': + return super().__new__(cls, exp, idx, *args, **kwargs) # type: ignore + + +# %% ../nbs/experiments.ipynb 80 +@dataclasses.dataclass +class ResultOCR(Result): + subject_ctx: ImageContext + block_idx: int + ocr: str | None = None + image: Image.Image | None = None + description: str = dataclasses.field(default='', kw_only=True) + + def __post_init__(self): + self._acc = None + if self.image is None: + cache_path = self.image_ctx.exp.final(self.cache_path()) + if cache_path.exists(): + self.image = Image.open(cache_path) + + @property + def image_ctx(self): return self.subject_ctx + + @property + def acc(self): + if self.ocr is not None: + self._acc = accuracy_ocr_difflib(self.ocr, self.image_ctx.gts[self.block_idx]) + return self._acc + @property + def suffix(self): return f"{self.block_idx}_{self.description}" + + def cache_path(self, suffix: str | None = None): + img_ctx = self.image_ctx + suffix = self.suffix + (('_'+suffix) if suffix else '') + img_name = img_ctx.image_path.stem + return img_ctx.cache_dir / '.crop' / f"{img_name}_{suffix}.png" + + def cache_image(self, image: Image.Image | None = None, suffix: str | None = None): + image = image or (self.image if not suffix else None) + box_image_path = self.cache_path(suffix) + final_path = self.image_ctx.exp.final(box_image_path) + if image and not final_path.exists(): + final_path.parent.mkdir(parents=True, exist_ok=True) + image.save(final_path) + return box_image_path + + def to_dict(self): + d = dataclasses.asdict(self) + d['image_ctx'] = d['image'] = d['page_data'] = d['gts'] = None + return d + + # @classmethod + # def from_dict(cls, d: dict, page_data: st.PageData, gts: list[str]): + # return cls(**(d | {'page_data':page_data, 'gts':gts})) + + def __repr__(self): + return f"{type(self).__name__}#block {self.block_idx:02}: {self.acc:.2f}||{self.ocr}" + + def display(self): + visor = getattr(self, '_default_visor_type', None) + display(HTML( + visor(self).as_html() if visor else f"{self}" + )) + def _ipython_display_(self): self.display() + + +class ResultOCRExtracted(ResultOCR): + def __repr__(self): return super().__repr__() + + +# %% ../nbs/experiments.ipynb 82 +BoxIdT: TypeAlias = int + +class ResultSet(dict[BoxIdT, dict[CropMethod, ResultOCR]]): ... + +class ResultSetDefault(defaultdict[BoxIdT, dict[CropMethod, ResultOCR]]): ... + +def resultset_to_dict(results: ResultSet) -> dict[BoxIdT, dict[str, str]]: + d = {} + for box, box_methods in results.items(): + for method, result in box_methods.items(): + if box not in d: + d[box] = {} + d[box][method.name] = result.ocr + return d + +def dict_to_resultset( + image_idx: ImgIdT, + results_dict: dict[str, dict[str, str]], + result_factory: Callable + ) -> ResultSetDefault: + results = ResultSetDefault(dict[CropMethod, ResultOCR]) + for box_idx, box_methods in results_dict.items(): + box_idx = int(box_idx) + for method, ocr in box_methods.items(): + m = CM[method] + results[box_idx][m] = result_factory(image_idx, box_idx, m, ocr) + return results + + +# %% ../nbs/experiments.ipynb 84 +class OCRModel(Enum): + TESSERACT = 0 + IDEFICS = 1 + @staticmethod + def __display_names__() -> dict[str, OCRModel]: + return dict( + zip("Tesseract, Idefics".split(', '), + OCRModel)) + + +class OCRExperimentRun(ExperimentRun): ... + + +class OCRExperimentContext(ExperimentContext): + """ + A utility class to maintain shared state across all experiments within OCR domain. + This class encapsulates state necessary for conducting PanelCleaner OCR experiments. + """ + + config: cfg.Config + image_paths: list[Path] + ocr_model: str + force_PIL: bool + use_tunnel: bool + server: web_server.WebServer | None + + # ExperimentRun name -> Image index -> Box index -> Crop method -> Result + _results: dict[RunIdT, dict[ImgIdT, ResultSet]] + + _running = T.Bool(False) + + engines = { + 'Tesseract': cfg.OCREngine.TESSERACT, + 'Idefics': None, + 'manga-ocr': cfg.OCREngine.MANGAOCR} + + + @classmethod + def get_config(cls) -> cfg.Config: + config = cfg.load_config() + profile = config.current_profile + preprocessor_conf = profile.preprocessor + # Modify the profile to OCR all boxes. + # Make sure OCR is enabled. + preprocessor_conf.ocr_enabled = True + # Make sure the max size is infinite, so no boxes are skipped in the OCR process. + preprocessor_conf.ocr_max_size = 10**10 + # Make sure the sus box min size is infinite, so all boxes with "unknown" language are skipped. + preprocessor_conf.suspicious_box_min_size = 10**10 + # Set the OCR blacklist pattern to match everything, so all text gets reported in the analytics. + preprocessor_conf.ocr_blacklist_pattern = ".*" + + # Load models if needed + gpu = torch.cuda.is_available() or torch.backends.mps.is_available() + model_path = config.get_model_path(gpu) + if model_path is None: + # don't mess with normal PanelCleaner, download models directly + import pcleaner.model_downloader as md + model_dir = config.get_model_cache_dir() + config.default_torch_model_path = md.download_torch_model(model_dir) + config.default_cv2_model_path = md.download_cv2_model(model_dir) + + return config + + @functools.lru_cache() + def mocr(self, lang: str): + engine = self.engines[self.ocr_model] + ocr_processor = ocr.get_ocr_processor(True, engine) + proc = ocr_processor[lang2pcleaner(lang)] + if isinstance(proc, TesseractOcr): + proc.lang = lang2tesseract(lang) + return proc + + @contextlib.contextmanager + def running(self, value: bool): + _running = self._running + self._running = value + yield + self._running = _running + + def ocr_box(self, result: ResultOCR, lang: str): + assert result.image is not None + text = self.mocr(lang)(result.image) + result.ocr = postprocess_ocr(text) + self._dirty = True + return result + + image_cache_dir = ExperimentContext.subject_cache_dir + + def _detect_text(self, img_path: Path, dest_dir: Path): + config = self.config + root_dir = self.root_dir + image_name = img_path.stem + pfl = config.current_profile + gpu = torch.cuda.is_available() or torch.backends.mps.is_available() + model_path = config.get_model_path(gpu) + json_data = None + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + ctm.model2annotations(pfl.general, pfl.text_detector, model_path, [img_path], temp_path) + contents = list(temp_path.glob(f"*{image_name}*")) + json_path = [_ for _ in contents if _.suffix == '.json'][0] + json_data = json.loads(json_path.read_text(encoding="utf-8")) + # we don't need unique names for this tests, strip uuids + for temp_file in contents: + if temp_file.suffix == ".json": continue + new_path = dest_dir / strip_uuid(temp_file.name) + shutil.copy(temp_file, new_path) + if 'mask' in temp_file.name: + json_data['mask_path'] = str(new_path.relative_to(root_dir)) + elif new_path.stem == image_name: + json_data['image_path'] = str(new_path.relative_to(root_dir)) + json_data['original_path'] = str(img_path.relative_to(root_dir)) + json.dump(json_data, open(dest_dir / f"{image_name}#raw.json", "w"), indent=2) + return json_data + + @functools.lru_cache() + def _load_page_data(self, image_idx: ImgIdT): + root_dir = self.root_dir + dest_dir = self.final(self.image_cache_dir(image_idx)) + img_path = self.final(self.image_paths[image_idx]) + image_name = img_path.stem + # read cached json + jsons = [_ for _ in dest_dir.glob("*#raw.json") if image_name in _.stem] + assert len(jsons) <= 1 + # generate text boxes if needed + if not jsons: + json_data = self._detect_text(img_path, dest_dir) + else: + json_file_path = jsons[0] + json_data = json.loads(json_file_path.read_text(encoding="utf-8")) + + if not all((root_dir / json_data[_]).exists() + for _ in ("image_path", "mask_path", "original_path")): + raise ValueError(f"Inconsistent page_data of {img_path}") + + page_data = st.PageData( + json_data["image_path"], json_data["mask_path"], + json_data["original_path"], json_data["scale"], + [st.Box(*data["xyxy"]) for data in json_data["blk_list"]], + [], [], []) + # Merge boxes that have mutually overlapping centers. + page_data.resolve_total_overlaps() + return json_data, page_data + + def page_data(self, image_idx: int): + _, page_data = self._load_page_data(image_idx) + return page_data + def json_data(self, image_idx: int): + json_data, _ = self._load_page_data(image_idx) + return json_data + + def path_from_idx(self, image_idx: ImgSpecT, cached: bool = False): + "(Relative path) to subject image. Raises ValueError if not found." + _idx = self.normalize_idx(image_idx) + if _idx is None: + raise ValueError(f"{_idx} not found in context.") + if cached: + page_data = self.page_data(_idx) # load if needed + path = Path(page_data.image_path) + else: + path = self.image_paths[_idx] + if not self.final(path).exists(): + raise ValueError(f"{path} not found in context.") + return path + + def cached_image(self, image_idx: ImgSpecT): + "Relative path to cached image." + _idx = self.normalize_idx(image_idx) + if _idx is None: + return None + page_data = self.page_data(_idx) # load if needed + path = Path(page_data.image_path) + if not self.final(path).exists(): + return None + return path + + def image_context(self, image_idx: ImgSpecT) -> ImageContext | None: + "Cached image context." + return cast(ImageContext, self.subject_context(image_idx)) + + def display_image(self, image_path: Path | str): + "Return a PIL image, a url, or the given (final) path." + final_image_path = self.final(image_path) + if self.force_PIL: + return Image.open(final_image_path) + elif self.use_tunnel: + if (server := self.server) is not None and (url := server.unc_share) is not None: + return url/final_image_path.relative_to(self.root_dir) + return final_image_path + DI = display_image + + def _update_results(self, run_name: str, img_idx: ImgIdT, results: ResultSetDefault): + self._results[run_name][img_idx] = cast(ResultSet, results) + + def _result_from(self, + image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None): + img_ctx = ImageContext(self, image_idx) + extracted = method in _EXTRACTED_METHODS + result_cls = ResultOCRExtracted if extracted else ResultOCR + result = result_cls(img_ctx, int(box_idx), None, None, description=f"{method.value}") + if ocr is not None: + result.ocr = ocr + return result + + def result(self, + run_name: str, + image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, + ocr: bool=True, + rebuild: bool=False) -> ResultOCR | None: + _result = self._results[run_name][image_idx][box_idx].get(method) + if not rebuild and _result is not None: + return _result + + result: ResultOCR = self._result_from(image_idx, box_idx, method) + image, cropped_image, cropped_mask = result.image, None, None + img_ctx = ImageContext(self, image_idx) + base_image = img_ctx.base_image + box = img_ctx.boxes[box_idx] + if image is None and method in _IMAGE_METHODS: + image = crop_by_image( + method, box, base_image, self.config.current_profile.preprocessor) + + if image is None and method in _EXTRACTED_METHODS: + mask = img_ctx.mask + cropped_image_path = result.cache_image(cropped_image, "cropped") + cropped_mask_path = result.cache_image(cropped_mask, "mask") + if not cropped_image_path.exists() or not cropped_mask_path.exists(): + image, cropped_image, cropped_mask = crop_by_extracted( + method, box, base_image, mask, + cropped_image_path, cropped_mask_path, img_ctx.dilated()) + + assert image is not None + if result.image is None: + result.image = image + result.cache_image() + if cropped_image is not None: + result.cache_image(cropped_image, "cropped") + if cropped_mask is not None: + result.cache_image(cropped_mask, "mask") + + exp_run = OCRExperimentRun(self, run_name) + if ocr: + exp_run.before_result(result) + result = self.ocr_box(result, img_ctx.page_lang) + exp_run.after_result(result) + self._results[run_name][image_idx][box_idx][method] = result + self._dirty = True + return result + + def results(self, run_name: str | None = None, img_idx: ImgIdT | None = None): + if run_name is None: return self._results + if img_idx is None: return self._results[run_name] + return self._results[run_name][img_idx] + def run_results(self, run_name: str): + return cast(dict[ImgIdT, ResultSet], self.results(run_name)) + def image_results(self, run_name: str, img_idx: ImgIdT): + return cast(ResultSet, self.results(run_name, img_idx)) + def box_results(self, run_name: str, img_idx: ImgIdT, box_idx: BoxIdT): + return cast(ResultSet, self.results(run_name, img_idx))[box_idx] + def method_results(self, run_name: str, img_idx: ImgIdT, method: CropMethod): + image_results = self.image_results(run_name, img_idx) + return {i: box_results.get(method) for i,box_results in image_results.items()} + + def _reset_results(self): + results = defaultdict(lambda: defaultdict(lambda: ResultSetDefault(dict))) + self._results = cast(dict[str, dict[ImgIdT, ResultSet]], results) + def _reset_results_(self, + run_name: str | None = None, + image_idx: int | None = None, + box_idx: int | None = None, + method: CropMethod | None = None): + if run_name is None and image_idx is None and box_idx is None and method is None: + self._reset_results() + return + results = self._results + models = tuple(results.keys()) if run_name is None else [run_name] if run_name in results else [] + for run_name in models: + img_nodes = results[run_name] + imgs = (tuple(img_nodes.keys()) + if image_idx is None else [image_idx] if image_idx in img_nodes else []) + for img_idx in imgs: + box_nodes = img_nodes[img_idx] + boxes = (tuple(box_nodes.keys()) + if box_idx is None else [box_idx] if box_idx in box_nodes else []) + for box_idx in boxes: + if method is None: + del box_nodes[box_idx] + else: + methods = box_nodes[box_idx] + if method in methods: + del methods[method] + if not box_nodes[box_idx]: + del box_nodes[box_idx] + if not img_nodes[img_idx]: + del img_nodes[img_idx] + if not results[run_name]: + del results[run_name] + def reset_results(self, + run_name: str | None = None, + image_idx: int | None = None, + box_idx: int | None = None, + method: CropMethod | None = None): + self._reset_results_(run_name, image_idx, box_idx, method) + self._dirty = True + def _reset_(self): + super()._reset_() + self._reset_results_() + self._load_page_data.cache_clear() + self.mocr.cache_clear() + + @classmethod + def get_image_paths(cls, root_dir: Path): + if root_dir is None: return [] + source_dir = root_dir / cls.SOURCE_DIR_NAME + return sorted( + [_.relative_to(root_dir) for _ in source_dir.glob("*") + if _.is_file() and _.suffix.lower() in [".jpg", ".png", ".jpeg"]]) + + def run_to_dict(self, run_name: RunIdT) -> dict[str, dict[BoxIdT, dict[str, str]]]: + "JSON serializable dict of the experiment run" + results = {} + idx2name = {i: p.name for i, p in enumerate(self.image_paths)} + run_results = cast(dict[ImgIdT, ResultSet], self.results(run_name)) + for img_idx, rset in run_results.items(): + results[idx2name[img_idx]] = resultset_to_dict(cast(ResultSet, rset)) + return results + + # def run_to_json(self, run_name: RunIdT): + # data = { + # 'run_name': run_name, + # 'results': self.run_to_dict(run_name), + # } + # fp = self._root / f'{run_name}.json' + # with open(fp, 'w') as f: + # json.dump(data, f, indent=2) + # return fp + + # def save_results(self): + # for run_name, _ in self._exp_runs.items(): + # self.run_to_json(run_name) + + def _load_run_results(self, run_name: str, run_data: dict[str, dict[str, dict[str, str]]]): + self._exp_runs[run_name] = OCRExperimentRun(self, run_name) + name2idx = {p.name: i for i, p in enumerate(self.image_paths)} + for img_name, data in run_data.items(): + img_idx = name2idx.get(img_name, None) + if img_idx is None: + logger.warning(f"Image {img_name} not found in context.") + continue + rset: ResultSetDefault = dict_to_resultset( + ImgIdT(img_idx), + data, + result_factory=self._result_from) + self._update_results(run_name, img_idx, rset) + + # def load_results(self): + # for json_path in self.root_dir.glob("*.json"): + # try: + # with open(json_path, 'r') as f: + # data = json.load(f) + # if data.keys() != {'run_name', 'results'}: + # continue + # except Exception as e: + # continue + # self._load_run_results(data) + + def to_dict(self): + data: dict = { + "ocr_model": self.ocr_model, + "runs": (rr := {}) + } + for run_name, _ in self._exp_runs.items(): + rr[run_name] = self.run_to_dict(run_name) + return data + + def to_json(self): + data = self.to_dict() + _ = datetime.datetime.now() + fp = self._root / f'{self.name}.json' + with open(fp, 'w') as f: + json.dump(data, f, indent=2) + return fp + + def _from_json(self): + json_path = self.root_dir / f'{self.name}.json' + if not json_path.exists(): return + try: + with open(json_path, 'r') as f: + data = json.load(f) + except Exception as e: + logger.error(f"Error loading {json_path}: {e}") + return + self.ocr_model = data['ocr_model'] + for run_name, run_results in data['runs'].items(): + self._load_run_results(run_name, run_results) + + @classmethod + def from_json(cls, root_dir: Path, name: str, config: cfg.Config | None = None): + json_path = root_dir / f'{name}.json' + try: + with open(json_path, 'r') as f: + data = json.load(f) + except Exception as e: + logger.error(f"Error loading {json_path}: {e}") + raise e + config = config or cls.get_config() + self = cls(data['ocr_model'], root_dir, config=config) + for run_name, run_results in data['runs'].items(): + self._load_run_results(run_name, run_results) + return self + + def save(self): + fp = self.to_json() + self._dirty = False + return fp + + @classmethod + def load(cls, root_dir: Path, name: str): + return cls.from_json(root_dir, name) + + + def show(self): + config = self.config + gpu = torch.cuda.is_available() or torch.backends.mps.is_available() + model_path = config.get_model_path(gpu) + device = ("mps" if torch.backends.mps.is_available() else "cuda") if model_path.suffix == ".pt" else "cpu" + + config.show() + cprint( + f"{'config cache_dir':>17}: {repr(config.cache_dir)}\n" + f"{'model_path':>17}: {repr(model_path)}\n" + f"{'device':>17}: {repr(device)}") + + cprint( + f"{'force_PIL':>17}: {self.force_PIL}\n" + f"{'use_tunnel':>17}: {self.use_tunnel}\n" + f"{'server_url':>17}: {repr(self.server.unc_share) if self.server else ''}\n" + f"{'experiment dir':>17}: {self.root_dir}\n" + f"{'source_dir':>17}: {self.final(self.source_dir)}\n" + f"{'cache_dir':>17}: {self.final(self.cache_dir)}\n" + ) + + + def __init__(self, + ocr_model: str, + root_dir: Path | str | None = None, + *, + config: cfg.Config | None = None, + server: web_server.WebServer | None = None, + run_name: str = 'Tesseract-crop-post', + load: bool = True): + if root_dir is None: + root_dir = type(self).EXP_DIR + self.config = config or type(self).get_config() + self.ocr_model = ocr_model + root_dir = Path(root_dir) + super().__init__( + ocr_model, self.get_image_paths(root_dir), root=root_dir, run_name=run_name) + self.image_paths = self._paths + self._reset_results() + self._images = self._subjects + use_pil = os.environ['USE_PIL'].lower() == 'true' + self.force_PIL = use_pil + use_tunnel = os.environ['USE_TUNNEL'].lower() == 'true' + self.use_tunnel = use_tunnel + self.server = server or SERVER + if load: + self._from_json() + + +# %% ../nbs/experiments.ipynb 85 +@FC.patch_to(ImageContext) +def setup(self, exp: OCRExperimentContext, image_idx: ImgSpecT, page_lang: str | None = None): + super(type(self), self).setup(exp, image_idx) + self._mask_dilated1 = self._mask_dilated05 = self._mask_dilated02 = None + self.json_data, self.page_data = exp._load_page_data(self.idx) + self.setup_page_lang(page_lang) + self.mask = Image.open(exp.final(self.mask_path)) + self.base_image = Image.open(exp.final(self.image_path)) + self.setup_ground_truth() + + +# %% ../nbs/experiments.ipynb 88 +class SimpleResultVisor: + ctx: ResultOCR + + @classmethod + def diff_tagged(cls, result: ResultOCR): + _, html = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr, False) + return f"{html}" + + def as_html(self): + result = self.ctx + DI = cast(OCRExperimentContext, result.image_ctx.exp).DI + if isinstance(result, ResultOCRExtracted): + return self.as_html_extracted() + has_ocr = result.ocr is not None + acc_html = '' + if has_ocr: + acc_html = f"
{result.acc:.2f}" + html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr) + box_image_path = result.cache_image() + html1 = get_columns_html([[DI(box_image_path)], [(result.ocr or '') + acc_html]]) + if has_ocr: + html2 = f"
{html_str1}
{html_str2}
" + else: + html2 = f"
{html_str1}
" + return html1 + '\n
\n' + html2 + + def as_html_extracted(self): + result = self.ctx + has_ocr = result.ocr is not None + DI = cast(OCRExperimentContext, result.image_ctx.exp).DI + html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr) + if has_ocr: + diff_html = f"
{html_str1}
{html_str2}
" + else: + diff_html = f"
{html_str1}
" + cropped_image_path = result.cache_image(None, "cropped") + cropped_mask_path = result.cache_image(None, "mask") + result_path = result.cache_image() + return '\n
\n'.join([ + get_image_grid_html([ + DI(cropped_image_path), DI(cropped_mask_path), DI(result_path)], 1, 3), + acc_as_html(result.acc) if has_ocr else '', + diff_html + ]) + + def display(self): display(HTML(self.as_html())) + def _ipython_display_(self): self.display() + + def __init__(self, ctx: ResultOCR): + self.ctx = ctx + + +ResultOCR._default_visor_type = SimpleResultVisor # type: ignore + + +# %% ../nbs/experiments.ipynb 107 +class RunSelector(ContextVisor): + def setup_controls(self): + options = self.run_names + w = W.Dropdown( + options=options, + value=self.values['run_name'], + layout={'width': 'fit-content'}, + style={'description_width': 'initial'}) + return {'run_name': w} + + def setup_ui(self): + ctls = self.controls + model_grp = W.HBox([ctls['run_name']]) + model_grp.add_class('model_grp') + ui = W.HBox([*super().comps_ui(), model_grp]) + return ui + + def __init__(self, + exp_ctx: OCRExperimentContext, + run_name: str | None=None, + run_names: list[str] | None = None, + **kwargs + ): + self.run_names: list[str] = run_names or exp_ctx.run_names + super().__init__(exp_ctx, + {'run_name': run_name or self.run_names[0]}) + + +# %% ../nbs/experiments.ipynb 110 +class MessageVisor(ContextVisor): + EMPTY = '' + + # _css = """ + # .message_visor-yXy { + # border: 1px solid red; + # } + # """ + + @property + def msg(self): return self.values['msg'] + + def update_output(self, /, msg: str | None = None, **kwargs): + if msg is not None and msg != self.EMPTY: + cprint(msg) + self.values['msg'] = None + else: + clear_output() + + def setup_style(self): + self.out.add_class('message_visor-yXy') + return super().setup_style() + + def setup_controls(self): + # w = W.Label(value=self.values['msg'] if self.values['msg'] != self.EMPTY else None, + # layout={'width': 'fit-content'}) + return {}#{'msg': w} + + def __init__(self, + msg: str | None = None, + **kwargs + ): + super().__init__(None, + {'msg': msg}, + **kwargs) + + +# %% ../nbs/experiments.ipynb 117 +class StatusVisor(ContextVisor): + ctx: OCRExperimentContext + + @property + def save_button(self) -> W.Button: return self.controls['save'] # type: ignore + @property + def reset_button(self) -> W.Button: return self.controls['reset'] # type: ignore + + def setup_controls(self): + style={'font_size': '1em', 'font_weight': 'bold'} + sw = W.Button(description = 'save', style=style, + layout={'width': '4em'}, + ) + rw = W.Button(description = 'reset', style=style, + layout={'width': '5em', 'margin': '0px 0px 0px 3em'}, + ) + return {'save': sw, 'reset': rw} + + def setup_ux(self): + source = (self.ctx, '_dirty') + target = (self.save_button, 'disabled') + T.dlink(source, target, lambda x: not x) + target = (self.save_button.style, 'button_color') + T.dlink(source, target, lambda x: 'lightblue' if x else None) + + + def __init__(self, exp_ctx: OCRExperimentContext, **kwargs): + super().__init__(exp_ctx, {}, **kwargs) + + +# %% ../nbs/experiments.ipynb 122 +class OCRModelSelector(ContextVisor): + ctx: OCRExperimentContext + + def setup_controls(self): + options = self.models + w = W.Dropdown( + options=options, + value=self.values['model'], + layout={'width': 'fit-content'}, + style={'description_width': 'initial'}) + return {'model': w} + + def setup_ui(self): + ctls = self.controls + model_grp = W.HBox([ctls['model']]) + model_grp.add_class('model_grp') + ui = W.HBox([*super().comps_ui(), model_grp]) + return ui + + def __init__(self, + exp_ctx: OCRExperimentContext, + ocr_model: OCRModel | None=OCRModel.TESSERACT, + ocr_models: dict[str, OCRModel] | None = None, + out: W.Output | None = None + ): + self.models: dict[str, OCRModel] = ocr_models or OCRModel.__display_names__() + super().__init__(exp_ctx, + {'model': ocr_model or OCRModel.TESSERACT}, + out=out or self.out)#, ctxs=[exp_visor]) + + +# %% ../nbs/experiments.ipynb 125 +class DisplayOptions(Enum): + BOXES = 0 + IMAGE = 1 + MASK = 2 + IMAGE_MASK = 3 + PAGE_DATA = 4 + GROUND_TRUTH = 5 + ALL = 6 + RESULTS = 7 + BEST_RESULTS = 8 + ACCURACY = 9 + DATAFRAME = 10 + CONFIG = 11 + + @staticmethod + def __display_names__(): + return dict( + zip("Boxes, Image, Mask, Image & Mask, Page data, Ground truth, Image All, Results, " + "Best results, Accuracy, Dataframe, Config".split(', '), + DisplayOptions)) + + +class ContentSelector(ContextVisor): + ctx: OCRExperimentContext + + def image_info(self, image_ctx: ImageContext): + img = image_ctx.base_image + (w, h), print_size_in, print_size_cm, required_dpi = image_ctx.image_info + format = PRINT_FORMATS['Modern Age'] + cprint( f"{'Width x Height':>30}: {w} x {h} pixels\n" + f"{'PIL Info DPI':>30}: {repr(img.info.get('dpi', None))}\n" + f"{'Print Size 300 DPI':>30}: {print_size_in[0]:.3f} x {print_size_in[1]:.3f} in" + f" / {print_size_cm[0]:.3f} x {print_size_cm[1]:.3f} cm\n" + f"Required DPI Modern Age format: {required_dpi:.3f} dpi " + f"({format[0]:.3f} x {format[1]:.3f} in)") + + + def display_content(self, image_ctx: ImageContext, display_option: DisplayOptions): + DI = self.ctx.DI + if display_option in (DisplayOptions.ALL, DisplayOptions.PAGE_DATA): + self.image_info(image_ctx) + RenderJSON(image_ctx.json_data, 350, 2).display() + if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH): + cprint(image_ctx.gts) + if display_option == DisplayOptions.CONFIG: + self.ctx.show() + if display_option == DisplayOptions.IMAGE: + display_image_grid([DI(image_ctx.image_path)], 1, 1) + if display_option == DisplayOptions.MASK: + display_image_grid([DI(image_ctx.mask_path)], 1, 1) + if display_option in (DisplayOptions.ALL, DisplayOptions.IMAGE_MASK): + display_image_grid([DI(image_ctx.image_path), DI(image_ctx.mask_path)], 1, 2) + if display_option in (DisplayOptions.ALL, DisplayOptions.BOXES): + display_image_grid([DI(image_ctx.image_boxes_path)], 1, 1) + + + def setup_controls(self): + options = self.display_options or {**DisplayOptions.__display_names__()} + display_option_wdgt = W.Dropdown( + options=options, + value=self.values['display_option'], + layout={'width': '120px'}, + style={'description_width': 'initial'}) + return {'display_option': display_option_wdgt} + + + def setup_ui(self): + ctls = self.controls + display_option_grp = W.HBox([ctls['display_option']]) + display_option_grp.add_class('display_option_grp') + comps = self.comps_ui() + ui = W.HBox([*comps, display_option_grp]) + return ui + + + def __init__(self, + exp_ctx: OCRExperimentContext, + display_option: DisplayOptions | None=DisplayOptions.BOXES, + display_options: Mapping[str, DisplayOptions] | None = None, + **kwargs + ): + self.display_options = display_options + super().__init__(exp_ctx, + {'display_option': display_option or DisplayOptions.BOXES}, + **kwargs)#, ctxs=[exp_visor]) + + +# %% ../nbs/experiments.ipynb 129 +class ImageSelector(ContextVisor): + ctx: OCRExperimentContext + + @property + def image_ctx(self): + return ImageContext(self.ctx, self.values['image_idx']) + + def setup_controls(self): + paths = self.ctx.image_paths + w = W.Dropdown( + options={_.stem:i for i,_ in enumerate(paths)}, + value=self.values['image_idx'], + # layout={'width': 'fit-content'}, + layout={'width': '25em'}, + style={'description_width': 'initial'}) + return {'image_idx': w} + + def update(self, image_idx: ImgSpecT | None = None, **kwargs): + if image_idx is None: return + idx = self.ctx.normalize_idx(image_idx) + if idx is None: return + super().update(image_idx=idx, **kwargs) + + + def __init__(self, + ctx: OCRExperimentContext, /, + image_idx: ImgSpecT = 0, + **kwargs): + idx = ctx.normalize_idx(image_idx) + assert idx is not None, f"Image {image_idx} not found in experiment context" + super().__init__(ctx, {'image_idx': idx}, **kwargs) + + +# %% ../nbs/experiments.ipynb 133 +class OCRContextVisor(ContextVisor): + ctx: OCRExperimentContext + + def update_output(self, /, image_idx: ImgIdT, **kwargs): + ctx = self.ctx + img_path = ctx.path_from_idx(image_idx, cached=ctx.force_PIL) + img = ctx.display_image(img_path) + display_image_grid([img], 1, 1) + + def update(self, image_idx: ImgSpecT | None = None, **kwargs): + if image_idx is None: return + idx = self.ctx.normalize_idx(image_idx) + if idx is None: return + super().update(image_idx=idx, **kwargs) + + def __init__(self, + ctx: OCRExperimentContext, /, + image_idx: ImgSpecT = 0, *, + out: W.Output | None=None): + super().__init__(ctx, {}, out, + ctxs={'image_idx': ImageSelector(ctx, image_idx, out=self.out)}) + + +# %% ../nbs/experiments.ipynb 152 +class ImageContextVisor(ContextVisor): + ctx: ImageContext + # control_names: list[str] = ['display_option'] + + _css = """ + .display_option_grp { + background-color: lightblue; + } + """ + + def image_info(self): + content_selector = cast(ContentSelector, self.comp('display_option')) + content_selector.image_info(self.ctx) + + def update_output(self, + display_option: DisplayOptions | None = None, + image_idx: ImgIdT | None = None, + **kwargs): + content_selector = cast(ContentSelector, self.comp('display_option')) + if image_idx is not None and image_idx != self.ctx.image_idx: + ctx = ImageContext(self.ctx.exp, image_idx) + assert ctx is not None + self.ctx = ctx + display_option = content_selector.values['display_option'] + if display_option is None: + return + content_selector.display_content(self.ctx, display_option) + + def update(self, + display_option: DisplayOptions | None=None, + image_idx: ImgSpecT | None=None, + **kwargs): + if image_idx is not None: + if (idx := self.ctx.exp.normalize_idx(image_idx)) is not None: + kwargs['image_idx'] = idx + super().update(display_option=display_option, **kwargs) + + def __init__(self, + exp_ctx: OCRExperimentContext, + img_idx: ImgSpecT | ImageContext, + display_option: DisplayOptions=DisplayOptions.BOXES, + display_options: Mapping[str, DisplayOptions] | None = None, + out: W.Output | None = None + ): + if isinstance(img_idx, ImageContext): + ctx = img_idx + else: + assert exp_ctx is not None, "exp_ctx must be provided if img_idx is not an ImageContext" + ctx = ImageContext(exp_ctx, img_idx) + assert ctx is not None, f"Image {img_idx} not found in experiment context" + if display_options is None: + display_options = {k: v for k, v in DisplayOptions.__display_names__().items() + if k not in ('Results', 'Accuracy', 'Best results', 'Dataframe', 'Config')} + out = out or self.out + content_selector = ContentSelector(exp_ctx, + display_option=display_option, display_options=display_options, out=out) + image_selector = ImageSelector(exp_ctx, ctx.image_idx, out=out) + super().__init__(ctx, {}, out=out, + ctxs={'image_idx': image_selector, 'display_option': content_selector}) + + +# %% ../nbs/experiments.ipynb 165 +def trimmed_mean(data, trim_percent): + sorted_data = np.sort(data) + n = len(data) + trim_count = int(trim_percent * n) + trimmed_data = sorted_data[trim_count:-trim_count] + return np.mean(trimmed_data) + +def mad_based_outlier(points, threshold=3.5): + median = np.median(points) + diff = np.abs(points - median) + mad = np.median(diff) + modified_z_score = 0.6745 * diff / mad + return points[modified_z_score < threshold] + +def iqr_outlier_removal(data): + q1 = np.percentile(data, 25) + q3 = np.percentile(data, 75) + iqr = q3 - q1 + lower_bound = q1 - 1.5 * iqr + upper_bound = q3 + 1.5 * iqr + return data[(data >= lower_bound) & (data <= upper_bound)] + + +# %% ../nbs/experiments.ipynb 166 +class ExperimentOCR: + ctx: ImageContext + run: ExperimentRun + + @property + def ocr_model(self): return cast(OCRExperimentContext, self.ctx.exp).ocr_model + @property + def img_ctx(self) -> ImageContext: return self.ctx + @property + def ctxs(self) -> tuple[OCRExperimentContext, ImageContext]: + return cast(OCRExperimentContext, self.img_ctx.exp), self.img_ctx + + def to_dict(self): + "JSON serializable dict of the experiment" + results = resultset_to_dict(self.results()) + return { + 'run_name': self.run.name, + 'image_name': self.img_ctx.image_name, + 'results': results, + } + + @classmethod + def from_image(cls, + ctx: OCRExperimentContext, + run_name: RunIdT, + image_idx: ImgSpecT): + idx = cast(ImgIdT, ctx.normalize_idx(image_idx)) + img_ctx = ImageContext(ctx, idx) + return cls(img_ctx, run_name) + + @classmethod + def from_method(cls, + ctx: OCRExperimentContext, + image_idx: ImgIdT | str | Path, + run_name: str, + method: CropMethod) -> ExperimentOCRMethod | None: + experiment = cls.from_image(ctx, run_name, image_idx) + if experiment is None: + return None + return experiment.method_experiment(method) + + + def result(self, box_idx: BoxIdT, method: CropMethod, ocr: bool=True, rebuild: bool=False): + ctx, img_ctx = self.ctxs + return ctx.result(self.run.name, img_ctx.image_idx, box_idx, method, ocr, rebuild) + + def results(self): + ctx, img_ctx = self.ctxs + return cast(ResultSet, ctx.results(self.run.name, img_ctx.image_idx)) + + def has_run(self): + "at least one method has run" + return len(self.results()) == len(self.img_ctx.page_data.boxes) # pylint: disable=no-member + + def best_results(self): + methods = list(CropMethod.__members__.values()) + _ = [self.method_experiment(m).results() for m in methods] + results = self.results() + # at least one method has run + if len(results) < len(self.img_ctx.page_data.boxes): # pylint: disable=no-member + return None + best = [] + for box_idx in results: + methods = results[box_idx] + best_method = max(methods, key=lambda m: methods[m].acc) # type: ignore + best.append((best_method, methods[best_method])) + return best + + def save_results_as_ground_truth(self, overwrite=False): + ctx, img_ctx = self.ctxs + gts_path = ctx.final(ground_truth_path(img_ctx.page_data)) # pylint: disable=no-member + if overwrite or not gts_path.exists(): + best_results = self.best_results() + if best_results: + tt = [r.ocr for m,r in best_results] + gts_path.write_text('\n'.join(tt), encoding="utf-8") + img_ctx.setup_ground_truth() + logger.info(f"Ground truth data saved successfully to {gts_path}") + return True + else: + logger.info("No best results available to save.") + return False + else: + return False + + @property + def experiments(self): + if not hasattr(self, '_experiments'): + self._experiments = {} + return self._experiments + def method_experiment(self, method: CropMethod) -> ExperimentOCRMethod: + if method not in self.experiments: + self.experiments[method] = ExperimentOCRMethod(self, method) + return self.experiments[method] + + + def to_dataframe(self): + "Dataframe with crop methods as columns and box ids as rows" + methods = list(CropMethod.__members__.values()) + experiments = [self.method_experiment(m) for m in methods] + accuracies = [[result.acc for result in exp.results()] for exp in experiments] + # transpose accuracies + accuracies = list(zip(*accuracies)) + return pd.DataFrame(accuracies, columns=CropMethod.__display_names__()) + + def plot_accuracies(self, + methods: list[CropMethod] | None = None, + ): + "Plots a horizontal bar chart of the accuracies for a list of method experiments." + from matplotlib.colors import Normalize + methods = methods or list(CropMethod.__members__.values()) + experiments = [self.method_experiment(m) for m in methods] + if not experiments: return + + _, img_ctx = self.ctxs + page_data = img_ctx.page_data # pylint: disable=no-member + _ = [[result.acc or 0.0 for result in exp.results()] for exp in experiments] + accuracies: list[float] = [np.mean(a) for a in _] + # accuracies = [np.mean([result.acc for result in exp.results()]) for exp in experiments] + + _, ax = plt.subplots(figsize=(10, 5)) + + # Normalize the accuracies for color mapping + norm = Normalize(min(accuracies), max(accuracies)) + # Color map from red to green + cmap = plt.get_cmap('RdYlGn') + colors = cmap(norm(accuracies)) + + ax.barh([m.value for m in methods], accuracies, color=colors) + + ax.set_xscale('log') # Set the x-axis to a logarithmic scale + ax.set_xlabel('Average Accuracy (log scale)', fontsize=12, fontweight='bold') + + ax.set_ylabel('Method', fontsize=12, fontweight='bold') + ax.set_yticks(range(len(methods))) + ax.set_yticklabels([f'{method.value} ({acc:.2f})' + for method, acc in zip(methods, accuracies)], fontsize=12) + max_acc_index = np.argmax(accuracies) + ax.get_yticklabels()[max_acc_index].set(color='blue', fontweight='bold') + + model = self.ocr_model + title_text = (f"{page_data.original_path} - OCR model: {model}") + ax.set_title(title_text, fontsize=12, fontweight='bold') + + plt.tight_layout() + plt.show() + + + def summary_box(self, box_idx: int): + ctx, _ = self.ctxs + results: list[tuple[CropMethod, ResultOCR]] = [] + pb = tqdm(CropMethod.__members__.values(), leave=False, desc=f"Box #{box_idx+1}") + for m in pb: + r = cast(ResultOCR, self.result(box_idx, m)) + results.append((m, r)) + methods, images, ocrs, accs = zip(*map( + lambda t: (t[0].value, ctx.DI(t[1].cache_image()), + SimpleResultVisor.diff_tagged(t[1]), acc_as_html(t[1].acc)), + results)) + display_columns([methods, images, accs, ocrs], + headers=["Method", f"Box #{box_idx+1}", "Accuracy", "OCR"]) + + + def summary_method(self, method: CropMethod): + ctx, _ = self.ctxs + results = self.method_experiment(method).results() + methods, images, ocrs, accs = zip(*map( + lambda r: (r.block_idx+1, ctx.DI(r.cache_image()), + SimpleResultVisor.diff_tagged(r), acc_as_html(r.acc)), + results)) + display_columns([methods, images, accs, ocrs], + headers=["Box #", "Box", "Accuracy", f"{method.value} OCR"]) + + + def display(self): + out = [] + for method in CropMethod: + out.append(f"---------- {method.value} ----------") + results = self.method_experiment(method).results() + out.extend(results) + out.append('\n') + cprint(*out, soft_wrap=True) + + + def reset(self, box_idx: int | None = None, method: CropMethod | None = None): + ctx, img_ctx = self.ctxs + ctx.reset_results(None, img_ctx.image_idx, box_idx, method) + + def perform_methods(self, + methods: CropMethod | list[CropMethod] | None = None, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + rebuild: bool = False, + plot_acc: bool = False + ): + if methods is None: + methods = [*CropMethod.__members__.values()] + elif isinstance(methods, CropMethod): + methods = [methods] + if rebuild: + _methods = tqdm(methods, desc="Methods") + else: + _methods = methods + for method in _methods: + method_exp = self.method_experiment(method) + if method_exp: + if rebuild: + method_exp(box_idxs, rebuild=rebuild) + if plot_acc: + self.plot_accuracies() + + def __call__(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + methods: CropMethod | list[CropMethod] | None = None, + # save: bool = True, + display=False, + rebuild: bool=False, + save_as_ground_truth=False): + self.perform_methods(methods, box_idxs, rebuild=rebuild) + if save_as_ground_truth: + self.save_results_as_ground_truth(overwrite=True) + # if save: + # self.to_json() + if display: + self.display() + + def __repr__(self): + return f"ExperimentOCR#{self.ctx.exp.name}_{self.run.name}" + + def __init__(self, ctx: ImageContext, run_name: RunIdT, **kwargs): + # super().__init__(ctx, **kwargs) + self.ctx = ctx + run = ctx.exp.experiment_run(run_name) + assert run is not None + self.run = run + + +@dataclasses.dataclass +class ExperimentOCRMethod: + ctx: ExperimentOCR + method: CropMethod + + @property + def exp_ctx(self): return self.ctx + @property + def img_ctx(self): return self.ctx.ctx + @property + def ctxs(self): + img_ctx = self.img_ctx + return cast(OCRExperimentContext, img_ctx.exp), img_ctx, self.ctx + + def result(self, box_idx: BoxIdT, ocr: bool=True, rebuild: bool=False) -> ResultOCR | None: + ctx, img_ctx, exp_ctx = self.ctxs + return ctx.result(exp_ctx.run.name, img_ctx.image_idx, box_idx, self.method, ocr, rebuild) + + def results(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + ocr: bool=True, rebuild: bool=False) -> list[ResultOCR]: + ctx, img_ctx, exp_ctx = self.ctxs + if box_idxs is None: + box_idxs = list(range(len(img_ctx.boxes))) + elif isinstance(box_idxs, int): + box_idxs = [box_idxs] + run_name = exp_ctx.run.name + results = ctx.method_results(run_name, img_ctx.image_idx, self.method) + results = {i:results[i] if i in results else None for i in box_idxs} + pb = rebuild or not results or any(r is None for r in results.values()) + if pb and len(results) > 2: + model = exp_ctx.ocr_model + progress_bar = tqdm(list(results.keys()), desc=f"{self.method.value} - {model}") + else: + progress_bar = list(results.keys()) + results = [] + for i in progress_bar: + results.append(self.result(i, ocr, rebuild=rebuild)) + return results + + + def get_results_html(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, + max_image_width: int | None = None): + ctx, img_ctx, exp_ctx = self.ctxs + results: list[ResultOCR] = self.results(box_idxs) + accs = np.array([r.acc for r in results]) + mean_accuracy = np.mean(accs) + mean_trimmed = trimmed_mean(accs, 0.1) + # filtered_data = mad_based_outlier(accs) + # mean_mad = np.mean(filtered_data) + # filtered_data = iqr_outlier_removal(accs) + # mean_iqr = np.mean(filtered_data) + + DI = ctx.DI + descriptions, images, ocrs, accs = zip(*map( + lambda r: ( + r.block_idx+1, + DI(r.cache_image()), + SimpleResultVisor.diff_tagged(r), + acc_as_html(r.acc) + ), results)) + non_breakin_space = u'\u00A0' # pylint: disable=unused-variable + tmpl = "{}" + padded_s = lambda s,n: tmpl.format(s.rjust(n)) + acc_fmt = f"{mean_accuracy:.2f}/{mean_trimmed:.2f}" + w, h = img_ctx.base_image.size + dim, _dpi = size(w, h), dpi(w, h) + dim_fmt = f"{w}x{h} px: {dim[0]:.2f} x {dim[1]:.2f} in @ {_dpi:.2f} dpi" + return '\n
\n'.join([ + ("
" + f"{padded_s('Page', 24)}: {img_ctx.page_data.original_path}
" + f"{padded_s('Size', 24)}: {dim_fmt}
" + f"{padded_s('Run', 24)}: {exp_ctx.run.name}
" + f"{padded_s('Model', 24)}: {exp_ctx.ocr_model}
" + f"{padded_s('Crop Method', 24)}: {self.method.value}
" + f"{padded_s('Accuracy Mean/Trimmed', 24)}: {acc_fmt}" + "
"), + get_columns_html( + [descriptions, images, accs, ocrs], + max_image_width, + headers=["Box #", "Image", "Accuracy", "OCR"]), + ]) + + def display(self, + box_idxs: BoxIdT | list[BoxIdT] | None = None, max_image_width: int | None = None): + display(HTML(self.get_results_html(box_idxs, max_image_width))) + + + def summary(self): + ctx, _, _ = self.ctxs + DI = ctx.DI + results = self.results() + methods, images, ocrs, accs = zip(*map( + lambda r: (r.block_idx+1, DI(r.cache_image()), SimpleResultVisor.diff_tagged(r), acc_as_html(r.acc)), + results)) + display_columns([methods, images, accs, ocrs], + headers=["Box #", "Box", "Accuracy", f"{self.method.value} OCR"]) + + + def reset(self): + _, _, exp_ctx = self.ctxs + exp_ctx.reset(method=self.method) + + def __call__(self, box_idxs: BoxIdT | list[BoxIdT] | None = None, display=False, rebuild=False): + if isinstance(box_idxs, int): + result = self.result(cast(BoxIdT, box_idxs), rebuild=rebuild) + if result is not None and display: + result.display() + else: + results = self.results(box_idxs, rebuild=rebuild) + if results and display: + self.display(box_idxs) + + +# %% ../nbs/experiments.ipynb 202 +class ResultVisor(ContextVisor): + ctx: ExperimentOCR + control_names: list[str] = ['all_boxes', 'box_idx', 'all_methods', 'method'] + + _css = """ + .box_grp { + background-color: aliceblue; + } + .method_grp { + background-color: #ededed; + } + """ + + def best_results(self): + ll = self.ctx.best_results() + if ll: + cprint([(m.value, f"{r.acc:.3f}", r.ocr) for m,r in ll]) + + def pd_to_html(self): + df = self.ctx.to_dataframe() + # set float precision + df = df.round(3) + # display floats with 3 decimal digits + df = df.applymap(lambda x: f"{x:.3f}") + # highlight max value in each row + stl = df.style.highlight_max(axis=0) + display(HTML(stl.to_html())) + + def update_output(self, **kwargs): + all_boxes: bool = self.values['all_boxes'] + box_idx: int = self.values['box_idx'] + all_methods: bool = self.values['all_methods'] + method: CropMethod = self.values['method'] + + # cprint(f"all_boxes: {all_boxes}, box_idx: {box_idx}, all_methods: {all_methods}, method: {method}") + + exp, _ = self.ctx.ctxs + if all_boxes and all_methods: + self.ctx.plot_accuracies() + self.ctx.display() + elif all_boxes: + self.ctx.summary_method(method) + elif all_methods: + self.ctx.summary_box(box_idx) + else: + with exp.running(True): + result = self.ctx.result(box_idx, method) + if result is not None: + result.display() + # from time import sleep + # sleep(2) + + def setup_controls(self): + _, img_ctx = self.ctx.ctxs + values = self.values + box_wdgt = W.BoundedIntText( + value=values['box_idx'], min=0, max=len(img_ctx.boxes)-1, step=1, + disabled=values['all_boxes'], + layout={'width': '50px'}, + style={'description_width': 'initial'}) + methods_wdgt = W.Dropdown( + options=CropMethod.__display_names__(), + value=values['method'], + layout={'width': '150px'}, + style={'description_width': 'initial'}) + all_boxes_wdgt = W.Checkbox(label='All', value=values['all_boxes'], + description="all", + layout={'width': 'initial'}, + style={'description_width': '0px'}) + all_methods_wdgt = W.Checkbox(label='All', value=values['all_methods'], + description="all", + layout={'width': 'initial'}, + style={'description_width': '0px'}) + return {'all_boxes': all_boxes_wdgt, 'box_idx': box_wdgt, + 'all_methods': all_methods_wdgt, 'method': methods_wdgt} + + def setup_ui(self): + ctls = self.controls + _, img_ctx = self.ctx.ctxs + box_label = W.Label( + value=f"Box # (of {len(img_ctx.boxes)}):", + layout={'width': 'initial', 'padding': '0px 0px 0px 10px'}) + method_label = W.Label(value='Method:', layout={'width': 'initial', 'padding': '0px 0px 0px 10px'}) + + box_grp = W.HBox([box_label, ctls['all_boxes'], ctls['box_idx']]) + box_grp.add_class('box_grp') + method_grp = W.HBox([method_label, ctls['all_methods'], ctls['method']]) + method_grp.add_class('method_grp') + uis = self.comps_ui() + return W.HBox([box_grp, method_grp, *uis]) + + def __init__(self, + ctx: OCRExperimentContext | ExperimentOCR, + img_idx: int | str | Path | None = None, + all_boxes: bool = False, + box_idx: int = 0, + all_methods: bool = False, + method: CropMethod=CropMethod.INITIAL_BOX, + **kwargs + ): + if isinstance(ctx, OCRExperimentContext): + assert img_idx is not None, "img_idx must be provided if ctx is an ExperimentContext" + exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx) + if not exp: + raise ValueError(f"Image {img_idx} not found in experiment context") + ctx = exp + else: + if not isinstance(ctx, ExperimentOCR): + raise ValueError("ctx must be an ExperimentOCR or OCRExperimentContext") + + super().__init__(ctx, {'all_boxes': all_boxes, 'box_idx': box_idx, + 'all_methods': all_methods, 'method': method}, **kwargs) + + +# %% ../nbs/experiments.ipynb 205 +class ExperimentVisor(ContextVisor): + ctx: ExperimentOCR + + def update_output(self, + image_idx: int | None = None, + **kwargs): + exp_ctx, img_ctx = self.ctx.ctxs + if image_idx is not None and image_idx != img_ctx.image_idx: + ctx = ImageContext(exp_ctx, image_idx) + assert ctx is not None + self.ctx.ctx = ctx + result_visor = self.comp('result_visor') + if result_visor is not None: + result_visor.update_output(**kwargs) + + def __init__(self, + ctx: OCRExperimentContext | ExperimentOCR, + img_idx: int | str | Path | None = None, + all_boxes: bool = False, + box_idx: int = 0, + all_methods: bool = False, + method: CropMethod=CropMethod.INITIAL_BOX, + out: W.Output | None = None, + ): + if isinstance(ctx, OCRExperimentContext): + assert img_idx is not None, "img_idx must be provided if ctx is an ExperimentContext" + exp = ExperimentOCR.from_image(ctx, 'Tesseract', img_idx) + if not exp: + raise ValueError(f"Image {img_idx} not found in experiment context") + ctx = exp + else: + if not issubclass(type(ctx), ExperimentOCR): + raise ValueError("ctx must be an ExperimentOCR or OCRExperimentContext") + + exp_ctx, img_ctx = ctx.ctxs + out = out or self.out + image_selector = ImageSelector(exp_ctx, image_idx=img_ctx.image_idx, out=out) + result_visor = ResultVisor(ctx, out=out, + all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method) + + super().__init__(ctx, {}, out=out, + ctxs={'image_selector': image_selector, 'result_visor': result_visor}, + hdlrs={'display_option': result_visor} + ) + + +# %% ../nbs/experiments.ipynb 253 +class ExperimentsVisor(ContextVisor): + ctx: OCRExperimentContext + + def update_output(self, + run_name: RunIdT | None = None, + image_idx: ImgIdT | None = None, + display_option: DisplayOptions | None = None, + msg: str | None = None, + **kwargs): + (run_selector, image_selector, content_selector, + result_visor, _, message_visor) = self._comps() + if msg is not None and msg != MessageVisor.EMPTY: + message_visor.update_output(msg=msg) + if run_name is not None: + run = OCRExperimentRun(run_selector.ctx, run_name) + exp_ctx = result_visor.ctx + exp_ctx.run = run + result_visor.ctx = exp_ctx + if image_idx is not None: + img_ctx = ImageContext(self.ctx, image_idx) + result_visor.ctx.ctx = img_ctx + display_option = content_selector.values['display_option'] + if display_option is not None and display_option != DisplayOptions.RESULTS: + result_visor.hide() + if display_option == DisplayOptions.BEST_RESULTS: + result_visor.best_results() + elif display_option == DisplayOptions.ACCURACY: + result_visor.ctx.plot_accuracies() + elif display_option == DisplayOptions.DATAFRAME: + result_visor.pd_to_html() + else: + content_selector.display_content(image_selector.image_ctx, display_option) + else: + result_visor.show() + result_visor.update_output(**kwargs) + + + @property + def status_visor(self) -> StatusVisor: return cast(StatusVisor, self.comps['status_visor']) + @property + def message_visor(self) -> MessageVisor: return cast(MessageVisor, self.comps['message_visor']) + @property + def image_context(self) -> ImageContext: + return ImageContext(self.ctx, self.comps['image_selector'].values['image_idx']) + @property + def run_name(self) -> str: return cast(str, self.comps['run_selector'].values['run_name']) + @property + def image_experiment(self) -> ExperimentOCR: return ExperimentOCR(self.image_context, self.run_name) + + def _comps(self): + cc = self.comps + rsel: RunSelector = cc['run_selector'] # type: ignore + isel: ImageSelector = cc['image_selector'] # type: ignore + cs: ContentSelector = cc['content_selector'] # type: ignore + rv: ResultVisor = cc['result_visor'] # type: ignore + sv: StatusVisor = cc['status_visor'] # type: ignore + mv: MessageVisor = cc['message_visor'] # type: ignore + return rsel, isel, cs, rv, sv, mv + + def setup_ui(self): + ctls = self.controls.values() + rselw, iselw, csw, rvw, svw, _ = [_.w for _ in self._comps()] + return W.VBox([W.HBox([rselw, iselw, csw, svw, *ctls]), rvw]) + + def save(self, b): + ctx = self.ctx + fp = ctx.save() + self.update(msg=f"'{ctx.name}' experiment saved on {fp}") + + def reset(self, b): + image_idx = self.comps['image_selector'].values['image_idx'] + self.ctx.reset_results(image_idx=image_idx) + + def setup_ux(self): + self.status_visor.save_button.on_click(self.save) + self.status_visor.reset_button.on_click(self.reset) + + def setup_experiment_context(self, + ocr_model: str | None = None, + root_dir: Path | str | None = None, + config: cfg.Config | None = None, + run_names: list[str] | None = None, + ): + ctx = OCRExperimentContext(ocr_model or 'Tesseract', root_dir, config=config) # type: ignore + + for run_name in run_names or ['Tesseract-crop-post', 'Tesseract-crop']: + OCRExperimentRun(ctx, run_name) + return ctx + + def __init__(self, + ctx: OCRExperimentContext | None = None, + ocr_model: str | None = None, + root_dir: Path | str | None = None, + config: cfg.Config | None = None, + image_idx: ImgIdT | str | Path = 0, + run_name: str | None = None, + run_names: list[str] | None = None, + display_option: DisplayOptions = DisplayOptions.RESULTS, + all_boxes: bool = False, + box_idx: int = 0, + all_methods: bool = False, + method: CropMethod=CropMethod.INITIAL_BOX, + # ocr_model: OCRModel = OCRModel.TESSERACT, + # ocr_models: dict[str, OCRModel] = {'Tesseract': OCRModel.TESSERACT}, + out: W.Output | None = None, + ): + if ctx is None: + ctx = self.setup_experiment_context(ocr_model, root_dir, config, run_names) + _ = ImageContext(ctx, image_idx) # raises if image_idx is out of range + + out = out or self.out + run_selector = RunSelector(ctx, run_name, run_names, out=out) + exp = ExperimentOCR.from_image(ctx, run_selector.values['run_name'], image_idx) + image_selector = ImageSelector(ctx, image_idx=image_idx, out=out) + content_selector = ContentSelector(ctx, display_option=display_option, out=out) + result_visor = ResultVisor(exp, out=out, + all_boxes=all_boxes, box_idx=box_idx, all_methods=all_methods, method=method, + ctxs={'spinner': Spinner(ctx, 20, 3)} + ) + status_visor = StatusVisor(ctx, out=None) + message_visor = MessageVisor(MessageVisor.EMPTY, out=out) + + super().__init__(ctx, {}, out=out, + ctxs={'run_selector': run_selector, #'model_selector': model_selector, + 'image_selector': image_selector, 'content_selector': content_selector, + 'result_visor': result_visor, + 'status_visor': status_visor, + 'message_visor': message_visor, + }, + ) + diff --git a/pcleaner/_testbed/testbed/helpers.py b/pcleaner/_testbed/testbed/helpers.py new file mode 100644 index 00000000..f5a99a6b --- /dev/null +++ b/pcleaner/_testbed/testbed/helpers.py @@ -0,0 +1,381 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/helpers.ipynb. + +# %% ../nbs/helpers.ipynb 7 +from __future__ import annotations + +import base64 +import json +import re +import sys +import uuid +from importlib import resources +from io import BytesIO +from pathlib import Path +from typing import Any +from typing import Iterable +from typing import Mapping +from typing import Sequence + +import pcleaner.data +import pcleaner.structures as st +from IPython.display import clear_output +from IPython.display import display +from IPython.display import HTML +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont + + +# %% auto 0 +__all__ = ['PRINT_FORMATS', 'cleanupwidgets', 'RenderJSON', 'page_boxes', 'crop_box', 'size', 'dpi', 'get_image_html', + 'get_columns_html', 'display_columns', 'get_image_grid_html', 'display_image_grid', 'acc_as_html', + 'strip_uuid', 'defaultdict_to_dict', '_pops_', '_pops_values_', '_gets_'] + +# %% ../nbs/helpers.ipynb 13 +_all_ = ['_pops_', '_pops_values_', '_gets_'] + + +# %% ../nbs/helpers.ipynb 14 +def _pops_(d: dict, ks: Iterable) -> dict: + "Pops `ks` keys from `d` and returns them in a dict. Note: `d` is changed in-place." + return {k:d.pop(k) for k in ks if k in d} + + +# %% ../nbs/helpers.ipynb 16 +def _pops_values_(d: dict, ks: Iterable) -> tuple: + "Pops `ks` keys from `d` and returns them as a tuple. Note: `d` is changed in-place." + return tuple(d.pop(k, None) for k in ks) + + +# %% ../nbs/helpers.ipynb 18 +def _gets_(d: Mapping[str, Any], ks: Iterable): + "Fetches values from a mapping for a given list of keys, returning `None` for missing keys." + return (d.get(k, None) for k in ks) + + +# %% ../nbs/helpers.ipynb 21 +def _get_globals(mod: str): + if hasattr(sys, '_getframe'): + glb = sys._getframe(2).f_globals + else: + glb = sys.modules[mod].__dict__ + return glb + + +# %% ../nbs/helpers.ipynb 23 +def cleanupwidgets(*ws, mod: str|None=None, clear=True): + glb = _get_globals(mod or __name__) + if clear: clear_output(wait=True) + for w in ws: + _w = glb.get(w) if isinstance(w, str) else w + if _w: + try: _w.close() # type: ignore + except: pass + + +# %% ../nbs/helpers.ipynb 26 +class RenderJSON(object): + def __init__(self, json_data, max_height=200, init_level=0): + if isinstance(json_data, (Sequence, Mapping)): + s = json.dumps(json_data) + elif hasattr(json_data, 'to_dict'): + s = json.dumps(json_data.to_dict()) + elif hasattr(json_data, 'to_json'): + s = json_data.to_json() + else: + s = json_data + self.json_str = s + self.uuid = str(uuid.uuid4()) + self.max_height = max_height + self.init_level = init_level + + def display(self): + html_content = f""" +
+
+ +
+ """ + display(HTML(html_content)) + + def _ipython_display_(self): + self.display() + +# %% ../nbs/helpers.ipynb 29 +def page_boxes(self: st.PageData, + image_path: Path, out_dir: Path | None = None) -> tuple[Image.Image, Path]: + """ + Visualize the boxes on an image. + Typically, this would be used to check where on the original image the + boxes are located. + + :param image_path: The path to the image to visualize the boxes on. + """ + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + data_path = resources.files(pcleaner.data) + font_path = str(data_path / "LiberationSans-Regular.ttf") + # Figure out the optimal font size based on the image size. E.g. 30 for a 1600px image. + font_size = int(image.size[0] / 50) + 5 + + for index, box in enumerate(self.boxes): + draw.rectangle(box.as_tuple, outline="green") + # Draw the box number, with a white background, respecting font size. + draw.text( + (box.x1 + 4, box.y1), + str(index + 1), + fill="green", + font=ImageFont.truetype(font_path, font_size), + stroke_fill="white", + stroke_width=3, + ) + + for box in self.extended_boxes: + draw.rectangle(box.as_tuple, outline="red") + for box in self.merged_extended_boxes: + draw.rectangle(box.as_tuple, outline="purple") + for box in self.reference_boxes: + draw.rectangle(box.as_tuple, outline="blue") + + # Save the image. + extension = "_boxes" + out_path = image_path.with_stem(image_path.stem + extension) + if out_dir is not None: + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / out_path.name + image.save(out_path) + + return image, out_path + +# %% ../nbs/helpers.ipynb 31 +def crop_box(box: st.Box, image: Image.Image) -> Image.Image: + return image.crop(box.as_tuple) + +# %% ../nbs/helpers.ipynb 33 +PRINT_FORMATS = { + 'Golden Age': (7.75, 10.5), # (1930s-40s) + 'Siver Age': (7, 10.375), # (1950s-60s) + 'Modern Age': (6.625,10.25), # North American comic books + 'Magazine': (8.5, 11), + 'Digest': (5.5, 8.5), + 'Manga': (5.0, 7.5), +} + + +def size(w: int, h: int, unit: str = 'in', dpi: float = 300.) -> tuple: + """ + Calculate the print size of an image in inches or centimeters. + + Args: + w (int): Width of the image in pixels. + h (int): Height of the image in pixels. + unit (str): Unit of measurement ('in' for inches, 'cm' for centimeters). + dpi (float): Dots per inch (resolution). + + Returns: + tuple: Width and height of the image in the specified unit. + """ + if unit == 'cm': + return (w / dpi * 2.54, h / dpi * 2.54) + else: # default to inches + return (w / dpi, h / dpi) + + +def dpi(w: int, h: int, print_format: str = 'Modern Age') -> float: + """ + Calculate the dpi (dots per inch) needed to print an image at a specified format size. + + Args: + w (int): Width of the image in pixels. + h (int): Height of the image in pixels. + print_format (str): Print format as defined in the formats dictionary. + + Returns: + float: Required dpi to achieve the desired print format size. + """ + # Default to 'Modern Age' if format not found + format_size = PRINT_FORMATS.get(print_format, PRINT_FORMATS['Modern Age']) + width_inch, height_inch = format_size + dpi_w = w / width_inch + dpi_h = h / height_inch + return (dpi_w + dpi_h) / 2 # Average dpi for width and height + + +# %% ../nbs/helpers.ipynb 35 +def get_image_html(image: Image.Image | Path | str, max_width: int | None = None): + """ + Converts a PIL image or an image file path to an HTML image tag. If the image is a PIL Image object, + it is converted to a base64-encoded PNG and embedded directly into the HTML. If the image is a file path, + the path is used as the source URL for the image tag. + """ + style = f' style="max-width: {max_width}px;"' if max_width is not None else '' + if isinstance(image, (Path, str)): + return f'' + else: + buffered = BytesIO() + image.save(buffered, format='PNG') + img_str = base64.b64encode(buffered.getvalue()).decode() + return f'' + + +def get_columns_html( + columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None +): + if not all(len(col) == len(columns[0]) for col in columns): + raise ValueError("All columns must have the same length.") + + # Calculate the maximum width of images in each column + max_widths = [] + for col_index in range(len(columns)): + max_col_width = 0 + for item in columns[col_index]: + if isinstance(item, (Image.Image, Path)): + if isinstance(item, (Path, str)): + try: + item = Image.open(item) + except: + continue + width, _ = item.size + max_col_width = max(max_col_width, width) + if max_col_width > 0: + max_widths.append( + f"{min(max_col_width, max_image_width)}px" + if max_image_width is not None else + f"{max_col_width}px" + ) + else: + max_widths.append('auto') + + html_str = "" + + # Apply calculated column widths using and elements + html_str += "" + for width in max_widths: + html_str += f"" + html_str += "" + + if headers: + if len(headers) != len(columns): + raise ValueError("Headers list must match the number of columns.") + html_str += ( + "" + + "".join( + f"" + for header in headers + ) + + "" + ) + + for row_items in zip(*columns): + html_str += "" + for i, item in enumerate(row_items): + if isinstance(item, (Image.Image, Path)): + img_html = get_image_html(item, max_width=max_image_width) + html_str += f"" + else: # Assume the item is a string + style = "font-weight: bold;" if i == 0 else "" + html_str += f"" + html_str += "" + + html_str += "
{header}
{img_html}{item}
" + return html_str + + +def display_columns( + columns: list[list], max_image_width: int | None = None, headers: list[str] | None = None +): + """ + Displays a table with any combination of columns, which can be lists of strings or lists + of PIL Image objects, within a Jupyter notebook cell. + + :param columns: A list of lists, where each sublist represents a column in the table. + Each sublist can contain either strings or PIL Image objects. + :param max_image_width: The maximum size of the images in pixels. This controls the max-height + of the images. + :param headers: A list of header labels for the table. If None, no headers are displayed. + """ + return display(HTML(get_columns_html(columns, max_image_width, headers))) + + +def get_image_grid_html( + images: list[Image.Image | Path | str], + rows: int, + columns: int, + titles: list[str] | None = None, + max_image_width: int | None = None, + caption: str | None = None +): + if titles and len(titles) != len(images): + raise ValueError("Titles list must match the number of images if provided.") + + html_str = "" + + if caption: + html_str += (f"") + + image_index = 0 + for _ in range(rows): + html_str += "" + for _ in range(columns): + if image_index < len(images): + img_html = get_image_html(images[image_index], max_width=max_image_width) + title_html = ( + f"
{titles[image_index]}
" + if titles + else "" + ) + html_str += f"" + else: + html_str += "" # Empty cell if no more images + image_index += 1 + html_str += "" + + html_str += "
{caption}
{title_html}{img_html}
" + return html_str + + +def display_image_grid( + images: list[Image.Image | Path | str], + rows: int, + columns: int, + titles: list[str] | None = None, + max_image_width: int | None = None, + caption: str | None = None, +): + """ + Displays a grid of images in a HTML table within a Jupyter notebook cell. + + :param images: A list of PIL Image objects to be displayed. + :param rows: The number of rows in the grid. + :param columns: The number of columns in the grid. + :param titles: An optional list of titles for each image. If provided, it must match the length + of the images list. + :param max_image_width: The maximum width of the images in pixels. + """ + display(HTML(get_image_grid_html(images, rows, columns, titles, max_image_width, caption))) + + +def acc_as_html(acc): + return f"
{acc:.2f}
" + + +# %% ../nbs/helpers.ipynb 37 +def strip_uuid(p: Path | str): + _p: Path = p if isinstance(p, Path) else Path(p) + new_stem = re.sub(r'(?i)[a-f0-9]{8}-([a-f0-9]{4}-){3}[a-f0-9]{12}', '', _p.stem).strip('_') + return _p.with_stem(new_stem) + + +# %% ../nbs/helpers.ipynb 40 +# Deep copy a defaultdict of defaultdicts to a dict of dicts if it is not already a dict +def defaultdict_to_dict(d) -> dict: + if not isinstance(d, defaultdict): + return d + return {k: defaultdict_to_dict(v) for k, v in d.items()} + diff --git a/pcleaner/_testbed/testbed/ocr_idefics.py b/pcleaner/_testbed/testbed/ocr_idefics.py new file mode 100644 index 00000000..2b8b8f07 --- /dev/null +++ b/pcleaner/_testbed/testbed/ocr_idefics.py @@ -0,0 +1,310 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/ocr_idefics.ipynb. + +# %% ../nbs/ocr_idefics.ipynb 1 +from __future__ import annotations + + +# %% auto 0 +__all__ = ['IdeficsOCR', 'IdeficsExperimentContext'] + +# %% ../nbs/ocr_idefics.ipynb 5 +import functools +import subprocess +from pathlib import Path +from typing import Any +from typing import Literal +from typing import TypeAlias + +import pcleaner.config as cfg +import pcleaner.ocr.ocr as ocr +import torch +from pcleaner.ocr.ocr_tesseract import TesseractOcr +from PIL import Image +from rich.console import Console +from transformers import AutoProcessor +from transformers import Idefics2ForConditionalGeneration + + +# %% ../nbs/ocr_idefics.ipynb 14 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% ../nbs/ocr_idefics.ipynb 17 +import pcleaner._testbed.testbed.experiments as exp_testbed +from pcleaner._testbed.testbed.experiments import * +from pcleaner._testbed.testbed.helpers import RenderJSON +import pcleaner._testbed.testbed.web_server as web_server + + +# %% ../nbs/ocr_idefics.ipynb 18 +def load_image(img_or_path) -> Image.Image: + if isinstance(img_or_path, (str, Path)): + return Image.open(img_or_path) + elif isinstance(img_or_path, Image.Image): + return img_or_path + else: + raise ValueError(f"img_or_path must be a path or PIL.Image, got: {type(img_or_path)}") + + +# %% ../nbs/ocr_idefics.ipynb 19 +def get_gpu_vram(total=True): + if total: + command = "nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits" + else: + command = "nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits" + try: + vram = subprocess.check_output(command, shell=True).decode('utf-8').strip() + return vram + except subprocess.CalledProcessError: + return "Failed to get VRAM" + + +# %% ../nbs/ocr_idefics.ipynb 43 +def _setup_processor(): + return AutoProcessor.from_pretrained( + "HuggingFaceM4/idefics2-8b", + do_image_splitting=False # cropped boxes are usually small + ) + +# %% ../nbs/ocr_idefics.ipynb 45 +QuantT: TypeAlias = Literal['bfloat16'] | Literal['8bits'] | Literal['4bits'] + +def _setup_model(quant: QuantT, flashattn: bool=True): + kwargs: dict = dict( + torch_dtype=torch.bfloat16, + ) + if quant == 'bfloat16': + pass + else: + from transformers import BitsAndBytesConfig + quantization_config = None + if quant == '8bits': + quantization_config = BitsAndBytesConfig( + load_in_8bit=True, + ) + if quant == '4bits': + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.float16 + ) + if quantization_config is not None: + kwargs.update(quantization_config=quantization_config) + if flashattn: + kwargs.update(_attn_implementation="flash_attention_2") + model = Idefics2ForConditionalGeneration.from_pretrained( + "HuggingFaceM4/idefics2-8b", + device_map='auto', + **kwargs) + return model + +# %% ../nbs/ocr_idefics.ipynb 46 +prompt_text_tmpl = ( + "Please perform optical character recognition (OCR) on this image, which displays " + "speech balloons from a comic book. The text is in {}. Extract the text and " + "format it as follows: transcribe in standard sentence case, avoid using all capital " + "letters. Provide the transcribed text clearly and double check the sentence is not all capital letters.") + +# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " +# f"speech balloons from a manga comic. The text is in {}. Extract the text and " +# "format it without newlines. Provide the transcribed text clearly.") + +# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " +# "speech balloons from a comic book. The text is in {}. Extract the text and " +# "format it as follows: transcribe in standard sentence case (avoid using all capital " +# "letters) and use asterisks to denote any words that appear in bold within the image. " +# "Provide the transcribed text clearly.") + +# prompt_text_tmpl = ("Please perform optical character recognition (OCR) on this image, which displays " +# "speech balloons from a comic book. The text is in {}. Extract the text and " +# "format it as follows: transcribe in standard sentence case, capitalized. Avoid using " +# "all capital letters. In comics, it is common to use two hyphens '--' to interrupt a sentence. " +# "Retain any hyphens as they appear in the original text. Provide the transcribed text " +# "clearly, ensuring it is capitalized where appropriate, including proper nouns.") + +prompt_text_tmpl = ( + "Please perform optical character recognition (OCR) on this image, which displays " + "speech balloons from a comic book. The text is in {}. Extract the text and " + "format it as follows: transcribe in standard sentence case, capitalized. Avoid using " + "all capital letters, but ensure it is capitalized where appropriate, including proper nouns. " + "Provide the transcribed text clearly. Double check the text is not all capital letters.") + + +# prompt_text_tmpl = ( +# "Please perform optical character recognition (OCR) on this image, which contains speech " +# "balloons from a comic book. The text is in English. Carefully transcribe the text, " +# "ensuring that you preserve the original formatting and line breaks as they appear " +# "in the speech balloon." +# ) + +default_prompt_text_tmpl = prompt_text_tmpl + +# %% ../nbs/ocr_idefics.ipynb 48 +class IdeficsOCR: + prompt_text_tmpl: str = default_prompt_text_tmpl + PROCESSOR: Any = None + MODEL: Any = None + + + @classmethod + def setup_processor(cls): + cls.PROCESSOR = _setup_processor() + return cls.PROCESSOR + + @classmethod + def setup_model(cls, quant: QuantT='bfloat16', flashattn: bool=True): + cls.MODEL = _setup_model(quant, flashattn) + return cls.MODEL + + @staticmethod + def is_idefics_available() -> bool: + return True + + def show_info(self): + cprint( + f"{'model':>17}: {type(self.MODEL)}\n" + f"{'quantization':>17}: {type(self.quant)}\n" + f"{'device':>17}: {repr(self.MODEL.device)}\n" + f"{'current VRAM':>17}: {get_gpu_vram(False)} MiB\n" + ) + + + def __init__(self, + lang: str | None = None, + prompt_text_tmpl: str|None = None, + quant: QuantT | None = None, + flashattn: bool | None = None, + ): + self.lang = lang + self.prompt_text_tmpl = prompt_text_tmpl or self.prompt_text_tmpl + self.quant = quant or 'bfloat16'#'4bits' + self.flashattn = flashattn or True + if self.PROCESSOR is None: + type(self).setup_processor() + if self.MODEL is None: + type(self).setup_model(self.quant, self.flashattn) + self.device = self.MODEL.device + + def _generation_args(self, image: Image.Image, resulting_messages: list[dict]): + prompt = self.PROCESSOR.apply_chat_template(resulting_messages, add_generation_prompt=True) + inputs = self.PROCESSOR(text=prompt, images=[image], return_tensors="pt") + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + max_new_tokens = 512 + repetition_penalty = 1.2 + decoding_strategy = "Greedy" + temperature = 0.4 + top_p = 0.8 + + generation_args = { + "max_new_tokens": max_new_tokens, + "repetition_penalty": repetition_penalty, + } + + assert decoding_strategy in [ + "Greedy", + "Top P Sampling", + ] + + if decoding_strategy == "Greedy": + generation_args["do_sample"] = False + elif decoding_strategy == "Top P Sampling": + generation_args["temperature"] = temperature + generation_args["do_sample"] = True + generation_args["top_p"] = top_p + + generation_args.update(inputs) + return prompt, generation_args + + def __call__( + self, + img_or_path: Image.Image | Path | str, + prompt_text: str | None = None, + lang: str | None = None, + config: str | None = None, + show_prompt: bool = False, + **kwargs, + ) -> str: + if not self.is_idefics_available(): + raise RuntimeError("Idefics is not installed or not found.") + resulting_messages = [ + { + "role": "user", + "content": [{"type": "image"}] + [ + {"type": "text", "text": prompt_text or self.prompt_text_tmpl.format(lang or self.lang)} + ] + } + ] + image = load_image(img_or_path) + prompt, generation_args = self._generation_args(image, resulting_messages) + generated_ids = self.MODEL.generate(**generation_args) + generated_texts = self.PROCESSOR.batch_decode( + generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True) + if show_prompt: + cprint("INPUT:", prompt, "|OUTPUT:", generated_texts) + return generated_texts[0]#.strip('"') + + def postprocess_ocr(self, text): + return ' '.join(remove_multiple_whitespaces(text).splitlines()) + + +# %% ../nbs/ocr_idefics.ipynb 50 +class IdeficsExperimentContext(OCRExperimentContext): + @functools.lru_cache() + def mocr(self, lang: str): + if self.ocr_model == 'Idefics': + proc = IdeficsOCR(lang) + else: + engine = self.engines[self.ocr_model] + ocr_processor = ocr.get_ocr_processor(True, engine) + proc = ocr_processor[lang2pcleaner(lang)] + if isinstance(proc, TesseractOcr): + proc.lang = lang2tesseract(lang) + return proc + + def cleanup_model(self): + del IdeficsOCR.MODEL + torch.cuda.empty_cache() + import gc + gc.collect() + IdeficsOCR.MODEL = None + + def setup_idefics(self, quant: QuantT = 'bfloat16', flashattn: bool = True): + if IdeficsOCR.PROCESSOR is None: + IdeficsOCR.setup_processor() + if IdeficsOCR.MODEL is not None: + self.cleanup_model() + if IdeficsOCR.MODEL is None: + IdeficsOCR.setup_model(quant=quant, flashattn=flashattn) + + def show(self): + super().show() + cfg = IdeficsOCR.MODEL.config + if hasattr(cfg, 'quantization_config'): + qcfg = cfg.quantization_config + quant = '4bits' if qcfg.load_in_4bit else '8bits' + else: + quant = 'bfloat16' + cprint( + f"{'Quantization':>17}: {quant!r}\n" + f"{'Flash attention 2':>17}: {cfg._attn_implementation == 'flash_attention_2'}\n" + f"{'VRAM':>17}: {get_gpu_vram(False)}/{get_gpu_vram()} MiB\n" + ) + + def __init__(self, + root_dir: Path | str | None = None, + quant: QuantT = 'bfloat16', + flashattn: bool = True, + *, + config: cfg.Config | None = None, + server: web_server.WebServer | None = None, + run_name: str = 'Idefics-crop-post', + setup_idefics: bool = True, + ): + super().__init__('Idefics', root_dir, config=config, server=server, run_name=run_name) + if setup_idefics: + self.setup_idefics(quant, flashattn) + + diff --git a/pcleaner/_testbed/testbed/ocr_metric.py b/pcleaner/_testbed/testbed/ocr_metric.py new file mode 100644 index 00000000..07ef9a60 --- /dev/null +++ b/pcleaner/_testbed/testbed/ocr_metric.py @@ -0,0 +1,64 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/ocr_metric.ipynb. + +# %% ../nbs/ocr_metric.ipynb 6 +from __future__ import annotations + +import difflib +import html + +from IPython.display import display +from IPython.display import HTML +from rich.console import Console + + +# %% auto 0 +__all__ = ['get_text_diffs_html', 'display_text_diffs'] + +# %% ../nbs/ocr_metric.ipynb 10 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% ../nbs/ocr_metric.ipynb 12 +def get_text_diffs_html(str1, str2: str | None=None, ignore_align: bool = False): + matcher = difflib.SequenceMatcher(None, str1, str2 or '') + html_str1, html_str2 = "", "" + _ch ='⎕' # ▿ + ch = f'&#x{ord(_ch):x};' + span1_g = lambda l: f"{ch*l}" if l > 0 else "" + span1_r = lambda l: f"{ch*l}" if l > 0 else "" + span2 = lambda s: f"{html.escape(s)}" if s else "" + + for opcode in matcher.get_opcodes(): + tag, i1, i2, j1, j2 = opcode + if tag == "equal": + html_str1 += html.escape(str1[i1:i2]) + html_str2 += html.escape(str2[j1:j2]) + elif tag == "replace": + max_span = max(i2 - i1, j2 - j1) + # str1_segment = str1[i1:i2].ljust(max_span) + html_str1 += html.escape(str1[i1:i2]) + span1_g(max_span - (i2 - i1)) + html_str2 += span2(str2[j1:j2]) + (span1_r(max_span - (j2 - j1)) if not ignore_align else '') + elif tag == "delete": + deleted_segment = str1[i1:i2] + html_str1 += html.escape(deleted_segment) + if not ignore_align: html_str2 += span1_r(len(deleted_segment)) + elif tag == "insert": + inserted_segment = str2[j1:j2].replace(" ", _ch) + html_str1 += span1_g(len(inserted_segment)) + html_str2 += span2(inserted_segment) + html_str1 = f"
{html_str1}
" + html_str2 = f"
{html_str2}
" + return html_str1, html_str2 + +def display_text_diffs(str1, str2): + """ + Displays two strings one above the other, with differing characters highlighted in red in the + second string only, using difflib.SequenceMatcher to align the strings and ensure matching + sequences are vertically aligned. + + :param str1: The first string to compare. + :param str2: The second string to compare. + """ + html_str1, html_str2 = get_text_diffs_html(str1, str2) + display(HTML(f"
{html_str1}
{html_str2}
")) diff --git a/pcleaner/_testbed/testbed/visor.py b/pcleaner/_testbed/testbed/visor.py new file mode 100644 index 00000000..a8a7c820 --- /dev/null +++ b/pcleaner/_testbed/testbed/visor.py @@ -0,0 +1,391 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/visor.ipynb. + +# %% ../nbs/visor.ipynb 1 +from __future__ import annotations + + +# %% auto 0 +__all__ = ['ContextVisor', 'Spinner'] + +# %% ../nbs/visor.ipynb 5 +from collections import defaultdict +from typing import Any +from typing import TypeAlias + +import ipywidgets as W +import traitlets as T +from IPython.display import clear_output +from IPython.display import display +from IPython.display import HTML +from ipywidgets.widgets.interaction import show_inline_matplotlib_plots +from rich.console import Console + + +# %% ../nbs/visor.ipynb 6 +from pcleaner._testbed.testbed.helpers import _pops_ +from pcleaner._testbed.testbed.helpers import cleanupwidgets + + +# %% ../nbs/visor.ipynb 10 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% ../nbs/visor.ipynb 12 +CtlT: TypeAlias = W.ValueWidget | W.fixed + +NO_UI = W.DOMWidget(layout={'display': 'none'}) +NO_UI.close() + +#| export + +UPDATE_SCRIPT = f""" + +""" + + +# %% ../nbs/visor.ipynb 13 +class ContextVisor: + ctx: Any + values: dict[str, Any] = {} + + _ctxs: dict[str, ContextVisor] = {} + _hdlrs: dict[str, ContextVisor] = {} + _css: str = '' + _ctl2name: dict[CtlT, str] + _name2comp: dict[str, ContextVisor] + _out: W.Output | None = None + _w: W.DOMWidget | None = None + _controls: dict[str, CtlT] | None = None + _all_controls: dict[str, CtlT] | None = None + _ui_cls: type[W.Box] = W.HBox + _inited = False + + @property + def w(self) -> W.DOMWidget: + "Container (DOM)widget of this comp." + if self._inited and self._w is None: + self._w = self._setup_ui() or NO_UI + return self._w # type: ignore + @property + def out(self) -> W.Output: + if self._inited and self._out is None: + self._out = W.Output() + self._out.clear_output(wait=True) + return self._out # type: ignore + @property + def controls(self) -> dict[str, CtlT]: + if self._controls is None: + self._controls = self.setup_controls() if self._inited else {} + return self._controls + @property + def all_controls(self) -> dict[str, CtlT]: + if self._all_controls is None: + controls = {} + if self._inited: + for visor in self._ctxs.values(): + controls.update(visor.all_controls) + controls.update(self.controls) + self._all_controls = controls + self._ctl2name = {v:k for k,v in controls.items()} + return self._all_controls # type: ignore + + @property + def all_values(self): + """Values from first level comps, keyed by comp name. + NOTE: will fail for nested contexts or more than one level. + """ + return {**{k:v.values for k,v in (self.comps | {'self': self}).items()}} + + def _all_values(self): + "Flattened values from all comps" + all_values = {} + for comp in [*self.comps.values(), self]: all_values.update(comp.values) + return all_values + + @property + def comps(self): return self._ctxs + def comp(self, k: str) -> ContextVisor | None: return self._ctxs.get(k) + def handler(self, k: str) -> ContextVisor | None: return self._hdlrs.get(k) + + @property + def styler(self) -> W.HTML | str: + if getattr(self, '_style', None) is None: + stl = self.setup_style() + if stl: + stl_id = 'stl-' + str(id(self)) + stl = f"" + self._style = W.HTML(stl) + else: + self._style = '' + return self._style + def setup_style(self): + collate = [_.setup_style() for _ in self.comps.values()] + if self._css: collate.append(self._css) + return '\n'.join([_ for _ in collate if _]) + + def update_output(self, **kwargs): + cprint(kwargs) + + def setup_controls(self) -> dict[str, CtlT]: + widgets = [W.interactive.widget_from_abbrev(v) for k, v in self.values.items()] + widgets = {k:W.fixed(v) if w is None else w for (k, v), w in zip(self.values.items(), widgets)} + # return {k: W.Label(value=str(k)) for k,w in self.values.items()} + return widgets + + def hide(self): + if (w := self.w) is not NO_UI: + w.layout.visibility = 'hidden' + def show(self): + if (w := self.w) is not NO_UI: + w.layout.visibility = 'visible' + + def comps_ui(self): + comps = [] + if self._inited: + for visor in self.comps.values(): + if (visor_ui := visor._setup_ui()) is not None: + comps.append(visor_ui) + return comps + + + def _setup_ui(self): + if not self._inited: return + w = self.setup_ui() + if w is not None: + w.add_class('context-visor') + w.add_class(str(id(self))) + return w + + def setup_ui(self) -> W.DOMWidget | None: + """Get the container widget for this comp. + This method should be the only one called when the comp is nested inside other comp. + """ + uis = [*self.comps_ui(), *self.controls.values()] + return self._ui_cls(uis) if uis else None + + def setup_display(self): + "Generates one time ui" + if not self._inited: return + if self._w is None: + self._w = self._setup_ui() + + + def _output(self, **kwargs): + # group keys by comp + collator = defaultdict(dict) + for k,v in kwargs.items(): + if (comp := self.handler(k)) is not None: + collator[comp][k] = v + else: + # keys w/out control assigned, considered internal state + collator[self][k] = v + # group comps by output + outs = defaultdict(list) + for comp, kw in collator.items(): + outs[comp.out].append((comp, kw)) + for out, g in outs.items(): + show_inline_matplotlib_plots() + with out: + clear_output(wait=True) + for comp, kw in g: + comp.update_output(**kw) + show_inline_matplotlib_plots() + + def _observe(self, change): + control_name = self._ctl2name[change['owner']] + kwargs = {control_name: change['new']} + updated = self._update(**kwargs) + self._output(**updated) + def setup_ux(self): pass + def _setup_ux(self): + for visor in self.comps.values(): + visor._setup_ux() + self.setup_ux() + def interactive_output(self): + controls = self.all_controls + all_values = self._all_values() + for k,w in controls.items(): + if k in all_values: + w.observe(self._observe, 'value') + + def display(self, **kwargs): + if not self._inited: return + if self._w is None: + self.setup_display() + self.interactive_output() + self._update(**(self.values | kwargs)) + all_values = self._all_values() + self._hdlrs = {k:self._hdlrs.get(k, self) for k in all_values} + self._output(**all_values) + # ux final touches once everything (including outputs) is setup + # for visor in [*self.comps.values(), self]: + # visor.setup_ux() + self._setup_ux() + stl = self.styler + ui: list = [stl] if stl else [] + if (w := self.w) is not NO_UI: + ui.append(w) + for comp in [*self.comps.values(), self]: + if comp._out is not None: + ui.append(comp._out) + self._final = W.VBox(ui) + self._display_handle = display(self._final, display_id=str(id(self))) + else: + self.update(**kwargs) + def _ipython_display_(self): self.display() + + def _update(self, update_value: bool=True, **kwargs): + updated = {} + for visor in self.comps.values(): + updated.update(visor._update(update_value=update_value, **kwargs)) + values = self.values + my_vals = _pops_(kwargs, self.values.keys()) + for k,v in my_vals.items(): + if v is not None and v != values[k]: + if update_value: + values[k] = v + updated[k] = v + return updated + def update(self, **kwargs): + updated = self._update(update_value=False, **kwargs) + controls = self.all_controls + for k,v in updated.items(): + if k in controls: + if hasattr((ctl := controls[k]), 'value'): + ctl.value = v # will trigger update (self._observe) + elif k in (vv := self._name2comp): + # update manually + comp = vv[k] + if v != comp.values[k]: + comp.values[k] = v + self._output(**{k:v}) + + + def close(self): + controls = self.all_controls + for w in controls.values(): + try: w.unobserve(self._observe, 'value') + except: pass + if isinstance(w, W.Widget): + w.close() + for visor in self._ctxs.values(): + if w := getattr(visor, '_w', None): w.close() + if visor._out is not self._out: + if o := getattr(visor, '_out', None): o.close() + visor.close() + if w := getattr(self, '_w', None): w.close() + if o := getattr(self, '_out', None): o.close() + if f := self._final: f.close() + if self._display_handle is not None: + self._display_handle.update(HTML(UPDATE_SCRIPT)) + + + def __del__(self): + self.close() + + def __init__(self, + ctx: Any, + values: dict[str, Any], + out: W.Output | None = None, + ctxs: dict[str, ContextVisor] | None = None, + hdlrs: dict[str, ContextVisor] | None = None, + css: str | None = None, + ): + # Only setup some state. Controls, values and containers will be setup only when explicitly displayed + self._display_handle = None + self._final = None + self.ctx = ctx + self.values = values or {} + self._out = out + self._ctxs = comps = ctxs or {} + self._hdlrs = hdlrs or {} + if css is not None: + self._css = css + self._name2comp = name2comp = {} + for n,vv in self.all_values.items(): + comp = comps.get(n, self) + for k in vv: + name2comp[k] = comp + self._inited = True + + +# %% ../nbs/visor.ipynb 25 +spinner_css = """ + .wrapper-spinner { + overflow: hidden; + width: fit-content; + height: fit-content; + } + + .loading-spinner { + display: flex; + align-items: center; + justify-content: center; + border: 1px solid white; + border-radius: 50%; + } + + .spinner { + border: |border_width|px solid rgba(128,128,128,.5); + border-radius: 50%; + border-left-color: red; + animation: spin 1s infinite linear; + } + + @keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } + } +""" + + +# %% ../nbs/visor.ipynb 32 +class Spinner(ContextVisor): + ctx: T.HasTraits + + def loading_spinner(self, size=36, border_width=4): + bw = border_width*2 + html: str = f''' +
+
+
+ ''' + return html + + def setup_controls(self): + spinner = W.HTML(self.loading_spinner(self.size, self.border_width)) + spinner.add_class('wrapper-spinner') + return {'spinner': spinner} + + @property + def spinner(self) -> W.HTML: return self.controls['spinner'] # type: ignore + + def hide(self): self.spinner.layout.display = 'none' + def show(self): self.spinner.layout.display = 'block' + + def setup_ux(self): + source = (self.ctx, '_running') + target = (self.spinner.layout, 'display') + self._link = T.dlink(source, target, lambda x: 'block' if x else 'none') + + def close(self): + if l := getattr(self, '_link', None): l.unlink() + super().close() + + def __init__(self, + ctx: T.HasTraits, + size: int = 24, + border_width: int = 4, + **kwargs + ): + self.size = size + self.border_width = border_width + self._link = None + super().__init__(ctx, {}, css=spinner_css.replace('|border_width|', str(border_width)), **kwargs) + diff --git a/pcleaner/_testbed/testbed/web_server.py b/pcleaner/_testbed/testbed/web_server.py new file mode 100644 index 00000000..bb401890 --- /dev/null +++ b/pcleaner/_testbed/testbed/web_server.py @@ -0,0 +1,330 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/web_server.ipynb. + +# %% ../nbs/web_server.ipynb 1 +from __future__ import annotations + + +# %% auto 0 +__all__ = ['display_ngrok_warning', 'WebServer', 'setup_ngrok', 'WebServerStdlib', 'WebServerBottle'] + +# %% ../nbs/web_server.ipynb 12 +import getpass +import http.server +import os +import signal +import socketserver +import threading +import uuid +from http import HTTPStatus +from pathlib import Path +from typing import Protocol + +import portpicker +import psutil +import requests +import rich +from IPython.display import display +from IPython.display import HTML +from loguru import logger +from pyngrok import conf +from pyngrok import ngrok +from rich.console import Console + + +# %% ../nbs/web_server.ipynb 13 +from pcleaner._testbed.testbed.bottle import Bottle +from pcleaner._testbed.testbed.bottle import HTTPError +from pcleaner._testbed.testbed.bottle import response +from pcleaner._testbed.testbed.bottle import run +from pcleaner._testbed.testbed.bottle import static_file + + +# %% ../nbs/web_server.ipynb 17 +console = Console(width=104, tab_size=4, force_jupyter=True) +cprint = console.print + + +# %% ../nbs/web_server.ipynb 18 +def display_ngrok_warning(url): + did = 'ngrokFrame' + str(uuid.uuid4()) + html_code = f""" +
+
+

Ngrok displays a warning page as a security measure to prevent unintentional access to your local servers. This page requires you to confirm that you wish to proceed to the content.

+

Please review the ngrok warning page displayed below. If prompted, click 'Visit Page' to proceed.

+ Don't worry if you see a 404 or 403 error. Then, you can click the 'Close' button below to hide this section.

+
+ + +
+""" + + display(HTML(html_code)) + + +# %% ../nbs/web_server.ipynb 19 +class WebServer(Protocol): + @property + def public_url(self) -> str | None: ... + @property + def unc_share(self) -> Path | None: ... + @property + def prefix(self) -> str: ... + @property + def running(self) -> bool: ... + def __init__(self, directory: Path | str = ""): ... + def start(self): ... + def stop(self): ... + + +def setup_ngrok(server_cls: type[WebServer], images_dir: str | Path): + cprint( + "Enter your ngrok authtoken, which can be copied from " + "https://dashboard.ngrok.com/get-started/your-authtoken" + ) + auth_token = getpass.getpass() + conf.get_default().auth_token = auth_token + ngrok.set_auth_token(auth_token) + + server = server_cls(directory=str(images_dir)) + try: + server.start() + except Exception as e: + cprint(f"Error starting server: {e}") + return None + + display_ngrok_warning(f"{server.public_url}/{server.prefix}/pcleaner.png") + return server + + +# %% ../nbs/web_server.ipynb 24 +class ImageHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): + def end_headers(self): + for k,v in { + 'ngrok-skip-browser-warning': 'true', + 'User-Agent': 'MyCustomUserAgent/1.0', + 'Cache-Control': 'public, max-age=86400' + }.items(): + self.send_header(k, v) + super().end_headers() + + def do_GET(self): + if self.is_image_request(self.path): + super().do_GET() + else: + self.send_error(HTTPStatus.FORBIDDEN, "Only image files are accessible.") + + def is_image_request(self, path): + allowed_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.webp') + _, ext = os.path.splitext(path) + return ext.lower() in allowed_extensions + + def __init__(self, *args, **kwargs): + super().__init__(*args, directory=self.directory, **kwargs) + + +# %% ../nbs/web_server.ipynb 25 +class WebServerStdlib: + """ + A simple web server for serving images from a local directory using http.server and ngrok. + + It is intended to be used in environments like Google Colab, where direct + web server hosting might not be feasible. It uses ngrok to allow images to be accessed + via a public URL. + + Attributes: + directory (str): The directory from which files are served. + port (int): The local port on which the server listens. + public_url (str): The ngrok public URL where the server is accessible. + tunnel (ngrok.NgrokTunnel): The ngrok tunnel object. + + Methods: + start(): Starts the web server and the ngrok tunnel. + stop(): Stops the web server and disconnects the ngrok tunnel. + make_request(path="/"): Makes a request to the ngrok URL to fetch data from the server. + """ + + def __init__(self, directory: Path | str=""): + port = portpicker.pick_unused_port() + self.port = port + if isinstance(directory, str): + directory = Path(directory) + assert directory.exists(), f"Directory {directory} does not exist" + self.directory = str(directory.resolve()) + self.thread = None + self.httpd = None + self.public_url = None + self.prefix = '' + self.unc_share = None + self.tunnel = None + + @property + def running(self): + return self.thread is not None and self.thread.is_alive() + + def start_server(self): + Handler = ImageHTTPRequestHandler + Handler.directory = self.directory + try: + with socketserver.TCPServer(("", self.port), Handler) as httpd: + self.httpd = httpd + httpd.serve_forever() + except OSError as e: + cprint(f"Error: {e}") + + def start(self): + if self.thread is None or not self.thread.is_alive(): + self.thread = threading.Thread(target=self.start_server) + self.thread.start() + self.tunnel = ngrok.connect(self.port) # type: ignore + self.public_url = self.tunnel.public_url + if self.public_url is not None: + self.unc_share = Path(self.public_url.replace('https:', '')) + cprint(f"ngrok tunnel: {self.tunnel}") + cprint(f"Public URL: {self.public_url}") + else: + cprint("Server is already running") + + def stop(self): + if self.httpd: + self.httpd.shutdown() + self.httpd.server_close() + if self.tunnel and self.tunnel.public_url: + ngrok.disconnect(self.tunnel.public_url) # Use the stored tunnel object's URL + cprint("Ngrok tunnel disconnected") + ngrok.kill() + if self.thread: + self.thread.join() + self.thread = self.public_url = self.unc_share = None + cprint("Server stopped") + + def make_request(self, path="/"): + """Makes a request to the ngrok URL with headers to bypass the ngrok warning.""" + if self.public_url: + url = f"{self.public_url}{path}" + headers = { + "ngrok-skip-browser-warning": "true", + "User-Agent": "MyCustomUserAgent/1.0" + } + response = requests.get(url, headers=headers) + return response.text + else: + return "Server not started or public URL not available." + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + + +# %% ../nbs/web_server.ipynb 39 +app = Bottle() + +@app.route('/images/') # type: ignore +def serve_image(filename): + if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): + return HTTPError(404, "File not found") + response.set_header('Cache-Control', 'public, max-age=86400') # Set caching headers + return static_file(filename, root=app.config['image_dir']) + +@app.route('/shutdown') # type: ignore +def shutdown(): + current_process = psutil.Process() + current_process.send_signal(signal.SIGTERM) + + +# %% ../nbs/web_server.ipynb 40 +class WebServerBottle: + """ + A simple web server for serving images from a local directory using ngrok. + This class uses the Bottle framework to handle HTTP requests and ngrok to expose the + server to the internet.It is designed to be used in environments like Google Colab, + where direct web server hosting might not be feasible. + + Attributes: + directory (Path | str): The directory from which files are served. + port (int): The local port on which the server listens. + public_url (str): The ngrok public URL where the server is accessible. + tunnel (ngrok.NgrokTunnel): The ngrok tunnel object. + + Methods: + start(): Starts the web server and the ngrok tunnel. + stop(): Stops the web server and disconnects the ngrok tunnel. + """ + + def __init__(self, directory: Path | str = ""): + self.port = portpicker.pick_unused_port() + if isinstance(directory, str): + directory = Path(directory) + assert directory.exists(), f"Directory {directory} does not exist" + self.directory = directory + self.thread = None + self.httpd = None + self.public_url = None + self.unc_share = None + self.prefix = 'images' + self.tunnel = None + app.config['image_dir'] = str(directory) # directory for Bottle + # app.routes[0].callback.__globals__['image_dir'] = str(directory) # directory for Bottle + + @property + def running(self): + return self.thread is not None and self.thread.is_alive() + + def start_server(self): + def bottle_run(): + run(app, host='localhost', port=self.port) + + self.thread = threading.Thread(target=bottle_run) + self.thread.start() + self.tunnel = ngrok.connect(self.port) # type: ignore + self.public_url = self.tunnel.public_url + if self.public_url is not None: + self.unc_share = Path(self.public_url.replace('https:', ''))/self.prefix + cprint(f"ngrok tunnel: {self.tunnel}") + cprint(f"Public URL: {self.public_url}") + + def start(self): + if self.thread is None or not self.thread.is_alive(): + self.start_server() + else: + cprint("Server is already running") + + def stop(self): + if self.tunnel and self.tunnel.public_url: + ngrok.disconnect(self.tunnel.public_url) + cprint("Ngrok tunnel disconnected") + ngrok.kill() + + if self.thread: + self.make_request('/shutdown') + self.thread.join(timeout=10) + if self.thread.is_alive(): + print("Thread did not terminate, proceeding with forceful shutdown.") + else: + print("Server thread stopped successfully.") + self.thread = self.tunnel = self.public_url = self.unc_share = None + cprint("Server stopped") + + def make_request(self, path="/"): + """Makes a request to the ngrok URL with headers to bypass the ngrok warning.""" + if self.public_url: + url = f"{self.public_url}{path}" + headers = { + "ngrok-skip-browser-warning": "true", + "User-Agent": "MyCustomUserAgent/1.0" + } + response = requests.get(url, headers=headers) + return response.text + else: + return "Server not started or public URL not available." + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + From 730e4943c91dc235ea9949deb8ab6089a1d754ad Mon Sep 17 00:00:00 2001 From: Voxel <41875513+VoxelCubes@users.noreply.github.com> Date: Tue, 28 May 2024 20:53:09 +0200 Subject: [PATCH 08/27] tiny grammar fix --- pcleaner/_testbed/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcleaner/_testbed/README.md b/pcleaner/_testbed/README.md index 0bead663..2adfe12a 100644 --- a/pcleaner/_testbed/README.md +++ b/pcleaner/_testbed/README.md @@ -24,7 +24,7 @@ Instructions to use Google Colab are included in the notebooks. ## Install Test Images The test images are not included in the repository but can be downloaded from the following link: -- [Test images PCISet](https://drive.google.com/file/d/18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT/view?usp=drive_link). The notebooks have also instructions and code to download directly the set +- [Test images PCISet](https://drive.google.com/file/d/18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT/view?usp=drive_link). The notebooks also have instructions and code to download the set directly After downloading, place the test images in the [source](source) directory. If you want to use your own, each image should have a corresponding text file with the same name, but with the extension `.txt`, which contains the ground truth data, one line per box (as calculated by PanelCleaner). Optionally, you can also include a `.json` file with the same name, specifying the language of the page: ```json From 0b236fe452440166b2866d1a9bac5c64e5e62dd4 Mon Sep 17 00:00:00 2001 From: Spikey Date: Sat, 25 May 2024 16:00:32 +0200 Subject: [PATCH 09/27] move accurecy metric logic to its notebook; change ground_truth file format; start refactorin Experiment_run --- pcleaner/_testbed/nbs/experiments.ipynb | 907 +++++++++++------------- pcleaner/_testbed/nbs/ocr_metric.ipynb | 96 ++- pcleaner/_testbed/testbed/ocr_metric.py | 39 +- 3 files changed, 514 insertions(+), 528 deletions(-) diff --git a/pcleaner/_testbed/nbs/experiments.ipynb b/pcleaner/_testbed/nbs/experiments.ipynb index bde864b5..0d9e564c 100644 --- a/pcleaner/_testbed/nbs/experiments.ipynb +++ b/pcleaner/_testbed/nbs/experiments.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -11,12 +11,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -26,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -44,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -60,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -85,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -109,12 +105,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -161,12 +153,8 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -180,12 +168,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import copy\n", @@ -210,12 +194,8 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# pretty print by default\n", @@ -224,12 +204,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| exporti\n", @@ -261,12 +237,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| exporti\n", @@ -292,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -373,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -417,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -433,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -449,7 +421,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -465,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -512,7 +484,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -618,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -643,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -666,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -682,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -698,7 +670,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -738,104 +710,73 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## OCR results clean-up" + "## Ground truth" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", "\n", - "def remove_multiple_whitespaces(text):\n", - " return ' '.join(text.split())\n", - "\n", - " \n", - "def postprocess_ocr(text):\n", - " \"Basic postprocessing for English Tesseract OCR results.\"\n", - " return ' '.join(remove_multiple_whitespaces(text).splitlines()).capitalize()\n", - "\n", - "def accuracy_ocr_naive(text, ground_truth):\n", - " return sum(1 for a, b in zip(text, ground_truth) if a == b) / len(text)\n", + "def ground_truth_path_old(page_data: st.PageData):\n", + " path = Path(page_data.original_path)\n", + " return path.with_stem(path.stem + '_gt').with_suffix('.txt')\n", "\n", "\n", - "def accuracy_ocr_difflib(text, ground_truth):\n", - " \"\"\"\n", - " Calculates the OCR accuracy based on the similarity between the OCR text and the ground truth text,\n", - " using difflib's SequenceMatcher to account for differences in a manner similar to git diffs.\n", + "def read_ground_truth_old(page_data: st.PageData, root_dir: Path):\n", + " gts_path = root_dir / ground_truth_path_old(page_data)\n", + " if gts_path.exists():\n", + " gts = gts_path.read_text(encoding=\"utf-8\").splitlines()\n", + " else:\n", + " gts = [\"\" for _ in range(len(page_data.boxes))]\n", + " return gts\n", "\n", - " :param text: The OCR-generated text.\n", - " :param ground_truth: The ground truth text.\n", - " :return: A float representing the similarity ratio between the OCR text and the ground truth, \n", - " where 1.0 is identical.\n", - " \"\"\"\n", - " # Initialize the SequenceMatcher with the OCR text and the ground truth\n", - " matcher = difflib.SequenceMatcher(None, text, ground_truth)\n", - " \n", - " # Get the similarity ratio\n", - " similarity_ratio = matcher.ratio()\n", - " \n", - " return similarity_ratio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Ground truth" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, - "outputs": [], - "source": [ - "#| export\n", "\n", "def ground_truth_path(page_data: st.PageData):\n", " path = Path(page_data.original_path)\n", - " return path.with_stem(path.stem + '_gt').with_suffix('.txt')\n", + " return path.with_stem(path.stem + '_gt').with_suffix('.json')\n", "\n", "\n", - "def read_ground_truth(page_data: st.PageData, root_dir: Path):\n", + "def read_ground_truth(page_data: st.PageData, root_dir: Path) -> dict[str, list[str]]:\n", " gts_path = root_dir / ground_truth_path(page_data)\n", + " empty = [\"\" for _ in range(len(page_data.boxes))]\n", " if gts_path.exists():\n", - " gts = gts_path.read_text(encoding=\"utf-8\").splitlines()\n", - " else:\n", - " gts = [\"\" for _ in range(len(page_data.boxes))]\n", - " return gts\n" + " try:\n", + " with open(gts_path, 'r') as f:\n", + " data = json.load(f)\n", + " if 'all-caps' not in data:\n", + " data['all-caps'] = empty\n", + " if 'capitalized' not in data:\n", + " data['capitalized'] = empty.copy()\n", + " return data\n", + " except Exception as e:\n", + " logger.error(f\"Error loading {gts_path}: {e}\")\n", + " gts = {'all-caps': empty,\n", + " 'capitalized': empty.copy()}\n", + " return gts\n", + "\n", + "\n", + "def save_ground_truth(gts: dict[str, list[str]], page_data: st.PageData, root_dir: Path):\n", + " fp = root_dir / ground_truth_path(page_data)\n", + " with open(fp, 'w') as f:\n", + " json.dump(gts, f, indent=2)\n", + " return fp\n" ] }, { "cell_type": "markdown", - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "metadata": {}, "source": [ "## Cropping" ] }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -878,12 +819,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -906,12 +843,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| exporti\n", @@ -924,12 +857,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -958,12 +887,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": { - "notebookRunGroups": { - "groupValue": "1" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1072,12 +997,8 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1291,15 +1212,13 @@ " def cleanup_model(self):\n", " pass\n", "\n", - " def __init__(self, name: str, paths: list[Path], root: Path | None = None, run_name: str = 'run1'):\n", + " def __init__(self, name: str, paths: list[Path], root: Path | None = None):\n", " self.name = name\n", " self._root = root or type(self).EXP_DIR\n", " self._paths = paths # relative paths wrt self._root\n", " self._subjects: dict[SubjIdT, ExperimentSubject] = {}\n", " self._exp_runs: dict[str, ExperimentRun] = {}\n", - " self._results = {}\n", - " # default run\n", - " ExperimentRun(self, run_name)\n" + " self._results = {}\n" ] }, { @@ -1311,7 +1230,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1340,7 +1259,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1356,7 +1275,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1377,14 +1296,14 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "run = exp.experiment_run('test')\n", "test_eq(run, None)\n", "\n", - "test_eq(exp.run_names, ['run1'])\n", + "test_eq(exp.run_names, [])\n", "\n", "# Instantiate run\n", "run = ExperimentRun(exp, 'test')\n", @@ -1393,11 +1312,11 @@ "test_is(_, run)\n", "test_is(run, ExperimentRun(exp, 'test'))\n", "\n", - "test_eq(exp.run_names, ['run1', 'test'])\n", + "test_eq(exp.run_names, ['test'])\n", "\n", "run2 = ExperimentRun(exp, 'test2')\n", "\n", - "test_eq(exp.run_names, ['run1', 'test', 'test2'])\n" + "test_eq(exp.run_names, ['test', 'test2'])\n" ] }, { @@ -1410,12 +1329,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1446,7 +1361,7 @@ " json_data: dict | None\n", " page_data: st.PageData\n", " _page_lang: str\n", - " _gts: list[str]\n", + " _gts: dict[str, list[str]]\n", " _mask_dilated1: Image.Image | None\n", " _mask_dilated05: Image.Image | None\n", " _mask_dilated02: Image.Image | None\n", @@ -1523,11 +1438,13 @@ " \n", " def setup_ground_truth(self):\n", " self._gts = read_ground_truth(self.page_data, self.exp.root_dir)\n", - " @property\n", - " def gts(self): \n", + " \n", + " def gts(self, version: str = 'capitalized'): \n", " if self._gts is None:\n", " self.setup_ground_truth()\n", - " return self._gts\n", + " if version not in self._gts:\n", + " self._gts[version] = [\"\" for _ in range(len(self.boxes))]\n", + " return self._gts[version]\n", " \n", " @functools.lru_cache(typed=True)\n", " def dilated_mask(self, fraction: float):\n", @@ -1570,12 +1487,8 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1601,7 +1514,7 @@ " @property\n", " def acc(self):\n", " if self.ocr is not None:\n", - " self._acc = accuracy_ocr_difflib(self.ocr, self.image_ctx.gts[self.block_idx])\n", + " self._acc = accuracy_ocr_difflib(self.ocr, self.image_ctx.gts()[self.block_idx])\n", " return self._acc\n", " @property\n", " def suffix(self): return f\"{self.block_idx}_{self.description}\"\n", @@ -1655,12 +1568,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1703,12 +1612,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -1723,7 +1628,16 @@ " OCRModel))\n", "\n", "\n", - "class OCRExperimentRun(ExperimentRun): ...\n", + "class OCRExperimentRun(ExperimentRun): \n", + " def _result_from(self, \n", + " image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None):\n", + " img_ctx = ImageContext(self.exp, image_idx)\n", + " extracted = method in _EXTRACTED_METHODS\n", + " result_cls = ResultOCRExtracted if extracted else ResultOCR\n", + " result = result_cls(img_ctx, int(box_idx), None, None, description=f\"{method.value}\")\n", + " if ocr is not None:\n", + " result.ocr = ocr\n", + " return result\n", "\n", "\n", "class OCRExperimentContext(ExperimentContext):\n", @@ -1741,6 +1655,7 @@ "\n", " # ExperimentRun name -> Image index -> Box index -> Crop method -> Result\n", " _results: dict[RunIdT, dict[ImgIdT, ResultSet]]\n", + " _exp_runs: dict[str, OCRExperimentRun]\n", "\n", " _running = T.Bool(False)\n", " \n", @@ -1796,7 +1711,7 @@ " def ocr_box(self, result: ResultOCR, lang: str): \n", " assert result.image is not None\n", " text = self.mocr(lang)(result.image)\n", - " result.ocr = postprocess_ocr(text)\n", + " result.ocr = text\n", " self._dirty = True\n", " return result\n", "\n", @@ -1908,15 +1823,15 @@ " def _update_results(self, run_name: str, img_idx: ImgIdT, results: ResultSetDefault):\n", " self._results[run_name][img_idx] = cast(ResultSet, results)\n", " \n", - " def _result_from(self, \n", - " image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None):\n", - " img_ctx = ImageContext(self, image_idx)\n", - " extracted = method in _EXTRACTED_METHODS\n", - " result_cls = ResultOCRExtracted if extracted else ResultOCR\n", - " result = result_cls(img_ctx, int(box_idx), None, None, description=f\"{method.value}\")\n", - " if ocr is not None:\n", - " result.ocr = ocr\n", - " return result\n", + " # def _result_from(self, \n", + " # image_idx: ImgIdT, box_idx: BoxIdT, method: CropMethod, ocr: str | None = None):\n", + " # img_ctx = ImageContext(self, image_idx)\n", + " # extracted = method in _EXTRACTED_METHODS\n", + " # result_cls = ResultOCRExtracted if extracted else ResultOCR\n", + " # result = result_cls(img_ctx, int(box_idx), None, None, description=f\"{method.value}\")\n", + " # if ocr is not None:\n", + " # result.ocr = ocr\n", + " # return result\n", " \n", " def result(self, \n", " run_name: str,\n", @@ -1927,7 +1842,8 @@ " if not rebuild and _result is not None:\n", " return _result\n", " \n", - " result: ResultOCR = self._result_from(image_idx, box_idx, method)\n", + " run = self._exp_runs[run_name]\n", + " result: ResultOCR = run._result_from(image_idx, box_idx, method)\n", " image, cropped_image, cropped_mask = result.image, None, None\n", " img_ctx = ImageContext(self, image_idx)\n", " base_image = img_ctx.base_image\n", @@ -1958,6 +1874,7 @@ " if ocr:\n", " exp_run.before_result(result)\n", " result = self.ocr_box(result, img_ctx.page_lang)\n", + " result.ocr = postprocess_ocr(result.ocr)\n", " exp_run.after_result(result)\n", " self._results[run_name][image_idx][box_idx][method] = result\n", " self._dirty = True\n", @@ -2056,7 +1973,8 @@ " # self.run_to_json(run_name)\n", " \n", " def _load_run_results(self, run_name: str, run_data: dict[str, dict[str, dict[str, str]]]):\n", - " self._exp_runs[run_name] = OCRExperimentRun(self, run_name)\n", + " run: OCRExperimentRun = cast(OCRExperimentRun, OCRExperimentRun(self, run_name))\n", + " self._exp_runs[run_name] = run\n", " name2idx = {p.name: i for i, p in enumerate(self.image_paths)}\n", " for img_name, data in run_data.items():\n", " img_idx = name2idx.get(img_name, None)\n", @@ -2066,7 +1984,7 @@ " rset: ResultSetDefault = dict_to_resultset(\n", " ImgIdT(img_idx), \n", " data, \n", - " result_factory=self._result_from)\n", + " result_factory=run._result_from)\n", " self._update_results(run_name, img_idx, rset)\n", "\n", " # def load_results(self):\n", @@ -2171,7 +2089,7 @@ " self.ocr_model = ocr_model\n", " root_dir = Path(root_dir)\n", " super().__init__(\n", - " ocr_model, self.get_image_paths(root_dir), root=root_dir, run_name=run_name)\n", + " ocr_model, self.get_image_paths(root_dir), root=root_dir)\n", " self.image_paths = self._paths\n", " self._reset_results()\n", " self._images = self._subjects\n", @@ -2181,17 +2099,16 @@ " self.use_tunnel = use_tunnel\n", " self.server = server or SERVER\n", " if load:\n", - " self._from_json()\n" + " self._from_json()\n", + " # default run\n", + " ExperimentRun(self, run_name)\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 20, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| exporti\n", @@ -2209,7 +2126,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2231,12 +2148,8 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -2246,7 +2159,7 @@ "\n", " @classmethod\n", " def diff_tagged(cls, result: ResultOCR):\n", - " _, html = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr, False)\n", + " _, html = get_text_diffs_html(result.image_ctx.gts()[result.block_idx], result.ocr, False)\n", " return f\"{html}\"\n", "\n", " def as_html(self):\n", @@ -2258,7 +2171,7 @@ " acc_html = ''\n", " if has_ocr:\n", " acc_html = f\"
{result.acc:.2f}\"\n", - " html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr)\n", + " html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts()[result.block_idx], result.ocr)\n", " box_image_path = result.cache_image()\n", " html1 = get_columns_html([[DI(box_image_path)], [(result.ocr or '') + acc_html]])\n", " if has_ocr:\n", @@ -2271,7 +2184,7 @@ " result = self.ctx\n", " has_ocr = result.ocr is not None\n", " DI = cast(OCRExperimentContext, result.image_ctx.exp).DI\n", - " html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts[result.block_idx], result.ocr)\n", + " html_str1, html_str2 = get_text_diffs_html(result.image_ctx.gts()[result.block_idx], result.ocr)\n", " if has_ocr:\n", " diff_html = f\"
{html_str1}
{html_str2}
\"\n", " else:\n", @@ -2308,12 +2221,8 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "EXP_DIR = ExperimentContext.EXP_DIR\n", @@ -2345,12 +2254,8 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# CONFIG = cfg.load_config()\n", @@ -2387,12 +2292,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "CONTEXT = OCRExperimentContext('Tesseract')\n" @@ -2400,7 +2301,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2500,12 +2401,8 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2544,7 +2441,7 @@ " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" ] }, - "execution_count": 25, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2564,7 +2461,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2590,12 +2487,8 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2603,7 +2496,7 @@ "'Tesseract-crop'" ] }, - "execution_count": 26, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2631,12 +2524,8 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -2671,7 +2560,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2705,12 +2594,8 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -2754,7 +2639,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2785,7 +2670,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2794,7 +2679,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2803,7 +2688,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2812,7 +2697,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2828,12 +2713,8 @@ }, { "cell_type": "code", - "execution_count": 29, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -2870,7 +2751,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2898,7 +2779,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2923,7 +2804,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2939,12 +2820,8 @@ }, { "cell_type": "code", - "execution_count": 30, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -2982,7 +2859,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3014,12 +2891,8 @@ }, { "cell_type": "code", - "execution_count": 31, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -3067,7 +2940,7 @@ " self.image_info(image_ctx)\n", " RenderJSON(image_ctx.json_data, 350, 2).display()\n", " if display_option in (DisplayOptions.ALL, DisplayOptions.GROUND_TRUTH):\n", - " cprint(image_ctx.gts)\n", + " cprint(image_ctx.gts())\n", " if display_option == DisplayOptions.CONFIG:\n", " self.ctx.show()\n", " if display_option == DisplayOptions.IMAGE:\n", @@ -3113,7 +2986,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3138,7 +3011,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3154,12 +3027,8 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -3199,7 +3068,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3226,7 +3095,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3243,12 +3112,8 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -3277,7 +3142,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3306,7 +3171,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3323,12 +3188,8 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "BASE_IMAGE_IDX: ImgIdT = cast(ImgIdT, CONTEXT.normalize_idx(\"Strange_Tales_172005.jpg\"))\n", @@ -3356,7 +3217,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3365,7 +3226,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3389,7 +3250,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3398,23 +3259,19 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "
\n", - "
\n", + "
\n", + "
\n", " \n", "
\n", @@ -3442,7 +3299,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3458,7 +3315,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3483,7 +3340,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3520,12 +3377,8 @@ }, { "cell_type": "code", - "execution_count": 36, - "metadata": { - "notebookRunGroups": { - "groupValue": "2" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -3600,7 +3453,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3609,7 +3462,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3640,7 +3493,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3649,7 +3502,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3673,7 +3526,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3713,7 +3566,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3765,36 +3618,133 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
[\n",
+       "    'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \n",
+       "Orleans, kept tidy by a white-haired old man known only as Bambu.',\n",
+       "    'The house and the old man are alike in many ways; tall, proud, patient, contented always to wait \n",
+       "until their master comes home--',\n",
+       "    'And one in need of some help, it would appear.',\n",
+       "    'Bambu-- we have a guest.',\n",
+       "    '--and tonight, he comes most urgently, slamming open the oaken front doors!',\n",
+       "    'Tell me, master-- how may Bambu serve?',\n",
+       "    'Some blankets to keep her warm, Bambu-- and perhaps some dry clothes',\n",
+       "    \"The echo of the old man's footsteps fades down the hall as...\",\n",
+       "    'How curious the whims of fate. Had I not chanced to stroll along the river tonight--',\n",
+       "    'As quickly as I can, master',\n",
+       "    '--the girl would most surely be dead by now.',\n",
+       "    'Ghede has been generous. the Death God has given the girl a second chance at--',\n",
+       "    \"Easy, girl-- there's nothing to scream about anymore.\",\n",
+       "    \"You're among friends now, you're safe!\",\n",
+       "    'Continued after next page'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New \u001b[0m\n", + "\u001b[32mOrleans, kept tidy by a white-haired old man known only as Bambu.'\u001b[0m,\n", + " \u001b[32m'The house and the old man are alike in many ways; tall, proud, patient, contented always to wait \u001b[0m\n", + "\u001b[32muntil their master comes home--'\u001b[0m,\n", + " \u001b[32m'And one in need of some help, it would appear.'\u001b[0m,\n", + " \u001b[32m'Bambu-- we have a guest.'\u001b[0m,\n", + " \u001b[32m'--and tonight, he comes most urgently, slamming open the oaken front doors!'\u001b[0m,\n", + " \u001b[32m'Tell me, master-- how may Bambu serve?'\u001b[0m,\n", + " \u001b[32m'Some blankets to keep her warm, Bambu-- and perhaps some dry clothes'\u001b[0m,\n", + " \u001b[32m\"The echo of the old man's footsteps fades down the hall as...\"\u001b[0m,\n", + " \u001b[32m'How curious the whims of fate. Had I not chanced to stroll along the river tonight--'\u001b[0m,\n", + " \u001b[32m'As quickly as I can, master'\u001b[0m,\n", + " \u001b[32m'--the girl would most surely be dead by now.'\u001b[0m,\n", + " \u001b[32m'Ghede has been generous. the Death God has given the girl a second chance at--'\u001b[0m,\n", + " \u001b[32m\"Easy, girl-- there's nothing to scream about anymore.\"\u001b[0m,\n", + " \u001b[32m\"You're among friends now, you're safe!\"\u001b[0m,\n", + " \u001b[32m'Continued after next page'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[\n",
+       "    'EMBOWERED BY GREAT GNARLED CYPRESS TREES, THE ANCIENT MANOR STANDS ALONE ON THE OUTSKIRTS OF NEW \n",
+       "ORLEANS, KEPT TIDY BY A WHITE-HAIRED OLD MAN KNOWN ONLY AS BAMBU.',\n",
+       "    'THE HOUSE AND THE OLD MAN ARE ALIKE IN MANY WAYS; TALL, PROUD, PATIENT, CONTENTED ALWAYS TO WAIT \n",
+       "UNTIL THEIR MASTER COMES HOME--',\n",
+       "    'AND ONE IN NEED OF SOME HELP, IT WOULD APPEAR.',\n",
+       "    'BAMBU-- WE HAVE A GUEST.',\n",
+       "    '--AND TONIGHT, HE COMES MOST URGENTLY, SLAMMING OPEN THE OAKEN FRONT DOORS!',\n",
+       "    'TELL ME, MASTER-- HOW MAY BAMBU SERVE?',\n",
+       "    'SOME BLANKETS TO KEEP HER WARM, BAMBU-- AND PERHAPS SOME DRY CLOTHES',\n",
+       "    \"THE ECHO OF THE OLD MAN'S FOOTSTEPS FADES DOWN THE HALL AS...\",\n",
+       "    'HOW CURIOUS THE WHIMS OF FATE. HAD I NOT CHANCED TO STROLL ALONG THE RIVER TONIGHT--',\n",
+       "    'AS QUICKLY AS I CAN, MASTER',\n",
+       "    '--THE GIRL WOULD MOST SURELY BE DEAD BY NOW.',\n",
+       "    'GHEDE HAS BEEN GENEROUS. THE DEATH GOD HAS GIVEN THE GIRL A SECOND CHANCE AT--',\n",
+       "    \"EASY, GIRL-- THERE'S NOTHING TO SCREAM ABOUT ANYMORE.\",\n",
+       "    \"YOU'RE AMONG FRIENDS NOW, YOU'RE SAFE!\",\n",
+       "    'CONTINUED AFTER NEXT PAGE'\n",
+       "]\n",
+       "
\n" + ], "text/plain": [ - "['Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.',\n", - " 'The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home--',\n", - " 'And one in need of some help, it would appear.',\n", - " 'Bambu-- we have a guest.',\n", - " '--and tonight, he comes most urgently, slamming open the oaken front doors!',\n", - " 'Tell me, master-- how may Bambu serve?',\n", - " 'Some blankets to keep her warm, Bambu-- and perhaps some dry clothes',\n", - " \"The echo of the old man's footsteps fades down the hall as...\",\n", - " 'How curious the whims of fate. Had I not chanced to stroll along the river tonight--',\n", - " 'As quickly as I can, master',\n", - " '--the girl would most surely be dead by now.',\n", - " 'Ghede has been generous. the Death God has given the girl a second chance at--',\n", - " \"Easy, girl-- there's nothing to scream about anymore.\",\n", - " \"You're among friends now, you're safe!\",\n", - " 'Continued after next page']" + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'EMBOWERED BY GREAT GNARLED CYPRESS TREES, THE ANCIENT MANOR STANDS ALONE ON THE OUTSKIRTS OF NEW \u001b[0m\n", + "\u001b[32mORLEANS, KEPT TIDY BY A WHITE-HAIRED OLD MAN KNOWN ONLY AS BAMBU.'\u001b[0m,\n", + " \u001b[32m'THE HOUSE AND THE OLD MAN ARE ALIKE IN MANY WAYS; TALL, PROUD, PATIENT, CONTENTED ALWAYS TO WAIT \u001b[0m\n", + "\u001b[32mUNTIL THEIR MASTER COMES HOME--'\u001b[0m,\n", + " \u001b[32m'AND ONE IN NEED OF SOME HELP, IT WOULD APPEAR.'\u001b[0m,\n", + " \u001b[32m'BAMBU-- WE HAVE A GUEST.'\u001b[0m,\n", + " \u001b[32m'--AND TONIGHT, HE COMES MOST URGENTLY, SLAMMING OPEN THE OAKEN FRONT DOORS!'\u001b[0m,\n", + " \u001b[32m'TELL ME, MASTER-- HOW MAY BAMBU SERVE?'\u001b[0m,\n", + " \u001b[32m'SOME BLANKETS TO KEEP HER WARM, BAMBU-- AND PERHAPS SOME DRY CLOTHES'\u001b[0m,\n", + " \u001b[32m\"THE ECHO OF THE OLD MAN'S FOOTSTEPS FADES DOWN THE HALL AS...\"\u001b[0m,\n", + " \u001b[32m'HOW CURIOUS THE WHIMS OF FATE. HAD I NOT CHANCED TO STROLL ALONG THE RIVER TONIGHT--'\u001b[0m,\n", + " \u001b[32m'AS QUICKLY AS I CAN, MASTER'\u001b[0m,\n", + " \u001b[32m'--THE GIRL WOULD MOST SURELY BE DEAD BY NOW.'\u001b[0m,\n", + " \u001b[32m'GHEDE HAS BEEN GENEROUS. THE DEATH GOD HAS GIVEN THE GIRL A SECOND CHANCE AT--'\u001b[0m,\n", + " \u001b[32m\"EASY, GIRL-- THERE'S NOTHING TO SCREAM ABOUT ANYMORE.\"\u001b[0m,\n", + " \u001b[32m\"YOU'RE AMONG FRIENDS NOW, YOU'RE SAFE!\"\u001b[0m,\n", + " \u001b[32m'CONTINUED AFTER NEXT PAGE'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" ] }, - "execution_count": 93, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "IMAGE_CONTEXT.gts\n" + "cprint(IMAGE_CONTEXT.gts())\n", + "cprint(IMAGE_CONTEXT.gts('all-caps'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for img_path in CONTEXT.image_paths:\n", + "# gt_path = img_path.with_stem(img_path.stem + '_gt').with_suffix('.txt')\n", + "# print(gt_path)\n", + "# gt_path = CONTEXT.root_dir/gt_path\n", + "# if gt_path.exists():\n", + "# gts = gt_path.read_text(encoding=\"utf-8\").splitlines()\n", + "# data = {\n", + "# 'all-caps': [gt.upper() for gt in gts],\n", + "# 'capitalized': gts\n", + "# }\n", + "# fp = gt_path.with_suffix('.json')\n", + "# with open(fp, 'w') as f:\n", + "# json.dump(data, f, indent=2)\n", + "\n" ] }, { @@ -3807,12 +3757,8 @@ }, { "cell_type": "code", - "execution_count": 37, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| exporti\n", @@ -3843,12 +3789,8 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -4214,12 +4156,8 @@ }, { "cell_type": "code", - "execution_count": 39, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "BOX_IDX = 0" @@ -4234,12 +4172,8 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -4247,7 +4181,7 @@ "ExperimentOCR#Tesseract_Tesseract-crop" ] }, - "execution_count": 40, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4283,7 +4217,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4330,7 +4264,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4354,7 +4288,8 @@ "result = image_experiment.result(BOX_IDX, method, ocr=False)\n", "assert result is not None\n", "\n", - "CONTEXT.ocr_box(result, page_lang)\n" + "CONTEXT.ocr_box(result, page_lang)\n", + "result.ocr = postprocess_ocr(result.ocr)\n" ] }, { @@ -4366,7 +4301,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4399,7 +4334,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4430,7 +4365,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4470,7 +4405,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4496,7 +4431,7 @@ "# image = results[method].image\n", "# assert image is not None\n", "# results[method].ocr = postprocess_ocr(IMAGE_CONTEXT.mocr(image))\n", - "# display_extracted_result(None, None, results[method], IMAGE_CONTEXT.gts[BOX_IDX])\n", + "# display_extracted_result(None, None, results[method], IMAGE_CONTEXT.gts()[BOX_IDX])\n", "image_experiment.result(BOX_IDX, method)\n" ] }, @@ -4509,7 +4444,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4542,7 +4477,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4575,7 +4510,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4608,7 +4543,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4641,7 +4576,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4675,7 +4610,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4723,7 +4658,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4747,7 +4682,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4780,12 +4715,8 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -4906,7 +4837,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4940,12 +4871,8 @@ }, { "cell_type": "code", - "execution_count": 42, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#| export\n", @@ -4998,22 +4925,18 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": { - "notebookRunGroups": { - "groupValue": "3" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7890ae3c54f44e4b8f42ba1fc487d044", + "model_id": "ae489cc65bb54dcf8215c8807fab2701", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HTML(value=\"" + } + }, + "b7ec0258d59d4c4cbe7fda2596748497": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b9ddec6ffc97453d9cabf78f36bd8152": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "0px" + } + }, + "bb37887f61cb487da67815a8441e4606": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoundedIntTextModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "BoundedIntTextModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "IntTextView", + "continuous_update": false, + "description": "", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_bc91ad9852e646928b2491514ce5c1fb", + "max": 14, + "min": 0, + "step": 1, + "style": "IPY_MODEL_84d6e759885547feac1bb40b9f6ba449", + "value": 0 + } + }, + "bc824c4742924a72b4a3cd4ff16047fc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": "0px 0px 0px 3em", + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "5em" + } + }, + "bc91ad9852e646928b2491514ce5c1fb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50px" + } + }, + "bebaa8285f044081bd907db14eda2876": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "c1038ace4996476db1708eaee3ea062c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "display_option_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_cf182da14bd84e1d934f8a29c3986d95" + ], + "layout": "IPY_MODEL_eb898c12964d48dca6dc7a82c79d2b73" + } + }, + "c4d45b30e1c8461088df86349146d0e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cf182da14bd84e1d934f8a29c3986d95": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Boxes", + "Image", + "Mask", + "Image & Mask", + "Page data", + "Ground truth", + "Image All", + "Results", + "Best results", + "Accuracy", + "Dataframe", + "Config" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 7, + "layout": "IPY_MODEL_37adfad0552c4b95b892e04a68eed45d", + "style": "IPY_MODEL_ab354dd44b9448868e84aa96c3a0c8ac" + } + }, + "d264052799214924961df8ad52df35c0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d33528dc39004a07ab6094b00a8371ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Tesseract-crop-post", + "Tesseract-crop" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 0, + "layout": "IPY_MODEL_7d7c5cfe0470438095fab691ac17977e", + "style": "IPY_MODEL_bebaa8285f044081bd907db14eda2876" + } + }, + "d55ff8c2cd4343c1a7c2394a0cbb7044": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": "0px 0px 0px 10px", + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "e15e755d1aed4ef1bea36095ef4e71c6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "0px" + } + }, + "e21fb5c1948d4317965fd788878f2e41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "139619740843360" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3a8042f94a2f466e94457b14fc91b53e" + ], + "layout": "IPY_MODEL_fc606d552a9d40a5908ef27939278f29" + } + }, + "e493f35b8f21428a975e738588df8222": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "box_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8029556124d34fdda0849febd878aefc", + "IPY_MODEL_0b84340f11284915a7056b60f3592676", + "IPY_MODEL_bb37887f61cb487da67815a8441e4606" + ], + "layout": "IPY_MODEL_9a1a5f51138c4c349e913786b0488d08" + } + }, + "e6a83dde359f4060baa853d233d56142": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e9851a001fa54db1a4e0da0e52efad28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e21fb5c1948d4317965fd788878f2e41", + "IPY_MODEL_19643c8a1d674222a20626830402afef", + "IPY_MODEL_a342b0c239ae4c8c966a197f346698bc", + "IPY_MODEL_4f28be29339d4155bff460a3ccd30b8e" + ], + "layout": "IPY_MODEL_6ee898c098d5463199a9efb201f0f90e" + } + }, + "eb898c12964d48dca6dc7a82c79d2b73": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "edb42bd2e8054ac381230f2b426cc477": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": "none", + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f1f2978a05b64b50ab0206498135b7e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "f4b70788e3b249e3bdc7ee247896fa21": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "f6cfe60a03ee4d66b6f7aa4208c9bcab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd8d32ff95bb4b6ba9985214a93ce713", + "placeholder": "​", + "style": "IPY_MODEL_8e79c3c66cef4b96b6c23b27fddef358", + "value": " 11/11 [00:03<00:00,  3.17it/s]" + } + }, + "f7481c76692e45d183a74797d7636601": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc606d552a9d40a5908ef27939278f29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc6c361cfc6e427ab9b80d4518c386fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "139619740843744" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e9851a001fa54db1a4e0da0e52efad28", + "IPY_MODEL_3a8ad8eaf7f54d77bb1c781160aba961" + ], + "layout": "IPY_MODEL_416f0fbdd95648a688f25a67f6a9e66f" + } + }, + "fd8d32ff95bb4b6ba9985214a93ce713": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 0 } diff --git a/pcleaner/_testbed/nbs/web_server.ipynb b/pcleaner/_testbed/nbs/web_server.ipynb index 1e6d9f25..ea761ca9 100644 --- a/pcleaner/_testbed/nbs/web_server.ipynb +++ b/pcleaner/_testbed/nbs/web_server.ipynb @@ -35,7 +35,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# install (Colab)" + "## Settings for Google Colab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n", + "\n", + "- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n", + "This allows the notebook to access files stored in your Google Drive.\n", + "\n", + "- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n", + "This acts as the root directory for accessing any files within your Google Drive from the notebook.\n", + "\n", + "- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n", + "This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n" ] }, { @@ -45,8 +61,16 @@ "outputs": [], "source": [ "MOUNT_DRIVE = True\n", - "DEV_INSTALL = True\n", - "GDRIVE_MOUNT_POINT = 'drive'\n" + "GDRIVE_MOUNT_POINT = 'drive'\n", + "PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)\n", + "> Install necessary libraries in Colab\n" ] }, { @@ -55,33 +79,27 @@ "metadata": {}, "outputs": [], "source": [ + "import fastcore.all as FC\n", "import os\n", + "import re\n", "from pathlib import Path\n", - "import fastcore.all as FC\n", + "\n", "from rich import print as cprint\n", "from rich.text import Text\n", "\n", - "def info(msg: str):\n", - " text = Text(msg)\n", - " text.stylize(\"bold red\", 0, 6)\n", - " cprint(\"_\" * 10, text, \"_\" * 10)\n", - "\n", + "def info(msg: str): \n", + " (t := Text(msg)).stylize(\"bold red\", 0, 6)\n", + " cprint(\"_\" * 10, t, \"_\" * 10)\n", "\n", "if FC.IN_COLAB:\n", - " if MOUNT_DRIVE:\n", - " mnt_point = f\"/content/{GDRIVE_MOUNT_POINT}\"\n", - " if not Path(mnt_point).exists():\n", - " info(\"Mounting Google Drive\")\n", - " from google.colab import drive\n", - "\n", - " drive.mount(mnt_point, force_remount=True)\n" + " !pip install -q pyngrok\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Colab has issues with PanelClenar PIL version.: uninstall Colab one, restart wen prompted and rerun from the top.\n" + "Mount Google Drive" ] }, { @@ -90,15 +108,24 @@ "metadata": {}, "outputs": [], "source": [ + "mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n", "if FC.IN_COLAB:\n", - " from packaging import version\n", - " import PIL\n", - " pil_version = version.parse(PIL.__version__)\n", - " if pil_version < version.parse(\"10\"):\n", - " info('Uninstalling Pillow')\n", - " !pip uninstall Pillow\n", - " info('Installing Pillow')\n", - " !pip install Pillow\n" + " if MOUNT_DRIVE:\n", + " if not mnt_point.exists():\n", + " info(\"Mounting Google Drive\")\n", + " from google.colab import drive\n", + " drive.mount(str(mnt_point), force_remount=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install **PanelCleaner**\n", + "\n", + "> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub. \n", + "\n", + "Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook." ] }, { @@ -109,14 +136,32 @@ "source": [ "if FC.IN_COLAB:\n", " info('Installing PanelCleaner')\n", - " if DEV_INSTALL:\n", - " assert MOUNT_DRIVE, \"DEV_INSTALL need a mounted google drive drive\"\n", - " info('Installing PanelCleaner from Google Drive')\n", - " os.chdir('/content/drive/MyDrive/Shared/PanelCleaner/')\n", - " !pip install -e .\n", + " pc_path = mnt_point / PANELCLEANER_IN_GDRIVE\n", + " testbed_path = pc_path / 'pcleaner/_testbed'\n", + " if testbed_path.exists():\n", + " info('Installing PanelCleaner from your Google Drive')\n", + " os.chdir(pc_path)\n", " else:\n", - " info('Installing PanelCleaner from Github')\n", - " !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed-colab\n" + " info('Installing PanelCleaner from GitHub')\n", + " !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n", + " os.chdir('PanelCleaner')\n", + " assert testbed_path.exists(), \"PanelCleaner not found\"\n", + " !pip install -e ." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note:** If you encounter installation issues with **PanelCleaner** in Colab, re-running the installation cell above may resolve them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Navigate to the Testbed Directory\n", + "> Ensure you are in the correct directory to start the experiments." ] }, { @@ -126,21 +171,7 @@ "outputs": [], "source": [ "if FC.IN_COLAB:\n", - " info('Installing PanelCleaner Colab requirements')\n", - " import importlib.resources\n", - " if DEV_INSTALL:\n", - " os.chdir('pcleaner/_testbed')\n", - " \n", - " try:\n", - " package_path = importlib.resources.files('pcleaner')\n", - " info('Installing PanelCleaner testbed requirements')\n", - " p = (Path(package_path)/'_testbed/requirements-colab.txt')\n", - " if p.exists():\n", - " !pip install -r {p}\n", - " else:\n", - " print(f\"colab requirements {p} not found\")\n", - " except Exception:\n", - " info(\"Couldn't install PanelCleaner Colab requirements\")\n" + " os.chdir('./pcleaner/_testbed')" ] }, { @@ -184,21 +215,13 @@ "from loguru import logger\n", "from pyngrok import conf\n", "from pyngrok import ngrok\n", - "from rich.console import Console\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "from pcleaner._testbed.testbed.bottle import Bottle\n", - "from pcleaner._testbed.testbed.bottle import HTTPError\n", - "from pcleaner._testbed.testbed.bottle import response\n", - "from pcleaner._testbed.testbed.bottle import run\n", - "from pcleaner._testbed.testbed.bottle import static_file\n" + "from rich.console import Console\n", + "\n", + "from testbed.bottle import Bottle\n", + "from testbed.bottle import HTTPError\n", + "from testbed.bottle import response\n", + "from testbed.bottle import run\n", + "from testbed.bottle import static_file\n" ] }, { @@ -209,9 +232,7 @@ "source": [ "import fastcore.all as FC\n", "import fastcore.xtras # patch Path with some utils\n", - "from fastcore.test import * # type: ignore\n", - "\n", - "import pcleaner._testbed.testbed.bottle as bottle\n" + "from fastcore.test import * # type: ignore\n" ] }, { @@ -440,6 +461,7 @@ " self.unc_share = Path(self.public_url.replace('https:', ''))\n", " cprint(f\"ngrok tunnel: {self.tunnel}\")\n", " cprint(f\"Public URL: {self.public_url}\")\n", + " cprint(f\"Serving images from: {self.directory}\")\n", " else:\n", " cprint(\"Server is already running\")\n", "\n", @@ -522,11 +544,11 @@ { "data": { "text/html": [ - "
ngrok tunnel: NgrokTunnel: \"https://36a0-83-33-227-209.ngrok-free.app\" -> \"http://localhost:55435\"\n",
+       "
ngrok tunnel: NgrokTunnel: \"https://5029-83-33-227-209.ngrok-free.app\" -> \"http://localhost:63057\"\n",
        "
\n" ], "text/plain": [ - "ngrok tunnel: NgrokTunnel: \u001b[32m\"https://36a0-83-33-227-209.ngrok-free.app\"\u001b[0m -> \u001b[32m\"http://localhost:55435\"\u001b[0m\n" + "ngrok tunnel: NgrokTunnel: \u001b[32m\"https://5029-83-33-227-209.ngrok-free.app\"\u001b[0m -> \u001b[32m\"http://localhost:63057\"\u001b[0m\n" ] }, "metadata": {}, @@ -535,11 +557,24 @@ { "data": { "text/html": [ - "
Public URL: https://36a0-83-33-227-209.ngrok-free.app\n",
+       "
Public URL: https://5029-83-33-227-209.ngrok-free.app\n",
        "
\n" ], "text/plain": [ - "Public URL: \u001b[4;94mhttps://36a0-83-33-227-209.ngrok-free.app\u001b[0m\n" + "Public URL: \u001b[4;94mhttps://5029-83-33-227-209.ngrok-free.app\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Serving images from: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/cache\n",
+       "
\n" + ], + "text/plain": [ + "Serving images from: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95mcache\u001b[0m\n" ] }, "metadata": {}, @@ -549,14 +584,14 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", "
\n", "

Ngrok displays a warning page as a security measure to prevent unintentional access to your local servers. This page requires you to confirm that you wish to proceed to the content.

\n", "

Please review the ngrok warning page displayed below. If prompted, click 'Visit Page' to proceed.

\n", " Don't worry if you see a 404 or 403 error. Then, you can click the 'Close' button below to hide this section.

\n", "
\n", - " \n", - " \n", + " \n", + " \n", "
\n" ], "text/plain": [ @@ -570,10 +605,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "127.0.0.1 - - [21/May/2024 13:22:26] \"GET //pcleaner.png HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [21/May/2024 13:22:46] \"GET /Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [21/May/2024 13:22:48] \"GET /Strange_Tales_172005/.crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 200 -\n", - "t=2024-05-21T13:22:51+0200 lvl=warn msg=\"Stopping forwarder\" name=http-55435-1681ca10-634d-4a9f-b4c3-e9ae3c2b48e2 acceptErr=\"failed to accept connection: Listener closed\"\n" + "127.0.0.1 - - [30/May/2024 20:04:54] code 404, message File not found\n", + "127.0.0.1 - - [30/May/2024 20:04:54] \"GET //pcleaner.png HTTP/1.1\" 404 -\n", + "127.0.0.1 - - [30/May/2024 20:05:28] \"GET /Strange_Tales_172005/_crop/Strange_Tales_172005_0_Default.png HTTP/1.1\" 200 -\n", + "127.0.0.1 - - [30/May/2024 20:05:32] \"GET /Strange_Tales_172005/_crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 200 -\n", + "t=2024-05-30T20:05:45+0200 lvl=warn msg=\"Stopping forwarder\" name=http-63057-fe3f0d56-9db5-4936-bb03-cb610aa88a40 acceptErr=\"failed to accept connection: Listener closed\"\n" ] } ], @@ -597,7 +633,7 @@ "metadata": {}, "outputs": [], "source": [ - "img_path = 'Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png'\n" + "img_path = 'Strange_Tales_172005/_crop/Strange_Tales_172005_0_Default.png'\n" ] }, { @@ -608,7 +644,7 @@ { "data": { "text/plain": [ - "''" + "''" ] }, "execution_count": null, @@ -628,7 +664,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -650,7 +686,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -661,7 +697,7 @@ } ], "source": [ - "display(HTML(f''))\n" + "display(HTML(f''))\n" ] }, { @@ -718,7 +754,7 @@ { "data": { "text/plain": [ - "52794" + "63057" ] }, "execution_count": null, @@ -850,6 +886,7 @@ " self.unc_share = Path(self.public_url.replace('https:', ''))/self.prefix\n", " cprint(f\"ngrok tunnel: {self.tunnel}\")\n", " cprint(f\"Public URL: {self.public_url}\")\n", + " cprint(f\"Serving images from: {self.directory}\")\n", "\n", " def start(self):\n", " if self.thread is None or not self.thread.is_alive():\n", @@ -861,16 +898,16 @@ " if self.tunnel and self.tunnel.public_url:\n", " ngrok.disconnect(self.tunnel.public_url)\n", " cprint(\"Ngrok tunnel disconnected\")\n", - " ngrok.kill()\n", " \n", " if self.thread:\n", " self.make_request('/shutdown')\n", - " self.thread.join(timeout=10)\n", + " self.thread.join(timeout=5)\n", " if self.thread.is_alive():\n", " print(\"Thread did not terminate, proceeding with forceful shutdown.\")\n", " else:\n", " print(\"Server thread stopped successfully.\")\n", " self.thread = self.tunnel = self.public_url = self.unc_share = None\n", + " ngrok.kill()\n", " cprint(\"Server stopped\")\n", "\n", " def make_request(self, path=\"/\"):\n", @@ -941,7 +978,7 @@ "output_type": "stream", "text": [ "Bottle v0.13-dev server starting up (using WSGIRefServer())...\n", - "Listening on http://localhost:55470/\n", + "Listening on http://localhost:63275/\n", "Hit Ctrl-C to quit.\n", "\n" ] @@ -949,11 +986,24 @@ { "data": { "text/html": [ - "
ngrok tunnel: NgrokTunnel: \"https://0836-83-33-227-209.ngrok-free.app\" -> \"http://localhost:55470\"\n",
+       "
ngrok tunnel: NgrokTunnel: \"https://ef4e-83-33-227-209.ngrok-free.app\" -> \"http://localhost:63275\"\n",
+       "
\n" + ], + "text/plain": [ + "ngrok tunnel: NgrokTunnel: \u001b[32m\"https://ef4e-83-33-227-209.ngrok-free.app\"\u001b[0m -> \u001b[32m\"http://localhost:63275\"\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Public URL: https://ef4e-83-33-227-209.ngrok-free.app\n",
        "
\n" ], "text/plain": [ - "ngrok tunnel: NgrokTunnel: \u001b[32m\"https://0836-83-33-227-209.ngrok-free.app\"\u001b[0m -> \u001b[32m\"http://localhost:55470\"\u001b[0m\n" + "Public URL: \u001b[4;94mhttps://ef4e-83-33-227-209.ngrok-free.app\u001b[0m\n" ] }, "metadata": {}, @@ -962,11 +1012,11 @@ { "data": { "text/html": [ - "
Public URL: https://0836-83-33-227-209.ngrok-free.app\n",
+       "
Serving images from: ../experiment/cache\n",
        "
\n" ], "text/plain": [ - "Public URL: \u001b[4;94mhttps://0836-83-33-227-209.ngrok-free.app\u001b[0m\n" + "Serving images from: ..\u001b[35m/experiment/\u001b[0m\u001b[95mcache\u001b[0m\n" ] }, "metadata": {}, @@ -976,14 +1026,14 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", "
\n", "

Ngrok displays a warning page as a security measure to prevent unintentional access to your local servers. This page requires you to confirm that you wish to proceed to the content.

\n", "

Please review the ngrok warning page displayed below. If prompted, click 'Visit Page' to proceed.

\n", " Don't worry if you see a 404 or 403 error. Then, you can click the 'Close' button below to hide this section.

\n", "
\n", - " \n", - " \n", + " \n", + " \n", "
\n" ], "text/plain": [ @@ -997,10 +1047,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "127.0.0.1 - - [21/May/2024 13:24:01] \"GET /images/pcleaner.png HTTP/1.1\" 200 17709\n", - "127.0.0.1 - - [21/May/2024 13:24:45] \"GET /images/Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png HTTP/1.1\" 200 137784\n", - "127.0.0.1 - - [21/May/2024 13:24:48] \"GET /images/images/Strange_Tales_172005/.crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 404 817\n", - "127.0.0.1 - - [21/May/2024 13:25:01] \"GET /images/Strange_Tales_172005/.crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 200 107550\n" + "127.0.0.1 - - [30/May/2024 20:13:37] \"GET /images/pcleaner.png HTTP/1.1\" 404 761\n", + "127.0.0.1 - - [30/May/2024 20:13:44] \"GET /images/Strange_Tales_172005/_crop/Strange_Tales_172005_0_Default.png HTTP/1.1\" 200 137784\n", + "127.0.0.1 - - [30/May/2024 20:13:45] \"GET /images/Strange_Tales_172005/_crop/Strange_Tales_172005_1_Default.png HTTP/1.1\" 200 107550\n", + "t=2024-05-30T20:13:50+0200 lvl=warn msg=\"Stopping forwarder\" name=http-63275-08b9c9cf-549d-441e-a6c5-74a0cd0fab91 acceptErr=\"failed to accept connection: Listener closed\"\n" ] } ], @@ -1024,7 +1074,7 @@ "metadata": {}, "outputs": [], "source": [ - "img_path = 'Strange_Tales_172005/.crop/Strange_Tales_172005_0_Default.png'\n" + "img_path = 'Strange_Tales_172005/_crop/Strange_Tales_172005_0_Default.png'\n" ] }, { @@ -1035,7 +1085,7 @@ { "data": { "text/plain": [ - "''" + "''" ] }, "execution_count": null, @@ -1055,7 +1105,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -1077,7 +1127,7 @@ { "data": { "text/html": [ - "" + "" ], "text/plain": [ "" @@ -1088,7 +1138,7 @@ } ], "source": [ - "display(HTML(f''))\n" + "display(HTML(f''))\n" ] }, { @@ -1096,13 +1146,6 @@ "execution_count": null, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "t=2024-05-21T13:25:04+0200 lvl=warn msg=\"Stopping forwarder\" name=http-55470-05e35e60-2237-402e-b7ab-8507dd7f47dc acceptErr=\"failed to accept connection: Listener closed\"\n" - ] - }, { "data": { "text/html": [ @@ -1159,7 +1202,7 @@ { "data": { "text/plain": [ - "58805" + "63275" ] }, "execution_count": null, @@ -1180,7 +1223,7 @@ { "data": { "text/plain": [ - "[]" + "'15604'" ] }, "execution_count": null, diff --git a/pcleaner/_testbed/testbed/web_server.py b/pcleaner/_testbed/testbed/web_server.py index 95d1e3e2..41358da3 100644 --- a/pcleaner/_testbed/testbed/web_server.py +++ b/pcleaner/_testbed/testbed/web_server.py @@ -37,12 +37,12 @@ from .bottle import static_file -# %% ../nbs/web_server.ipynb 17 +# %% ../nbs/web_server.ipynb 16 console = Console(width=104, tab_size=4, force_jupyter=True) cprint = console.print -# %% ../nbs/web_server.ipynb 18 +# %% ../nbs/web_server.ipynb 17 def display_ngrok_warning(url): did = 'ngrokFrame' + str(uuid.uuid4()) html_code = f""" @@ -60,7 +60,7 @@ def display_ngrok_warning(url): display(HTML(html_code)) -# %% ../nbs/web_server.ipynb 19 +# %% ../nbs/web_server.ipynb 18 class WebServer(Protocol): @property def public_url(self) -> str | None: ... @@ -95,7 +95,7 @@ def setup_ngrok(server_cls: type[WebServer], images_dir: str | Path): return server -# %% ../nbs/web_server.ipynb 24 +# %% ../nbs/web_server.ipynb 23 class ImageHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): def end_headers(self): for k,v in { @@ -121,7 +121,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, directory=self.directory, **kwargs) -# %% ../nbs/web_server.ipynb 25 +# %% ../nbs/web_server.ipynb 24 class WebServerStdlib: """ A simple web server for serving images from a local directory using http.server and ngrok. @@ -180,6 +180,7 @@ def start(self): self.unc_share = Path(self.public_url.replace('https:', '')) cprint(f"ngrok tunnel: {self.tunnel}") cprint(f"Public URL: {self.public_url}") + cprint(f"Serving images from: {self.directory}") else: cprint("Server is already running") @@ -217,7 +218,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.stop() -# %% ../nbs/web_server.ipynb 39 +# %% ../nbs/web_server.ipynb 38 app = Bottle() @app.route('/images/') # type: ignore @@ -233,7 +234,7 @@ def shutdown(): current_process.send_signal(signal.SIGTERM) -# %% ../nbs/web_server.ipynb 40 +# %% ../nbs/web_server.ipynb 39 class WebServerBottle: """ A simple web server for serving images from a local directory using ngrok. @@ -283,6 +284,7 @@ def bottle_run(): self.unc_share = Path(self.public_url.replace('https:', ''))/self.prefix cprint(f"ngrok tunnel: {self.tunnel}") cprint(f"Public URL: {self.public_url}") + cprint(f"Serving images from: {self.directory}") def start(self): if self.thread is None or not self.thread.is_alive(): @@ -294,16 +296,16 @@ def stop(self): if self.tunnel and self.tunnel.public_url: ngrok.disconnect(self.tunnel.public_url) cprint("Ngrok tunnel disconnected") - ngrok.kill() if self.thread: self.make_request('/shutdown') - self.thread.join(timeout=10) + self.thread.join(timeout=5) if self.thread.is_alive(): print("Thread did not terminate, proceeding with forceful shutdown.") else: print("Server thread stopped successfully.") self.thread = self.tunnel = self.public_url = self.unc_share = None + ngrok.kill() cprint("Server stopped") def make_request(self, path="/"): From 158020d6ffa045412b139a67bce2ee6039f9249d Mon Sep 17 00:00:00 2001 From: Spikey Date: Fri, 31 May 2024 20:01:35 +0200 Subject: [PATCH 21/27] PaliGemma support --- pcleaner/_testbed/nbs/ocr_paligemma.ipynb | 2092 ++++++++++++++++++++ pcleaner/_testbed/testbed/ocr_paligemma.py | 292 +++ 2 files changed, 2384 insertions(+) create mode 100644 pcleaner/_testbed/nbs/ocr_paligemma.ipynb create mode 100644 pcleaner/_testbed/testbed/ocr_paligemma.py diff --git a/pcleaner/_testbed/nbs/ocr_paligemma.ipynb b/pcleaner/_testbed/nbs/ocr_paligemma.ipynb new file mode 100644 index 00000000..115649a9 --- /dev/null +++ b/pcleaner/_testbed/nbs/ocr_paligemma.ipynb @@ -0,0 +1,2092 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp ocr_paligemma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from __future__ import annotations\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# %reload_ext autoreload\n", + "# %autoreload 0\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `PaliGemma` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import subprocess\n", + "from pathlib import Path\n", + "from typing import Any\n", + "from typing import Literal\n", + "from typing import TypeAlias\n", + "\n", + "import torch\n", + "import transformers.image_utils as image_utils\n", + "from PIL import Image\n", + "from rich.console import Console\n", + "from transformers import AutoProcessor\n", + "from transformers import BitsAndBytesConfig\n", + "from transformers import PaliGemmaForConditionalGeneration\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import sys\n", + "from typing import cast\n", + "import requests\n", + "\n", + "import fastcore.all as FC\n", + "import fastcore.xtras # patch pathlib.Path with some utils\n", + "import transformers\n", + "from fastcore.test import * # type: ignore\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "need version >4.41 of transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'4.42.0.dev0'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformers.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install git+https://github.com/huggingface/transformers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pretty print by default\n", + "# %load_ext rich" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "console = Console(width=104, tab_size=4, force_jupyter=True)\n", + "cprint = console.print\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Force reload of `experiments` module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if 'pcleaner._testbed.testbed.experiments' in sys.modules:\n", + " import importlib; importlib.reload(pcleaner._testbed.testbed.experiments) # type: ignore\n", + "else:\n", + " import pcleaner._testbed.testbed.experiments\n", + " from pcleaner._testbed.testbed.experiments import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from testbed.experiments import *\n", + "from testbed.helpers import RenderJSON\n", + "from testbed.helpers import IN_MAC\n", + "from testbed.helpers import IN_LINUX\n", + "from testbed.helpers import default_device\n", + "from testbed.ocr_metric import *\n", + "from testbed.web_server import setup_ngrok\n", + "from testbed.web_server import WebServerBottle\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "if IN_MAC:\n", + " import mlx.core as mx\n", + " from mlx_vlm import load, generate\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "def load_image(img_ref: str | Path | Image.Image) -> Image.Image:\n", + " return image_utils.load_image(str(img_ref) if isinstance(img_ref, Path) else img_ref)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "def get_gpu_vram(total=True):\n", + " if total:\n", + " if IN_MAC:\n", + " return mx.metal.device_info()['memory_size']//1024//1024\n", + " else:\n", + " command = \"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits\"\n", + " else:\n", + " if IN_MAC:\n", + " return mx.metal.get_active_memory()//1024//1024\n", + " else:\n", + " command = \"nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits\"\n", + " try:\n", + " vram = subprocess.check_output(command, shell=True).decode('utf-8').strip()\n", + " return vram\n", + " except subprocess.CalledProcessError:\n", + " return \"Failed to get VRAM\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'mps'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DEVICE = default_device()\n", + "DEVICE\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
          metal.is_available(): True\n",
+       "           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': \n",
+       "51539607552, 'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n",
+       "     metal.get_active_memory(): 8\n",
+       "       metal.get_peak_memory(): 8\n",
+       "      metal.get_cache_memory(): 0\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n", + " \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \n", + "\u001b[1;36m51539607552\u001b[0m, \u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m8\u001b[0m\n", + " \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m8\u001b[0m\n", + " \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
     total VRAM: 65536 MiB\n",
+       "    active VRAM: 0 MiB\n",
+       "
\n" + ], + "text/plain": [ + " total VRAM: \u001b[1;36m65536\u001b[0m MiB\n", + " active VRAM: \u001b[1;36m0\u001b[0m MiB\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if IN_MAC:\n", + " cprint(\n", + " f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n", + " f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n", + " f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()}\\n\"\n", + " f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()}\\n\"\n", + " f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()}\\n\"\n", + " )\n", + "else:\n", + " !nvidia-smi\n", + "\n", + "cprint( f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n", + " f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# PaliGemma basic usage\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### [Model card](https://huggingface.co/google/paligemma-3b-mix-448) examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User is already logged in.\n" + ] + } + ], + "source": [ + "from huggingface_hub import notebook_login\n", + "notebook_login(False, True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true'\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + "image\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### CPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n", + "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n", + "`config.hidden_activation` if you want to override this behaviour.\n", + "See https://github.com/huggingface/transformers/pull/29402 for more details.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "27cb8013eba248a6bfa08aeb6be0f83e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/3 [00:00Un auto azul estacionado frente a un edificio.\n", + "
\n" + ], + "text/plain": [ + "Un auto azul estacionado frente a un edificio.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Instruct the model to create a caption in Spanish\n", + "prompt = 'caption es'\n", + "model_inputs = processor(text=prompt, images=image, return_tensors='pt')\n", + "input_len = model_inputs['input_ids'].shape[-1]\n", + "\n", + "with torch.inference_mode():\n", + " generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)\n", + " generation = generation[0][input_len:]\n", + " decoded = processor.decode(generation, skip_special_tokens=True)\n", + " cprint(decoded)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del model\n", + "import gc\n", + "gc.collect();\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "549d83079f7544c9a517f12efb9b7af2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00'OCR the text in the image'\n", + "
\n" + ], + "text/plain": [ + "\u001b[32m'OCR the text in the image'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "prompt = 'What does the text say?'\n", + "prompt = 'OCR the text in the image'\n", + "cprint(repr(prompt))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_url = Path('../experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_7_Default, grey pad.png')\n", + "image1 = Image.open(image_url)\n", + "display(image1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==========\n", + "Image: \n", + "\n", + "Prompt: OCR the text in the image\n", + "THE ECHO OF THE OLD MAN'S FOOTSTEPS FADES DOWN THE HALL AS...\n", + "==========\n", + "Prompt: 20.769 tokens-per-sec\n", + "Generation: 62.188 tokens-per-sec\n" + ] + } + ], + "source": [ + "if IN_MAC:\n", + " output = generate(\n", + " model, \n", + " processor, # type: ignore\n", + " image1, # type: ignore\n", + " prompt, # type: ignore\n", + " max_tokens=100,\n", + " temp=0.0,\n", + " top_p=1.0,\n", + " # repetition_penalty=1.2,\n", + " verbose=True\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_url = Path('../experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_0_Default, grey pad.png')\n", + "image2 = Image.open(image_url)\n", + "display(image2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==========\n", + "Image: \n", + "\n", + "Prompt: OCR the text in the image\n", + "EMBOWERED BY GREAT CHARLED CYPRESS TREES, THE ANCIENT MAYOR STANDS ALONE ON THE OUTSKIRTS OF NEW ORLEANS, KEPT TIDY BY A WHITE-HAIRED OLD MAN KNOWN ONLY AS BAMBU.\n", + "==========\n", + "Prompt: 23.426 tokens-per-sec\n", + "Generation: 62.838 tokens-per-sec\n" + ] + } + ], + "source": [ + "if IN_MAC:\n", + " output = generate(\n", + " model, \n", + " processor, # type: ignore\n", + " image2, # type: ignore\n", + " prompt, # type: ignore\n", + " max_tokens=100,\n", + " temp=0.0,\n", + " top_p=1.0,\n", + " # repetition_penalty=1.2,\n", + " # repetition_context_size=40,\n", + " verbose=True\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "image_url = Path('../experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_1_Default, grey pad.png')\n", + "image3 = Image.open(image_url)\n", + "display(image3)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==========\n", + "Image: \n", + "\n", + "Prompt: OCR the text in the image\n", + "THE HOUSE AND THE OLD MAN ARE ALIKE IN MANY WAYS; TALL, PROUD, PATIENT, CONTENTED ALWAYS TO WAIT UNTIL THEIR MASTER COMES HOME--\n", + "==========\n", + "Prompt: 23.533 tokens-per-sec\n", + "Generation: 62.826 tokens-per-sec\n" + ] + } + ], + "source": [ + "if IN_MAC:\n", + " output = generate(\n", + " model, \n", + " processor, # type: ignore\n", + " image3, # type: ignore\n", + " prompt, # type: ignore\n", + " max_tokens=100,\n", + " temp=0.0,\n", + " top_p=1.0,\n", + " # repetition_penalty=1.2,\n", + " # repetition_context_size=40,\n", + " verbose=True\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if IN_MAC:\n", + " del processor\n", + " del model\n", + " mx.metal.clear_cache()\n", + " import gc\n", + " gc.collect()\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linux" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### [Model card](https://huggingface.co/google/paligemma-3b-mix-448) examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4-bit/8-bit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if IN_LINUX:\n", + " model_id = \"google/paligemma-3b-mix-224\"\n", + " device = \"cuda:0\"\n", + " dtype = torch.bfloat16\n", + "\n", + " quantization_config = BitsAndBytesConfig(load_in_8bit=True)\n", + "\n", + " model = PaliGemmaForConditionalGeneration.from_pretrained(\n", + " model_id, quantization_config=quantization_config\n", + " ).eval()\n", + " model.device\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if IN_LINUX:\n", + " # Instruct the model to create a caption in Spanish\n", + " prompt = \"caption es\"\n", + " model_inputs = processor(text=prompt, images=image, return_tensors=\"pt\").to(model.device)\n", + " input_len = model_inputs[\"input_ids\"].shape[-1]\n", + "\n", + " with torch.inference_mode():\n", + " generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)\n", + " generation = generation[0][input_len:]\n", + " decoded = processor.decode(generation, skip_special_tokens=True)\n", + " print(decoded)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if IN_LINUX:\n", + " del model\n", + " torch.cuda.empty_cache()\n", + " import gc\n", + " gc.collect();\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OCR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "processor = AutoProcessor.from_pretrained(model_id)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "756c1e6c2222451aad5de4391e2e8627", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00 bool:\n", + " return PaliGemmaOCR.PROCESSOR is not None and PaliGemmaOCR.MODEL is not None\n", + " is_model_ready = is_paligemma_available\n", + "\n", + " def setup_paligemma(self):\n", + " if self.PROCESSOR is None:\n", + " type(self).setup_processor(size=self.size, quant=self.quant)\n", + " if self.MODEL is None:\n", + " type(self).setup_model(self.device, self.size, self.quant)\n", + " setup = setup_paligemma\n", + " \n", + " def cleanup(self):\n", + " try: del self.PROCESSOR\n", + " except Exception: pass\n", + " try: del self.MODEL\n", + " except Exception: pass\n", + " if IN_MAC:\n", + " mx.metal.clear_cache()\n", + " else:\n", + " torch.cuda.empty_cache()\n", + " import gc\n", + " gc.collect()\n", + " self.MODEL = self.PROCESSOR = None\n", + "\n", + " def _generation_args_mac(self, image: Image.Image, prompt_text: str):\n", + " prompt = prompt_text\n", + " max_new_tokens = 100\n", + " temperature = 0.0\n", + " top_p = 1.0\n", + " # repetition_penalty = 1.2\n", + " # repetition_context_size = 20\n", + " generation_args: dict = {\n", + " 'model': self.MODEL,\n", + " 'processor': self.PROCESSOR,\n", + " \"image\": image,\n", + " 'prompt': prompt,\n", + " \"max_tokens\": max_new_tokens,\n", + " 'temp': temperature,\n", + " 'top_p': top_p,\n", + " # 'repetition_penalty': repetition_penalty,\n", + " # 'repetition_context_size': repetition_context_size,\n", + " }\n", + " return prompt, generation_args\n", + "\n", + " def _generation_args(self, image: Image.Image, prompt: str):\n", + " model_inputs = self.PROCESSOR(text=prompt, images=image, return_tensors=\"pt\").to(self.MODEL.device)\n", + " max_new_tokens=100\n", + " do_sample = False\n", + " generation_args: dict = {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'do_sample': do_sample,\n", + " }\n", + " return prompt, model_inputs, generation_args\n", + "\n", + " def _generate_mac(self, image: Image.Image, prompt_text: str):\n", + " prompt, generation_args = self._generation_args_mac(image, prompt_text)\n", + " output = generate(\n", + " **generation_args, \n", + " verbose=False#True\n", + " )\n", + " return prompt, output.strip('').strip(' ')\n", + "\n", + " def _generate(self, image: Image.Image, prompt: str):\n", + " prompt, model_inputs, generation_args = self._generation_args(image, prompt)\n", + " input_len = model_inputs[\"input_ids\"].shape[-1]\n", + " with torch.inference_mode():\n", + " generation = self.MODEL.generate(**model_inputs, **generation_args)\n", + " generation = generation[0][input_len:]\n", + " decoded = self.PROCESSOR.decode(generation, skip_special_tokens=True)\n", + " return prompt, decoded\n", + "\n", + " def postprocess_ocr(self, text):\n", + " return ' '.join(remove_multiple_whitespaces(text).splitlines())\n", + " \n", + " def show_info(self):\n", + " quant = self.quant\n", + " size = self.size\n", + " cfg = PaliGemmaOCR.MODEL.config if PaliGemmaOCR.MODEL is not None else None\n", + " if cfg is not None:\n", + " if hasattr(cfg, 'quantization_config'):\n", + " qcfg = cfg.quantization_config\n", + " quant = '4bit' if qcfg.load_in_4bit else '8bit'\n", + " cprint(\n", + " f\"{'Size':>17}: {size!r}\\n\"\n", + " f\"{'Quantization':>17}: {quant!r}\\n\"\n", + " f\"{'Device':>17}: {self.device!r}\\n\"\n", + " f\"{'VRAM':>17}: {get_gpu_vram(False)}/{get_gpu_vram()} MiB\\n\"\n", + " )\n", + "\n", + "\n", + " def __call__(\n", + " self,\n", + " img_or_path: Image.Image | Path | str,\n", + " lang: str | None = None,\n", + " prompt_text: str | None = None,\n", + " config: str | None = None,\n", + " show_prompt: bool = False,\n", + " **kwargs,\n", + " ) -> str:\n", + " self.setup_paligemma()\n", + " if not self.is_paligemma_available():\n", + " raise RuntimeError(\"PaliGemma is not installed or not found.\")\n", + " prompt_text = prompt_text or self.prompt_text_tmpl.format(lang or self.lang)\n", + " image = load_image(img_or_path)\n", + " gen_func = self._generate_mac if IN_MAC and self.quant != 'bfloat16' else self._generate\n", + " prompt, generated_text = gen_func(image, prompt_text)\n", + " if show_prompt:\n", + " cprint(\"INPUT:\", prompt, \"\\nOUTPUT:\", generated_text)\n", + " return generated_text#.strip('\"')\n", + "\n", + "\n", + " def __init__(self, \n", + " lang: str | None = None, \n", + " size: SizeT | None = None,\n", + " quant: QuantT | None = None,\n", + " device: str | None = None, \n", + " *, \n", + " prompt_text_tmpl: str | None = None, \n", + " lazy: bool | None = False,\n", + " **_\n", + " ):\n", + " self.lang = lang\n", + " self.prompt_text_tmpl = prompt_text_tmpl or self.prompt_text_tmpl\n", + " self.size: SizeT = size or '224'\n", + " self.quant: QuantT = quant or 'bfloat16'#'8bits'\n", + " self.device = device or default_device()\n", + " if not lazy and not self.is_paligemma_available():\n", + " self.setup_paligemma()\n", + "\n", + "\n", + "OCRExperimentContext.register_model('PaliGemma', PaliGemmaOCR, {\n", + " \"size\": '224',\n", + " \"quant\": 'bfloat16',\n", + " })\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Context" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| quant \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n", + "| --- | --- | --- | --- | --- | --- | --- |\n", + "| **bfloat16** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n", + "| **8bit** | 3 GB | ✅ | ✅ | ? | ✅ | ✅ |\n", + "| **4bit** | 2 GB | ✅ | ✅ | ? | ✅ | ✅ |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: System default\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
 config cache_dir: None\n",
+       "       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "           device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " config cache_dir: \u001b[3;35mNone\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
        force_PIL: False\n",
+       "       use_tunnel: False\n",
+       "       server_url: \n",
+       "   experiment dir: ../experiment\n",
+       "       source_dir: ../experiment/source\n",
+       "        cache_dir: ../experiment/cache\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " force_PIL: \u001b[3;91mFalse\u001b[0m\n", + " use_tunnel: \u001b[3;91mFalse\u001b[0m\n", + " server_url: \n", + " experiment dir: ..\u001b[35m/\u001b[0m\u001b[95mexperiment\u001b[0m\n", + " source_dir: ..\u001b[35m/experiment/\u001b[0m\u001b[95msource\u001b[0m\n", + " cache_dir: ..\u001b[35m/experiment/\u001b[0m\u001b[95mcache\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Experiment runs:\n",
+       "PaliGemma-crop-post: 0\n",
+       "
\n" + ], + "text/plain": [ + "Experiment runs:\n", + "PaliGemma-crop-post: \u001b[1;36m0\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "try: CONTEXT.cleanup_model() # type: ignore\n", + "except: pass\n", + "\n", + "quant = '4bit'\n", + "# quant = '8bit'\n", + "# quant = 'bfloat16'\n", + "CONTEXT = OCRExperimentContext('PaliGemma', EXP_DIR, quant=quant, load=False)\n", + "CONTEXT.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "401f1ae17c0941cea4755e18e8738ebe", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 9 files: 0%| | 0/9 [00:00 Size: '224'\n", + " Quantization: '4bit'\n", + " Device: 'mps'\n", + " VRAM: 2986/65536 MiB\n", + "\n", + "
\n" + ], + "text/plain": [ + " Size: \u001b[32m'224'\u001b[0m\n", + " Quantization: \u001b[32m'4bit'\u001b[0m\n", + " Device: \u001b[32m'mps'\u001b[0m\n", + " VRAM: \u001b[1;36m2986\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ocr_model = CONTEXT.setup_ocr_model(False)\n", + "ocr_model.show_info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Embowered by great charled cypress trees, the ancient mayor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
0.96
\n", + "
\n", + "
Embowered by great gnarled cypress trees, the ancient manor stands alone on the outskirts of New Orleans, kept tidy by a white-haired old man known only as Bambu.

Embowered by great charled cypress trees, the ancient mayor stands alone on the outskirts of new orleans, kept tidy by a white-haired old man known only as bambu.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONTEXT.reset_results(image_idx=20)\n", + "result = CONTEXT.result('PaliGemma-crop', 20, 0, CropMethod.DEFAULT_GREY_PAD)\n", + "result\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
The echo of the old man's footsteps fades down the hall as...
1.00
\n", + "
\n", + "
The echo of the old man's footsteps fades down the hall as...

The echo of the old man's footsteps fades down the hall as...
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONTEXT.result('Paligemma', 20, 7, CropMethod.INITIAL_BOX)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
The house and the old man are alike in many ways, tall, proud, patient, contented always to wait until their master comes home--
0.99
\n", + "
\n", + "
The house and the old man are alike in many ways; tall, proud, patient, contented always to wait until their master comes home--

The house and the old man are alike in many ways, tall, proud, patient, contented always to wait until their master comes home--
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONTEXT.result('Paligemma', 20, 1, CropMethod.DEFAULT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IMAGE_PATHS = CONTEXT.image_paths\n", + "\n", + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(IMAGE_PATHS)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EXP_RUN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'PaliGemma-crop-post'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EXP_RUN = CONTEXT.experiment_run()\n", + "assert EXP_RUN is not None\n", + "RUN_NAME = EXP_RUN.name\n", + "RUN_NAME\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base image\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a44c162848d04dc69562869f2d272c93", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value=\""}},"cd3f6165ed144369a54e4b784cac2a99":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cdfdd7698432492586d0d19783b20b79":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce3896c172d4497b805ba27a6464c0da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cefeb098a7b04f2782b00e28cd809833":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ac247fdb754c460dac3d2a731a04f74b","placeholder":"​","style":"IPY_MODEL_47000f0cc4ce485b9f249ac32a0dc75b","value":" 74.4k/74.4k [00:00<00:00, 5.22MB/s]"}},"d19ccb7ec79a40a483fe40e712c74c8c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d20d3a3712f64dceaeea4b6aeb435b8d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d338945414dc4b23bb1dc1b81cc17be0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d468d37a817f44ebb5c8b844fc98182a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Action_Comics_1960-01-00_(262)","Adolf_Cap_01_008","Barnaby_v1-028","Barnaby_v1-029","Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013","Cannon-292","Contrato_con_Dios_028","Erase_una_vez_en_Francia_02_88","FOX_CHILLINTALES_T17_012","Furari_-_Jiro_Taniguchi_selma_056","Galactus_12","INOUE_KYOUMEN_002","MCCALL_ROBINHOOD_T31_010","MCCAY_LITTLENEMO_090","Mary_Perkins_On_Stage_v2006_1_-_P00068","PIKE_BOYLOVEGIRLS_T41_012","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2","Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024","Strange_Tales_172005","Strange_Tales_172021","Tarzan_014-21","Tintin_21_Les_Bijoux_de_la_Castafiore_page_39","Transformers_-_Unicron_000-004","Transformers_-_Unicron_000-016","WARE_ACME_024","Yoko_Tsuno_T01_1972-10","Your_Name_Another_Side_Earthbound_T02_084","manga_0033","ronson-031","哀心迷図のバベル 第01巻 - 22002_00_059"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":20,"layout":"IPY_MODEL_8c8083f3c88c493bb2c86be2ee831cc9","style":"IPY_MODEL_2cf68b7c35634bb684f1a9b5364eebff"}},"d47ae1fd98304f549bb15af1885ba182":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"d6f64705e9da407f9959f1b124e8e47e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2305ed3b768e4930af09fa8b2809df1a","IPY_MODEL_16fcee1797ec48fdba6c556d996eb0c9","IPY_MODEL_397f4e0b79e84838ba3640db134d7b0d"],"layout":"IPY_MODEL_0e7f53ea18fb47bba3f007b7116b8994"}},"d753fc74422941cfa7031efe2c996f50":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6d0fe13087244d38b7e2c81442467ad0","placeholder":"​","style":"IPY_MODEL_4cda5365a57e453e9bbdcc0de548f1e3","value":"model.safetensors.index.json: 100%"}},"d7f7174351c44565a4778ae02f7b2d73":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d8416277ca174bdebc879a5eeb559203":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d991113b7e924a9ba14b2ee034bc58bb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d9c15d9a41f24f1a86084b0c863d3af8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_69faa9295aa143629f8ba0ce018454d4","IPY_MODEL_14e4633237104f15850409f4ffcfa30a","IPY_MODEL_413317a231404aa9a2a6580775f06fcb"],"layout":"IPY_MODEL_10442533920f484dbf1974e528c6ec1f"}},"d9fdbddf880c4a0cb12ae568c7c3bc02":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dac4186c487e46c6b56ba7c292629a41":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dedf69f896794fdab878d78a44db1fcd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df002956627b496a8fe33a02bcec05a8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e2f25159f7b249b9a2d820201413e190":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9c002b71e1f144998d8f5ce683766c2a","placeholder":"​","style":"IPY_MODEL_304ef8bb5c894f3fb9fbb9309fccb2db","value":"tokenizer_config.json: 100%"}},"e3b557d743624c2680fedc10edc679fe":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50px"}},"e41cf0472cbf40f9b8422faef0f7df6a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e4494cbb70ef4e2fb04a138e12c84e34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","134608384422912"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_7f5e49d03e4e4dc699479b28c9861e86","IPY_MODEL_e498874bec694db7930da19e87551331"],"layout":"IPY_MODEL_2ab7e8393c274604a8a33d5f84efcd58"}},"e498874bec694db7930da19e87551331":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","134608506364032"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_33d7a2f0e1f74398b7fa7b5f24ca8b2d","IPY_MODEL_1e23724ddd3341c4970f0f084c32c11f","IPY_MODEL_1a36a60101aa405d96f154646e1b04fa"],"layout":"IPY_MODEL_0951a0674c6b416d888f8cc62a0a0bd7"}},"e49c2a376b28457fab3517524e5841d9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b50cb561a7444f488f477b8a1dedbf6a","placeholder":"​","style":"IPY_MODEL_700c364b9ed742e39d7d066f38456853","value":"model-00007-of-00007.safetensors: 100%"}},"e5d23d25fb4646d48d863ba26b592273":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e71b8879e55848479fa6f20a4f5010ea":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e7bc0e0e5fed4f6ebf068100805c2439":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e7f41c7ef7b8464899301192fed93749":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8a8f2864e2346e9abd9ea7e6371901f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e98b093c952c4e3293abdf5b736b3bd7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ea5a56efc11845e2bf38a65d8e93af7c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eabc054fcd6b447da92f402662469c8e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_71a15c3988114259961d1943dd4e931c","max":185,"min":0,"orientation":"horizontal","style":"IPY_MODEL_43115512206d4717a02ccd0ef4133555","value":185}},"ed19a655de0e42529a3eca5b105a8e92":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"eed1b76b9a824cca8f7ddb490bcec2ff":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"ef92abc3751b4b03a673a92e817d39de":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f0d73ff6d04040f1962fd50d2352f23d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f10a031bce3948e2bccca61bbb6f101c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e2f25159f7b249b9a2d820201413e190","IPY_MODEL_56a8217adf3944b980a92b2aaea2bd39","IPY_MODEL_5090734da55b47d5a2a6087e51891e0d"],"layout":"IPY_MODEL_6aceed8edb8e4105b1f807e57bb25d5e"}},"f52b8b4144bd414b97fddf1e63f114b4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"f54949799fe747e2b4757d73d40960e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f56c9aac9b874cb79be45bb93af2519d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":"lightblue","font_weight":"bold"}},"f66a7d2bb62d4ef9aec868904a60d8f7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fb0037405add4963a01046b1ce4d84da":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc58444ff3d545fcb1714ac56106734c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"markdown","metadata":{"id":"FO-koL4wUMUg"},"source":["# Testing `Idefics` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"WDMB4HFe39wf"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"Mox-OHw539wf"},"source":["To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n","\n","- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n","This allows the notebook to access files stored in your Google Drive.\n","\n","- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n","This acts as the root directory for accessing any files within your Google Drive from the notebook.\n","\n","- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n","This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n"]},{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":740,"status":"ok","timestamp":1717163941428,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"_ItAMB1U39wg"},"outputs":[],"source":["MOUNT_DRIVE = True\n","GDRIVE_MOUNT_POINT = 'drive'\n","PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'"]},{"cell_type":"markdown","metadata":{"id":"OcFWv_IN39wg"},"source":["# install (Colab)\n"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":361,"status":"ok","timestamp":1717163944019,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"FsMCNzHZ39wg"},"outputs":[],"source":["import fastcore.all as FC\n","import os\n","import re\n","import sys\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," (t := Text(msg)).stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, t, \"_\" * 10)\n"]},{"cell_type":"markdown","metadata":{"id":"RkLIZZbo39wg"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717163947918,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"qoUI5zQe39wg"},"outputs":[],"source":["mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," if not mnt_point.exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n"," drive.mount(str(mnt_point), force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"AWZnedVS39wg"},"source":["### Install **PanelCleaner**\n","\n","> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n","\n","Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook."]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":224},"executionInfo":{"elapsed":12781,"status":"ok","timestamp":1717163977869,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"5VSxqsyK39wg","outputId":"049bf207-b965-499b-8932-ac3044a01621"},"outputs":[],"source":["if FC.IN_COLAB:\n"," pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n"," tb_path = pc_path/'pcleaner/_testbed'\n"," if tb_path.exists():\n"," info('Installing PanelCleaner from your Google Drive')\n"," else:\n"," info('Installing PanelCleaner from GitHub')\n"," !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n"," tb_path = Path('PanelCleaner/pcleaner/_testbed')\n"," assert tb_path.exists(), \"PanelCleaner not found\"\n"," os.chdir(tb_path)\n"," sys.path.append(f\"{pc_path}\")\n"," sys.path.append(f\"{tb_path}\")\n"," !pip install -q -r requirements-colab.txt\n"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"executionInfo":{"elapsed":88801,"status":"ok","timestamp":1717162885976,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"lxwhxBydCiDj","outputId":"48d07921-9afe-46df-a468-e3e43b96f303"},"outputs":[{"data":{"text/plain":["'4.42.0.dev0'"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["if FC.IN_COLAB:\n"," !pip install -q flash-attn --no-build-isolation\n"," !pip install -q transformers accelerate datasets peft bitsandbytes\n","\n","import transformers\n","transformers.__version__"]},{"cell_type":"markdown","metadata":{"id":"gLER7Z0u39wh"},"source":["# Prologue"]},{"cell_type":"code","execution_count":6,"metadata":{"executionInfo":{"elapsed":28634,"status":"ok","timestamp":1717162918596,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"q_7GYpnWUMUl"},"outputs":[],"source":["from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n","from testbed.ocr_idefics import IdeficsOCR, get_gpu_vram\n","from testbed.helpers import IN_MAC, IN_LINUX\n"]},{"cell_type":"code","execution_count":7,"metadata":{"executionInfo":{"elapsed":13,"status":"ok","timestamp":1717162918597,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"cQ0R9v8r39wh"},"outputs":[],"source":["if IN_MAC:\n"," !pip install -q mlx_vlm\n","\n"," import mlx.core as mx\n"]},{"cell_type":"markdown","metadata":{"id":"CAHWMrX339wh"},"source":["# GPU"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":396},"executionInfo":{"elapsed":12,"status":"ok","timestamp":1717162918597,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"f8XHRVRG39wh","outputId":"5f2d295c-1a32-40af-b964-20b73d546458"},"outputs":[{"data":{"text/html":["
          metal.is_available(): True\n","           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': 51539607552, \n","'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n","     metal.get_active_memory(): 0\n","       metal.get_peak_memory(): 0\n","      metal.get_cache_memory(): 0\n","\n","
\n"],"text/plain":[" \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n"," \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \u001b[1;36m51539607552\u001b[0m, \n","\u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n"," \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
     total VRAM: 65536 MiB\n","    active VRAM: 0 MiB\n","
\n"],"text/plain":[" total VRAM: \u001b[1;36m65536\u001b[0m MiB\n"," active VRAM: \u001b[1;36m0\u001b[0m MiB\n"]},"metadata":{},"output_type":"display_data"}],"source":["if IN_MAC:\n"," gpu_name = mx.metal.device_info()['architecture']\n"," cprint(\n"," f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n"," f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n"," f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()//1024//1024}\\n\"\n"," )\n","else:\n"," !nvidia-smi\n"," import subprocess\n"," gpu_name = subprocess.check_output(\n"," \"nvidia-smi --query-gpu=gpu_name --format=csv,noheader\", shell=True\n"," ).decode('utf-8').strip()\n"," \n","\n","cprint( f\"{'GPU':>15}: {gpu_name}\\n\"\n"," f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n"," f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n","\n"]},{"cell_type":"markdown","metadata":{"id":"Op4kiDaAUMUl"},"source":["----\n","# Idefics experiments"]},{"cell_type":"markdown","metadata":{"id":"q8q3vnLH39wh"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"6AB4w9C-39wh"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n","\n","NOTE: the default value assumes we are currently inside `PanelCleaner/pcleaner/_testbed` directory. You can check that is the case with `Path('.').resolve()`."]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49},"executionInfo":{"elapsed":652,"status":"ok","timestamp":1717162939430,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"poPksk6739wh","outputId":"1452cde6-a74a-447c-eec2-3049bd2f8f19"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/html":["
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n","
\n"],"text/plain":["\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{"id":"8Iondm2oUMUl"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"nuHPp1U7UMUl"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":10,"metadata":{"executionInfo":{"elapsed":425,"status":"ok","timestamp":1717162947685,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"nVAga31l39wh"},"outputs":[],"source":["if FC.IN_COLAB:\n"," os.environ['USE_TUNNEL'] = 'True'\n"," os.environ['USE_PIL'] = 'False'\n"]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":184},"executionInfo":{"elapsed":8015,"status":"ok","timestamp":1717162983857,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"tG8xtrBb39wh","outputId":"c2e625b8-0f78-44d0-af3a-ca6895a7a340"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import testbed.web_server as web_server\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"p5YiBbJ739wh"},"source":["# CONTEXT"]},{"cell_type":"markdown","metadata":{"id":"1yjjtdjJ39wi"},"source":["| quant, attn \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n","| --- | --- | --- | --- | --- | --- | --- |\n","| **float16** | 17 GB | ✅ | ✅ | ? | ❌ | ✅ |\n","| **float16 + attn** | 17 GB | ❌ | ✅ | ? | ❌ | ✅ |\n","| **8bit** | 10 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **8bit + attn** | 10 GB | ❌ | ✅ | ? | ❌ | ✅ |\n","| **4bit** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **4bit + attn** | 6 GB | ❌ | ✅ | ? | ❌ | ✅ |\n"]},{"cell_type":"markdown","metadata":{"id":"AUgSvi6CUMUm"},"source":["Creates the `IdeficsExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":779},"executionInfo":{"elapsed":43567,"status":"ok","timestamp":1717163091060,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"wroznCt0CQdT","outputId":"0d259b2d-b20a-44cc-a0be-1198ad570b46"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Experiment runs:\n","Idefics-crop-post: 16\n","
\n"],"text/plain":["Experiment runs:\n","Idefics-crop-post: \u001b[1;36m16\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
     Quantization: '4bit'\n","Flash attention 2: N/A\n","             VRAM: 0/65536 MiB\n","\n","
\n"],"text/plain":[" Quantization: \u001b[32m'4bit'\u001b[0m\n","Flash attention \u001b[1;36m2\u001b[0m: N/A\n"," VRAM: \u001b[1;36m0\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["quant = '4bit' if IN_MAC or FC.IN_COLAB else 'float16'\n","flashattn = True if not FC.IN_COLAB else False\n","CONTEXT = OCRExperimentContext('Idefics', EXP_DIR, \n"," quant=quant, flashattn=flashattn, \n"," server=SERVER, run_name='Idefics-crop-post', load=True)\n","CONTEXT.show()\n"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a93af477ed4c4af8aba4d97397009adc","version_major":2,"version_minor":0},"text/plain":["Fetching 11 files: 0%| | 0/11 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n","Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0f89471c58b94c2db7ffab9143b26b6b","version_major":2,"version_minor":0},"text/plain":["Fetching 11 files: 0%| | 0/11 [00:00 Quantization: '4bit'\n","Flash attention 2: N/A\n"," VRAM: 5261/65536 MiB\n","\n","
\n"],"text/plain":[" Quantization: \u001b[32m'4bit'\u001b[0m\n","Flash attention \u001b[1;36m2\u001b[0m: N/A\n"," VRAM: \u001b[1;36m5261\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["ocr_model = CONTEXT.setup_ocr_model(False)\n","ocr_model.show_info()"]},{"cell_type":"markdown","metadata":{"id":"NnKaY8er39wi"},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":319,"status":"ok","timestamp":1717163095107,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"ZXL84T8dUMUm","outputId":"12d640aa-69d7-46aa-c2c8-9d400cccb701"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":255,"referenced_widgets":["41e7d3d5da8843bf97d53a5acc2fdd9c","def94ca8125e4128901b038c5e2e135f","a5b8058abf214b46a3dbc121c285cca7","7792b1aed8724a98817331a5610f9909","4c558212f11743be83311ff10e506255","db4a37385bc04a27ba786692ff23f32c","5cc2cd6464854d1996f83b6cccb71be6","86692867f33a4e2885722f8a9efaa144","57b0595135a84b04bf1c040150132fca","f9feda61c229465da90f33c69e7a4398","2b2f189c18e0494294ad7dc5e6b5a7ce","0eb627a4cba9497f8ea6833ac8080cf3","93acb5ead7244f2b8cb7ddb3db658dc1","827cc40c923e4081bfe56231f386d855","022996922e9d40919a086ce15625e4f3","74e995f849ec449fb5fd50166397897a","c2494c8753e9409aa5e6d8862be5c773","0e112f697e7d4a34826d2a46a067c14e","ee91033cfb084d4e83529a11edb48134","f5e02d40775a49a5b895d498233464f9","cd03b8f4a3144660a66bfb8361433973","de48c263fb124df9b7bf72805e1c1baa","3e76e6f2463549ad91832afd9da60672","cc0369009fb04a0b83a10510d7ac0388","44f0c3de011d47ccab46270d16c690b7","b2e63ba8c2f149d38e8c7722bc47ef07","0f9ca91ec321450f9ce53a84f2459f86","113566b1f4b74279a27f0ab51cffb683","4b1fe42380a1464180f548aaccb220ba","44e2a19f9dcb450cbca016e7b3e9c79d","7a75478806bf465cabc8d1429030f615","fb224135f5a14ba4872e0e64428d60b6","3fb8a4f0b2444c28a082fcbb278979cc","23f2673b6b2f40f19f0d4b1e1d5c6435","c54baefce7804a0eb8729ba88a22f713","af23aff11ddd45b9abf16daa53884c5f","029cc314dfba464b8c91194ee41bb604","76c165d73f574ab39d3357e87b401ac9","68d09a9f2fc84ead952160fe05c5dc90","74d438026f47463f8aa440eb39a58369","a80659636b7f4aa4aeaea4c1149620ea","86315ef030234020a49cfb2275e4ee53","a863d10b9ee947338b73f542c350efe4","ffeb080904984371bb8b715963c00069","f8bc3fb84517404b94502e7498d5c55a","3e1245f557d54e999d58c858f664a0a7","74d88ac2166a407ca2fb1aaf57006018","27e1a3f6033f4ee0aea9bf5726eb78cc","2348e08910b149048c90269942321d57","e6fac459206848898c206456b9dc8b1a","bfc1517ef24c459aae0a536c315ddad1","b9b462859a1846e2b2fdc625f6904506","167be8a944f0447c9c66fe8a485d309e","11c9edbf48f445d09d85f849915496e2","9bb8a92858144190b6dd506673234315","d0f9bc72d3054c45ba0c12d7839e994b","00120cae44354b1397eabc117243f609","5bdcbdc274d14afa91406ca177fe66b6","a478421e36a344eca8cf732ea07241dd","8470bcc137ba4295996a4ef0a2e8bf9c","5cc12b33d27a4715a995173135478c9c","fd00985eb51141799c4ad95c99461926","4fd269b8015f4be783474e835ff8a52e","decf6497acee49faae554f3db39be3ce","8aef4716537e417ea4203a68711cb4e9","bd64ee547bd049398c6d37f1ca476461","5da238d5b8514352a70b6b16cc9d944f","e74be1cd463d42a783ddea46072f4499","a6072dc7d6294061b840144104e07538","f1513571ca224451a3809b56a5739525","e7caf68af3c84c6195483d0a27225164","f6c25f96ead14b13a1a716a6de4e3f5c","4a51933821d045cd881418ff2a2b30e7","1d1a5dd1cc1d44be948464159187e856","d418846006b8496989e86276d8538b50","2ad60e460fe64f0087cf3052f06451ba","6c0b5365fd624af793b4d37f7449a7a5","9590087714614e948f3a8e4dd537a895","24551ce1bc1c4cdab44c25ffd783216e","6aa5fe05ab024eb0bd2b3da7881949f2","8758c30979a2414aac0d1583ab856acf","5768ef94421043d4b87c3a8f36b2fc88","950f956a453648ddbe66fa8b7f484115","1d40ec2dce6d43ae9ed7b02ca6ee5ade","c160c32bb63b405c81788dd2c49ed74a","d142fa6d7f1f452185080e55b24fef2c","67a75a653d0948ac92b0e30a8057f97f","08e41a3312ec4dc698c9677f13493dec","b8b0508e598c461195fdff01e45cefea","dc4134ab8bc54c9084238bce0d63dedd","d18de743a82d421b95ef67cce0cf8300","65d59de757ae4440b71dafd4fbe1ef0f","d265b8b4b76641599b70674b3b6843c2","f58a0d2e63aa456db54e8b34185a9e19","5bee9b83cf53437da9f86ca3e8c28ec8","feb6497eb22b418b89260242009018b1","fcebd41dea6a49d786827eb30e6202f0","cf9218e035054fe49188325d9173124a","ca76a74d6b204b4cae1f1d15a928782f","4fe39d4b2ce74d529df2c59c9b02b468","14ff55af3b4b4f19be5ecbdd7fdbb4b3","f937e91a44ba4ba29379c6cd98c7b06e","69c93310236e407c9ec47efba5949c2c","a9292503813741b096f1627ac629afcf","31b1c0b2c88244b6a1144b5b9bfd6ff2","4377ad0c4af348689bf257dbc1e45d0f","15beb9c45e974b8b9c7928f7dee85721","00d90919857a4182b41f71e4e029b8c9","a0bfaef233fc485088fd5f1069f0e7b5","91f9cb7da8554114a0fdccf7125323f8","afa6b2be45404268bbeae05757c38ea7","c707c0173ba64f278d29d8f91c5ebe72","f84e4be232c7471e8d7c32463d1e69f4","0ec87ae9355747b381f3a3c4759e30a1","7ff52331ff05443c885358a2bc91e45a","2e7dde21eddb461d94120cc0bc8b635d","acd43b8610784095ba13c79a9a8b66b3","94107ffa62c74aa9a7c1c6e9e0926534","aa8727521af944b990cd47bd7f624875","c49d62b351924bac9b758ed566642bc9","03ca2146c0a249e98891c826dc0c7166","3e67ecba11ed494eb5d04ac09aaf2982","cda2a733038b4a2d8d9126478f7c0ae2","856b386e9e354da6985976453f3146df","ae912f1a78a541d58364872d548fb113","1d92eaf91d064d7799cdbdce29cb4a80","8a4eec25ed2f4d888751714385bffb48","23683387f91d412ebcc1c6bda48f36c4","72ed4329ca9140078197aee662b9fe9c","cdbd26ce916d4aefa6bc2de33451b894","d81b6b93bf85468da26dd410d1fbf9ae","926cfc439c6f42c7bc7874e2abd27b00","925d430a70474c2bb645ab1c9635a3d8","1696aa6d01824c8da87bd6c3c99d9338","28e884495ed9480dbf42901aee98c8da","5cb8ef2c547741c782e0b87b44e88883","dfc87068738d4a51aa4582d66655ed5d","65c5547ef83346a3a7fe89350fd0498c","cc4dafcc559147f39d31c93d7bacead0","5ebed15f4ad74010a9dcc1aee2884acb","1d243bb88a024f5cad553638cb66b58f","00358206e8e44b7f93e33a68c77ed88e","1d01c3b7a45443219a40b6fc175bea93","e9638ccc83e64995b8d08950b5d4177c","a38abadac68d4e909589ede57054c9b3","9f407f26d4494b81acf8a2b3d22a4db0","047b24fa06104e88a4649b55d0704bba","e2ba9bdc71e342eea3fe5f0b2d50cad7","768d6cdbca5b4fd081c3c0b665af29e6","6d67906818de434193c0d07dffc16ca3","2d8e2453d52148de9e6f0af80025d277","beae34e9850d41f2928135a17bb7e24e","9ab61a0b2be94ab4b07712847603099f","18ff5098c2b0481e8122bafbb6773254","0adde2a598044d99b27e88632aa79204","18cd166a13cd40409789425e6ad8b1cf","262c89785cf747f8b2fabb01df958f46","481aca32d2314b71b8e2fe34fd5c2752","52bdbb92c8da4412944a3130a6739ff6","277c284a9a6f469285a8cd4adafeac48","c279c338d2ac48d1a905625818533a46","9e8c07a31ffe42f99b3c9234a53fa6dd","0a19274b68ec450895fd7f92941a2d90","4598f6ae49f94d639503175f378f011e","41fe52cfcadb4fcfb708f7f5e7c027ff","330cc931269846f2b6dc82c67fb2251b","c5d36dc5c318484db7e0c8a078aa175b","c159c8f334a64512a6d28efecaea2a10","479dc71849a4477183258d7ef9c0d4c2","d601d39013c24a5db643a09fe3fbf3af","4356a69037274d4288ab9cb2aaf72aef","747ff338df744bb2b4ea31ec325461bb","62416330d58b40d4b5d7cef51dff2245","bbdba0ab16184329bd1479a315c5946e","66b382b44b994abd9279d5edb90f86a1","a4016511fd92466e999483f4314d0a86","262eb29055f44a6dad1a9ad9d2d06ea0","57677aa2aee34f8b831622091eccdc42","6fe70dc2c58c4f34be8f25e507a203fa","cfcf8add8ce44707b89b97b291819b2e","da875aa1f4af4128b1c42bc30c79a8a8","d8d9489d104747d0aeaceab091a5dbba","b0bf5147c56b4a9e829fe1d7016a8971","06127d2f58d14e9b834f9c42fb6ccdee","1491cc4eedf14d788cc7a1750c84021e","ebae562970a84eac8ba32466a405aa06","acae4f3d583f4bb3bc7dc118ce63ae52","fc6764a48f134c7c863b83ac060a7c4e","b916a63767b44a7f9af4c4fd2d9f9118","44e7eca8403f42eab30a12cb6c4ec177","cad18185ae304fda8d114efce29f8b2b","022a6ebb208c40a38d991a7f48cf8199","5a12c2fbe4f949ec931de9e0ccb035ee","1cac8099d5a84d24bd968b0df0293e72","61e8b3559eb8464b9d3544921f9f716d","04fb26ceffe54dffb6b0f9864a3ada8a","83e6807d435d458ab54317a00543458a","383311a4464e4759972acb80200e05f3","aa8959e5f31a4e2a917849ff38e2d54a","cfad9960dbe7451db558e5d23edd4f87","7c0dbe672a904b7486a3274e87b42588","6f57b2e373e84323a7a0a2052c8a37db","418e999ec09c434ab6ac89e3b4b11365","4359515725fb4bfc93a8ac6ca824c6bd","7a6ff42ed0954523a58a579968191cca","f4f7b2ccfbd94a30a1f7a413c159ef07","7048ae3da65443a1a6996d937af1fec8","b74df493ed504a6aa9e2dd4c4bc48c56","0606d53650544db098d23fdf417584ad","fc986695bc55477099e8df7f9a0022cd","bf2ea05486a644e8ad8166ba0a9ff0a4","8c77254960fc4ed59a0431bb9f43b9dd","adda7298580c4bffb8d3c5e272e8eb7f","b9972d7b5b614566be7408889f1893bf","e364391a3e1d41d899735458a2b01436","7c30a1a41d3747e0b87625488e29c1b3","25cebb1840ea4239983838fdf2e5c84d","26deaa27cdea4addb812a67b4a0aae7a","bd6f1373744c49aa9ad9947714998e5d","83c2db21b673401fb11b45dde165fa14","96f3c7a52a37470a98de481a7ea343ca","eae063c3c5f54c86aee0b68d31f721ff","2472a63aefde4dc1bb199ab9847ca993","c4c988f7315a4734b4b4efcd378d2fe0","a05a89a8727a486792504d208aa43034","2c62ea060f2448ab928eb2b75897a227","b67a72677c674923b15e568e6fcdeb5c","9875ff389f3d46268417e9b53fbceaf5","d6a71cec5ffe4ba5992636efad52b8fe","62ec88a9e4e549729c5531c2c84ae21d","2c3264065ce141ab9f4c02ca29538962","37675716244d46fcb115ba1d8c500d3f","e37b7be566694f66bd5aa6e82572a2e7","b9fa62d6dd5e4e67921be98817cb00e2","9d210efdd32a4aa8a2b084cb9790ed48","3041f272c57c4038b11806de84232840","613b52321ad543d3b1f67c25642edf54","61a3f504ff5645db9d499f8f2ce0cb37","e1dcbdbabc40469793a494f59318b088","b5fe8a048b9a4db3bcfa77c81dfa8b0c","a4a7b41a2d6d4836b1d5b7c46d214a27","ab1faf852db24d418b647d44305f9998","f9b21842783e489d99ad9d25cd7d0914","5674ed00bf624d4fa0180ca9d855a797","66d2a279e9704c4cb03e51791814387e","a24e83fa73e24808a1b39f3f3d29e7ac","2d058a65cfdf4c628a38bd34eed8a8a8","934d301cd7d94a7597047efd861ceb66","113f7abf92c04d61aeb27a53796dddde","357d2c73ff2346a2a04ec63f11796048","7b065f9d0a454070b32e2f539f94769e","6cd377ac03004bf3b12fed2c9ec72aa9","010d7b561b1541e680b2a93007ec8bc5","73b6e040d91845c8ba2d220b53f93c09","ff88a16ea32e4df4b3abce055f6e2bc2","d71eb0b4940b4708a51b90af9e746db7","66f62676485e444db8fb34d870047708","3eb2ad0ee975499691c2655bdb9d90e5","1a7e467e1e7d4adf9c04217b09a54173","4e5a20cc1ed24d5d89305329da2b48e1","6f67bfe2f79c424fafbe0f85e40c1642","f17c2e4e8b2045e9930d9f2d73bf2ac7","cce621f14c99412ea7b92441fd4f3223","2c3a7c4e59e64433be763bcabd340df3","ae6d816055bd43a4a01cb54b8fb84486","598b5756615241c298f8f44d1e3b3f4f","d0567212f6824f5daec8a14d4c7bb0c7","aafbd97ba12d40e4affc97c378ae165e","798fe48e8b184a4ab35f56fca750dcbe","4c7342561d52489581e643f1d3121eb8","777aaee7cfdf4afd974983fdbd37ac8a","95af7b96533b47a7be57deb9cd5b0fb8","49cf9e00eb8b44cd9168574abe3df2c5","b7156a8bbb104cef9ce2ebbcf458f2cb","eb03b5ac6feb452a86ab17129481d3cb","ed6bd84830a642fd832ea549372056ad","6d69bd7081d44f748463fe8252ce675d","8d52617983b442e7b02d3d2ef885376c"],"resources":{"http://localhost:8080/experiment/cache/Action_Comics_1960-01-00_(262)/_crop/Action_Comics_1960-01-00_(262)_13_Default,%20grey%20pad.png":{"data":"CjwhRE9DVFlQRSBodG1sPgo8aHRtbCBsYW5nPWVuPgogIDxtZXRhIGNoYXJzZXQ9dXRmLTg+CiAgPG1ldGEgbmFtZT12aWV3cG9ydCBjb250ZW50PSJpbml0aWFsLXNjYWxlPTEsIG1pbmltdW0tc2NhbGU9MSwgd2lkdGg9ZGV2aWNlLXdpZHRoIj4KICA8dGl0bGU+RXJyb3IgNDA0IChOb3QgRm91bmQpISExPC90aXRsZT4KICA8c3R5bGU+CiAgICAqe21hcmdpbjowO3BhZGRpbmc6MH1odG1sLGNvZGV7Zm9udDoxNXB4LzIycHggYXJpYWwsc2Fucy1zZXJpZn1odG1se2JhY2tncm91bmQ6I2ZmZjtjb2xvcjojMjIyO3BhZGRpbmc6MTVweH1ib2R5e21hcmdpbjo3JSBhdXRvIDA7bWF4LXdpZHRoOjM5MHB4O21pbi1oZWlnaHQ6MTgwcHg7cGFkZGluZzozMHB4IDAgMTVweH0qID4gYm9keXtiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9lcnJvcnMvcm9ib3QucG5nKSAxMDAlIDVweCBuby1yZXBlYXQ7cGFkZGluZy1yaWdodDoyMDVweH1we21hcmdpbjoxMXB4IDAgMjJweDtvdmVyZmxvdzpoaWRkZW59aW5ze2NvbG9yOiM3Nzc7dGV4dC1kZWNvcmF0aW9uOm5vbmV9YSBpbWd7Ym9yZGVyOjB9QG1lZGlhIHNjcmVlbiBhbmQgKG1heC13aWR0aDo3NzJweCl7Ym9keXtiYWNrZ3JvdW5kOm5vbmU7bWFyZ2luLXRvcDowO21heC13aWR0aDpub25lO3BhZGRpbmctcmlnaHQ6MH19I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LnBuZykgbm8tcmVwZWF0O21hcmdpbi1sZWZ0Oi01cHh9QG1lZGlhIG9ubHkgc2NyZWVuIGFuZCAobWluLXJlc29sdXRpb246MTkyZHBpKXsjbG9nb3tiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSBuby1yZXBlYXQgMCUgMCUvMTAwJSAxMDAlOy1tb3otYm9yZGVyLWltYWdlOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSAwfX1AbWVkaWEgb25seSBzY3JlZW4gYW5kICgtd2Via2l0LW1pbi1kZXZpY2UtcGl4ZWwtcmF0aW86Mil7I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LTJ4LnBuZykgbm8tcmVwZWF0Oy13ZWJraXQtYmFja2dyb3VuZC1zaXplOjEwMCUgMTAwJX19I2xvZ297ZGlzcGxheTppbmxpbmUtYmxvY2s7aGVpZ2h0OjU0cHg7d2lkdGg6MTUwcHh9CiAgPC9zdHlsZT4KICA8YSBocmVmPS8vd3d3Lmdvb2dsZS5jb20vPjxzcGFuIGlkPWxvZ28gYXJpYS1sYWJlbD1Hb29nbGU+PC9zcGFuPjwvYT4KICA8cD48Yj40MDQuPC9iPiA8aW5zPlRoYXTigJlzIGFuIGVycm9yLjwvaW5zPgogIDxwPiAgPGlucz5UaGF04oCZcyBhbGwgd2Uga25vdy48L2lucz4K","headers":[["content-length","1449"],["content-type","text/html; charset=utf-8"]],"ok":false,"status":404,"status_text":""},"http://localhost:8080/experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_13_Default,%20grey%20pad.png":{"data":"CjwhRE9DVFlQRSBodG1sPgo8aHRtbCBsYW5nPWVuPgogIDxtZXRhIGNoYXJzZXQ9dXRmLTg+CiAgPG1ldGEgbmFtZT12aWV3cG9ydCBjb250ZW50PSJpbml0aWFsLXNjYWxlPTEsIG1pbmltdW0tc2NhbGU9MSwgd2lkdGg9ZGV2aWNlLXdpZHRoIj4KICA8dGl0bGU+RXJyb3IgNDA0IChOb3QgRm91bmQpISExPC90aXRsZT4KICA8c3R5bGU+CiAgICAqe21hcmdpbjowO3BhZGRpbmc6MH1odG1sLGNvZGV7Zm9udDoxNXB4LzIycHggYXJpYWwsc2Fucy1zZXJpZn1odG1se2JhY2tncm91bmQ6I2ZmZjtjb2xvcjojMjIyO3BhZGRpbmc6MTVweH1ib2R5e21hcmdpbjo3JSBhdXRvIDA7bWF4LXdpZHRoOjM5MHB4O21pbi1oZWlnaHQ6MTgwcHg7cGFkZGluZzozMHB4IDAgMTVweH0qID4gYm9keXtiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9lcnJvcnMvcm9ib3QucG5nKSAxMDAlIDVweCBuby1yZXBlYXQ7cGFkZGluZy1yaWdodDoyMDVweH1we21hcmdpbjoxMXB4IDAgMjJweDtvdmVyZmxvdzpoaWRkZW59aW5ze2NvbG9yOiM3Nzc7dGV4dC1kZWNvcmF0aW9uOm5vbmV9YSBpbWd7Ym9yZGVyOjB9QG1lZGlhIHNjcmVlbiBhbmQgKG1heC13aWR0aDo3NzJweCl7Ym9keXtiYWNrZ3JvdW5kOm5vbmU7bWFyZ2luLXRvcDowO21heC13aWR0aDpub25lO3BhZGRpbmctcmlnaHQ6MH19I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LnBuZykgbm8tcmVwZWF0O21hcmdpbi1sZWZ0Oi01cHh9QG1lZGlhIG9ubHkgc2NyZWVuIGFuZCAobWluLXJlc29sdXRpb246MTkyZHBpKXsjbG9nb3tiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSBuby1yZXBlYXQgMCUgMCUvMTAwJSAxMDAlOy1tb3otYm9yZGVyLWltYWdlOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSAwfX1AbWVkaWEgb25seSBzY3JlZW4gYW5kICgtd2Via2l0LW1pbi1kZXZpY2UtcGl4ZWwtcmF0aW86Mil7I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LTJ4LnBuZykgbm8tcmVwZWF0Oy13ZWJraXQtYmFja2dyb3VuZC1zaXplOjEwMCUgMTAwJX19I2xvZ297ZGlzcGxheTppbmxpbmUtYmxvY2s7aGVpZ2h0OjU0cHg7d2lkdGg6MTUwcHh9CiAgPC9zdHlsZT4KICA8YSBocmVmPS8vd3d3Lmdvb2dsZS5jb20vPjxzcGFuIGlkPWxvZ28gYXJpYS1sYWJlbD1Hb29nbGU+PC9zcGFuPjwvYT4KICA8cD48Yj40MDQuPC9iPiA8aW5zPlRoYXTigJlzIGFuIGVycm9yLjwvaW5zPgogIDxwPiAgPGlucz5UaGF04oCZcyBhbGwgd2Uga25vdy48L2lucz4K","headers":[["content-length","1449"],["content-type","text/html; charset=utf-8"]],"ok":false,"status":404,"status_text":""}}},"executionInfo":{"elapsed":1557,"status":"ok","timestamp":1717163102674,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"GmAca8fVUMUm","outputId":"407601af-7f59-4bb0-e428-cdde90b5ad8d"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d4d04b8e8b6048f98d6614d47949e7c0","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\""}},"dfc87068738d4a51aa4582d66655ed5d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d243bb88a024f5cad553638cb66b58f","placeholder":"​","style":"IPY_MODEL_00358206e8e44b7f93e33a68c77ed88e","value":"special_tokens_map.json: 100%"}},"e1dcbdbabc40469793a494f59318b088":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e2ba9bdc71e342eea3fe5f0b2d50cad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beae34e9850d41f2928135a17bb7e24e","placeholder":"​","style":"IPY_MODEL_9ab61a0b2be94ab4b07712847603099f","value":"config.json: 100%"}},"e364391a3e1d41d899735458a2b01436":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_83c2db21b673401fb11b45dde165fa14","max":4999813704,"min":0,"orientation":"horizontal","style":"IPY_MODEL_96f3c7a52a37470a98de481a7ea343ca","value":4999813704}},"e37b7be566694f66bd5aa6e82572a2e7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6fac459206848898c206456b9dc8b1a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"e74be1cd463d42a783ddea46072f4499":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e7caf68af3c84c6195483d0a27225164":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d418846006b8496989e86276d8538b50","placeholder":"​","style":"IPY_MODEL_2ad60e460fe64f0087cf3052f06451ba","value":"processor_config.json: 100%"}},"e9638ccc83e64995b8d08950b5d4177c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"eae063c3c5f54c86aee0b68d31f721ff":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eb03b5ac6feb452a86ab17129481d3cb":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebae562970a84eac8ba32466a405aa06":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ed6bd84830a642fd832ea549372056ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"ee91033cfb084d4e83529a11edb48134":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137212937453776"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_76c165d73f574ab39d3357e87b401ac9"],"layout":"IPY_MODEL_68d09a9f2fc84ead952160fe05c5dc90"}},"f1513571ca224451a3809b56a5739525":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e7caf68af3c84c6195483d0a27225164","IPY_MODEL_f6c25f96ead14b13a1a716a6de4e3f5c","IPY_MODEL_4a51933821d045cd881418ff2a2b30e7"],"layout":"IPY_MODEL_1d1a5dd1cc1d44be948464159187e856"}},"f17c2e4e8b2045e9930d9f2d73bf2ac7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f4f7b2ccfbd94a30a1f7a413c159ef07":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f58a0d2e63aa456db54e8b34185a9e19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ca76a74d6b204b4cae1f1d15a928782f","max":1636,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4fe39d4b2ce74d529df2c59c9b02b468","value":1636}},"f5e02d40775a49a5b895d498233464f9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"visible","width":null}},"f6c25f96ead14b13a1a716a6de4e3f5c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_6c0b5365fd624af793b4d37f7449a7a5","max":483,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9590087714614e948f3a8e4dd537a895","value":483}},"f84e4be232c7471e8d7c32463d1e69f4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f8bc3fb84517404b94502e7498d5c55a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f937e91a44ba4ba29379c6cd98c7b06e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f9b21842783e489d99ad9d25cd7d0914":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f9feda61c229465da90f33c69e7a4398":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137212937454928"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c2494c8753e9409aa5e6d8862be5c773","IPY_MODEL_0e112f697e7d4a34826d2a46a067c14e","IPY_MODEL_ee91033cfb084d4e83529a11edb48134"],"layout":"IPY_MODEL_f5e02d40775a49a5b895d498233464f9"}},"fb224135f5a14ba4872e0e64428d60b6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"BoundedIntTextModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"BoundedIntTextModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"IntTextView","continuous_update":false,"description":"","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_11c9edbf48f445d09d85f849915496e2","max":14,"min":0,"step":1,"style":"IPY_MODEL_9bb8a92858144190b6dd506673234315","value":0}},"fc6764a48f134c7c863b83ac060a7c4e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fc986695bc55477099e8df7f9a0022cd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fcebd41dea6a49d786827eb30e6202f0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fd00985eb51141799c4ad95c99461926":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"block","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"feb6497eb22b418b89260242009018b1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ff88a16ea32e4df4b3abce055f6e2bc2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffeb080904984371bb8b715963c00069":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Boxes","Image","Mask","Image & Mask","Page data","Ground truth","Image All","Results","Best results","Accuracy","Dataframe","Config"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":7,"layout":"IPY_MODEL_bd64ee547bd049398c6d37f1ca476461","style":"IPY_MODEL_5da238d5b8514352a70b6b16cc9d944f"}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/pcleaner/_testbed/test_paligemma.ipynb b/pcleaner/_testbed/test_paligemma.ipynb new file mode 100644 index 00000000..0275156a --- /dev/null +++ b/pcleaner/_testbed/test_paligemma.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"FO-koL4wUMUg"},"source":["# Testing `PaliGemma` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"HKOXOxox_jG0"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"GakYQssB_jG0"},"source":["To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n","\n","- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n","This allows the notebook to access files stored in your Google Drive.\n","\n","- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n","This acts as the root directory for accessing any files within your Google Drive from the notebook.\n","\n","- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n","This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717165201592,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"uNgZx4vk_jG0"},"outputs":[],"source":["MOUNT_DRIVE = True\n","GDRIVE_MOUNT_POINT = 'drive'\n","PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'"]},{"cell_type":"markdown","metadata":{"id":"1iZhT2Zd_jG1"},"source":["# install (Colab)\n"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1717165204341,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"RNFlWKSb_jG1"},"outputs":[],"source":["import fastcore.all as FC\n","import os\n","import re\n","import sys\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," (t := Text(msg)).stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, t, \"_\" * 10)\n"]},{"cell_type":"markdown","metadata":{"id":"Xrv0xwSP_jG1"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":4,"status":"ok","timestamp":1717165208042,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"7h-QzI37_jG1"},"outputs":[],"source":["mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," if not mnt_point.exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n"," drive.mount(str(mnt_point), force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"41JG2K1G_jG1"},"source":["### Install **PanelCleaner**\n","\n","> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n","\n","Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook."]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":4564,"status":"ok","timestamp":1717165217825,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"SdiVA--u_jG1","outputId":"4c6a00e8-c8d9-4988-c5f2-7010c1233681"},"outputs":[],"source":["if FC.IN_COLAB:\n"," pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n"," tb_path = pc_path/'pcleaner/_testbed'\n"," if tb_path.exists():\n"," info('Installing PanelCleaner from your Google Drive')\n"," else:\n"," info('Installing PanelCleaner from GitHub')\n"," !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n"," tb_path = Path('PanelCleaner/pcleaner/_testbed')\n"," assert tb_path.exists(), \"PanelCleaner not found\"\n"," os.chdir(tb_path)\n"," sys.path.append(f\"{pc_path}\")\n"," sys.path.append(f\"{tb_path}\")\n"," !pip install -q -r requirements-colab.txt\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/plain":["'4.42.0.dev0'"]},"metadata":{},"output_type":"display_data"}],"source":["if FC.IN_COLAB:\n"," !pip install -q accelerate\n","\n","import transformers\n","transformers.__version__"]},{"cell_type":"markdown","metadata":{"id":"8_A18iMfN2-g"},"source":["# Prologue"]},{"cell_type":"code","execution_count":7,"metadata":{"executionInfo":{"elapsed":4223,"status":"ok","timestamp":1717165238956,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"q_7GYpnWUMUl"},"outputs":[],"source":["from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n","from testbed.ocr_paligemma import PaliGemmaOCR, get_gpu_vram\n","from testbed.helpers import IN_MAC, IN_LINUX\n"]},{"cell_type":"code","execution_count":8,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717165238956,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"CQtkWb9ON2-g"},"outputs":[],"source":["if IN_MAC:\n"," # !pip install -q mlx_vlm\n","\n"," import mlx.core as mx\n"]},{"cell_type":"markdown","metadata":{"id":"DK4iMZ7nN2-g"},"source":["# GPU"]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":396},"executionInfo":{"elapsed":660,"status":"ok","timestamp":1717165239614,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"_qoPb5u3N2-g","outputId":"6dec4a6e-5679-47c5-cc53-17473a6e214b"},"outputs":[{"data":{"text/html":["
          metal.is_available(): True\n","           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': 51539607552, \n","'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n","     metal.get_active_memory(): 0\n","       metal.get_peak_memory(): 0\n","      metal.get_cache_memory(): 0\n","\n","
\n"],"text/plain":[" \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n"," \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \u001b[1;36m51539607552\u001b[0m, \n","\u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n"," \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
            GPU: applegpu_g13s\n","     total VRAM: 65536 MiB\n","    active VRAM: 0 MiB\n","
\n"],"text/plain":[" GPU: applegpu_g13s\n"," total VRAM: \u001b[1;36m65536\u001b[0m MiB\n"," active VRAM: \u001b[1;36m0\u001b[0m MiB\n"]},"metadata":{},"output_type":"display_data"}],"source":["if IN_MAC:\n"," gpu_name = mx.metal.device_info()['architecture']\n"," cprint(\n"," f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n"," f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n"," f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()//1024//1024}\\n\"\n"," )\n","else:\n"," !nvidia-smi\n"," import subprocess\n"," gpu_name = subprocess.check_output(\n"," \"nvidia-smi --query-gpu=gpu_name --format=csv,noheader\", shell=True\n"," ).decode('utf-8').strip()\n"," \n","\n","cprint( f\"{'GPU':>15}: {gpu_name}\\n\"\n"," f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n"," f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n","\n"]},{"cell_type":"markdown","metadata":{"id":"Op4kiDaAUMUl"},"source":["----\n","# PaliGemma experiments"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1932,"status":"ok","timestamp":1717165245172,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"D0LeqL6TZ-GY","outputId":"4b587e05-4de3-469b-cbd4-49b58fc0939d"},"outputs":[{"name":"stdout","output_type":"stream","text":["User is already logged in.\n"]}],"source":["from huggingface_hub import notebook_login\n","notebook_login(False, False)"]},{"cell_type":"markdown","metadata":{"id":"Y3IENuzMN2-h"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"urj8RO7aN2-h"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n","\n","NOTE: the default value assumes we are currently inside `PanelCleaner/pcleaner/_testbed` directory. You can check that is the case with `Path('.').resolve()`."]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49},"executionInfo":{"elapsed":369,"status":"ok","timestamp":1717165248315,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"cbFj63q4N2-h","outputId":"b10db83f-aa18-496d-e16e-834050b8aea7"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/html":["
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n","
\n"],"text/plain":["\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{"id":"8Iondm2oUMUl"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"nuHPp1U7UMUl"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":12,"metadata":{"executionInfo":{"elapsed":559,"status":"ok","timestamp":1717165257647,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"vaf4PGLlUMUm"},"outputs":[],"source":["if FC.IN_COLAB:\n"," os.environ['USE_TUNNEL'] = 'True'\n"," os.environ['USE_PIL'] = 'False'\n"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":184},"executionInfo":{"elapsed":47202,"status":"ok","timestamp":1717165306667,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"kivkfTrcUMUm","outputId":"6547dd4b-3747-4c99-e393-a422d1a85798"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import testbed.web_server as web_server\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"B0UlRbJ4N2-h"},"source":["# CONTEXT"]},{"cell_type":"markdown","metadata":{"id":"gBET0jZMN2-h"},"source":["| quant \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n","| --- | --- | --- | --- | --- | --- | --- |\n","| **bfloat16** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **8bit** | 3 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **4bit** | 2 GB | ✅ | ✅ | ? | ✅ | ✅ |\n"]},{"cell_type":"markdown","metadata":{"id":"AUgSvi6CUMUm"},"source":["Creates the `ExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":504},"executionInfo":{"elapsed":434,"status":"ok","timestamp":1717165321066,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"wroznCt0CQdT","outputId":"ec50236e-dddc-4d60-b3cd-b6fdcb0a584f"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Experiment runs:\n","PaliGemma-crop-post: 0\n","
\n"],"text/plain":["Experiment runs:\n","PaliGemma-crop-post: \u001b[1;36m0\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["quant = '8bit' if IN_MAC else 'bfloat16'\n","CONTEXT = OCRExperimentContext('PaliGemma', EXP_DIR, quant=quant, \n"," server=SERVER, run_name='PaliGemma-crop-post', load=True)\n","CONTEXT.show()\n"]},{"cell_type":"code","execution_count":19,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":358,"referenced_widgets":["d086b7598b4848aaa4eb0ffede5da0f7","4856c138483c4bc59c165c6f8c81777a","1bffa03a8dfa4d4ba6e0b2cf76f5f0fd","88ee0bfb700a4f369333729d755fbc91","f2b7ff5971bd43b986d810b9261abd1c","c5431bc93486497faa74330f37d582aa","aec0a793b589469d957a83b767724cc4","8bb2d35ebdbc48dda1f1f365389ac92e","2d8a14b167c943a888fa9a313f6166eb","2930ae9cab9e421f9c6191bdb86d37a4","cde5c9e129e443fb9c1c642156bab294","5d09637e4ba34f14a879dbe44c675285","4b9e45c5e109429da8a7ed75633cdcfb","11d2466d2ced4c69afc62bda1f3baf27","725a2c78a0d94db19517c9f03fdbd76f","635032f84bbc4b0a8b1e4bcfb9d5318d","0e6515360d65481c8b398c02b0b6f437","9ac3df27ca5f442398b86df5d5849fab","6683113218e44183a6a5a1cd1392b452","5c0be55008d045108e2a91a819adfd58","ad5ddbcf9642427ca162e9cfdefe8ded","2682f4f1f76b498091a8bb16c97205e6","513155104caa4340900c728a570728e9","b64f35e753334071ba92cc9c23472f5f","5d772e124d1c4286bc83b0fdfd50b737","3a40dc732037425cb1491bb68f7d7178","4306183c004747b897acb4be6577f46d","78f02d7219cb4e5bbde518ac5e96a341","06d9f190506c40fb9689977c2a2b2757","daabe05918774871ba2f9cba38ca14ff","e72b3672869743e7b8fbbe178a93698a","86e80efa2af4416b9f465e7cb3873c3b","64d05881c560431b86dec6c27c01bfbb","a49fa2c6c92c4191a79f659b114eae0e","2db177bd125f4e0789363306784c51fa","b75c3b4c04a9416ca5e93bdf3b049c12","5d1fd260fdee46d69e8f81f1e146b787","5541585cb2714d82a3fdb9f70a161533","7fa1b8a86fe846c29568b89135748bc2","7694626eeb3b45598fa74862fb5b95a0","4cc8f4e7c0dd442199d94880ca6b718f","7efe7d6323ef4a00a4c4239643ef1d7d","b51706f5a3c14f20a642a64912e068ff","f1cf213ea11e431c8aded42b1af2536b","a26dbf2c5fc647e89984f08d89e11cb4","fd31aabd8e4c4aa8add50c95e55e9d6f","22121cd9a6954bc2908e5a44c6bdd6a9","54abbc4b157c41cba0f0c663bba5eb25","0d416a34a58745ec9558443d076d3d55","64b5967a0be04a05aeddda4a1b977531","aeb9f955725443f5b2c2f0c54ffb1fc8","1c60de0ca3b8455a900affc4690fca1f","06de6316d026491bbd9e7c18efa3a351","f4fd6b82478a444d91242587c0a5045f","8f5f4df805114835b66872e2520edf70","bbf4c62d0b674b2594e455696e6e427a","377aea61c8d441afad1456b08681c7f2","b037f85089e24e5ba970bef2f46e568b","f3863102525f488fb74877a54b447c63","79d600bfe6204618af8ba7a7cb1577a8","0251403980314014b84ae3dc63cc5455","84ab0a402fe54b0f9f8791321c8d9276","4bf8171d649f4a819275590070b734bd","7a2075bd25ac42c984f2e12c2f18b578","0b9d514d2d9249f09095065a9e302c49","4acbacc0ed4a4c1581583f428b69aff7"]},"executionInfo":{"elapsed":31187,"status":"ok","timestamp":1717165357281,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"cSuBWT4YN2-i","outputId":"00cdca4b-4568-4a97-c646-42ab70541d1d"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6701b0fefb114810a52c202ffb6a4b4d","version_major":2,"version_minor":0},"text/plain":["Fetching 9 files: 0%| | 0/9 [00:00 Size: '224'\n"," Quantization: '8bit'\n"," Device: 'mps'\n"," VRAM: 2986/65536 MiB\n","\n","\n"],"text/plain":[" Size: \u001b[32m'224'\u001b[0m\n"," Quantization: \u001b[32m'8bit'\u001b[0m\n"," Device: \u001b[32m'mps'\u001b[0m\n"," VRAM: \u001b[1;36m2986\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["ocr_model = CONTEXT.setup_ocr_model(False)\n","ocr_model.show_info()"]},{"cell_type":"markdown","metadata":{"id":"rgSiXSTyN2-l"},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":431,"status":"ok","timestamp":1717165362762,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"ZXL84T8dUMUm","outputId":"92365c79-9c3d-4645-9638-b9f56b3131b7"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"code","execution_count":22,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":200,"referenced_widgets":["59886ba6cd1c4fe8951884a5c340808b","bd58f503314643659cf9e8a1cef488c9","dda9ef398b524535a68fd6ea11fad818","eca634da59354f1caa467fba3c58c5fc","4b450035de2f401b9e3554a023a3ec65","53d92d965e984ed7afc3496af7d61d70","3d496f12c55645aead4dfe7d0d07dcf0","91b0f76df87a4c56aa5a02943cb28839","c24dd55347c44ee586bab8ee277db7da","3cb6adfc2d764b97b6673765860d47bb","f77c96169250417aad96b5c1d58222f8","f68accd919884dd09190b80513b9646c","e30127ffa4834f609af7939fffe106e8","e6f1a30a76264f7e84e93559d196cb5f","2daa7d0e57fd4e4682ce1adc15b9f84d","4cb1d38e7aec4391b8cd7527ca9febee","d3913b280fba46c4b9053b0e1b7f76fe","c610dd153a364592b786fcdf898c3da3","14fe1f23cc4b4887a5a7d8b987af49eb","7f56d139cd774438be2902f1310d55b6","b24e5e348f604c2fb1359e12f9c1dde1","3b259cdb3ddd4432bd07c36f6145a4de","941510f7665d47f3bda727843f6684ae","85198e7d3abb4492b0cca02083f68a24","143e6c3613d8483d91833a2e13f86fb6","212c95d82afc43a9b849a07cd0193ad6","b3691f5cb09041b5aa88e1caf3a29b20","9ad1e5cabb0d4636a08d306513e63c16","d03fbf56a2934588892fd1bb43739564","491fa386a15f4719bf44fa8e302fd011","fc0380f14a854a6cb040442f98d41000","a3aff0a8b5f4459e872c8a2c05f0226d","d5541144ffea4c418ef5c6c3db1e519e","385f8f44444540af87117059cfec0f5a","816479b548ed4dd99880c862929898fa","ec3cca3523514dc1b21d8d79773cb988","577f895e829e4f9098f600795aa18d08","58e62210fd1c482cb848c00704ee496c","9ca29ee5af0b4f888d2f6c5c011cad7f","a340e132d43a4178b902d6e18cdaa42c","14457502346c40069dda383db7503d76","0f90ecefed3b4b30bff078f97f4a67d7","15a4d0683ce04296ae0426bff451cc06","460853ad40bc474e900544e27d4d8440","041565754c1d4b8188ac80ac296974a3","2567a327e02648e29b2410d406d45217","e892ff1ff8244fb5b4dd1617e7b67bcb","3215c940fe7e485c97a3af8c5dbf8e08","9932bf3222934434baca1a23e649f118","a2ed35e40a0342f1b5209cf2368ec9b8","d35bec09dbe849f9b64a211fd45b80c0","0915b9e3ce76421a8761cc5eaf25e2a7","b6c5b7f105e74dd09000d038b57066e8","d033341173d24e19a5f413ca0a53ce6c","358dfaf8072f4c77aabc4e267e48d833","ea37bd0802504828a90c1d3625e2d39e","b54c98e9b45d4a6391fce48e463488aa","9a1befb039dd4021af786a7cefc7e7fe","d3dc27978259400187c814165ce20091","1a20a2ace0234eca9b4b8f2fc68358b0","215b89c0f4304fb4b2a4642b763f3b8e","d58f6d5a301e4389b4e73b7ae2ef595f","3655dd45e31845cb9cf4bcc1a7572db0","3cd7144832b1489aa0dcdc2eb74da6b4","0f41b180ba3f4e139d00d1d4e41fa808","db7189bed4d04523bafa9267fd5ed194","8ee17913c9124a8f8136f87de059f86f","65f9a631473a4ce989995cfcc84c1388","dd4f665280c1428ea9cb3073d95bc02e"],"resources":{"http://localhost:8080/experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_6_Default,%20grey%20pad.png":{"data":"CjwhRE9DVFlQRSBodG1sPgo8aHRtbCBsYW5nPWVuPgogIDxtZXRhIGNoYXJzZXQ9dXRmLTg+CiAgPG1ldGEgbmFtZT12aWV3cG9ydCBjb250ZW50PSJpbml0aWFsLXNjYWxlPTEsIG1pbmltdW0tc2NhbGU9MSwgd2lkdGg9ZGV2aWNlLXdpZHRoIj4KICA8dGl0bGU+RXJyb3IgNDA0IChOb3QgRm91bmQpISExPC90aXRsZT4KICA8c3R5bGU+CiAgICAqe21hcmdpbjowO3BhZGRpbmc6MH1odG1sLGNvZGV7Zm9udDoxNXB4LzIycHggYXJpYWwsc2Fucy1zZXJpZn1odG1se2JhY2tncm91bmQ6I2ZmZjtjb2xvcjojMjIyO3BhZGRpbmc6MTVweH1ib2R5e21hcmdpbjo3JSBhdXRvIDA7bWF4LXdpZHRoOjM5MHB4O21pbi1oZWlnaHQ6MTgwcHg7cGFkZGluZzozMHB4IDAgMTVweH0qID4gYm9keXtiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9lcnJvcnMvcm9ib3QucG5nKSAxMDAlIDVweCBuby1yZXBlYXQ7cGFkZGluZy1yaWdodDoyMDVweH1we21hcmdpbjoxMXB4IDAgMjJweDtvdmVyZmxvdzpoaWRkZW59aW5ze2NvbG9yOiM3Nzc7dGV4dC1kZWNvcmF0aW9uOm5vbmV9YSBpbWd7Ym9yZGVyOjB9QG1lZGlhIHNjcmVlbiBhbmQgKG1heC13aWR0aDo3NzJweCl7Ym9keXtiYWNrZ3JvdW5kOm5vbmU7bWFyZ2luLXRvcDowO21heC13aWR0aDpub25lO3BhZGRpbmctcmlnaHQ6MH19I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LnBuZykgbm8tcmVwZWF0O21hcmdpbi1sZWZ0Oi01cHh9QG1lZGlhIG9ubHkgc2NyZWVuIGFuZCAobWluLXJlc29sdXRpb246MTkyZHBpKXsjbG9nb3tiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSBuby1yZXBlYXQgMCUgMCUvMTAwJSAxMDAlOy1tb3otYm9yZGVyLWltYWdlOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSAwfX1AbWVkaWEgb25seSBzY3JlZW4gYW5kICgtd2Via2l0LW1pbi1kZXZpY2UtcGl4ZWwtcmF0aW86Mil7I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LTJ4LnBuZykgbm8tcmVwZWF0Oy13ZWJraXQtYmFja2dyb3VuZC1zaXplOjEwMCUgMTAwJX19I2xvZ297ZGlzcGxheTppbmxpbmUtYmxvY2s7aGVpZ2h0OjU0cHg7d2lkdGg6MTUwcHh9CiAgPC9zdHlsZT4KICA8YSBocmVmPS8vd3d3Lmdvb2dsZS5jb20vPjxzcGFuIGlkPWxvZ28gYXJpYS1sYWJlbD1Hb29nbGU+PC9zcGFuPjwvYT4KICA8cD48Yj40MDQuPC9iPiA8aW5zPlRoYXTigJlzIGFuIGVycm9yLjwvaW5zPgogIDxwPiAgPGlucz5UaGF04oCZcyBhbGwgd2Uga25vdy48L2lucz4K","headers":[["content-length","1449"],["content-type","text/html; charset=utf-8"]],"ok":false,"status":404,"status_text":""}}},"executionInfo":{"elapsed":639,"status":"ok","timestamp":1717165542401,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"GmAca8fVUMUm","outputId":"4042b144-9cfe-43f0-b391-ac8d44b612f8"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"ed7e79b69996433b87d46e466cb48fb6","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\""}},"c24dd55347c44ee586bab8ee277db7da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f68accd919884dd09190b80513b9646c","IPY_MODEL_e30127ffa4834f609af7939fffe106e8","IPY_MODEL_e6f1a30a76264f7e84e93559d196cb5f","IPY_MODEL_2daa7d0e57fd4e4682ce1adc15b9f84d"],"layout":"IPY_MODEL_4cb1d38e7aec4391b8cd7527ca9febee"}},"c5431bc93486497faa74330f37d582aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c610dd153a364592b786fcdf898c3da3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["method_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_385f8f44444540af87117059cfec0f5a","IPY_MODEL_816479b548ed4dd99880c862929898fa","IPY_MODEL_ec3cca3523514dc1b21d8d79773cb988"],"layout":"IPY_MODEL_577f895e829e4f9098f600795aa18d08"}},"cde5c9e129e443fb9c1c642156bab294":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d033341173d24e19a5f413ca0a53ce6c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50px"}},"d03fbf56a2934588892fd1bb43739564":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d086b7598b4848aaa4eb0ffede5da0f7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4856c138483c4bc59c165c6f8c81777a","IPY_MODEL_1bffa03a8dfa4d4ba6e0b2cf76f5f0fd","IPY_MODEL_88ee0bfb700a4f369333729d755fbc91"],"layout":"IPY_MODEL_f2b7ff5971bd43b986d810b9261abd1c"}},"d35bec09dbe849f9b64a211fd45b80c0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d3913b280fba46c4b9053b0e1b7f76fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["box_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_491fa386a15f4719bf44fa8e302fd011","IPY_MODEL_fc0380f14a854a6cb040442f98d41000","IPY_MODEL_a3aff0a8b5f4459e872c8a2c05f0226d"],"layout":"IPY_MODEL_d5541144ffea4c418ef5c6c3db1e519e"}},"d3dc27978259400187c814165ce20091":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"d5541144ffea4c418ef5c6c3db1e519e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d58f6d5a301e4389b4e73b7ae2ef595f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"none","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"daabe05918774871ba2f9cba38ca14ff":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"db7189bed4d04523bafa9267fd5ed194":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"120px"}},"dd4f665280c1428ea9cb3073d95bc02e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dda9ef398b524535a68fd6ea11fad818":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","140192597210928"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_c24dd55347c44ee586bab8ee277db7da","IPY_MODEL_3cb6adfc2d764b97b6673765860d47bb"],"layout":"IPY_MODEL_f77c96169250417aad96b5c1d58222f8"}},"e30127ffa4834f609af7939fffe106e8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","140192597212656"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_941510f7665d47f3bda727843f6684ae"],"layout":"IPY_MODEL_85198e7d3abb4492b0cca02083f68a24"}},"e6f1a30a76264f7e84e93559d196cb5f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","140192597212608"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_143e6c3613d8483d91833a2e13f86fb6"],"layout":"IPY_MODEL_212c95d82afc43a9b849a07cd0193ad6"}},"e72b3672869743e7b8fbbe178a93698a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e892ff1ff8244fb5b4dd1617e7b67bcb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":"lightblue","font_weight":"bold"}},"ea37bd0802504828a90c1d3625e2d39e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"ec3cca3523514dc1b21d8d79773cb988":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Initial box","Default","Default, grey pad","Padded 4px","Padded 8px","Extracted, init box","Padded 4, extracted","Padded 8, extracted","Padded 8, dilation 1","Pad 8, fract. 0.5","Pad 8, fract. 0.2"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":2,"layout":"IPY_MODEL_1a20a2ace0234eca9b4b8f2fc68358b0","style":"IPY_MODEL_215b89c0f4304fb4b2a4642b763f3b8e"}},"eca634da59354f1caa467fba3c58c5fc":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":["message_visor-yXy"],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_65f9a631473a4ce989995cfcc84c1388","msg_id":"","outputs":[]}},"f1cf213ea11e431c8aded42b1af2536b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f2b7ff5971bd43b986d810b9261abd1c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f3863102525f488fb74877a54b447c63":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0b9d514d2d9249f09095065a9e302c49","placeholder":"​","style":"IPY_MODEL_4acbacc0ed4a4c1581583f428b69aff7","value":" 137/137 [00:00<00:00, 13.4kB/s]"}},"f4fd6b82478a444d91242587c0a5045f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f68accd919884dd09190b80513b9646c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","140192597212368"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b24e5e348f604c2fb1359e12f9c1dde1"],"layout":"IPY_MODEL_3b259cdb3ddd4432bd07c36f6145a4de"}},"f77c96169250417aad96b5c1d58222f8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc0380f14a854a6cb040442f98d41000":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_0915b9e3ce76421a8761cc5eaf25e2a7","style":"IPY_MODEL_b6c5b7f105e74dd09000d038b57066e8","value":false}},"fd31aabd8e4c4aa8add50c95e55e9d6f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_64b5967a0be04a05aeddda4a1b977531","placeholder":"​","style":"IPY_MODEL_aeb9f955725443f5b2c2f0c54ffb1fc8","value":"Loading checkpoint shards: 100%"}}}}},"nbformat":4,"nbformat_minor":0} diff --git a/pcleaner/_testbed/test_tesseract.ipynb b/pcleaner/_testbed/test_tesseract.ipynb index 24ca973d..4218723c 100644 --- a/pcleaner/_testbed/test_tesseract.ipynb +++ b/pcleaner/_testbed/test_tesseract.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","metadata":{"id":"XLuXdPExZlIq"},"source":["# Testing `Tesseract` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"O02O4qXwZlIr"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"dD_MtS4cZlIr"},"source":["We will install the more up-to-date version of PanelCleaner from GitHub. Only affects Colab notebooks."]},{"cell_type":"code","execution_count":1,"metadata":{"id":"V3ABUVumZlIr"},"outputs":[],"source":["DEV_INSTALL = True"]},{"cell_type":"markdown","metadata":{"id":"jgisVUTBZlIs"},"source":["The best way to get the images source of the experiments is to mount your Google Drive.\n"]},{"cell_type":"code","execution_count":2,"metadata":{"id":"fFZBHyWLZlIs"},"outputs":[],"source":["MOUNT_DRIVE = DEV_INSTALL\n","GDRIVE_MOUNT_POINT = 'drive'\n"]},{"cell_type":"markdown","metadata":{"id":"X8I28oA1ZlIs"},"source":["# install (Colab)"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"nT4E7IXbcUbp"},"outputs":[],"source":["import fastcore.all as FC\n"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"jpnPDDFIcFh0"},"outputs":[],"source":["if FC.IN_COLAB:\n"," !pip install -q pyngrok\n"]},{"cell_type":"markdown","metadata":{"id":"gqYSM7iBcZsH"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"bPF0CtUaZlIs"},"outputs":[],"source":["import os\n","import re\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," text = Text(msg)\n"," text.stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, text, \"_\" * 10)\n","\n","\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," mnt_point = f\"/content/{GDRIVE_MOUNT_POINT}\"\n"," if not Path(mnt_point).exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n","\n"," drive.mount(mnt_point, force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"UjPTi7MfcdxU"},"source":["Install **PanelCleaner**"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"7y3qqqy6ZlIt"},"outputs":[],"source":["if FC.IN_COLAB:\n"," info('Installing PanelCleaner')\n"," if DEV_INSTALL:\n"," assert MOUNT_DRIVE, \"DEV_INSTALL need a mounted google drive\"\n"," info('Installing PanelCleaner from Google Drive')\n"," os.chdir(f\"/content/{GDRIVE_MOUNT_POINT}/MyDrive/Shared/PanelCleaner/\")\n"," !pip install -e .\n"," else:\n"," info('Installing PanelCleaner from Github')\n"," !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed\n"]},{"cell_type":"markdown","metadata":{"id":"6ylyum-ibQdL"},"source":["**PanelCleaner** is a heavy-weight and sometimes **Colab** refuses (*silently*) to install it. If the cell below gives an error, re-run the cell above. That usually fixes the problem."]},{"cell_type":"code","execution_count":7,"metadata":{"id":"oE3PogewbUQO"},"outputs":[],"source":["import importlib.resources\n","package_path = importlib.resources.files('pcleaner')\n","assert package_path.name == 'pcleaner'\n","\n","os.chdir(package_path/'_testbed')"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"18HJJTQebWoV"},"outputs":[],"source":["from pcleaner._testbed.testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n"]},{"cell_type":"markdown","metadata":{},"source":["## Tesseract setup"]},{"cell_type":"markdown","metadata":{},"source":["Get current version of Tesseract"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"data":{"text/html":["
[\n","    'tesseract 5.3.4',\n","    ' leptonica-1.84.1',\n","    '  libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \n","1.4.0 : libopenjp2 2.5.2',\n","    ' Found NEON',\n","    ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n","    ' Found libcurl/8.6.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.12 nghttp2/1.61.0'\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[32m'tesseract 5.3.4'\u001b[0m,\n"," \u001b[32m' leptonica-1.84.1'\u001b[0m,\n"," \u001b[32m' libgif 5.2.1 : libjpeg 8d \u001b[0m\u001b[32m(\u001b[0m\u001b[32mlibjpeg-turbo 3.0.0\u001b[0m\u001b[32m)\u001b[0m\u001b[32m : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \u001b[0m\n","\u001b[32m1.4.0 : libopenjp2 2.5.2'\u001b[0m,\n"," \u001b[32m' Found NEON'\u001b[0m,\n"," \u001b[32m' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6'\u001b[0m,\n"," \u001b[32m' Found libcurl/8.6.0 SecureTransport \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLibreSSL/3.3.6\u001b[0m\u001b[32m)\u001b[0m\u001b[32m zlib/1.2.12 nghttp2/1.61.0'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --version # type: ignore\n","cprint(out)\n","if 'tesseract 5.' not in out[0]:\n"," if 'tesseractd 4.' in out[0]:\n"," cprint('Old Tesseract 4.x is installed. You should uninstall it and install Tesseract 5.x')\n"," else:\n"," cprint('You should install Tesseract 5.x')\n"]},{"cell_type":"markdown","metadata":{},"source":["> **NOTE: in below cells, when you encounter lines starting with the exclamation mark `!` (`bang`), uncoment them if you want to excute the shell commands**\n"]},{"cell_type":"markdown","metadata":{},"source":["### Remove Tesseract installation\n","> I you have the old 4.x version, you should consider removing the installation with the following commands.\n"]},{"cell_type":"markdown","metadata":{},"source":["#### Mac (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### WIndows (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### Ubuntu"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[],"source":["# !sudo apt-get remove tesseract-ocr\n"]},{"cell_type":"markdown","metadata":{},"source":["### Tesseract installation"]},{"cell_type":"markdown","metadata":{},"source":["#### Mac (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### WIndows (TBD)"]},{"cell_type":"markdown","metadata":{},"source":["#### Ubuntu"]},{"cell_type":"markdown","metadata":{},"source":["The **5.x** release series is available in the [another PPA](https://launchpad.net/~alex-p/+archive/ubuntu/tesseract-ocr5) for Ubuntu **18.04**, **20.04**, and **22.04**.\n"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[],"source":["# !sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5"]},{"cell_type":"markdown","metadata":{},"source":["refresh system package cache in case you’re still running old Ubuntu 18.04"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["# !sudo apt update"]},{"cell_type":"markdown","metadata":{},"source":["install the software engine"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[],"source":["# !sudo apt install -y tesseract-ocr"]},{"cell_type":"markdown","metadata":{},"source":["and check version:"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/html":["
[\n","    'tesseract 5.3.4',\n","    ' leptonica-1.84.1',\n","    '  libgif 5.2.1 : libjpeg 8d (libjpeg-turbo 3.0.0) : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \n","1.4.0 : libopenjp2 2.5.2',\n","    ' Found NEON',\n","    ' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6',\n","    ' Found libcurl/8.6.0 SecureTransport (LibreSSL/3.3.6) zlib/1.2.12 nghttp2/1.61.0'\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[32m'tesseract 5.3.4'\u001b[0m,\n"," \u001b[32m' leptonica-1.84.1'\u001b[0m,\n"," \u001b[32m' libgif 5.2.1 : libjpeg 8d \u001b[0m\u001b[32m(\u001b[0m\u001b[32mlibjpeg-turbo 3.0.0\u001b[0m\u001b[32m)\u001b[0m\u001b[32m : libpng 1.6.43 : libtiff 4.6.0 : zlib 1.2.11 : libwebp \u001b[0m\n","\u001b[32m1.4.0 : libopenjp2 2.5.2'\u001b[0m,\n"," \u001b[32m' Found NEON'\u001b[0m,\n"," \u001b[32m' Found libarchive 3.7.2 zlib/1.2.11 liblzma/5.4.6 bz2lib/1.0.8 liblz4/1.9.4 libzstd/1.5.6'\u001b[0m,\n"," \u001b[32m' Found libcurl/8.6.0 SecureTransport \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLibreSSL/3.3.6\u001b[0m\u001b[32m)\u001b[0m\u001b[32m zlib/1.2.12 nghttp2/1.61.0'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --version # type: ignore\n","cprint(out)"]},{"cell_type":"markdown","metadata":{},"source":["### Install Tesseract languages"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"data":{"text/html":["
tessdata path: /opt/homebrew/share/tessdata\n","
\n"],"text/plain":["tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Installed languages:\n","[\n","    'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n","    'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n","    'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n","    'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n","    'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n","    'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n","    'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \n","script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n","    'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \n","script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \n","script/Kannada, script/Khmer',\n","    'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \n","script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n","    'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n","    'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\n","]\n","
\n"],"text/plain":["Installed languages:\n","\u001b[1m[\u001b[0m\n"," \u001b[32m'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces'\u001b[0m,\n"," \u001b[32m'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo'\u001b[0m,\n"," \u001b[32m'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc'\u001b[0m,\n"," \u001b[32m'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert'\u001b[0m,\n"," \u001b[32m'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal'\u001b[0m,\n"," \u001b[32m'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol'\u001b[0m,\n"," \u001b[32m'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \u001b[0m\n","\u001b[32mscript/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur'\u001b[0m,\n"," \u001b[32m'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \u001b[0m\n","\u001b[32mscript/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \u001b[0m\n","\u001b[32mscript/Kannada, script/Khmer'\u001b[0m,\n"," \u001b[32m'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \u001b[0m\n","\u001b[32mscript/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk'\u001b[0m,\n"," \u001b[32m'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel'\u001b[0m,\n"," \u001b[32m'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --list-langs # type: ignore\n","tessdata = Path(out[0].split('\"')[1])\n","cprint(f\"tessdata path: {tessdata}\")\n","cprint(\"Installed languages:\", [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]])"]},{"cell_type":"markdown","metadata":{},"source":["#### Install **best** languages and **jpn_ver** Tesseract lang\n","> Much better results than default langs and `jpn` language model.\n"]},{"cell_type":"markdown","metadata":{},"source":["Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best). \n","Donwload from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) a model trained for vertical Japanese text as found in manga.\n","\n","See [here](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html) the languages codes.\n","\n","> Note: I've not play much with `jpn` or `jpn_vert`, `manag-ocr` is surely a much better fit, but it can be educational to compare."]},{"cell_type":"markdown","metadata":{},"source":["Uncomment and excute to download the best language models:\n"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[],"source":["# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/osd.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn.traineddata\n","\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn_vert.traineddata\n","# or\n","# !wget -O jpn_vert.traineddata https://github.com/zodiac3539/jpn_vert/blob/master/jpn_ver5.traineddata\n","\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/spa.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/fra.traineddata"]},{"cell_type":"markdown","metadata":{},"source":["Copy downloaded models to tessdata folder (double check that `tessdata` variable points to the right folder):\n"]},{"cell_type":"code","execution_count":17,"metadata":{},"outputs":[{"data":{"text/html":["
tessdata path: /opt/homebrew/share/tessdata\n","
\n"],"text/plain":["tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(f\"tessdata path: {tessdata}\")"]},{"cell_type":"code","execution_count":18,"metadata":{},"outputs":[],"source":["# !sudo mv *.traineddata $tessdata"]},{"cell_type":"markdown","metadata":{},"source":["and remove the downloaded models:\n"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[],"source":["# !rm *.traineddata"]},{"cell_type":"markdown","metadata":{},"source":["Check installed languages\n"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[{"data":{"text/html":["
[\n","    Path('/opt/homebrew/share/tessdata/spa.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/eng.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/jpn_vert.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/spa_old.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/fra.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/jpn.traineddata')\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/eng.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn_vert.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa_old.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/fra.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(list(filter(lambda x: re.match(r'eng|jpn|jpn_vert|fra|spa', x.name), tessdata.ls()))) # type: ignore\n","# cprint(pytesseract.get_languages())\n"]},{"cell_type":"markdown","metadata":{"id":"fErGl5xSZlI5"},"source":["----\n","# Tesseract experiments"]},{"cell_type":"markdown","metadata":{"id":"op24JaWwfsSv"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"Av0ceG7efw2L"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n"]},{"cell_type":"code","execution_count":21,"metadata":{"id":"jfMv_sdZfwVY"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{"id":"z62cVR57ZlI5"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"qMAn1mOSZlI5"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is `USE_PIL=False`. You can set the environment variable `USE_PIL=True` to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":22,"metadata":{"id":"A0il2cCMZlI5"},"outputs":[],"source":["os.environ['USE_TUNNEL'] = 'True' if FC.IN_COLAB else 'False'\n","os.environ['USE_PIL'] = 'True' if FC.IN_COLAB and os.environ['USE_TUNNEL'] == 'False' else 'False'\n"]},{"cell_type":"code","execution_count":23,"metadata":{"id":"Su0YJikGZlI5"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import pcleaner._testbed.testbed.web_server as web_server\n","\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"2ara95FoZlI5"},"source":["Creates the `OCRExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":24,"metadata":{"id":"qhQ3nY1OhgdS"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["CONTEXT = OCRExperimentContext('Tesseract', EXP_DIR, server=SERVER)\n","CONTEXT.show()"]},{"cell_type":"markdown","metadata":{"id":"5gEd0ZYOZlI5"},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{"id":"8ecqlXd_h2yc"},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":25,"metadata":{"id":"Ha8wqfTHZlI5"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":25,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"markdown","metadata":{"id":"VHtWWQnKj2eU"},"source":["----"]},{"cell_type":"code","execution_count":26,"metadata":{"id":"TZIA3E3jZlI5"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7153dba0227b423f99fe3417327523ed","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\""}},"03b66d0eb8b5487b8ad21382b93cc68d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"044d11e5bc4e4f87ad9d099734418899":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"068b8601d7364d29b9598b80aeb782e0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c28619fcebc4c58a9a756888add9140":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["model_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a2410f8101384beda0114d93ac655edb"],"layout":"IPY_MODEL_24e8764bfd294a629d6c3855b4094f8b"}},"0d4014b56e93430e9782502bf29c7c2e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"0d5a810240d84cc683b42f56b1f4c2d1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0e3ae88bc86f4a31b5aedc1dfd0b1bf2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"120px"}},"0f1bb59001314b13a091c7e1b331c58b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"reset","disabled":false,"icon":"","layout":"IPY_MODEL_3ab779d3f50a456980527636aa763dba","style":"IPY_MODEL_bb3adfcd8e4a4af29079953bc9220ac1","tooltip":""}},"101d1fc2b6df471ebd6a32a44c8120ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_df77be57fe7943298432a759e6ce8416","placeholder":"​","style":"IPY_MODEL_8b38a0bdf80149bba72324232beb14ed","value":"Method:"}},"175e7409479d4f56ba4a4faf4c8bebaa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_3f8e2f6b025e4fcd90ad34c2e99025ae","max":11,"min":0,"orientation":"horizontal","style":"IPY_MODEL_044d11e5bc4e4f87ad9d099734418899","value":11}},"1807cf3a1770434599e993a087293d91":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"18ccc64fc8504bdeb2fb07043688c1c4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"25em"}},"1da81d9a8d7446838b76ae017e794e78":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"247e8a63e98441f294c1514a43b35e0d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"24e8764bfd294a629d6c3855b4094f8b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"25cdb8370f70459886e00eeef0f66388":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_247e8a63e98441f294c1514a43b35e0d","placeholder":"​","style":"IPY_MODEL_7a928ad4bd52422b9544d0d312402a5b","value":"Box #1: 100%"}},"27c90e0d46934e80a38889a0ae18a726":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"31dcaeca8806453ea52292112bb0a4f3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3588eeb28e024f1ead6cd21f46d65a6c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["method_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_101d1fc2b6df471ebd6a32a44c8120ed","IPY_MODEL_8ca00801b4e54cbd980c998791664f60","IPY_MODEL_ec3c881e1da4414a9c9499497bc5de74"],"layout":"IPY_MODEL_31dcaeca8806453ea52292112bb0a4f3"}},"39648bdbc39c4c7c827510141663804c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"visible","width":null}},"3ab779d3f50a456980527636aa763dba":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":"0px 0px 0px 3em","max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"5em"}},"3cf7099443124d289495da41bfbc8b58":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":"lightblue","font_weight":"bold"}},"3d0ad04ce06b4314b64dcdde269b38e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"BoundedIntTextModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"BoundedIntTextModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"IntTextView","continuous_update":false,"description":"","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_4aba84bf7b4847fdb1cac927ba6a7ebf","max":14,"min":0,"step":1,"style":"IPY_MODEL_f542dba5c6de48a0a44e698706d87017","value":2}},"3f8e2f6b025e4fcd90ad34c2e99025ae":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4064335db7684f2f81ad237513e90c34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420256"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_878477e8708a47e8ab49b00be715d1cd"],"layout":"IPY_MODEL_00878ffbdb234a48b8917563f3058107"}},"44f79f7ea58742a0a770aab41e876c9c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"150px"}},"47b4dcc7177b41d1aa69d3918a6e8598":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":["message_visor-yXy"],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_c193eb8285a740208457bbe7985d356f","msg_id":"","outputs":[]}},"48555e1574db42db86f6572e9c888b58":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","137612216365504"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_60e22ee6a0fc428b8e3709e394e44841","IPY_MODEL_7e8086d63d1b4bbf96e4c70c63bc920a"],"layout":"IPY_MODEL_0d5a810240d84cc683b42f56b1f4c2d1"}},"48cfde671ecf42c7a124a94a2962bb66":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420208"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ad2bbb4b23a648209da1edab0006adae"],"layout":"IPY_MODEL_d6fed923432746c9b3dff00549de596d"}},"496e8548fc0f41b7b0c15c5881596326":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"4aba84bf7b4847fdb1cac927ba6a7ebf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50px"}},"4f8b904d91eb44028477a6a61c34af54":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_919da88097344a689a9afb6fa5977b29","placeholder":"​","style":"IPY_MODEL_d0ed39572cea4c37bb07dc99577258e3","value":"Box # (of 15):"}},"57f7d34ddc2b4a938d7fcb4bf066deb0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"57f9b685011043418d9552b3cb48784e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"save","disabled":false,"icon":"","layout":"IPY_MODEL_fbb4d3433c6f42e094934b6936659b8f","style":"IPY_MODEL_3cf7099443124d289495da41bfbc8b58","tooltip":""}},"58b2ec779ab64a2b9e85d57160b453c9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"60e22ee6a0fc428b8e3709e394e44841":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e1de76477d764776ac5a209f7ebcc8d6","IPY_MODEL_48cfde671ecf42c7a124a94a2962bb66","IPY_MODEL_4064335db7684f2f81ad237513e90c34","IPY_MODEL_99bbf4dd65424e87aab67bcf162a8171"],"layout":"IPY_MODEL_eee81f49e99f4cbb8db65b56c511adbd"}},"6a87fbd3fc6440c5a81a564ac94400cc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_01cd556393d643f8bb3fd88c8cd7bb84","IPY_MODEL_48555e1574db42db86f6572e9c888b58","IPY_MODEL_47b4dcc7177b41d1aa69d3918a6e8598","IPY_MODEL_dc75396dcc3c4606a8fe56401d29a4e9"],"layout":"IPY_MODEL_778afca8ec0a431ca8670604c10d8956"}},"6eed00e28fae4570b2348330bb9dcbab":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"72440081608048b1b5fc76e42011ad0c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"hidden","width":null}},"778afca8ec0a431ca8670604c10d8956":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7a928ad4bd52422b9544d0d312402a5b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7e8086d63d1b4bbf96e4c70c63bc920a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175421024"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d89bf137dd5e4d698dc7f6187f2e148c","IPY_MODEL_3588eeb28e024f1ead6cd21f46d65a6c","IPY_MODEL_b0d6d88a4b824cf9be7c01d5611c3426"],"layout":"IPY_MODEL_39648bdbc39c4c7c827510141663804c"}},"878477e8708a47e8ab49b00be715d1cd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["display_option_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_bd6e62f7db8c40d695d6ca3d93cf7e94"],"layout":"IPY_MODEL_068b8601d7364d29b9598b80aeb782e0"}},"8b38a0bdf80149bba72324232beb14ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8ca00801b4e54cbd980c998791664f60":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_0d4014b56e93430e9782502bf29c7c2e","style":"IPY_MODEL_58b2ec779ab64a2b9e85d57160b453c9","value":false}},"90f41682d85f42a6becdca99a59e9a2c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_496e8548fc0f41b7b0c15c5881596326","style":"IPY_MODEL_df9bb4a390fa4e6597b832d09949d24e","value":false}},"919da88097344a689a9afb6fa5977b29":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"929913d45cc9400087eab703338d00f0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"99bbf4dd65424e87aab67bcf162a8171":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175414976"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_57f9b685011043418d9552b3cb48784e","IPY_MODEL_0f1bb59001314b13a091c7e1b331c58b"],"layout":"IPY_MODEL_fb08b9a59d6c4e678b368c915bb97405"}},"9a49e8c567b94a64a19dc3eecdf5a857":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_25cdb8370f70459886e00eeef0f66388","IPY_MODEL_175e7409479d4f56ba4a4faf4c8bebaa","IPY_MODEL_c944b093296248b4a9a75a9139d5003d"],"layout":"IPY_MODEL_72440081608048b1b5fc76e42011ad0c"}},"a2410f8101384beda0114d93ac655edb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Tesseract-crop-post","Tesseract-crop"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":0,"layout":"IPY_MODEL_bfa2f19eedfa4bd99b59d08365ae4676","style":"IPY_MODEL_a7755ebc78f34335a32f75fa342fcd76"}},"a7755ebc78f34335a32f75fa342fcd76":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"ad2bbb4b23a648209da1edab0006adae":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Action_Comics_1960-01-00_(262)","Adolf_Cap_01_008","Barnaby_v1-028","Barnaby_v1-029","Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013","Cannon-292","Contrato_con_Dios_028","Erase_una_vez_en_Francia_02_88","FOX_CHILLINTALES_T17_012","Furari_-_Jiro_Taniguchi_selma_056","Galactus_12","INOUE_KYOUMEN_002","MCCALL_ROBINHOOD_T31_010","MCCAY_LITTLENEMO_090","Mary_Perkins_On_Stage_v2006_1_-_P00068","PIKE_BOYLOVEGIRLS_T41_012","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2","Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024","Strange_Tales_172005","Strange_Tales_172021","Tarzan_014-21","Tintin_21_Les_Bijoux_de_la_Castafiore_page_39","Transformers_-_Unicron_000-004","Transformers_-_Unicron_000-016","WARE_ACME_024","Yoko_Tsuno_T01_1972-10","Your_Name_Another_Side_Earthbound_T02_084","manga_0033","ronson-031","哀心迷図のバベル 第01巻 - 22002_00_059"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":14,"layout":"IPY_MODEL_18ccc64fc8504bdeb2fb07043688c1c4","style":"IPY_MODEL_6eed00e28fae4570b2348330bb9dcbab"}},"b0d6d88a4b824cf9be7c01d5611c3426":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420544"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e13c49f50ebd4f81a1dafe940ac13a31"],"layout":"IPY_MODEL_1da81d9a8d7446838b76ae017e794e78"}},"bb3adfcd8e4a4af29079953bc9220ac1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":"bold"}},"bd6e62f7db8c40d695d6ca3d93cf7e94":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Boxes","Image","Mask","Image & Mask","Page data","Ground truth","Image All","Results","Best results","Accuracy","Dataframe","Config"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":7,"layout":"IPY_MODEL_0e3ae88bc86f4a31b5aedc1dfd0b1bf2","style":"IPY_MODEL_929913d45cc9400087eab703338d00f0"}},"bfa2f19eedfa4bd99b59d08365ae4676":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"fit-content"}},"c193eb8285a740208457bbe7985d356f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c944b093296248b4a9a75a9139d5003d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dcc0c0ee6a284ecb8c40f06891919805","placeholder":"​","style":"IPY_MODEL_1807cf3a1770434599e993a087293d91","value":" 11/11 [00:04<00:00,  2.10it/s]"}},"d0ed39572cea4c37bb07dc99577258e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d6fed923432746c9b3dff00549de596d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d89bf137dd5e4d698dc7f6187f2e148c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["box_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4f8b904d91eb44028477a6a61c34af54","IPY_MODEL_90f41682d85f42a6becdca99a59e9a2c","IPY_MODEL_3d0ad04ce06b4314b64dcdde269b38e6"],"layout":"IPY_MODEL_eab60b2fe7a84e4b9a913f1450766452"}},"dc75396dcc3c4606a8fe56401d29a4e9":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_ee44cc3dae5c42299460702dd280dbce","msg_id":"","outputs":[{"data":{"text/html":"
Maybe... but l tn certainly wouldn't feel right holding back tip money!
0.96
\n
\n
Maybe ... but I⎕⎕⎕ certainly wouldn't feel right holding back tip money!

Maybe... but l tn certainly wouldn't feel right holding back tip money!
","text/plain":""},"metadata":{},"output_type":"display_data"}]}},"dcc0c0ee6a284ecb8c40f06891919805":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df77be57fe7943298432a759e6ce8416":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"df9bb4a390fa4e6597b832d09949d24e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"e13c49f50ebd4f81a1dafe940ac13a31":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":["wrapper-spinner"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f40fbbd396ac4528a97962ec911d5837","placeholder":"​","style":"IPY_MODEL_03b66d0eb8b5487b8ad21382b93cc68d","value":"\n
\n
\n
\n "}},"e1de76477d764776ac5a209f7ebcc8d6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420016"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0c28619fcebc4c58a9a756888add9140"],"layout":"IPY_MODEL_fa20ebba2bdb493799524bdf780fa86e"}},"e36c1a1c609c425bb76ea83edf910425":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eab60b2fe7a84e4b9a913f1450766452":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ec3c881e1da4414a9c9499497bc5de74":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Initial box","Default","Default, grey pad","Padded 4px","Padded 8px","Extracted, init box","Padded 4, extracted","Padded 8, extracted","Padded 8, dilation 1","Pad 8, fract. 0.5","Pad 8, fract. 0.2"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":1,"layout":"IPY_MODEL_44f79f7ea58742a0a770aab41e876c9c","style":"IPY_MODEL_57f7d34ddc2b4a938d7fcb4bf066deb0"}},"ee44cc3dae5c42299460702dd280dbce":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eee81f49e99f4cbb8db65b56c511adbd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f40fbbd396ac4528a97962ec911d5837":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"none","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f542dba5c6de48a0a44e698706d87017":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"fa20ebba2bdb493799524bdf780fa86e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fb08b9a59d6c4e678b368c915bb97405":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fbb4d3433c6f42e094934b6936659b8f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"4em"}}}}},"nbformat":4,"nbformat_minor":0} From 24412a8a1472714282c97f6dc2aab0c2dec17125 Mon Sep 17 00:00:00 2001 From: Spikey Date: Sat, 1 Jun 2024 16:16:35 +0200 Subject: [PATCH 26/27] fix a bug setting incorrect path --- pcleaner/_testbed/test_idefics.ipynb | 9610 +++++++++++++++++++++++- pcleaner/_testbed/test_paligemma.ipynb | 5151 ++++++++++++- pcleaner/_testbed/test_tesseract.ipynb | 3737 ++++++++- 3 files changed, 18495 insertions(+), 3 deletions(-) diff --git a/pcleaner/_testbed/test_idefics.ipynb b/pcleaner/_testbed/test_idefics.ipynb index 9a6b1ea6..fc16b1d1 100644 --- a/pcleaner/_testbed/test_idefics.ipynb +++ b/pcleaner/_testbed/test_idefics.ipynb @@ -1 +1,9609 @@ -{"cells":[{"cell_type":"markdown","metadata":{"id":"FO-koL4wUMUg"},"source":["# Testing `Idefics` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"WDMB4HFe39wf"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"Mox-OHw539wf"},"source":["To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n","\n","- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n","This allows the notebook to access files stored in your Google Drive.\n","\n","- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n","This acts as the root directory for accessing any files within your Google Drive from the notebook.\n","\n","- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n","This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n"]},{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":740,"status":"ok","timestamp":1717163941428,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"_ItAMB1U39wg"},"outputs":[],"source":["MOUNT_DRIVE = True\n","GDRIVE_MOUNT_POINT = 'drive'\n","PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'"]},{"cell_type":"markdown","metadata":{"id":"OcFWv_IN39wg"},"source":["# install (Colab)\n"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":361,"status":"ok","timestamp":1717163944019,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"FsMCNzHZ39wg"},"outputs":[],"source":["import fastcore.all as FC\n","import os\n","import re\n","import sys\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," (t := Text(msg)).stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, t, \"_\" * 10)\n"]},{"cell_type":"markdown","metadata":{"id":"RkLIZZbo39wg"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717163947918,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"qoUI5zQe39wg"},"outputs":[],"source":["mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," if not mnt_point.exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n"," drive.mount(str(mnt_point), force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"AWZnedVS39wg"},"source":["### Install **PanelCleaner**\n","\n","> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n","\n","Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook."]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":224},"executionInfo":{"elapsed":12781,"status":"ok","timestamp":1717163977869,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"5VSxqsyK39wg","outputId":"049bf207-b965-499b-8932-ac3044a01621"},"outputs":[],"source":["if FC.IN_COLAB:\n"," pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n"," tb_path = pc_path/'pcleaner/_testbed'\n"," if tb_path.exists():\n"," info('Installing PanelCleaner from your Google Drive')\n"," else:\n"," info('Installing PanelCleaner from GitHub')\n"," !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n"," tb_path = Path('PanelCleaner/pcleaner/_testbed')\n"," assert tb_path.exists(), \"PanelCleaner not found\"\n"," os.chdir(tb_path)\n"," sys.path.append(f\"{pc_path}\")\n"," sys.path.append(f\"{tb_path}\")\n"," !pip install -q -r requirements-colab.txt\n"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":244},"executionInfo":{"elapsed":88801,"status":"ok","timestamp":1717162885976,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"lxwhxBydCiDj","outputId":"48d07921-9afe-46df-a468-e3e43b96f303"},"outputs":[{"data":{"text/plain":["'4.42.0.dev0'"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["if FC.IN_COLAB:\n"," !pip install -q flash-attn --no-build-isolation\n"," !pip install -q transformers accelerate datasets peft bitsandbytes\n","\n","import transformers\n","transformers.__version__"]},{"cell_type":"markdown","metadata":{"id":"gLER7Z0u39wh"},"source":["# Prologue"]},{"cell_type":"code","execution_count":6,"metadata":{"executionInfo":{"elapsed":28634,"status":"ok","timestamp":1717162918596,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"q_7GYpnWUMUl"},"outputs":[],"source":["from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n","from testbed.ocr_idefics import IdeficsOCR, get_gpu_vram\n","from testbed.helpers import IN_MAC, IN_LINUX\n"]},{"cell_type":"code","execution_count":7,"metadata":{"executionInfo":{"elapsed":13,"status":"ok","timestamp":1717162918597,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"cQ0R9v8r39wh"},"outputs":[],"source":["if IN_MAC:\n"," !pip install -q mlx_vlm\n","\n"," import mlx.core as mx\n"]},{"cell_type":"markdown","metadata":{"id":"CAHWMrX339wh"},"source":["# GPU"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":396},"executionInfo":{"elapsed":12,"status":"ok","timestamp":1717162918597,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"f8XHRVRG39wh","outputId":"5f2d295c-1a32-40af-b964-20b73d546458"},"outputs":[{"data":{"text/html":["
          metal.is_available(): True\n","           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': 51539607552, \n","'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n","     metal.get_active_memory(): 0\n","       metal.get_peak_memory(): 0\n","      metal.get_cache_memory(): 0\n","\n","
\n"],"text/plain":[" \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n"," \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \u001b[1;36m51539607552\u001b[0m, \n","\u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n"," \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
     total VRAM: 65536 MiB\n","    active VRAM: 0 MiB\n","
\n"],"text/plain":[" total VRAM: \u001b[1;36m65536\u001b[0m MiB\n"," active VRAM: \u001b[1;36m0\u001b[0m MiB\n"]},"metadata":{},"output_type":"display_data"}],"source":["if IN_MAC:\n"," gpu_name = mx.metal.device_info()['architecture']\n"," cprint(\n"," f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n"," f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n"," f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()//1024//1024}\\n\"\n"," )\n","else:\n"," !nvidia-smi\n"," import subprocess\n"," gpu_name = subprocess.check_output(\n"," \"nvidia-smi --query-gpu=gpu_name --format=csv,noheader\", shell=True\n"," ).decode('utf-8').strip()\n"," \n","\n","cprint( f\"{'GPU':>15}: {gpu_name}\\n\"\n"," f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n"," f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n","\n"]},{"cell_type":"markdown","metadata":{"id":"Op4kiDaAUMUl"},"source":["----\n","# Idefics experiments"]},{"cell_type":"markdown","metadata":{"id":"q8q3vnLH39wh"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"6AB4w9C-39wh"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n","\n","NOTE: the default value assumes we are currently inside `PanelCleaner/pcleaner/_testbed` directory. You can check that is the case with `Path('.').resolve()`."]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49},"executionInfo":{"elapsed":652,"status":"ok","timestamp":1717162939430,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"poPksk6739wh","outputId":"1452cde6-a74a-447c-eec2-3049bd2f8f19"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/html":["
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n","
\n"],"text/plain":["\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{"id":"8Iondm2oUMUl"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"nuHPp1U7UMUl"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":10,"metadata":{"executionInfo":{"elapsed":425,"status":"ok","timestamp":1717162947685,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"nVAga31l39wh"},"outputs":[],"source":["if FC.IN_COLAB:\n"," os.environ['USE_TUNNEL'] = 'True'\n"," os.environ['USE_PIL'] = 'False'\n"]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":184},"executionInfo":{"elapsed":8015,"status":"ok","timestamp":1717162983857,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"tG8xtrBb39wh","outputId":"c2e625b8-0f78-44d0-af3a-ca6895a7a340"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import testbed.web_server as web_server\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"p5YiBbJ739wh"},"source":["# CONTEXT"]},{"cell_type":"markdown","metadata":{"id":"1yjjtdjJ39wi"},"source":["| quant, attn \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n","| --- | --- | --- | --- | --- | --- | --- |\n","| **float16** | 17 GB | ✅ | ✅ | ? | ❌ | ✅ |\n","| **float16 + attn** | 17 GB | ❌ | ✅ | ? | ❌ | ✅ |\n","| **8bit** | 10 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **8bit + attn** | 10 GB | ❌ | ✅ | ? | ❌ | ✅ |\n","| **4bit** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **4bit + attn** | 6 GB | ❌ | ✅ | ? | ❌ | ✅ |\n"]},{"cell_type":"markdown","metadata":{"id":"AUgSvi6CUMUm"},"source":["Creates the `IdeficsExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":779},"executionInfo":{"elapsed":43567,"status":"ok","timestamp":1717163091060,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"wroznCt0CQdT","outputId":"0d259b2d-b20a-44cc-a0be-1198ad570b46"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Experiment runs:\n","Idefics-crop-post: 16\n","
\n"],"text/plain":["Experiment runs:\n","Idefics-crop-post: \u001b[1;36m16\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
     Quantization: '4bit'\n","Flash attention 2: N/A\n","             VRAM: 0/65536 MiB\n","\n","
\n"],"text/plain":[" Quantization: \u001b[32m'4bit'\u001b[0m\n","Flash attention \u001b[1;36m2\u001b[0m: N/A\n"," VRAM: \u001b[1;36m0\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["quant = '4bit' if IN_MAC or FC.IN_COLAB else 'float16'\n","flashattn = True if not FC.IN_COLAB else False\n","CONTEXT = OCRExperimentContext('Idefics', EXP_DIR, \n"," quant=quant, flashattn=flashattn, \n"," server=SERVER, run_name='Idefics-crop-post', load=True)\n","CONTEXT.show()\n"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a93af477ed4c4af8aba4d97397009adc","version_major":2,"version_minor":0},"text/plain":["Fetching 11 files: 0%| | 0/11 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n","Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0f89471c58b94c2db7ffab9143b26b6b","version_major":2,"version_minor":0},"text/plain":["Fetching 11 files: 0%| | 0/11 [00:00 Quantization: '4bit'\n","Flash attention 2: N/A\n"," VRAM: 5261/65536 MiB\n","\n","\n"],"text/plain":[" Quantization: \u001b[32m'4bit'\u001b[0m\n","Flash attention \u001b[1;36m2\u001b[0m: N/A\n"," VRAM: \u001b[1;36m5261\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["ocr_model = CONTEXT.setup_ocr_model(False)\n","ocr_model.show_info()"]},{"cell_type":"markdown","metadata":{"id":"NnKaY8er39wi"},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":319,"status":"ok","timestamp":1717163095107,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"ZXL84T8dUMUm","outputId":"12d640aa-69d7-46aa-c2c8-9d400cccb701"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":255,"referenced_widgets":["41e7d3d5da8843bf97d53a5acc2fdd9c","def94ca8125e4128901b038c5e2e135f","a5b8058abf214b46a3dbc121c285cca7","7792b1aed8724a98817331a5610f9909","4c558212f11743be83311ff10e506255","db4a37385bc04a27ba786692ff23f32c","5cc2cd6464854d1996f83b6cccb71be6","86692867f33a4e2885722f8a9efaa144","57b0595135a84b04bf1c040150132fca","f9feda61c229465da90f33c69e7a4398","2b2f189c18e0494294ad7dc5e6b5a7ce","0eb627a4cba9497f8ea6833ac8080cf3","93acb5ead7244f2b8cb7ddb3db658dc1","827cc40c923e4081bfe56231f386d855","022996922e9d40919a086ce15625e4f3","74e995f849ec449fb5fd50166397897a","c2494c8753e9409aa5e6d8862be5c773","0e112f697e7d4a34826d2a46a067c14e","ee91033cfb084d4e83529a11edb48134","f5e02d40775a49a5b895d498233464f9","cd03b8f4a3144660a66bfb8361433973","de48c263fb124df9b7bf72805e1c1baa","3e76e6f2463549ad91832afd9da60672","cc0369009fb04a0b83a10510d7ac0388","44f0c3de011d47ccab46270d16c690b7","b2e63ba8c2f149d38e8c7722bc47ef07","0f9ca91ec321450f9ce53a84f2459f86","113566b1f4b74279a27f0ab51cffb683","4b1fe42380a1464180f548aaccb220ba","44e2a19f9dcb450cbca016e7b3e9c79d","7a75478806bf465cabc8d1429030f615","fb224135f5a14ba4872e0e64428d60b6","3fb8a4f0b2444c28a082fcbb278979cc","23f2673b6b2f40f19f0d4b1e1d5c6435","c54baefce7804a0eb8729ba88a22f713","af23aff11ddd45b9abf16daa53884c5f","029cc314dfba464b8c91194ee41bb604","76c165d73f574ab39d3357e87b401ac9","68d09a9f2fc84ead952160fe05c5dc90","74d438026f47463f8aa440eb39a58369","a80659636b7f4aa4aeaea4c1149620ea","86315ef030234020a49cfb2275e4ee53","a863d10b9ee947338b73f542c350efe4","ffeb080904984371bb8b715963c00069","f8bc3fb84517404b94502e7498d5c55a","3e1245f557d54e999d58c858f664a0a7","74d88ac2166a407ca2fb1aaf57006018","27e1a3f6033f4ee0aea9bf5726eb78cc","2348e08910b149048c90269942321d57","e6fac459206848898c206456b9dc8b1a","bfc1517ef24c459aae0a536c315ddad1","b9b462859a1846e2b2fdc625f6904506","167be8a944f0447c9c66fe8a485d309e","11c9edbf48f445d09d85f849915496e2","9bb8a92858144190b6dd506673234315","d0f9bc72d3054c45ba0c12d7839e994b","00120cae44354b1397eabc117243f609","5bdcbdc274d14afa91406ca177fe66b6","a478421e36a344eca8cf732ea07241dd","8470bcc137ba4295996a4ef0a2e8bf9c","5cc12b33d27a4715a995173135478c9c","fd00985eb51141799c4ad95c99461926","4fd269b8015f4be783474e835ff8a52e","decf6497acee49faae554f3db39be3ce","8aef4716537e417ea4203a68711cb4e9","bd64ee547bd049398c6d37f1ca476461","5da238d5b8514352a70b6b16cc9d944f","e74be1cd463d42a783ddea46072f4499","a6072dc7d6294061b840144104e07538","f1513571ca224451a3809b56a5739525","e7caf68af3c84c6195483d0a27225164","f6c25f96ead14b13a1a716a6de4e3f5c","4a51933821d045cd881418ff2a2b30e7","1d1a5dd1cc1d44be948464159187e856","d418846006b8496989e86276d8538b50","2ad60e460fe64f0087cf3052f06451ba","6c0b5365fd624af793b4d37f7449a7a5","9590087714614e948f3a8e4dd537a895","24551ce1bc1c4cdab44c25ffd783216e","6aa5fe05ab024eb0bd2b3da7881949f2","8758c30979a2414aac0d1583ab856acf","5768ef94421043d4b87c3a8f36b2fc88","950f956a453648ddbe66fa8b7f484115","1d40ec2dce6d43ae9ed7b02ca6ee5ade","c160c32bb63b405c81788dd2c49ed74a","d142fa6d7f1f452185080e55b24fef2c","67a75a653d0948ac92b0e30a8057f97f","08e41a3312ec4dc698c9677f13493dec","b8b0508e598c461195fdff01e45cefea","dc4134ab8bc54c9084238bce0d63dedd","d18de743a82d421b95ef67cce0cf8300","65d59de757ae4440b71dafd4fbe1ef0f","d265b8b4b76641599b70674b3b6843c2","f58a0d2e63aa456db54e8b34185a9e19","5bee9b83cf53437da9f86ca3e8c28ec8","feb6497eb22b418b89260242009018b1","fcebd41dea6a49d786827eb30e6202f0","cf9218e035054fe49188325d9173124a","ca76a74d6b204b4cae1f1d15a928782f","4fe39d4b2ce74d529df2c59c9b02b468","14ff55af3b4b4f19be5ecbdd7fdbb4b3","f937e91a44ba4ba29379c6cd98c7b06e","69c93310236e407c9ec47efba5949c2c","a9292503813741b096f1627ac629afcf","31b1c0b2c88244b6a1144b5b9bfd6ff2","4377ad0c4af348689bf257dbc1e45d0f","15beb9c45e974b8b9c7928f7dee85721","00d90919857a4182b41f71e4e029b8c9","a0bfaef233fc485088fd5f1069f0e7b5","91f9cb7da8554114a0fdccf7125323f8","afa6b2be45404268bbeae05757c38ea7","c707c0173ba64f278d29d8f91c5ebe72","f84e4be232c7471e8d7c32463d1e69f4","0ec87ae9355747b381f3a3c4759e30a1","7ff52331ff05443c885358a2bc91e45a","2e7dde21eddb461d94120cc0bc8b635d","acd43b8610784095ba13c79a9a8b66b3","94107ffa62c74aa9a7c1c6e9e0926534","aa8727521af944b990cd47bd7f624875","c49d62b351924bac9b758ed566642bc9","03ca2146c0a249e98891c826dc0c7166","3e67ecba11ed494eb5d04ac09aaf2982","cda2a733038b4a2d8d9126478f7c0ae2","856b386e9e354da6985976453f3146df","ae912f1a78a541d58364872d548fb113","1d92eaf91d064d7799cdbdce29cb4a80","8a4eec25ed2f4d888751714385bffb48","23683387f91d412ebcc1c6bda48f36c4","72ed4329ca9140078197aee662b9fe9c","cdbd26ce916d4aefa6bc2de33451b894","d81b6b93bf85468da26dd410d1fbf9ae","926cfc439c6f42c7bc7874e2abd27b00","925d430a70474c2bb645ab1c9635a3d8","1696aa6d01824c8da87bd6c3c99d9338","28e884495ed9480dbf42901aee98c8da","5cb8ef2c547741c782e0b87b44e88883","dfc87068738d4a51aa4582d66655ed5d","65c5547ef83346a3a7fe89350fd0498c","cc4dafcc559147f39d31c93d7bacead0","5ebed15f4ad74010a9dcc1aee2884acb","1d243bb88a024f5cad553638cb66b58f","00358206e8e44b7f93e33a68c77ed88e","1d01c3b7a45443219a40b6fc175bea93","e9638ccc83e64995b8d08950b5d4177c","a38abadac68d4e909589ede57054c9b3","9f407f26d4494b81acf8a2b3d22a4db0","047b24fa06104e88a4649b55d0704bba","e2ba9bdc71e342eea3fe5f0b2d50cad7","768d6cdbca5b4fd081c3c0b665af29e6","6d67906818de434193c0d07dffc16ca3","2d8e2453d52148de9e6f0af80025d277","beae34e9850d41f2928135a17bb7e24e","9ab61a0b2be94ab4b07712847603099f","18ff5098c2b0481e8122bafbb6773254","0adde2a598044d99b27e88632aa79204","18cd166a13cd40409789425e6ad8b1cf","262c89785cf747f8b2fabb01df958f46","481aca32d2314b71b8e2fe34fd5c2752","52bdbb92c8da4412944a3130a6739ff6","277c284a9a6f469285a8cd4adafeac48","c279c338d2ac48d1a905625818533a46","9e8c07a31ffe42f99b3c9234a53fa6dd","0a19274b68ec450895fd7f92941a2d90","4598f6ae49f94d639503175f378f011e","41fe52cfcadb4fcfb708f7f5e7c027ff","330cc931269846f2b6dc82c67fb2251b","c5d36dc5c318484db7e0c8a078aa175b","c159c8f334a64512a6d28efecaea2a10","479dc71849a4477183258d7ef9c0d4c2","d601d39013c24a5db643a09fe3fbf3af","4356a69037274d4288ab9cb2aaf72aef","747ff338df744bb2b4ea31ec325461bb","62416330d58b40d4b5d7cef51dff2245","bbdba0ab16184329bd1479a315c5946e","66b382b44b994abd9279d5edb90f86a1","a4016511fd92466e999483f4314d0a86","262eb29055f44a6dad1a9ad9d2d06ea0","57677aa2aee34f8b831622091eccdc42","6fe70dc2c58c4f34be8f25e507a203fa","cfcf8add8ce44707b89b97b291819b2e","da875aa1f4af4128b1c42bc30c79a8a8","d8d9489d104747d0aeaceab091a5dbba","b0bf5147c56b4a9e829fe1d7016a8971","06127d2f58d14e9b834f9c42fb6ccdee","1491cc4eedf14d788cc7a1750c84021e","ebae562970a84eac8ba32466a405aa06","acae4f3d583f4bb3bc7dc118ce63ae52","fc6764a48f134c7c863b83ac060a7c4e","b916a63767b44a7f9af4c4fd2d9f9118","44e7eca8403f42eab30a12cb6c4ec177","cad18185ae304fda8d114efce29f8b2b","022a6ebb208c40a38d991a7f48cf8199","5a12c2fbe4f949ec931de9e0ccb035ee","1cac8099d5a84d24bd968b0df0293e72","61e8b3559eb8464b9d3544921f9f716d","04fb26ceffe54dffb6b0f9864a3ada8a","83e6807d435d458ab54317a00543458a","383311a4464e4759972acb80200e05f3","aa8959e5f31a4e2a917849ff38e2d54a","cfad9960dbe7451db558e5d23edd4f87","7c0dbe672a904b7486a3274e87b42588","6f57b2e373e84323a7a0a2052c8a37db","418e999ec09c434ab6ac89e3b4b11365","4359515725fb4bfc93a8ac6ca824c6bd","7a6ff42ed0954523a58a579968191cca","f4f7b2ccfbd94a30a1f7a413c159ef07","7048ae3da65443a1a6996d937af1fec8","b74df493ed504a6aa9e2dd4c4bc48c56","0606d53650544db098d23fdf417584ad","fc986695bc55477099e8df7f9a0022cd","bf2ea05486a644e8ad8166ba0a9ff0a4","8c77254960fc4ed59a0431bb9f43b9dd","adda7298580c4bffb8d3c5e272e8eb7f","b9972d7b5b614566be7408889f1893bf","e364391a3e1d41d899735458a2b01436","7c30a1a41d3747e0b87625488e29c1b3","25cebb1840ea4239983838fdf2e5c84d","26deaa27cdea4addb812a67b4a0aae7a","bd6f1373744c49aa9ad9947714998e5d","83c2db21b673401fb11b45dde165fa14","96f3c7a52a37470a98de481a7ea343ca","eae063c3c5f54c86aee0b68d31f721ff","2472a63aefde4dc1bb199ab9847ca993","c4c988f7315a4734b4b4efcd378d2fe0","a05a89a8727a486792504d208aa43034","2c62ea060f2448ab928eb2b75897a227","b67a72677c674923b15e568e6fcdeb5c","9875ff389f3d46268417e9b53fbceaf5","d6a71cec5ffe4ba5992636efad52b8fe","62ec88a9e4e549729c5531c2c84ae21d","2c3264065ce141ab9f4c02ca29538962","37675716244d46fcb115ba1d8c500d3f","e37b7be566694f66bd5aa6e82572a2e7","b9fa62d6dd5e4e67921be98817cb00e2","9d210efdd32a4aa8a2b084cb9790ed48","3041f272c57c4038b11806de84232840","613b52321ad543d3b1f67c25642edf54","61a3f504ff5645db9d499f8f2ce0cb37","e1dcbdbabc40469793a494f59318b088","b5fe8a048b9a4db3bcfa77c81dfa8b0c","a4a7b41a2d6d4836b1d5b7c46d214a27","ab1faf852db24d418b647d44305f9998","f9b21842783e489d99ad9d25cd7d0914","5674ed00bf624d4fa0180ca9d855a797","66d2a279e9704c4cb03e51791814387e","a24e83fa73e24808a1b39f3f3d29e7ac","2d058a65cfdf4c628a38bd34eed8a8a8","934d301cd7d94a7597047efd861ceb66","113f7abf92c04d61aeb27a53796dddde","357d2c73ff2346a2a04ec63f11796048","7b065f9d0a454070b32e2f539f94769e","6cd377ac03004bf3b12fed2c9ec72aa9","010d7b561b1541e680b2a93007ec8bc5","73b6e040d91845c8ba2d220b53f93c09","ff88a16ea32e4df4b3abce055f6e2bc2","d71eb0b4940b4708a51b90af9e746db7","66f62676485e444db8fb34d870047708","3eb2ad0ee975499691c2655bdb9d90e5","1a7e467e1e7d4adf9c04217b09a54173","4e5a20cc1ed24d5d89305329da2b48e1","6f67bfe2f79c424fafbe0f85e40c1642","f17c2e4e8b2045e9930d9f2d73bf2ac7","cce621f14c99412ea7b92441fd4f3223","2c3a7c4e59e64433be763bcabd340df3","ae6d816055bd43a4a01cb54b8fb84486","598b5756615241c298f8f44d1e3b3f4f","d0567212f6824f5daec8a14d4c7bb0c7","aafbd97ba12d40e4affc97c378ae165e","798fe48e8b184a4ab35f56fca750dcbe","4c7342561d52489581e643f1d3121eb8","777aaee7cfdf4afd974983fdbd37ac8a","95af7b96533b47a7be57deb9cd5b0fb8","49cf9e00eb8b44cd9168574abe3df2c5","b7156a8bbb104cef9ce2ebbcf458f2cb","eb03b5ac6feb452a86ab17129481d3cb","ed6bd84830a642fd832ea549372056ad","6d69bd7081d44f748463fe8252ce675d","8d52617983b442e7b02d3d2ef885376c"],"resources":{"http://localhost:8080/experiment/cache/Action_Comics_1960-01-00_(262)/_crop/Action_Comics_1960-01-00_(262)_13_Default,%20grey%20pad.png":{"data":"CjwhRE9DVFlQRSBodG1sPgo8aHRtbCBsYW5nPWVuPgogIDxtZXRhIGNoYXJzZXQ9dXRmLTg+CiAgPG1ldGEgbmFtZT12aWV3cG9ydCBjb250ZW50PSJpbml0aWFsLXNjYWxlPTEsIG1pbmltdW0tc2NhbGU9MSwgd2lkdGg9ZGV2aWNlLXdpZHRoIj4KICA8dGl0bGU+RXJyb3IgNDA0IChOb3QgRm91bmQpISExPC90aXRsZT4KICA8c3R5bGU+CiAgICAqe21hcmdpbjowO3BhZGRpbmc6MH1odG1sLGNvZGV7Zm9udDoxNXB4LzIycHggYXJpYWwsc2Fucy1zZXJpZn1odG1se2JhY2tncm91bmQ6I2ZmZjtjb2xvcjojMjIyO3BhZGRpbmc6MTVweH1ib2R5e21hcmdpbjo3JSBhdXRvIDA7bWF4LXdpZHRoOjM5MHB4O21pbi1oZWlnaHQ6MTgwcHg7cGFkZGluZzozMHB4IDAgMTVweH0qID4gYm9keXtiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9lcnJvcnMvcm9ib3QucG5nKSAxMDAlIDVweCBuby1yZXBlYXQ7cGFkZGluZy1yaWdodDoyMDVweH1we21hcmdpbjoxMXB4IDAgMjJweDtvdmVyZmxvdzpoaWRkZW59aW5ze2NvbG9yOiM3Nzc7dGV4dC1kZWNvcmF0aW9uOm5vbmV9YSBpbWd7Ym9yZGVyOjB9QG1lZGlhIHNjcmVlbiBhbmQgKG1heC13aWR0aDo3NzJweCl7Ym9keXtiYWNrZ3JvdW5kOm5vbmU7bWFyZ2luLXRvcDowO21heC13aWR0aDpub25lO3BhZGRpbmctcmlnaHQ6MH19I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LnBuZykgbm8tcmVwZWF0O21hcmdpbi1sZWZ0Oi01cHh9QG1lZGlhIG9ubHkgc2NyZWVuIGFuZCAobWluLXJlc29sdXRpb246MTkyZHBpKXsjbG9nb3tiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSBuby1yZXBlYXQgMCUgMCUvMTAwJSAxMDAlOy1tb3otYm9yZGVyLWltYWdlOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSAwfX1AbWVkaWEgb25seSBzY3JlZW4gYW5kICgtd2Via2l0LW1pbi1kZXZpY2UtcGl4ZWwtcmF0aW86Mil7I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LTJ4LnBuZykgbm8tcmVwZWF0Oy13ZWJraXQtYmFja2dyb3VuZC1zaXplOjEwMCUgMTAwJX19I2xvZ297ZGlzcGxheTppbmxpbmUtYmxvY2s7aGVpZ2h0OjU0cHg7d2lkdGg6MTUwcHh9CiAgPC9zdHlsZT4KICA8YSBocmVmPS8vd3d3Lmdvb2dsZS5jb20vPjxzcGFuIGlkPWxvZ28gYXJpYS1sYWJlbD1Hb29nbGU+PC9zcGFuPjwvYT4KICA8cD48Yj40MDQuPC9iPiA8aW5zPlRoYXTigJlzIGFuIGVycm9yLjwvaW5zPgogIDxwPiAgPGlucz5UaGF04oCZcyBhbGwgd2Uga25vdy48L2lucz4K","headers":[["content-length","1449"],["content-type","text/html; charset=utf-8"]],"ok":false,"status":404,"status_text":""},"http://localhost:8080/experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_13_Default,%20grey%20pad.png":{"data":"CjwhRE9DVFlQRSBodG1sPgo8aHRtbCBsYW5nPWVuPgogIDxtZXRhIGNoYXJzZXQ9dXRmLTg+CiAgPG1ldGEgbmFtZT12aWV3cG9ydCBjb250ZW50PSJpbml0aWFsLXNjYWxlPTEsIG1pbmltdW0tc2NhbGU9MSwgd2lkdGg9ZGV2aWNlLXdpZHRoIj4KICA8dGl0bGU+RXJyb3IgNDA0IChOb3QgRm91bmQpISExPC90aXRsZT4KICA8c3R5bGU+CiAgICAqe21hcmdpbjowO3BhZGRpbmc6MH1odG1sLGNvZGV7Zm9udDoxNXB4LzIycHggYXJpYWwsc2Fucy1zZXJpZn1odG1se2JhY2tncm91bmQ6I2ZmZjtjb2xvcjojMjIyO3BhZGRpbmc6MTVweH1ib2R5e21hcmdpbjo3JSBhdXRvIDA7bWF4LXdpZHRoOjM5MHB4O21pbi1oZWlnaHQ6MTgwcHg7cGFkZGluZzozMHB4IDAgMTVweH0qID4gYm9keXtiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9lcnJvcnMvcm9ib3QucG5nKSAxMDAlIDVweCBuby1yZXBlYXQ7cGFkZGluZy1yaWdodDoyMDVweH1we21hcmdpbjoxMXB4IDAgMjJweDtvdmVyZmxvdzpoaWRkZW59aW5ze2NvbG9yOiM3Nzc7dGV4dC1kZWNvcmF0aW9uOm5vbmV9YSBpbWd7Ym9yZGVyOjB9QG1lZGlhIHNjcmVlbiBhbmQgKG1heC13aWR0aDo3NzJweCl7Ym9keXtiYWNrZ3JvdW5kOm5vbmU7bWFyZ2luLXRvcDowO21heC13aWR0aDpub25lO3BhZGRpbmctcmlnaHQ6MH19I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LnBuZykgbm8tcmVwZWF0O21hcmdpbi1sZWZ0Oi01cHh9QG1lZGlhIG9ubHkgc2NyZWVuIGFuZCAobWluLXJlc29sdXRpb246MTkyZHBpKXsjbG9nb3tiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSBuby1yZXBlYXQgMCUgMCUvMTAwJSAxMDAlOy1tb3otYm9yZGVyLWltYWdlOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSAwfX1AbWVkaWEgb25seSBzY3JlZW4gYW5kICgtd2Via2l0LW1pbi1kZXZpY2UtcGl4ZWwtcmF0aW86Mil7I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LTJ4LnBuZykgbm8tcmVwZWF0Oy13ZWJraXQtYmFja2dyb3VuZC1zaXplOjEwMCUgMTAwJX19I2xvZ297ZGlzcGxheTppbmxpbmUtYmxvY2s7aGVpZ2h0OjU0cHg7d2lkdGg6MTUwcHh9CiAgPC9zdHlsZT4KICA8YSBocmVmPS8vd3d3Lmdvb2dsZS5jb20vPjxzcGFuIGlkPWxvZ28gYXJpYS1sYWJlbD1Hb29nbGU+PC9zcGFuPjwvYT4KICA8cD48Yj40MDQuPC9iPiA8aW5zPlRoYXTigJlzIGFuIGVycm9yLjwvaW5zPgogIDxwPiAgPGlucz5UaGF04oCZcyBhbGwgd2Uga25vdy48L2lucz4K","headers":[["content-length","1449"],["content-type","text/html; charset=utf-8"]],"ok":false,"status":404,"status_text":""}}},"executionInfo":{"elapsed":1557,"status":"ok","timestamp":1717163102674,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"GmAca8fVUMUm","outputId":"407601af-7f59-4bb0-e428-cdde90b5ad8d"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d4d04b8e8b6048f98d6614d47949e7c0","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\""}},"dfc87068738d4a51aa4582d66655ed5d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d243bb88a024f5cad553638cb66b58f","placeholder":"​","style":"IPY_MODEL_00358206e8e44b7f93e33a68c77ed88e","value":"special_tokens_map.json: 100%"}},"e1dcbdbabc40469793a494f59318b088":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e2ba9bdc71e342eea3fe5f0b2d50cad7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_beae34e9850d41f2928135a17bb7e24e","placeholder":"​","style":"IPY_MODEL_9ab61a0b2be94ab4b07712847603099f","value":"config.json: 100%"}},"e364391a3e1d41d899735458a2b01436":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_83c2db21b673401fb11b45dde165fa14","max":4999813704,"min":0,"orientation":"horizontal","style":"IPY_MODEL_96f3c7a52a37470a98de481a7ea343ca","value":4999813704}},"e37b7be566694f66bd5aa6e82572a2e7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e6fac459206848898c206456b9dc8b1a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"e74be1cd463d42a783ddea46072f4499":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e7caf68af3c84c6195483d0a27225164":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d418846006b8496989e86276d8538b50","placeholder":"​","style":"IPY_MODEL_2ad60e460fe64f0087cf3052f06451ba","value":"processor_config.json: 100%"}},"e9638ccc83e64995b8d08950b5d4177c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"eae063c3c5f54c86aee0b68d31f721ff":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eb03b5ac6feb452a86ab17129481d3cb":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ebae562970a84eac8ba32466a405aa06":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ed6bd84830a642fd832ea549372056ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"ee91033cfb084d4e83529a11edb48134":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137212937453776"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_76c165d73f574ab39d3357e87b401ac9"],"layout":"IPY_MODEL_68d09a9f2fc84ead952160fe05c5dc90"}},"f1513571ca224451a3809b56a5739525":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e7caf68af3c84c6195483d0a27225164","IPY_MODEL_f6c25f96ead14b13a1a716a6de4e3f5c","IPY_MODEL_4a51933821d045cd881418ff2a2b30e7"],"layout":"IPY_MODEL_1d1a5dd1cc1d44be948464159187e856"}},"f17c2e4e8b2045e9930d9f2d73bf2ac7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f4f7b2ccfbd94a30a1f7a413c159ef07":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f58a0d2e63aa456db54e8b34185a9e19":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ca76a74d6b204b4cae1f1d15a928782f","max":1636,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4fe39d4b2ce74d529df2c59c9b02b468","value":1636}},"f5e02d40775a49a5b895d498233464f9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"visible","width":null}},"f6c25f96ead14b13a1a716a6de4e3f5c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_6c0b5365fd624af793b4d37f7449a7a5","max":483,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9590087714614e948f3a8e4dd537a895","value":483}},"f84e4be232c7471e8d7c32463d1e69f4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f8bc3fb84517404b94502e7498d5c55a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f937e91a44ba4ba29379c6cd98c7b06e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f9b21842783e489d99ad9d25cd7d0914":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f9feda61c229465da90f33c69e7a4398":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137212937454928"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c2494c8753e9409aa5e6d8862be5c773","IPY_MODEL_0e112f697e7d4a34826d2a46a067c14e","IPY_MODEL_ee91033cfb084d4e83529a11edb48134"],"layout":"IPY_MODEL_f5e02d40775a49a5b895d498233464f9"}},"fb224135f5a14ba4872e0e64428d60b6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"BoundedIntTextModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"BoundedIntTextModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"IntTextView","continuous_update":false,"description":"","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_11c9edbf48f445d09d85f849915496e2","max":14,"min":0,"step":1,"style":"IPY_MODEL_9bb8a92858144190b6dd506673234315","value":0}},"fc6764a48f134c7c863b83ac060a7c4e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fc986695bc55477099e8df7f9a0022cd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"fcebd41dea6a49d786827eb30e6202f0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fd00985eb51141799c4ad95c99461926":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"block","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"feb6497eb22b418b89260242009018b1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ff88a16ea32e4df4b3abce055f6e2bc2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffeb080904984371bb8b715963c00069":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Boxes","Image","Mask","Image & Mask","Page data","Ground truth","Image All","Results","Best results","Accuracy","Dataframe","Config"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":7,"layout":"IPY_MODEL_bd64ee547bd049398c6d37f1ca476461","style":"IPY_MODEL_5da238d5b8514352a70b6b16cc9d944f"}}}}},"nbformat":4,"nbformat_minor":0} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `Idefics` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings for Google Colab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n", + "\n", + "- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n", + "This allows the notebook to access files stored in your Google Drive.\n", + "\n", + "- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n", + "This acts as the root directory for accessing any files within your Google Drive from the notebook.\n", + "\n", + "- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n", + "This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MOUNT_DRIVE = True\n", + "GDRIVE_MOUNT_POINT = 'drive'\n", + "PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "import os\n", + "import re\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "from rich import print as cprint\n", + "from rich.text import Text\n", + "\n", + "def info(msg: str):\n", + " (t := Text(msg)).stylize(\"bold red\", 0, 6)\n", + " cprint(\"_\" * 10, t, \"_\" * 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mount Google Drive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n", + "if FC.IN_COLAB:\n", + " if MOUNT_DRIVE:\n", + " if not mnt_point.exists():\n", + " info(\"Mounting Google Drive\")\n", + " from google.colab import drive\n", + " drive.mount(str(mnt_point), force_remount=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install **PanelCleaner**\n", + "\n", + "> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n", + "\n", + "Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_COLAB:\n", + " pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n", + " tb_path = pc_path/'pcleaner/_testbed'\n", + " if tb_path.exists():\n", + " info('Installing PanelCleaner from your Google Drive')\n", + " else:\n", + " info('Installing PanelCleaner from GitHub')\n", + " !rm -rf PanelCleaner\n", + " !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n", + " pc_path = Path('PanelCleaner').absolute()\n", + " tb_path = pc_path/'pcleaner/_testbed'\n", + " assert tb_path.exists(), \"PanelCleaner not found\"\n", + " os.chdir(tb_path)\n", + " sys.path.insert(0, f\"{tb_path}\")\n", + " sys.path.insert(0, f\"{pc_path}\")\n", + " !pip install -q -r requirements-colab.txt\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'4.42.0.dev0'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "if FC.IN_COLAB:\n", + " !pip install -q flash-attn --no-build-isolation\n", + " !pip install -q transformers accelerate datasets peft bitsandbytes\n", + "\n", + "import transformers\n", + "transformers.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n", + "from testbed.ocr_idefics import IdeficsOCR, get_gpu_vram\n", + "from testbed.helpers import IN_MAC, IN_LINUX\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if IN_MAC:\n", + " !pip install -q mlx_vlm\n", + "\n", + " import mlx.core as mx\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
          metal.is_available(): True\n",
+       "           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': 51539607552, \n",
+       "'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n",
+       "     metal.get_active_memory(): 0\n",
+       "       metal.get_peak_memory(): 0\n",
+       "      metal.get_cache_memory(): 0\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n", + " \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \u001b[1;36m51539607552\u001b[0m, \n", + "\u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + " \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + " \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
     total VRAM: 65536 MiB\n",
+       "    active VRAM: 0 MiB\n",
+       "
\n" + ], + "text/plain": [ + " total VRAM: \u001b[1;36m65536\u001b[0m MiB\n", + " active VRAM: \u001b[1;36m0\u001b[0m MiB\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if IN_MAC:\n", + " gpu_name = mx.metal.device_info()['architecture']\n", + " cprint(\n", + " f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n", + " f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n", + " f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()//1024//1024}\\n\"\n", + " f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()//1024//1024}\\n\"\n", + " f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()//1024//1024}\\n\"\n", + " )\n", + "else:\n", + " !nvidia-smi\n", + " import subprocess\n", + " gpu_name = subprocess.check_output(\n", + " \"nvidia-smi --query-gpu=gpu_name --format=csv,noheader\", shell=True\n", + " ).decode('utf-8').strip()\n", + " \n", + "\n", + "cprint( f\"{'GPU':>15}: {gpu_name}\\n\"\n", + " f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n", + " f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Idefics experiments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n", + "\n", + "NOTE: the default value assumes we are currently inside `PanelCleaner/pcleaner/_testbed` directory. You can check that is the case with `Path('.').resolve()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n",
+       "Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n",
+       "
\n" + ], + "text/plain": [ + " Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n", + "Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "EXP_DIR = Path('./experiment')\n", + "cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy your images to the source directory:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint((EXP_DIR/'source').resolve())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "or download the standard set:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !unzip -qn experiment.zip -d ." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup ngrok (Colab)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n", + "\n", + "Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n", + "- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n", + "- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n", + "\n", + "You choose.\n", + "\n", + "If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n", + "\n", + "I you don't change the default settings and\n", + "- the notebook is running locally, it'll serve the images directly without any additional setup.\n", + "- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_COLAB:\n", + " os.environ['USE_TUNNEL'] = 'True'\n", + " os.environ['USE_PIL'] = 'False'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SERVER = None\n", + "if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n", + " import testbed.web_server as web_server\n", + " SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| quant, attn \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n", + "| --- | --- | --- | --- | --- | --- | --- |\n", + "| **float16** | 17 GB | ✅ | ✅ | ? | ❌ | ✅ |\n", + "| **float16 + attn** | 17 GB | ❌ | ✅ | ? | ❌ | ✅ |\n", + "| **8bit** | 10 GB | ✅ | ✅ | ? | ✅ | ✅ |\n", + "| **8bit + attn** | 10 GB | ❌ | ✅ | ? | ❌ | ✅ |\n", + "| **4bit** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n", + "| **4bit + attn** | 6 GB | ❌ | ✅ | ? | ❌ | ✅ |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creates the `IdeficsExperimentContext` object we'll use to manage the experiments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: System default\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
 config cache_dir: None\n",
+       "       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "           device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " config cache_dir: \u001b[3;35mNone\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
        force_PIL: False\n",
+       "       use_tunnel: False\n",
+       "       server_url: \n",
+       "   experiment dir: experiment\n",
+       "       source_dir: experiment/source\n",
+       "        cache_dir: experiment/cache\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " force_PIL: \u001b[3;91mFalse\u001b[0m\n", + " use_tunnel: \u001b[3;91mFalse\u001b[0m\n", + " server_url: \n", + " experiment dir: experiment\n", + " source_dir: experiment/source\n", + " cache_dir: experiment/cache\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Experiment runs:\n",
+       "Idefics-crop-post: 16\n",
+       "
\n" + ], + "text/plain": [ + "Experiment runs:\n", + "Idefics-crop-post: \u001b[1;36m16\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
     Quantization: '4bit'\n",
+       "Flash attention 2: N/A\n",
+       "             VRAM: 0/65536 MiB\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " Quantization: \u001b[32m'4bit'\u001b[0m\n", + "Flash attention \u001b[1;36m2\u001b[0m: N/A\n", + " VRAM: \u001b[1;36m0\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "quant = '4bit' if IN_MAC or FC.IN_COLAB else 'float16'\n", + "flashattn = True if not FC.IN_COLAB else False\n", + "CONTEXT = OCRExperimentContext('Idefics', EXP_DIR, \n", + " quant=quant, flashattn=flashattn, \n", + " server=SERVER, run_name='Idefics-crop-post', load=True)\n", + "CONTEXT.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a93af477ed4c4af8aba4d97397009adc", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 11 files: 0%| | 0/11 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0f89471c58b94c2db7ffab9143b26b6b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 11 files: 0%| | 0/11 [00:00 Quantization: '4bit'\n", + "Flash attention 2: N/A\n", + " VRAM: 5261/65536 MiB\n", + "\n", + "\n" + ], + "text/plain": [ + " Quantization: \u001b[32m'4bit'\u001b[0m\n", + "Flash attention \u001b[1;36m2\u001b[0m: N/A\n", + " VRAM: \u001b[1;36m5261\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ocr_model = CONTEXT.setup_ocr_model(False)\n", + "ocr_model.show_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check the images are in place" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d4d04b8e8b6048f98d6614d47949e7c0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value=\"" + } + }, + "dfc87068738d4a51aa4582d66655ed5d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1d243bb88a024f5cad553638cb66b58f", + "placeholder": "​", + "style": "IPY_MODEL_00358206e8e44b7f93e33a68c77ed88e", + "value": "special_tokens_map.json: 100%" + } + }, + "e1dcbdbabc40469793a494f59318b088": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2ba9bdc71e342eea3fe5f0b2d50cad7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_beae34e9850d41f2928135a17bb7e24e", + "placeholder": "​", + "style": "IPY_MODEL_9ab61a0b2be94ab4b07712847603099f", + "value": "config.json: 100%" + } + }, + "e364391a3e1d41d899735458a2b01436": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_83c2db21b673401fb11b45dde165fa14", + "max": 4999813704, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_96f3c7a52a37470a98de481a7ea343ca", + "value": 4999813704 + } + }, + "e37b7be566694f66bd5aa6e82572a2e7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e6fac459206848898c206456b9dc8b1a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": "0px 0px 0px 10px", + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "e74be1cd463d42a783ddea46072f4499": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7caf68af3c84c6195483d0a27225164": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d418846006b8496989e86276d8538b50", + "placeholder": "​", + "style": "IPY_MODEL_2ad60e460fe64f0087cf3052f06451ba", + "value": "processor_config.json: 100%" + } + }, + "e9638ccc83e64995b8d08950b5d4177c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "eae063c3c5f54c86aee0b68d31f721ff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eb03b5ac6feb452a86ab17129481d3cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ebae562970a84eac8ba32466a405aa06": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ed6bd84830a642fd832ea549372056ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ee91033cfb084d4e83529a11edb48134": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137212937453776" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_76c165d73f574ab39d3357e87b401ac9" + ], + "layout": "IPY_MODEL_68d09a9f2fc84ead952160fe05c5dc90" + } + }, + "f1513571ca224451a3809b56a5739525": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e7caf68af3c84c6195483d0a27225164", + "IPY_MODEL_f6c25f96ead14b13a1a716a6de4e3f5c", + "IPY_MODEL_4a51933821d045cd881418ff2a2b30e7" + ], + "layout": "IPY_MODEL_1d1a5dd1cc1d44be948464159187e856" + } + }, + "f17c2e4e8b2045e9930d9f2d73bf2ac7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f4f7b2ccfbd94a30a1f7a413c159ef07": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f58a0d2e63aa456db54e8b34185a9e19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ca76a74d6b204b4cae1f1d15a928782f", + "max": 1636, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4fe39d4b2ce74d529df2c59c9b02b468", + "value": 1636 + } + }, + "f5e02d40775a49a5b895d498233464f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "visible", + "width": null + } + }, + "f6c25f96ead14b13a1a716a6de4e3f5c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6c0b5365fd624af793b4d37f7449a7a5", + "max": 483, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9590087714614e948f3a8e4dd537a895", + "value": 483 + } + }, + "f84e4be232c7471e8d7c32463d1e69f4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f8bc3fb84517404b94502e7498d5c55a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f937e91a44ba4ba29379c6cd98c7b06e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f9b21842783e489d99ad9d25cd7d0914": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f9feda61c229465da90f33c69e7a4398": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137212937454928" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c2494c8753e9409aa5e6d8862be5c773", + "IPY_MODEL_0e112f697e7d4a34826d2a46a067c14e", + "IPY_MODEL_ee91033cfb084d4e83529a11edb48134" + ], + "layout": "IPY_MODEL_f5e02d40775a49a5b895d498233464f9" + } + }, + "fb224135f5a14ba4872e0e64428d60b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoundedIntTextModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "BoundedIntTextModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "IntTextView", + "continuous_update": false, + "description": "", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_11c9edbf48f445d09d85f849915496e2", + "max": 14, + "min": 0, + "step": 1, + "style": "IPY_MODEL_9bb8a92858144190b6dd506673234315", + "value": 0 + } + }, + "fc6764a48f134c7c863b83ac060a7c4e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fc986695bc55477099e8df7f9a0022cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fcebd41dea6a49d786827eb30e6202f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd00985eb51141799c4ad95c99461926": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": "block", + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "feb6497eb22b418b89260242009018b1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ff88a16ea32e4df4b3abce055f6e2bc2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ffeb080904984371bb8b715963c00069": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Boxes", + "Image", + "Mask", + "Image & Mask", + "Page data", + "Ground truth", + "Image All", + "Results", + "Best results", + "Accuracy", + "Dataframe", + "Config" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 7, + "layout": "IPY_MODEL_bd64ee547bd049398c6d37f1ca476461", + "style": "IPY_MODEL_5da238d5b8514352a70b6b16cc9d944f" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/pcleaner/_testbed/test_paligemma.ipynb b/pcleaner/_testbed/test_paligemma.ipynb index 0275156a..95268903 100644 --- a/pcleaner/_testbed/test_paligemma.ipynb +++ b/pcleaner/_testbed/test_paligemma.ipynb @@ -1 +1,5150 @@ -{"cells":[{"cell_type":"markdown","metadata":{"id":"FO-koL4wUMUg"},"source":["# Testing `PaliGemma` OCR for Comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"HKOXOxox_jG0"},"source":["## Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"GakYQssB_jG0"},"source":["To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n","\n","- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n","This allows the notebook to access files stored in your Google Drive.\n","\n","- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n","This acts as the root directory for accessing any files within your Google Drive from the notebook.\n","\n","- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n","This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717165201592,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"uNgZx4vk_jG0"},"outputs":[],"source":["MOUNT_DRIVE = True\n","GDRIVE_MOUNT_POINT = 'drive'\n","PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'"]},{"cell_type":"markdown","metadata":{"id":"1iZhT2Zd_jG1"},"source":["# install (Colab)\n"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1717165204341,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"RNFlWKSb_jG1"},"outputs":[],"source":["import fastcore.all as FC\n","import os\n","import re\n","import sys\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," (t := Text(msg)).stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, t, \"_\" * 10)\n"]},{"cell_type":"markdown","metadata":{"id":"Xrv0xwSP_jG1"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":4,"metadata":{"executionInfo":{"elapsed":4,"status":"ok","timestamp":1717165208042,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"7h-QzI37_jG1"},"outputs":[],"source":["mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," if not mnt_point.exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n"," drive.mount(str(mnt_point), force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"41JG2K1G_jG1"},"source":["### Install **PanelCleaner**\n","\n","> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n","\n","Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook."]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":4564,"status":"ok","timestamp":1717165217825,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"SdiVA--u_jG1","outputId":"4c6a00e8-c8d9-4988-c5f2-7010c1233681"},"outputs":[],"source":["if FC.IN_COLAB:\n"," pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n"," tb_path = pc_path/'pcleaner/_testbed'\n"," if tb_path.exists():\n"," info('Installing PanelCleaner from your Google Drive')\n"," else:\n"," info('Installing PanelCleaner from GitHub')\n"," !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n"," tb_path = Path('PanelCleaner/pcleaner/_testbed')\n"," assert tb_path.exists(), \"PanelCleaner not found\"\n"," os.chdir(tb_path)\n"," sys.path.append(f\"{pc_path}\")\n"," sys.path.append(f\"{tb_path}\")\n"," !pip install -q -r requirements-colab.txt\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/plain":["'4.42.0.dev0'"]},"metadata":{},"output_type":"display_data"}],"source":["if FC.IN_COLAB:\n"," !pip install -q accelerate\n","\n","import transformers\n","transformers.__version__"]},{"cell_type":"markdown","metadata":{"id":"8_A18iMfN2-g"},"source":["# Prologue"]},{"cell_type":"code","execution_count":7,"metadata":{"executionInfo":{"elapsed":4223,"status":"ok","timestamp":1717165238956,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"q_7GYpnWUMUl"},"outputs":[],"source":["from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n","from testbed.ocr_paligemma import PaliGemmaOCR, get_gpu_vram\n","from testbed.helpers import IN_MAC, IN_LINUX\n"]},{"cell_type":"code","execution_count":8,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717165238956,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"CQtkWb9ON2-g"},"outputs":[],"source":["if IN_MAC:\n"," # !pip install -q mlx_vlm\n","\n"," import mlx.core as mx\n"]},{"cell_type":"markdown","metadata":{"id":"DK4iMZ7nN2-g"},"source":["# GPU"]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":396},"executionInfo":{"elapsed":660,"status":"ok","timestamp":1717165239614,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"_qoPb5u3N2-g","outputId":"6dec4a6e-5679-47c5-cc53-17473a6e214b"},"outputs":[{"data":{"text/html":["
          metal.is_available(): True\n","           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': 51539607552, \n","'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n","     metal.get_active_memory(): 0\n","       metal.get_peak_memory(): 0\n","      metal.get_cache_memory(): 0\n","\n","
\n"],"text/plain":[" \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n"," \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \u001b[1;36m51539607552\u001b[0m, \n","\u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n"," \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n"," \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
            GPU: applegpu_g13s\n","     total VRAM: 65536 MiB\n","    active VRAM: 0 MiB\n","
\n"],"text/plain":[" GPU: applegpu_g13s\n"," total VRAM: \u001b[1;36m65536\u001b[0m MiB\n"," active VRAM: \u001b[1;36m0\u001b[0m MiB\n"]},"metadata":{},"output_type":"display_data"}],"source":["if IN_MAC:\n"," gpu_name = mx.metal.device_info()['architecture']\n"," cprint(\n"," f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n"," f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n"," f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()//1024//1024}\\n\"\n"," f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()//1024//1024}\\n\"\n"," )\n","else:\n"," !nvidia-smi\n"," import subprocess\n"," gpu_name = subprocess.check_output(\n"," \"nvidia-smi --query-gpu=gpu_name --format=csv,noheader\", shell=True\n"," ).decode('utf-8').strip()\n"," \n","\n","cprint( f\"{'GPU':>15}: {gpu_name}\\n\"\n"," f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n"," f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n","\n"]},{"cell_type":"markdown","metadata":{"id":"Op4kiDaAUMUl"},"source":["----\n","# PaliGemma experiments"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1932,"status":"ok","timestamp":1717165245172,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"D0LeqL6TZ-GY","outputId":"4b587e05-4de3-469b-cbd4-49b58fc0939d"},"outputs":[{"name":"stdout","output_type":"stream","text":["User is already logged in.\n"]}],"source":["from huggingface_hub import notebook_login\n","notebook_login(False, False)"]},{"cell_type":"markdown","metadata":{"id":"Y3IENuzMN2-h"},"source":["## Experiment directory"]},{"cell_type":"markdown","metadata":{"id":"urj8RO7aN2-h"},"source":["Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n","\n","NOTE: the default value assumes we are currently inside `PanelCleaner/pcleaner/_testbed` directory. You can check that is the case with `Path('.').resolve()`."]},{"cell_type":"code","execution_count":11,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49},"executionInfo":{"elapsed":369,"status":"ok","timestamp":1717165248315,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"cbFj63q4N2-h","outputId":"b10db83f-aa18-496d-e16e-834050b8aea7"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["EXP_DIR = Path('./experiment')\n","cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{},"source":["# Test images\n"]},{"cell_type":"markdown","metadata":{},"source":["Copy your images to the source directory:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/html":["
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n","
\n"],"text/plain":["\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{"id":"8Iondm2oUMUl"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"nuHPp1U7UMUl"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":12,"metadata":{"executionInfo":{"elapsed":559,"status":"ok","timestamp":1717165257647,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"vaf4PGLlUMUm"},"outputs":[],"source":["if FC.IN_COLAB:\n"," os.environ['USE_TUNNEL'] = 'True'\n"," os.environ['USE_PIL'] = 'False'\n"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":184},"executionInfo":{"elapsed":47202,"status":"ok","timestamp":1717165306667,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"kivkfTrcUMUm","outputId":"6547dd4b-3747-4c99-e393-a422d1a85798"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import testbed.web_server as web_server\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"B0UlRbJ4N2-h"},"source":["# CONTEXT"]},{"cell_type":"markdown","metadata":{"id":"gBET0jZMN2-h"},"source":["| quant \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n","| --- | --- | --- | --- | --- | --- | --- |\n","| **bfloat16** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **8bit** | 3 GB | ✅ | ✅ | ? | ✅ | ✅ |\n","| **4bit** | 2 GB | ✅ | ✅ | ? | ✅ | ✅ |\n"]},{"cell_type":"markdown","metadata":{"id":"AUgSvi6CUMUm"},"source":["Creates the `ExperimentContext` object we'll use to manage the experiments.\n"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":504},"executionInfo":{"elapsed":434,"status":"ok","timestamp":1717165321066,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"wroznCt0CQdT","outputId":"ec50236e-dddc-4d60-b3cd-b6fdcb0a584f"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Experiment runs:\n","PaliGemma-crop-post: 0\n","
\n"],"text/plain":["Experiment runs:\n","PaliGemma-crop-post: \u001b[1;36m0\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["quant = '8bit' if IN_MAC else 'bfloat16'\n","CONTEXT = OCRExperimentContext('PaliGemma', EXP_DIR, quant=quant, \n"," server=SERVER, run_name='PaliGemma-crop-post', load=True)\n","CONTEXT.show()\n"]},{"cell_type":"code","execution_count":19,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":358,"referenced_widgets":["d086b7598b4848aaa4eb0ffede5da0f7","4856c138483c4bc59c165c6f8c81777a","1bffa03a8dfa4d4ba6e0b2cf76f5f0fd","88ee0bfb700a4f369333729d755fbc91","f2b7ff5971bd43b986d810b9261abd1c","c5431bc93486497faa74330f37d582aa","aec0a793b589469d957a83b767724cc4","8bb2d35ebdbc48dda1f1f365389ac92e","2d8a14b167c943a888fa9a313f6166eb","2930ae9cab9e421f9c6191bdb86d37a4","cde5c9e129e443fb9c1c642156bab294","5d09637e4ba34f14a879dbe44c675285","4b9e45c5e109429da8a7ed75633cdcfb","11d2466d2ced4c69afc62bda1f3baf27","725a2c78a0d94db19517c9f03fdbd76f","635032f84bbc4b0a8b1e4bcfb9d5318d","0e6515360d65481c8b398c02b0b6f437","9ac3df27ca5f442398b86df5d5849fab","6683113218e44183a6a5a1cd1392b452","5c0be55008d045108e2a91a819adfd58","ad5ddbcf9642427ca162e9cfdefe8ded","2682f4f1f76b498091a8bb16c97205e6","513155104caa4340900c728a570728e9","b64f35e753334071ba92cc9c23472f5f","5d772e124d1c4286bc83b0fdfd50b737","3a40dc732037425cb1491bb68f7d7178","4306183c004747b897acb4be6577f46d","78f02d7219cb4e5bbde518ac5e96a341","06d9f190506c40fb9689977c2a2b2757","daabe05918774871ba2f9cba38ca14ff","e72b3672869743e7b8fbbe178a93698a","86e80efa2af4416b9f465e7cb3873c3b","64d05881c560431b86dec6c27c01bfbb","a49fa2c6c92c4191a79f659b114eae0e","2db177bd125f4e0789363306784c51fa","b75c3b4c04a9416ca5e93bdf3b049c12","5d1fd260fdee46d69e8f81f1e146b787","5541585cb2714d82a3fdb9f70a161533","7fa1b8a86fe846c29568b89135748bc2","7694626eeb3b45598fa74862fb5b95a0","4cc8f4e7c0dd442199d94880ca6b718f","7efe7d6323ef4a00a4c4239643ef1d7d","b51706f5a3c14f20a642a64912e068ff","f1cf213ea11e431c8aded42b1af2536b","a26dbf2c5fc647e89984f08d89e11cb4","fd31aabd8e4c4aa8add50c95e55e9d6f","22121cd9a6954bc2908e5a44c6bdd6a9","54abbc4b157c41cba0f0c663bba5eb25","0d416a34a58745ec9558443d076d3d55","64b5967a0be04a05aeddda4a1b977531","aeb9f955725443f5b2c2f0c54ffb1fc8","1c60de0ca3b8455a900affc4690fca1f","06de6316d026491bbd9e7c18efa3a351","f4fd6b82478a444d91242587c0a5045f","8f5f4df805114835b66872e2520edf70","bbf4c62d0b674b2594e455696e6e427a","377aea61c8d441afad1456b08681c7f2","b037f85089e24e5ba970bef2f46e568b","f3863102525f488fb74877a54b447c63","79d600bfe6204618af8ba7a7cb1577a8","0251403980314014b84ae3dc63cc5455","84ab0a402fe54b0f9f8791321c8d9276","4bf8171d649f4a819275590070b734bd","7a2075bd25ac42c984f2e12c2f18b578","0b9d514d2d9249f09095065a9e302c49","4acbacc0ed4a4c1581583f428b69aff7"]},"executionInfo":{"elapsed":31187,"status":"ok","timestamp":1717165357281,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"cSuBWT4YN2-i","outputId":"00cdca4b-4568-4a97-c646-42ab70541d1d"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6701b0fefb114810a52c202ffb6a4b4d","version_major":2,"version_minor":0},"text/plain":["Fetching 9 files: 0%| | 0/9 [00:00 Size: '224'\n"," Quantization: '8bit'\n"," Device: 'mps'\n"," VRAM: 2986/65536 MiB\n","\n","\n"],"text/plain":[" Size: \u001b[32m'224'\u001b[0m\n"," Quantization: \u001b[32m'8bit'\u001b[0m\n"," Device: \u001b[32m'mps'\u001b[0m\n"," VRAM: \u001b[1;36m2986\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n","\n"]},"metadata":{},"output_type":"display_data"}],"source":["ocr_model = CONTEXT.setup_ocr_model(False)\n","ocr_model.show_info()"]},{"cell_type":"markdown","metadata":{"id":"rgSiXSTyN2-l"},"source":["Check the images are in place"]},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":431,"status":"ok","timestamp":1717165362762,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"ZXL84T8dUMUm","outputId":"92365c79-9c3d-4645-9638-b9f56b3131b7"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"code","execution_count":22,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":200,"referenced_widgets":["59886ba6cd1c4fe8951884a5c340808b","bd58f503314643659cf9e8a1cef488c9","dda9ef398b524535a68fd6ea11fad818","eca634da59354f1caa467fba3c58c5fc","4b450035de2f401b9e3554a023a3ec65","53d92d965e984ed7afc3496af7d61d70","3d496f12c55645aead4dfe7d0d07dcf0","91b0f76df87a4c56aa5a02943cb28839","c24dd55347c44ee586bab8ee277db7da","3cb6adfc2d764b97b6673765860d47bb","f77c96169250417aad96b5c1d58222f8","f68accd919884dd09190b80513b9646c","e30127ffa4834f609af7939fffe106e8","e6f1a30a76264f7e84e93559d196cb5f","2daa7d0e57fd4e4682ce1adc15b9f84d","4cb1d38e7aec4391b8cd7527ca9febee","d3913b280fba46c4b9053b0e1b7f76fe","c610dd153a364592b786fcdf898c3da3","14fe1f23cc4b4887a5a7d8b987af49eb","7f56d139cd774438be2902f1310d55b6","b24e5e348f604c2fb1359e12f9c1dde1","3b259cdb3ddd4432bd07c36f6145a4de","941510f7665d47f3bda727843f6684ae","85198e7d3abb4492b0cca02083f68a24","143e6c3613d8483d91833a2e13f86fb6","212c95d82afc43a9b849a07cd0193ad6","b3691f5cb09041b5aa88e1caf3a29b20","9ad1e5cabb0d4636a08d306513e63c16","d03fbf56a2934588892fd1bb43739564","491fa386a15f4719bf44fa8e302fd011","fc0380f14a854a6cb040442f98d41000","a3aff0a8b5f4459e872c8a2c05f0226d","d5541144ffea4c418ef5c6c3db1e519e","385f8f44444540af87117059cfec0f5a","816479b548ed4dd99880c862929898fa","ec3cca3523514dc1b21d8d79773cb988","577f895e829e4f9098f600795aa18d08","58e62210fd1c482cb848c00704ee496c","9ca29ee5af0b4f888d2f6c5c011cad7f","a340e132d43a4178b902d6e18cdaa42c","14457502346c40069dda383db7503d76","0f90ecefed3b4b30bff078f97f4a67d7","15a4d0683ce04296ae0426bff451cc06","460853ad40bc474e900544e27d4d8440","041565754c1d4b8188ac80ac296974a3","2567a327e02648e29b2410d406d45217","e892ff1ff8244fb5b4dd1617e7b67bcb","3215c940fe7e485c97a3af8c5dbf8e08","9932bf3222934434baca1a23e649f118","a2ed35e40a0342f1b5209cf2368ec9b8","d35bec09dbe849f9b64a211fd45b80c0","0915b9e3ce76421a8761cc5eaf25e2a7","b6c5b7f105e74dd09000d038b57066e8","d033341173d24e19a5f413ca0a53ce6c","358dfaf8072f4c77aabc4e267e48d833","ea37bd0802504828a90c1d3625e2d39e","b54c98e9b45d4a6391fce48e463488aa","9a1befb039dd4021af786a7cefc7e7fe","d3dc27978259400187c814165ce20091","1a20a2ace0234eca9b4b8f2fc68358b0","215b89c0f4304fb4b2a4642b763f3b8e","d58f6d5a301e4389b4e73b7ae2ef595f","3655dd45e31845cb9cf4bcc1a7572db0","3cd7144832b1489aa0dcdc2eb74da6b4","0f41b180ba3f4e139d00d1d4e41fa808","db7189bed4d04523bafa9267fd5ed194","8ee17913c9124a8f8136f87de059f86f","65f9a631473a4ce989995cfcc84c1388","dd4f665280c1428ea9cb3073d95bc02e"],"resources":{"http://localhost:8080/experiment/cache/Strange_Tales_172005/_crop/Strange_Tales_172005_6_Default,%20grey%20pad.png":{"data":"CjwhRE9DVFlQRSBodG1sPgo8aHRtbCBsYW5nPWVuPgogIDxtZXRhIGNoYXJzZXQ9dXRmLTg+CiAgPG1ldGEgbmFtZT12aWV3cG9ydCBjb250ZW50PSJpbml0aWFsLXNjYWxlPTEsIG1pbmltdW0tc2NhbGU9MSwgd2lkdGg9ZGV2aWNlLXdpZHRoIj4KICA8dGl0bGU+RXJyb3IgNDA0IChOb3QgRm91bmQpISExPC90aXRsZT4KICA8c3R5bGU+CiAgICAqe21hcmdpbjowO3BhZGRpbmc6MH1odG1sLGNvZGV7Zm9udDoxNXB4LzIycHggYXJpYWwsc2Fucy1zZXJpZn1odG1se2JhY2tncm91bmQ6I2ZmZjtjb2xvcjojMjIyO3BhZGRpbmc6MTVweH1ib2R5e21hcmdpbjo3JSBhdXRvIDA7bWF4LXdpZHRoOjM5MHB4O21pbi1oZWlnaHQ6MTgwcHg7cGFkZGluZzozMHB4IDAgMTVweH0qID4gYm9keXtiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9lcnJvcnMvcm9ib3QucG5nKSAxMDAlIDVweCBuby1yZXBlYXQ7cGFkZGluZy1yaWdodDoyMDVweH1we21hcmdpbjoxMXB4IDAgMjJweDtvdmVyZmxvdzpoaWRkZW59aW5ze2NvbG9yOiM3Nzc7dGV4dC1kZWNvcmF0aW9uOm5vbmV9YSBpbWd7Ym9yZGVyOjB9QG1lZGlhIHNjcmVlbiBhbmQgKG1heC13aWR0aDo3NzJweCl7Ym9keXtiYWNrZ3JvdW5kOm5vbmU7bWFyZ2luLXRvcDowO21heC13aWR0aDpub25lO3BhZGRpbmctcmlnaHQ6MH19I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LnBuZykgbm8tcmVwZWF0O21hcmdpbi1sZWZ0Oi01cHh9QG1lZGlhIG9ubHkgc2NyZWVuIGFuZCAobWluLXJlc29sdXRpb246MTkyZHBpKXsjbG9nb3tiYWNrZ3JvdW5kOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSBuby1yZXBlYXQgMCUgMCUvMTAwJSAxMDAlOy1tb3otYm9yZGVyLWltYWdlOnVybCgvL3d3dy5nb29nbGUuY29tL2ltYWdlcy9sb2dvcy9lcnJvcnBhZ2UvZXJyb3JfbG9nby0xNTB4NTQtMngucG5nKSAwfX1AbWVkaWEgb25seSBzY3JlZW4gYW5kICgtd2Via2l0LW1pbi1kZXZpY2UtcGl4ZWwtcmF0aW86Mil7I2xvZ297YmFja2dyb3VuZDp1cmwoLy93d3cuZ29vZ2xlLmNvbS9pbWFnZXMvbG9nb3MvZXJyb3JwYWdlL2Vycm9yX2xvZ28tMTUweDU0LTJ4LnBuZykgbm8tcmVwZWF0Oy13ZWJraXQtYmFja2dyb3VuZC1zaXplOjEwMCUgMTAwJX19I2xvZ297ZGlzcGxheTppbmxpbmUtYmxvY2s7aGVpZ2h0OjU0cHg7d2lkdGg6MTUwcHh9CiAgPC9zdHlsZT4KICA8YSBocmVmPS8vd3d3Lmdvb2dsZS5jb20vPjxzcGFuIGlkPWxvZ28gYXJpYS1sYWJlbD1Hb29nbGU+PC9zcGFuPjwvYT4KICA8cD48Yj40MDQuPC9iPiA8aW5zPlRoYXTigJlzIGFuIGVycm9yLjwvaW5zPgogIDxwPiAgPGlucz5UaGF04oCZcyBhbGwgd2Uga25vdy48L2lucz4K","headers":[["content-length","1449"],["content-type","text/html; charset=utf-8"]],"ok":false,"status":404,"status_text":""}}},"executionInfo":{"elapsed":639,"status":"ok","timestamp":1717165542401,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"GmAca8fVUMUm","outputId":"4042b144-9cfe-43f0-b391-ac8d44b612f8"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"ed7e79b69996433b87d46e466cb48fb6","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\""}},"c24dd55347c44ee586bab8ee277db7da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f68accd919884dd09190b80513b9646c","IPY_MODEL_e30127ffa4834f609af7939fffe106e8","IPY_MODEL_e6f1a30a76264f7e84e93559d196cb5f","IPY_MODEL_2daa7d0e57fd4e4682ce1adc15b9f84d"],"layout":"IPY_MODEL_4cb1d38e7aec4391b8cd7527ca9febee"}},"c5431bc93486497faa74330f37d582aa":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c610dd153a364592b786fcdf898c3da3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["method_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_385f8f44444540af87117059cfec0f5a","IPY_MODEL_816479b548ed4dd99880c862929898fa","IPY_MODEL_ec3cca3523514dc1b21d8d79773cb988"],"layout":"IPY_MODEL_577f895e829e4f9098f600795aa18d08"}},"cde5c9e129e443fb9c1c642156bab294":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d033341173d24e19a5f413ca0a53ce6c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50px"}},"d03fbf56a2934588892fd1bb43739564":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d086b7598b4848aaa4eb0ffede5da0f7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4856c138483c4bc59c165c6f8c81777a","IPY_MODEL_1bffa03a8dfa4d4ba6e0b2cf76f5f0fd","IPY_MODEL_88ee0bfb700a4f369333729d755fbc91"],"layout":"IPY_MODEL_f2b7ff5971bd43b986d810b9261abd1c"}},"d35bec09dbe849f9b64a211fd45b80c0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d3913b280fba46c4b9053b0e1b7f76fe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["box_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_491fa386a15f4719bf44fa8e302fd011","IPY_MODEL_fc0380f14a854a6cb040442f98d41000","IPY_MODEL_a3aff0a8b5f4459e872c8a2c05f0226d"],"layout":"IPY_MODEL_d5541144ffea4c418ef5c6c3db1e519e"}},"d3dc27978259400187c814165ce20091":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"d5541144ffea4c418ef5c6c3db1e519e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d58f6d5a301e4389b4e73b7ae2ef595f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"none","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"daabe05918774871ba2f9cba38ca14ff":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"db7189bed4d04523bafa9267fd5ed194":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"120px"}},"dd4f665280c1428ea9cb3073d95bc02e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dda9ef398b524535a68fd6ea11fad818":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","140192597210928"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_c24dd55347c44ee586bab8ee277db7da","IPY_MODEL_3cb6adfc2d764b97b6673765860d47bb"],"layout":"IPY_MODEL_f77c96169250417aad96b5c1d58222f8"}},"e30127ffa4834f609af7939fffe106e8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","140192597212656"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_941510f7665d47f3bda727843f6684ae"],"layout":"IPY_MODEL_85198e7d3abb4492b0cca02083f68a24"}},"e6f1a30a76264f7e84e93559d196cb5f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","140192597212608"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_143e6c3613d8483d91833a2e13f86fb6"],"layout":"IPY_MODEL_212c95d82afc43a9b849a07cd0193ad6"}},"e72b3672869743e7b8fbbe178a93698a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e892ff1ff8244fb5b4dd1617e7b67bcb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":"lightblue","font_weight":"bold"}},"ea37bd0802504828a90c1d3625e2d39e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"ec3cca3523514dc1b21d8d79773cb988":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Initial box","Default","Default, grey pad","Padded 4px","Padded 8px","Extracted, init box","Padded 4, extracted","Padded 8, extracted","Padded 8, dilation 1","Pad 8, fract. 0.5","Pad 8, fract. 0.2"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":2,"layout":"IPY_MODEL_1a20a2ace0234eca9b4b8f2fc68358b0","style":"IPY_MODEL_215b89c0f4304fb4b2a4642b763f3b8e"}},"eca634da59354f1caa467fba3c58c5fc":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":["message_visor-yXy"],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_65f9a631473a4ce989995cfcc84c1388","msg_id":"","outputs":[]}},"f1cf213ea11e431c8aded42b1af2536b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f2b7ff5971bd43b986d810b9261abd1c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f3863102525f488fb74877a54b447c63":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0b9d514d2d9249f09095065a9e302c49","placeholder":"​","style":"IPY_MODEL_4acbacc0ed4a4c1581583f428b69aff7","value":" 137/137 [00:00<00:00, 13.4kB/s]"}},"f4fd6b82478a444d91242587c0a5045f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f68accd919884dd09190b80513b9646c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","140192597212368"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b24e5e348f604c2fb1359e12f9c1dde1"],"layout":"IPY_MODEL_3b259cdb3ddd4432bd07c36f6145a4de"}},"f77c96169250417aad96b5c1d58222f8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc0380f14a854a6cb040442f98d41000":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_0915b9e3ce76421a8761cc5eaf25e2a7","style":"IPY_MODEL_b6c5b7f105e74dd09000d038b57066e8","value":false}},"fd31aabd8e4c4aa8add50c95e55e9d6f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_64b5967a0be04a05aeddda4a1b977531","placeholder":"​","style":"IPY_MODEL_aeb9f955725443f5b2c2f0c54ffb1fc8","value":"Loading checkpoint shards: 100%"}}}}},"nbformat":4,"nbformat_minor":0} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `PaliGemma` OCR for Comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings for Google Colab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n", + "\n", + "- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n", + "This allows the notebook to access files stored in your Google Drive.\n", + "\n", + "- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n", + "This acts as the root directory for accessing any files within your Google Drive from the notebook.\n", + "\n", + "- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n", + "This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MOUNT_DRIVE = True\n", + "GDRIVE_MOUNT_POINT = 'drive'\n", + "PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# install (Colab)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "import os\n", + "import re\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "from rich import print as cprint\n", + "from rich.text import Text\n", + "\n", + "def info(msg: str):\n", + " (t := Text(msg)).stylize(\"bold red\", 0, 6)\n", + " cprint(\"_\" * 10, t, \"_\" * 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mount Google Drive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n", + "if FC.IN_COLAB:\n", + " if MOUNT_DRIVE:\n", + " if not mnt_point.exists():\n", + " info(\"Mounting Google Drive\")\n", + " from google.colab import drive\n", + " drive.mount(str(mnt_point), force_remount=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install **PanelCleaner**\n", + "\n", + "> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n", + "\n", + "Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_COLAB:\n", + " pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n", + " tb_path = pc_path/'pcleaner/_testbed'\n", + " if tb_path.exists():\n", + " info('Installing PanelCleaner from your Google Drive')\n", + " else:\n", + " info('Installing PanelCleaner from GitHub')\n", + " !rm -rf PanelCleaner\n", + " !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n", + " pc_path = Path('PanelCleaner').absolute()\n", + " tb_path = pc_path/'pcleaner/_testbed'\n", + " assert tb_path.exists(), \"PanelCleaner not found\"\n", + " os.chdir(tb_path)\n", + " sys.path.insert(0, f\"{tb_path}\")\n", + " sys.path.insert(0, f\"{pc_path}\")\n", + " !pip install -q -r requirements-colab.txt\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'4.42.0.dev0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if FC.IN_COLAB:\n", + " !pip install -q accelerate\n", + "\n", + "import transformers\n", + "transformers.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n", + "from testbed.ocr_paligemma import PaliGemmaOCR, get_gpu_vram\n", + "from testbed.helpers import IN_MAC, IN_LINUX\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if IN_MAC:\n", + " # !pip install -q mlx_vlm\n", + "\n", + " import mlx.core as mx\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
          metal.is_available(): True\n",
+       "           metal.device_info(): {'memory_size': 68719476736, 'max_recommended_working_set_size': 51539607552, \n",
+       "'max_buffer_length': 38654705664, 'architecture': 'applegpu_g13s'}\n",
+       "     metal.get_active_memory(): 0\n",
+       "       metal.get_peak_memory(): 0\n",
+       "      metal.get_cache_memory(): 0\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " \u001b[1;35mmetal.is_available\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[3;92mTrue\u001b[0m\n", + " \u001b[1;35mmetal.device_info\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'memory_size'\u001b[0m: \u001b[1;36m68719476736\u001b[0m, \u001b[32m'max_recommended_working_set_size'\u001b[0m: \u001b[1;36m51539607552\u001b[0m, \n", + "\u001b[32m'max_buffer_length'\u001b[0m: \u001b[1;36m38654705664\u001b[0m, \u001b[32m'architecture'\u001b[0m: \u001b[32m'applegpu_g13s'\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1;35mmetal.get_active_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + " \u001b[1;35mmetal.get_peak_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + " \u001b[1;35mmetal.get_cache_memory\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m: \u001b[1;36m0\u001b[0m\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
            GPU: applegpu_g13s\n",
+       "     total VRAM: 65536 MiB\n",
+       "    active VRAM: 0 MiB\n",
+       "
\n" + ], + "text/plain": [ + " GPU: applegpu_g13s\n", + " total VRAM: \u001b[1;36m65536\u001b[0m MiB\n", + " active VRAM: \u001b[1;36m0\u001b[0m MiB\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if IN_MAC:\n", + " gpu_name = mx.metal.device_info()['architecture']\n", + " cprint(\n", + " f\"{'metal.is_available()':>30}: {mx.metal.is_available()}\\n\"\n", + " f\"{'metal.device_info()':>30}: {mx.metal.device_info()}\\n\"\n", + " f\"{'metal.get_active_memory()':>30}: {mx.metal.get_active_memory()//1024//1024}\\n\"\n", + " f\"{'metal.get_peak_memory()':>30}: {mx.metal.get_peak_memory()//1024//1024}\\n\"\n", + " f\"{'metal.get_cache_memory()':>30}: {mx.metal.get_cache_memory()//1024//1024}\\n\"\n", + " )\n", + "else:\n", + " !nvidia-smi\n", + " import subprocess\n", + " gpu_name = subprocess.check_output(\n", + " \"nvidia-smi --query-gpu=gpu_name --format=csv,noheader\", shell=True\n", + " ).decode('utf-8').strip()\n", + " \n", + "\n", + "cprint( f\"{'GPU':>15}: {gpu_name}\\n\"\n", + " f\"{'total VRAM':>15}: {get_gpu_vram()} MiB\\n\"\n", + " f\"{'active VRAM':>15}: {get_gpu_vram(False)} MiB\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# PaliGemma experiments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "User is already logged in.\n" + ] + } + ], + "source": [ + "from huggingface_hub import notebook_login\n", + "notebook_login(False, False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.\n", + "\n", + "NOTE: the default value assumes we are currently inside `PanelCleaner/pcleaner/_testbed` directory. You can check that is the case with `Path('.').resolve()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n",
+       "Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n",
+       "
\n" + ], + "text/plain": [ + " Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n", + "Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "EXP_DIR = Path('./experiment')\n", + "cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy your images to the source directory:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint((EXP_DIR/'source').resolve())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "or download the standard set:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !unzip -qn experiment.zip -d ." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup ngrok (Colab)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n", + "\n", + "Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n", + "- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n", + "- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n", + "\n", + "You choose.\n", + "\n", + "If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n", + "\n", + "I you don't change the default settings and\n", + "- the notebook is running locally, it'll serve the images directly without any additional setup.\n", + "- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_COLAB:\n", + " os.environ['USE_TUNNEL'] = 'True'\n", + " os.environ['USE_PIL'] = 'False'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SERVER = None\n", + "if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n", + " import testbed.web_server as web_server\n", + " SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| quant \\ platform | Mem | Mac | Linux | Windows | Colab T4 | Colab L4/H100 |\n", + "| --- | --- | --- | --- | --- | --- | --- |\n", + "| **bfloat16** | 6 GB | ✅ | ✅ | ? | ✅ | ✅ |\n", + "| **8bit** | 3 GB | ✅ | ✅ | ? | ✅ | ✅ |\n", + "| **4bit** | 2 GB | ✅ | ✅ | ? | ✅ | ✅ |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creates the `ExperimentContext` object we'll use to manage the experiments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: System default\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
 config cache_dir: None\n",
+       "       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "           device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " config cache_dir: \u001b[3;35mNone\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
        force_PIL: False\n",
+       "       use_tunnel: False\n",
+       "       server_url: \n",
+       "   experiment dir: experiment\n",
+       "       source_dir: experiment/source\n",
+       "        cache_dir: experiment/cache\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " force_PIL: \u001b[3;91mFalse\u001b[0m\n", + " use_tunnel: \u001b[3;91mFalse\u001b[0m\n", + " server_url: \n", + " experiment dir: experiment\n", + " source_dir: experiment/source\n", + " cache_dir: experiment/cache\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Experiment runs:\n",
+       "PaliGemma-crop-post: 0\n",
+       "
\n" + ], + "text/plain": [ + "Experiment runs:\n", + "PaliGemma-crop-post: \u001b[1;36m0\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "quant = '8bit' if IN_MAC else 'bfloat16'\n", + "CONTEXT = OCRExperimentContext('PaliGemma', EXP_DIR, quant=quant, \n", + " server=SERVER, run_name='PaliGemma-crop-post', load=True)\n", + "CONTEXT.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6701b0fefb114810a52c202ffb6a4b4d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 9 files: 0%| | 0/9 [00:00 Size: '224'\n", + " Quantization: '8bit'\n", + " Device: 'mps'\n", + " VRAM: 2986/65536 MiB\n", + "\n", + "\n" + ], + "text/plain": [ + " Size: \u001b[32m'224'\u001b[0m\n", + " Quantization: \u001b[32m'8bit'\u001b[0m\n", + " Device: \u001b[32m'mps'\u001b[0m\n", + " VRAM: \u001b[1;36m2986\u001b[0m/\u001b[1;36m65536\u001b[0m MiB\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ocr_model = CONTEXT.setup_ocr_model(False)\n", + "ocr_model.show_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check the images are in place" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ed7e79b69996433b87d46e466cb48fb6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value=\"" + } + }, + "c24dd55347c44ee586bab8ee277db7da": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f68accd919884dd09190b80513b9646c", + "IPY_MODEL_e30127ffa4834f609af7939fffe106e8", + "IPY_MODEL_e6f1a30a76264f7e84e93559d196cb5f", + "IPY_MODEL_2daa7d0e57fd4e4682ce1adc15b9f84d" + ], + "layout": "IPY_MODEL_4cb1d38e7aec4391b8cd7527ca9febee" + } + }, + "c5431bc93486497faa74330f37d582aa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c610dd153a364592b786fcdf898c3da3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "method_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_385f8f44444540af87117059cfec0f5a", + "IPY_MODEL_816479b548ed4dd99880c862929898fa", + "IPY_MODEL_ec3cca3523514dc1b21d8d79773cb988" + ], + "layout": "IPY_MODEL_577f895e829e4f9098f600795aa18d08" + } + }, + "cde5c9e129e443fb9c1c642156bab294": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d033341173d24e19a5f413ca0a53ce6c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50px" + } + }, + "d03fbf56a2934588892fd1bb43739564": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d086b7598b4848aaa4eb0ffede5da0f7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4856c138483c4bc59c165c6f8c81777a", + "IPY_MODEL_1bffa03a8dfa4d4ba6e0b2cf76f5f0fd", + "IPY_MODEL_88ee0bfb700a4f369333729d755fbc91" + ], + "layout": "IPY_MODEL_f2b7ff5971bd43b986d810b9261abd1c" + } + }, + "d35bec09dbe849f9b64a211fd45b80c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d3913b280fba46c4b9053b0e1b7f76fe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "box_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_491fa386a15f4719bf44fa8e302fd011", + "IPY_MODEL_fc0380f14a854a6cb040442f98d41000", + "IPY_MODEL_a3aff0a8b5f4459e872c8a2c05f0226d" + ], + "layout": "IPY_MODEL_d5541144ffea4c418ef5c6c3db1e519e" + } + }, + "d3dc27978259400187c814165ce20091": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "0px" + } + }, + "d5541144ffea4c418ef5c6c3db1e519e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d58f6d5a301e4389b4e73b7ae2ef595f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": "none", + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "daabe05918774871ba2f9cba38ca14ff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "db7189bed4d04523bafa9267fd5ed194": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "120px" + } + }, + "dd4f665280c1428ea9cb3073d95bc02e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dda9ef398b524535a68fd6ea11fad818": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "140192597210928" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c24dd55347c44ee586bab8ee277db7da", + "IPY_MODEL_3cb6adfc2d764b97b6673765860d47bb" + ], + "layout": "IPY_MODEL_f77c96169250417aad96b5c1d58222f8" + } + }, + "e30127ffa4834f609af7939fffe106e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "140192597212656" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_941510f7665d47f3bda727843f6684ae" + ], + "layout": "IPY_MODEL_85198e7d3abb4492b0cca02083f68a24" + } + }, + "e6f1a30a76264f7e84e93559d196cb5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "140192597212608" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_143e6c3613d8483d91833a2e13f86fb6" + ], + "layout": "IPY_MODEL_212c95d82afc43a9b849a07cd0193ad6" + } + }, + "e72b3672869743e7b8fbbe178a93698a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e892ff1ff8244fb5b4dd1617e7b67bcb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": "lightblue", + "font_weight": "bold" + } + }, + "ea37bd0802504828a90c1d3625e2d39e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": "0px 0px 0px 10px", + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "ec3cca3523514dc1b21d8d79773cb988": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Initial box", + "Default", + "Default, grey pad", + "Padded 4px", + "Padded 8px", + "Extracted, init box", + "Padded 4, extracted", + "Padded 8, extracted", + "Padded 8, dilation 1", + "Pad 8, fract. 0.5", + "Pad 8, fract. 0.2" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 2, + "layout": "IPY_MODEL_1a20a2ace0234eca9b4b8f2fc68358b0", + "style": "IPY_MODEL_215b89c0f4304fb4b2a4642b763f3b8e" + } + }, + "eca634da59354f1caa467fba3c58c5fc": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [ + "message_visor-yXy" + ], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_65f9a631473a4ce989995cfcc84c1388", + "msg_id": "", + "outputs": [] + } + }, + "f1cf213ea11e431c8aded42b1af2536b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f2b7ff5971bd43b986d810b9261abd1c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f3863102525f488fb74877a54b447c63": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0b9d514d2d9249f09095065a9e302c49", + "placeholder": "​", + "style": "IPY_MODEL_4acbacc0ed4a4c1581583f428b69aff7", + "value": " 137/137 [00:00<00:00, 13.4kB/s]" + } + }, + "f4fd6b82478a444d91242587c0a5045f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f68accd919884dd09190b80513b9646c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "140192597212368" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b24e5e348f604c2fb1359e12f9c1dde1" + ], + "layout": "IPY_MODEL_3b259cdb3ddd4432bd07c36f6145a4de" + } + }, + "f77c96169250417aad96b5c1d58222f8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc0380f14a854a6cb040442f98d41000": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "CheckboxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "all", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_0915b9e3ce76421a8761cc5eaf25e2a7", + "style": "IPY_MODEL_b6c5b7f105e74dd09000d038b57066e8", + "value": false + } + }, + "fd31aabd8e4c4aa8add50c95e55e9d6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_64b5967a0be04a05aeddda4a1b977531", + "placeholder": "​", + "style": "IPY_MODEL_aeb9f955725443f5b2c2f0c54ffb1fc8", + "value": "Loading checkpoint shards: 100%" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/pcleaner/_testbed/test_tesseract.ipynb b/pcleaner/_testbed/test_tesseract.ipynb index 4218723c..dd558977 100644 --- a/pcleaner/_testbed/test_tesseract.ipynb +++ b/pcleaner/_testbed/test_tesseract.ipynb @@ -1 +1,3736 @@ -{"cells":[{"cell_type":"markdown","metadata":{"id":"XLuXdPExZlIq"},"source":["# Testing `Tesseract` OCR for comics\n","> Accuracy Enhancements for OCR in `PanelCleaner`\n"]},{"cell_type":"markdown","metadata":{"id":"1fXVD50hsqi-"},"source":["In this notebook, you can test how Tesseract performs with texts from a diverse array of comics, manga, languages, and styles. You can run this notebook locally using Jupyter Lab/Notebook or on any Jupyter-compatible platform like Google Colab or VSCode.\n","\n","We'll begin by setting up [PanelCleaner](https://github.com/VoxelCubes/PanelCleaner) and the [testbed](https://github.com/civvic/PanelCleaner/tree/testbed) in Colab, though the instructions are applicable to other platforms such as [Kaggle](https://www.kaggle.com/code). We will then verify the Tesseract installation, prime an `ExperimentContext`, and create a visor to experiment with different parameters and configurations. \n","\n","**New to Jupyter Notebooks?** If you are not familiar with Jupyter environments, consider exploring the [Introduction to Colab](https://colab.research.google.com/notebooks/intro.ipynb) and the others provided by Google. It offers a quick and comprehensive guide to using Jupyter Notebooks effectively. The Jupyter project is a great way to learn about the notebook interface and the [Jupyter ecosystem](https://jupyter.org/)."]},{"cell_type":"markdown","metadata":{"id":"3Vz9SEk9sqi-"},"source":["# Settings for Google Colab"]},{"cell_type":"markdown","metadata":{"id":"xCL1cU7csqi-"},"source":["To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n","\n","- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n","This allows the notebook to access files stored in your Google Drive.\n","\n","- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n","This acts as the root directory for accessing any files within your Google Drive from the notebook.\n","\n","- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n","This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n"]},{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":1,"status":"ok","timestamp":1717161960886,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"NGD253mFsqi-"},"outputs":[],"source":["MOUNT_DRIVE = True\n","GDRIVE_MOUNT_POINT = 'drive'\n","PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'"]},{"cell_type":"markdown","metadata":{"id":"O3mFm2CEsqi_"},"source":["## install (Colab)\n"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":601,"status":"ok","timestamp":1717161965022,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"YHVrkf__sqi_"},"outputs":[],"source":["import fastcore.all as FC\n","import os\n","import re\n","import sys\n","from pathlib import Path\n","\n","from rich import print as cprint\n","from rich.text import Text\n","\n","def info(msg: str):\n"," (t := Text(msg)).stylize(\"bold red\", 0, 6)\n"," cprint(\"_\" * 10, t, \"_\" * 10)\n"]},{"cell_type":"markdown","metadata":{"id":"G3BUsMaHsqi_"},"source":["Mount Google Drive"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":775,"status":"ok","timestamp":1717161972478,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"B7II6iVwsqi_"},"outputs":[],"source":["mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n","if FC.IN_COLAB:\n"," if MOUNT_DRIVE:\n"," if not mnt_point.exists():\n"," info(\"Mounting Google Drive\")\n"," from google.colab import drive\n"," drive.mount(str(mnt_point), force_remount=True)\n"]},{"cell_type":"markdown","metadata":{"id":"S5FZkSJTsqi_"},"source":["### Install **PanelCleaner**\n","\n","> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n","\n","Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook."]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":6546,"status":"ok","timestamp":1717161987090,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"B579RzhEsqi_","outputId":"bc78b076-6852-426b-c8dc-00db6d663ce6"},"outputs":[],"source":["if FC.IN_COLAB:\n"," pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n"," tb_path = pc_path/'pcleaner/_testbed'\n"," if tb_path.exists():\n"," info('Installing PanelCleaner from your Google Drive')\n"," else:\n"," info('Installing PanelCleaner from GitHub')\n"," !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n"," tb_path = Path('PanelCleaner/pcleaner/_testbed')\n"," assert tb_path.exists(), \"PanelCleaner not found\"\n"," os.chdir(tb_path)\n"," sys.path.append(f\"{pc_path}\")\n"," sys.path.append(f\"{tb_path}\")\n"," !pip install -q -r requirements-colab.txt\n"]},{"cell_type":"markdown","metadata":{"id":"ii6CcMw_sqjA"},"source":["# Prologue\n","\n","In this section, we import essential components from the `PanelCleaner` testbed. `ExperimentsVisor` is used to manage and visualize the experiments, `CropMethod` defines the cropping strategies for image preprocessing, and `OCRExperimentContext` sets up the context for OCR experiments.\n","\n","If you're curious about the inner workings of these components, you can explore the notebooks that develop them in the `nbs` folder, or check out the source code they generate in the `testbed` directory. For instance, see [experiments.ipynb](nbs/experiments.ipynb) and [`_testbed/testbed/experiments.py`](testbed/experiments.py) for more details."]},{"cell_type":"code","execution_count":5,"metadata":{"executionInfo":{"elapsed":4572,"status":"ok","timestamp":1717162002282,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"18HJJTQebWoV"},"outputs":[],"source":["from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n"]},{"cell_type":"markdown","metadata":{"id":"rehyLeMbsqjA"},"source":["## Tesseract setup\n","> This section ensures that Tesseract OCR is correctly installed and configured for our experiments. We require Tesseract version 5.x due to its improved accuracy and features.\n"]},{"cell_type":"markdown","metadata":{"id":"bYfWu4DlsqjA"},"source":["> **NOTE:** In the following cells, lines starting with an exclamation mark `!` (also known as a \"bang\") are shell commands. Uncomment these lines if you wish to execute the commands directly from this notebook."]},{"cell_type":"markdown","metadata":{"id":"lbgjGH-FsqjA"},"source":["### Check Current Tesseract Version"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":552,"status":"ok","timestamp":1717162009373,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"maVw84ptsqjA","outputId":"574df29a-3c03-491a-b829-3b279ab9d828"},"outputs":[{"data":{"text/html":["
Correct version of Tesseract is installed.\n","
\n"],"text/plain":["Correct version of Tesseract is installed.\n"]},"metadata":{},"output_type":"display_data"}],"source":["import subprocess\n","\n","def check_tesseract_version():\n"," version_output = subprocess.run([\"tesseract\", \"--version\"], capture_output=True, text=True)\n"," if 'tesseract 5.' in version_output.stdout:\n"," cprint(\"Correct version of Tesseract is installed.\")\n"," else:\n"," cprint(\"No version or Incorrect version of Tesseract is installed. Please install Tesseract 5.x.\")\n","\n","check_tesseract_version()"]},{"cell_type":"markdown","metadata":{"id":"G1wnq7N7sqjA"},"source":["### Remove Tesseract installation\n","> I you have the old 4.x version, you should consider removing the installation with the following commands.\n"]},{"cell_type":"markdown","metadata":{"id":"QM9oOtf2sqjA"},"source":["#### Mac (TBD)"]},{"cell_type":"markdown","metadata":{"id":"Qodfe6UIsqjA"},"source":["#### WIndows (TBD)"]},{"cell_type":"markdown","metadata":{"id":"1F-oys3dsqjA"},"source":["#### Ubuntu"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"teXKLm6gsqjA"},"outputs":[],"source":["# !sudo apt-get remove tesseract-ocr\n"]},{"cell_type":"markdown","metadata":{"id":"6KuqPXBrsqjB"},"source":["### Install Tesseract 5.x (if necessary)"]},{"cell_type":"markdown","metadata":{"id":"6-GVVIUvsqjB"},"source":["#### Mac (TBD)"]},{"cell_type":"markdown","metadata":{"id":"xZO3cIMDsqjB"},"source":["#### WIndows (TBD)"]},{"cell_type":"markdown","metadata":{"id":"JKNbHfjBsqjB"},"source":["#### Linux (Ubuntu)"]},{"cell_type":"markdown","metadata":{"id":"u8zjEp0vsqjB"},"source":["The **5.x** release series is available in the [another PPA](https://launchpad.net/~alex-p/+archive/ubuntu/tesseract-ocr5) for Ubuntu **18.04**, **20.04**, and **22.04**.\n"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7127,"status":"ok","timestamp":1717159937349,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"mB6zqY-csqjB","outputId":"d7f93490-f50c-4a58-b267-49d44315e143"},"outputs":[],"source":["# !sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5"]},{"cell_type":"markdown","metadata":{"id":"E50Mub5ssqjB"},"source":["refresh system package cache in case you’re still running old Ubuntu 18.04"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"m3Mrt-23sqjB"},"outputs":[],"source":["# !sudo apt update"]},{"cell_type":"markdown","metadata":{"id":"h65xnzb0sqjB"},"source":["install the software engine"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7877,"status":"ok","timestamp":1717159957612,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"_IAlFgRzsqjB","outputId":"2c813cc8-2192-425c-dd65-fb390a23b629"},"outputs":[],"source":["# !sudo apt install -y tesseract-ocr"]},{"cell_type":"markdown","metadata":{"id":"RZf_GC5BsqjB"},"source":["\n","### Re-check version after installation"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":287,"status":"ok","timestamp":1717159961205,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"YsuiKSX3sqjB","outputId":"ada6dc36-1b3d-4a68-e951-f3f513e07552"},"outputs":[{"data":{"text/html":["
Correct version of Tesseract is installed.\n","
\n"],"text/plain":["Correct version of Tesseract is installed.\n"]},"metadata":{},"output_type":"display_data"}],"source":["check_tesseract_version()\n"]},{"cell_type":"markdown","metadata":{"id":"26wuuPsRsqjB"},"source":["### Install Tesseract languages"]},{"cell_type":"code","execution_count":12,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":65},"executionInfo":{"elapsed":864,"status":"ok","timestamp":1717162041268,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"EcR2B--FsqjB","outputId":"e2bdd940-3c12-4876-de2f-532e85fbf332"},"outputs":[{"data":{"text/html":["
tessdata path: /opt/homebrew/share/tessdata\n","
\n"],"text/plain":["tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Installed languages:\n","[\n","    'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n","    'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n","    'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n","    'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n","    'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n","    'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n","    'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \n","script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n","    'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \n","script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \n","script/Kannada, script/Khmer',\n","    'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \n","script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n","    'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n","    'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\n","]\n","
\n"],"text/plain":["Installed languages:\n","\u001b[1m[\u001b[0m\n"," \u001b[32m'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces'\u001b[0m,\n"," \u001b[32m'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo'\u001b[0m,\n"," \u001b[32m'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc'\u001b[0m,\n"," \u001b[32m'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert'\u001b[0m,\n"," \u001b[32m'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal'\u001b[0m,\n"," \u001b[32m'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol'\u001b[0m,\n"," \u001b[32m'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \u001b[0m\n","\u001b[32mscript/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur'\u001b[0m,\n"," \u001b[32m'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \u001b[0m\n","\u001b[32mscript/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \u001b[0m\n","\u001b[32mscript/Kannada, script/Khmer'\u001b[0m,\n"," \u001b[32m'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \u001b[0m\n","\u001b[32mscript/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk'\u001b[0m,\n"," \u001b[32m'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel'\u001b[0m,\n"," \u001b[32m'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["out = !tesseract --list-langs # type: ignore\n","tessdata = Path(out[0].split('\"')[1])\n","cprint(f\"tessdata path: {tessdata}\")\n","cprint(\"Installed languages:\", [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]])"]},{"cell_type":"markdown","metadata":{"id":"s_ylP5uTsqjB"},"source":["#### Install **best** languages and **jpn_ver** Tesseract lang\n","> to get better results than default langs and `jpn` language model.\n"]},{"cell_type":"markdown","metadata":{"id":"h1kl60ArsqjB"},"source":["Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best). \n","Donwload from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) a model trained for vertical Japanese text as found in manga.\n","\n","See [here](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html) the languages codes.\n","\n","**Note:** While the `jpn` and `jpn_vert` language models are available, the `manga-ocr` model use by `PanelCleaner`is generally more suited for manga text recognition. However, comparing these models can provide educational insights into their respective strengths and limitations."]},{"cell_type":"markdown","metadata":{"id":"Q7WvG7QOsqjB"},"source":["Uncomment and excute to download the best language models:\n"]},{"cell_type":"code","execution_count":10,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5012,"status":"ok","timestamp":1717159997755,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"PGLGCL-_sqjC","outputId":"75f7bdff-c146-4d18-cf00-ae0f962170cc"},"outputs":[],"source":["# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/osd.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn.traineddata\n","\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn_vert.traineddata\n","# or\n","# !wget -O jpn_vert.traineddata https://github.com/zodiac3539/jpn_vert/blob/master/jpn_ver5.traineddata\n","\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/spa.traineddata\n","# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/fra.traineddata"]},{"cell_type":"markdown","metadata":{"id":"q6_QS0sisqjC"},"source":["Copy downloaded models to tessdata folder (double check that `tessdata` variable points to the right folder):\n"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":33},"executionInfo":{"elapsed":306,"status":"ok","timestamp":1717160002438,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"AqMUxfFZsqjC","outputId":"4677359f-7bc5-4b9d-b668-2efa3c26366f"},"outputs":[{"data":{"text/html":["
tessdata path: /opt/homebrew/share/tessdata\n","
\n"],"text/plain":["tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(f\"tessdata path: {tessdata}\")"]},{"cell_type":"code","execution_count":13,"metadata":{"executionInfo":{"elapsed":297,"status":"ok","timestamp":1717160012026,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"jgMImPcMsqjC"},"outputs":[],"source":["# !sudo mv *.traineddata $tessdata"]},{"cell_type":"markdown","metadata":{"id":"UWpjzsOVsqjC"},"source":["and remove the downloaded models:\n"]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":329,"status":"ok","timestamp":1717160017466,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"NgXafsP-sqjC","outputId":"08109c93-77a1-4c25-f558-77cd48ecd158"},"outputs":[{"name":"stdout","output_type":"stream","text":["rm: cannot remove '*.traineddata': No such file or directory\n"]}],"source":["# !rm *.traineddata"]},{"cell_type":"markdown","metadata":{"id":"M8wh0Uv9sqjC"},"source":["Check installed languages\n"]},{"cell_type":"code","execution_count":14,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":129},"executionInfo":{"elapsed":703,"status":"ok","timestamp":1717162047846,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"6XYS33DLsqjC","outputId":"ffdce0a8-8344-450f-b835-65fcec5eb11d"},"outputs":[{"data":{"text/html":["
[\n","    Path('/opt/homebrew/share/tessdata/spa.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/eng.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/jpn_vert.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/spa_old.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/fra.traineddata'),\n","    Path('/opt/homebrew/share/tessdata/jpn.traineddata')\n","]\n","
\n"],"text/plain":["\u001b[1m[\u001b[0m\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/eng.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn_vert.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa_old.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/fra.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n"," \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n","\u001b[1m]\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(list(filter(lambda x: re.match(r'eng|jpn|jpn_vert|fra|spa', x.name), tessdata.ls()))) # type: ignore\n","# cprint(pytesseract.get_languages())\n"]},{"cell_type":"markdown","metadata":{"id":"fErGl5xSZlI5"},"source":["----\n","# Tesseract Experiments\n","\n","In this notebook, we focus on applying Tesseract OCR to a variety of comic book images to evaluate its performance across different text styles, languages, and image qualities. \n","\n","The experiments are specifically designed to explore how different cropping methods affect Tesseract's ability to recognize text in complex visual contexts typical of comic panels. By experimenting with various cropping strategies, we want to determine whether feeding Tesseract single cropped boxes, as opposed to whole pages, can enhance OCR accuracy.\n","\n","## Objectives\n","\n","- **Evaluate basic OCR performance:** Assess how well Tesseract recognizes text across a diverse set of comic book images.\n","- **Test different cropping methods:** Systematically vary the way images are cropped to isolate text boxes and see if this improves the accuracy of text recognition.\n","- **Optimize OCR settings (TBD):** Adjust Tesseract's configuration settings based on the results of the cropping experiments to optimize performance for comic texts.\n"]},{"cell_type":"markdown","metadata":{"id":"op24JaWwfsSv"},"source":["## Experiment directory\n","\n","Defines the directory structure for storing images, caching auxiliary data, and saving experiment results.\n","\n","- **Source Directory (`EXP_DIR/source/`):** This is where the original images for the experiments are stored.\n","- **Cache Directory (`EXP_DIR/cache/`):** This directory is used for caching processed images or other auxiliary files that are generated during the experiments.\n","\n","You can modify the default locations of these directories as needed. The default setup assumes that you are working within the `PanelCleaner/pcleaner/_testbed` directory. Use the following code to verify your current working directory and to set up the experiment directory:\n"]},{"cell_type":"code","execution_count":15,"metadata":{"executionInfo":{"elapsed":560,"status":"ok","timestamp":1717162052233,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"jfMv_sdZfwVY"},"outputs":[],"source":["EXP_DIR = Path('./experiment')\n"]},{"cell_type":"code","execution_count":16,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49},"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717162052658,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"0_2xxaRBsqjC","outputId":"e35faa83-ebe3-4e65-a12c-3acd4c8276c6"},"outputs":[{"data":{"text/html":["
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n","Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n","
\n"],"text/plain":[" Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n","Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n"]},{"cell_type":"markdown","metadata":{},"source":["# Test images\n","\n","Prepare and manage the comic book images for OCR testing.\n","\n","If you have specific comic book images you want to test, upload them to the `EXP_DIR/source/` directory. Ensure that each image file is accompanied by a corresponding text file containing the ground truth data. The text file should have the same name as the image but with a `.txt` extension. Each line in the text file should represent one text box as detected and processed by PanelCleaner.\n","\n","For those who prefer to use a standardized set of images for comparison purposes, we provide a link to download a pre-selected set of comic book images. After downloading, ensure to place these images in the `EXP_DIR/source/` directory.\n","\n","Optionally, you can include a `.json` file for each image, specifying the language of the text on the page. This file should have the same name as the image and a `.json` extension. Here is an example of the content for a language specification file:\n","\n","```json\n","{\n","\"lang\": \"Spanish\"\n","}\n","```"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/html":["
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n","
\n"],"text/plain":["\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["cprint((EXP_DIR/'source').resolve())"]},{"cell_type":"markdown","metadata":{},"source":["or download the standard set:\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["# !unzip -qn experiment.zip -d ."]},{"cell_type":"markdown","metadata":{"id":"z62cVR57ZlI5"},"source":["# Setup ngrok (Colab)"]},{"cell_type":"markdown","metadata":{"id":"qMAn1mOSZlI5"},"source":["The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n","\n","Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n","- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n","- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n","\n","You choose.\n","\n","If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is `USE_PIL=False`. You can set the environment variable `USE_PIL=True` to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n","\n","I you don't change the default settings and\n","- the notebook is running locally, it'll serve the images directly without any additional setup.\n","- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n"]},{"cell_type":"code","execution_count":17,"metadata":{"executionInfo":{"elapsed":1,"status":"ok","timestamp":1717162054215,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"A0il2cCMZlI5"},"outputs":[],"source":["if FC.IN_COLAB:\n"," os.environ['USE_TUNNEL'] = 'True'\n"," os.environ['USE_PIL'] = 'False'\n"]},{"cell_type":"code","execution_count":18,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":184},"executionInfo":{"elapsed":7100,"status":"ok","timestamp":1717162062654,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"Su0YJikGZlI5","outputId":"39d5db0a-d544-4e84-9e85-c338d94fc2af"},"outputs":[],"source":["SERVER = None\n","if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n"," import testbed.web_server as web_server\n"," SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n"]},{"cell_type":"markdown","metadata":{"id":"GPkPmP7rsqjD"},"source":["# CONTEXT\n","> Creates the `OCRExperimentContext` object we'll use to manage the experiments and visualize the configuration.\n"]},{"cell_type":"code","execution_count":22,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":572},"executionInfo":{"elapsed":45996,"status":"ok","timestamp":1717162125752,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"qhQ3nY1OhgdS","outputId":"962a664b-2d3c-443a-c9b4-09a258762b05"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Configuration:\n","\n","Locale: System default\n","Default Profile: Built-in\n","Saved Profiles:\n","- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n","- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n","\n","Profile Editor: cursor\n","Cache Directory: System default\n","Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n","Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n","GUI Theme: System default\n","\n","--------------------\n","\n","Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n","System default cache directory: /Users/vic/Library/Caches/pcleaner\n"]},{"data":{"text/html":["
 config cache_dir: None\n","       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n","           device: 'mps'\n","
\n"],"text/plain":[" config cache_dir: \u001b[3;35mNone\u001b[0m\n"," model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n"," device: \u001b[32m'mps'\u001b[0m\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
        force_PIL: False\n","       use_tunnel: False\n","       server_url: \n","   experiment dir: experiment\n","       source_dir: experiment/source\n","        cache_dir: experiment/cache\n","\n","
\n"],"text/plain":[" force_PIL: \u001b[3;91mFalse\u001b[0m\n"," use_tunnel: \u001b[3;91mFalse\u001b[0m\n"," server_url: \n"," experiment dir: experiment\n"," source_dir: experiment/source\n"," cache_dir: experiment/cache\n","\n"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["
Experiment runs:\n","Tesseract-crop-post: 17\n","   Tesseract-crop: 0\n","
\n"],"text/plain":["Experiment runs:\n","Tesseract-crop-post: \u001b[1;36m17\u001b[0m\n"," Tesseract-crop: \u001b[1;36m0\u001b[0m\n"]},"metadata":{},"output_type":"display_data"}],"source":["CONTEXT = OCRExperimentContext('Tesseract', EXP_DIR, server=SERVER, load=True)\n","CONTEXT.show()"]},{"cell_type":"markdown","metadata":{"id":"O-M9uxOosqjD"},"source":["## Verify images setup\n","\n","Before visualizing the experiments, verify that all images are correctly recognized and accessible."]},{"cell_type":"code","execution_count":23,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":510,"status":"ok","timestamp":1717162129784,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"Ha8wqfTHZlI5","outputId":"dde8119f-0629-445e-be47-cce08d725ecb"},"outputs":[{"data":{"text/plain":["['00: Action_Comics_1960-01-00_(262).JPG',\n"," '01: Adolf_Cap_01_008.jpg',\n"," '02: Barnaby_v1-028.png',\n"," '03: Barnaby_v1-029.png',\n"," '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n"," '05: Cannon-292.jpg',\n"," '06: Contrato_con_Dios_028.jpg',\n"," '07: Erase_una_vez_en_Francia_02_88.jpg',\n"," '08: FOX_CHILLINTALES_T17_012.jpg',\n"," '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n"," '10: Galactus_12.jpg',\n"," '11: INOUE_KYOUMEN_002.png',\n"," '12: MCCALL_ROBINHOOD_T31_010.jpg',\n"," '13: MCCAY_LITTLENEMO_090.jpg',\n"," '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n"," '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n"," '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n"," '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n"," '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n"," '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n"," '20: Strange_Tales_172005.jpg',\n"," '21: Strange_Tales_172021.jpg',\n"," '22: Tarzan_014-21.JPG',\n"," '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n"," '24: Transformers_-_Unicron_000-004.jpg',\n"," '25: Transformers_-_Unicron_000-016.jpg',\n"," '26: WARE_ACME_024.jpg',\n"," '27: Yoko_Tsuno_T01_1972-10.jpg',\n"," '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n"," '29: manga_0033.jpg',\n"," '30: ronson-031.jpg',\n"," '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']"]},"execution_count":23,"metadata":{},"output_type":"execute_result"}],"source":["[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n"]},{"cell_type":"markdown","metadata":{"id":"sShEU3dZsqjD"},"source":["----\n","# Running an experiment\n","\n","Conduct an OCR experiment using the established context and tools. You will select an image, choose a cropping method, and decide which text box to analyze. The results will be visualized so you can assess the effectiveness of the OCR process.\n","\n","### Selecting and configuring the experiment\n","\n","1. **Choose an image:** Start by selecting an image from the loaded dataset.\n","2. **Specify cropping method:** Choose how the image should be cropped. Different cropping methods can affect OCR accuracy, as they change how the text is presented to the OCR engine.\n","3. **Select text box:** Select the specific text box within the image to focus the OCR process.\n","\n","### Visualizing results\n","\n","The results are visualized immediately. Here, it is crucial to have accurate **ground truth** data to effectively compare and assess the OCR results.\n","\n","You can assess the accuracy of OCR results at various levels: box by box, method by method, and overall. Currently, we use a simplified version of the `edit distance` metric to calculate accuracy. However, we plan to adopt more standardized metrics, such as the `Levenshtein distance`, in future updates.\n","\n","Additionally, we should probably develop a metric specifically tailored to the unique characteristics of comic texts, such as the prevalence of all-caps and handwritten text, to provide more relevant evaluations. OCR models are trained with typeset text, synthetic or real-world, and business, forms, news or literary data, and usually don't perform well on handwritten text. We haven't found any OCR dataset that incorporates comics style data.\n","\n","\n","### Managing experiment data\n","\n","- **Save results:** You have the option to save the results of the experiment, useful for documenting performance and changes over time. However, be cautious with this option as it will overwrite existing results without confirmation.\n"]},{"cell_type":"markdown","metadata":{"id":"HLSafK7UsqjD"},"source":["> **Note:** This visor functionality is currently a work in progress (WIP). The interface and options are being refined to enhance the experience and provide more robust data management. This section gives you a preliminary look at what we are aiming for with the `testbed` project.\n"]},{"cell_type":"code","execution_count":24,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":291,"referenced_widgets":["6a87fbd3fc6440c5a81a564ac94400cc","01cd556393d643f8bb3fd88c8cd7bb84","48555e1574db42db86f6572e9c888b58","47b4dcc7177b41d1aa69d3918a6e8598","dc75396dcc3c4606a8fe56401d29a4e9","778afca8ec0a431ca8670604c10d8956","e36c1a1c609c425bb76ea83edf910425","27c90e0d46934e80a38889a0ae18a726","60e22ee6a0fc428b8e3709e394e44841","7e8086d63d1b4bbf96e4c70c63bc920a","0d5a810240d84cc683b42f56b1f4c2d1","e1de76477d764776ac5a209f7ebcc8d6","48cfde671ecf42c7a124a94a2962bb66","4064335db7684f2f81ad237513e90c34","99bbf4dd65424e87aab67bcf162a8171","eee81f49e99f4cbb8db65b56c511adbd","d89bf137dd5e4d698dc7f6187f2e148c","3588eeb28e024f1ead6cd21f46d65a6c","b0d6d88a4b824cf9be7c01d5611c3426","39648bdbc39c4c7c827510141663804c","0c28619fcebc4c58a9a756888add9140","fa20ebba2bdb493799524bdf780fa86e","ad2bbb4b23a648209da1edab0006adae","d6fed923432746c9b3dff00549de596d","878477e8708a47e8ab49b00be715d1cd","00878ffbdb234a48b8917563f3058107","57f9b685011043418d9552b3cb48784e","0f1bb59001314b13a091c7e1b331c58b","fb08b9a59d6c4e678b368c915bb97405","4f8b904d91eb44028477a6a61c34af54","90f41682d85f42a6becdca99a59e9a2c","3d0ad04ce06b4314b64dcdde269b38e6","eab60b2fe7a84e4b9a913f1450766452","101d1fc2b6df471ebd6a32a44c8120ed","8ca00801b4e54cbd980c998791664f60","ec3c881e1da4414a9c9499497bc5de74","31dcaeca8806453ea52292112bb0a4f3","e13c49f50ebd4f81a1dafe940ac13a31","1da81d9a8d7446838b76ae017e794e78","a2410f8101384beda0114d93ac655edb","24e8764bfd294a629d6c3855b4094f8b","18ccc64fc8504bdeb2fb07043688c1c4","6eed00e28fae4570b2348330bb9dcbab","bd6e62f7db8c40d695d6ca3d93cf7e94","068b8601d7364d29b9598b80aeb782e0","fbb4d3433c6f42e094934b6936659b8f","3cf7099443124d289495da41bfbc8b58","3ab779d3f50a456980527636aa763dba","bb3adfcd8e4a4af29079953bc9220ac1","919da88097344a689a9afb6fa5977b29","d0ed39572cea4c37bb07dc99577258e3","496e8548fc0f41b7b0c15c5881596326","df9bb4a390fa4e6597b832d09949d24e","4aba84bf7b4847fdb1cac927ba6a7ebf","f542dba5c6de48a0a44e698706d87017","df77be57fe7943298432a759e6ce8416","8b38a0bdf80149bba72324232beb14ed","0d4014b56e93430e9782502bf29c7c2e","58b2ec779ab64a2b9e85d57160b453c9","44f79f7ea58742a0a770aab41e876c9c","57f7d34ddc2b4a938d7fcb4bf066deb0","f40fbbd396ac4528a97962ec911d5837","03b66d0eb8b5487b8ad21382b93cc68d","bfa2f19eedfa4bd99b59d08365ae4676","a7755ebc78f34335a32f75fa342fcd76","0e3ae88bc86f4a31b5aedc1dfd0b1bf2","929913d45cc9400087eab703338d00f0","c193eb8285a740208457bbe7985d356f","ee44cc3dae5c42299460702dd280dbce","9a49e8c567b94a64a19dc3eecdf5a857","25cdb8370f70459886e00eeef0f66388","175e7409479d4f56ba4a4faf4c8bebaa","c944b093296248b4a9a75a9139d5003d","72440081608048b1b5fc76e42011ad0c","247e8a63e98441f294c1514a43b35e0d","7a928ad4bd52422b9544d0d312402a5b","3f8e2f6b025e4fcd90ad34c2e99025ae","044d11e5bc4e4f87ad9d099734418899","dcc0c0ee6a284ecb8c40f06891919805","1807cf3a1770434599e993a087293d91"]},"executionInfo":{"elapsed":837,"status":"ok","timestamp":1717162136971,"user":{"displayName":"Vicente Sosa","userId":"11884045517174046483"},"user_tz":-120},"id":"TZIA3E3jZlI5","outputId":"734f248f-a720-4a65-e2ca-a8e70a2f20d5"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c3ed154f58e6480abe8993b4a2fc22b7","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=\""}},"03b66d0eb8b5487b8ad21382b93cc68d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"044d11e5bc4e4f87ad9d099734418899":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"068b8601d7364d29b9598b80aeb782e0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c28619fcebc4c58a9a756888add9140":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["model_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a2410f8101384beda0114d93ac655edb"],"layout":"IPY_MODEL_24e8764bfd294a629d6c3855b4094f8b"}},"0d4014b56e93430e9782502bf29c7c2e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"0d5a810240d84cc683b42f56b1f4c2d1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0e3ae88bc86f4a31b5aedc1dfd0b1bf2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"120px"}},"0f1bb59001314b13a091c7e1b331c58b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"reset","disabled":false,"icon":"","layout":"IPY_MODEL_3ab779d3f50a456980527636aa763dba","style":"IPY_MODEL_bb3adfcd8e4a4af29079953bc9220ac1","tooltip":""}},"101d1fc2b6df471ebd6a32a44c8120ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_df77be57fe7943298432a759e6ce8416","placeholder":"​","style":"IPY_MODEL_8b38a0bdf80149bba72324232beb14ed","value":"Method:"}},"175e7409479d4f56ba4a4faf4c8bebaa":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"","description":"","description_tooltip":null,"layout":"IPY_MODEL_3f8e2f6b025e4fcd90ad34c2e99025ae","max":11,"min":0,"orientation":"horizontal","style":"IPY_MODEL_044d11e5bc4e4f87ad9d099734418899","value":11}},"1807cf3a1770434599e993a087293d91":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"18ccc64fc8504bdeb2fb07043688c1c4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"25em"}},"1da81d9a8d7446838b76ae017e794e78":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"247e8a63e98441f294c1514a43b35e0d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"24e8764bfd294a629d6c3855b4094f8b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"25cdb8370f70459886e00eeef0f66388":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_247e8a63e98441f294c1514a43b35e0d","placeholder":"​","style":"IPY_MODEL_7a928ad4bd52422b9544d0d312402a5b","value":"Box #1: 100%"}},"27c90e0d46934e80a38889a0ae18a726":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"31dcaeca8806453ea52292112bb0a4f3":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3588eeb28e024f1ead6cd21f46d65a6c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["method_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_101d1fc2b6df471ebd6a32a44c8120ed","IPY_MODEL_8ca00801b4e54cbd980c998791664f60","IPY_MODEL_ec3c881e1da4414a9c9499497bc5de74"],"layout":"IPY_MODEL_31dcaeca8806453ea52292112bb0a4f3"}},"39648bdbc39c4c7c827510141663804c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"visible","width":null}},"3ab779d3f50a456980527636aa763dba":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":"0px 0px 0px 3em","max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"5em"}},"3cf7099443124d289495da41bfbc8b58":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":"lightblue","font_weight":"bold"}},"3d0ad04ce06b4314b64dcdde269b38e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"BoundedIntTextModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"BoundedIntTextModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"IntTextView","continuous_update":false,"description":"","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_4aba84bf7b4847fdb1cac927ba6a7ebf","max":14,"min":0,"step":1,"style":"IPY_MODEL_f542dba5c6de48a0a44e698706d87017","value":2}},"3f8e2f6b025e4fcd90ad34c2e99025ae":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4064335db7684f2f81ad237513e90c34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420256"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_878477e8708a47e8ab49b00be715d1cd"],"layout":"IPY_MODEL_00878ffbdb234a48b8917563f3058107"}},"44f79f7ea58742a0a770aab41e876c9c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"150px"}},"47b4dcc7177b41d1aa69d3918a6e8598":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":["message_visor-yXy"],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_c193eb8285a740208457bbe7985d356f","msg_id":"","outputs":[]}},"48555e1574db42db86f6572e9c888b58":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":["context-visor","137612216365504"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_60e22ee6a0fc428b8e3709e394e44841","IPY_MODEL_7e8086d63d1b4bbf96e4c70c63bc920a"],"layout":"IPY_MODEL_0d5a810240d84cc683b42f56b1f4c2d1"}},"48cfde671ecf42c7a124a94a2962bb66":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420208"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ad2bbb4b23a648209da1edab0006adae"],"layout":"IPY_MODEL_d6fed923432746c9b3dff00549de596d"}},"496e8548fc0f41b7b0c15c5881596326":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"initial"}},"4aba84bf7b4847fdb1cac927ba6a7ebf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50px"}},"4f8b904d91eb44028477a6a61c34af54":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_919da88097344a689a9afb6fa5977b29","placeholder":"​","style":"IPY_MODEL_d0ed39572cea4c37bb07dc99577258e3","value":"Box # (of 15):"}},"57f7d34ddc2b4a938d7fcb4bf066deb0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"57f9b685011043418d9552b3cb48784e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"save","disabled":false,"icon":"","layout":"IPY_MODEL_fbb4d3433c6f42e094934b6936659b8f","style":"IPY_MODEL_3cf7099443124d289495da41bfbc8b58","tooltip":""}},"58b2ec779ab64a2b9e85d57160b453c9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"60e22ee6a0fc428b8e3709e394e44841":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e1de76477d764776ac5a209f7ebcc8d6","IPY_MODEL_48cfde671ecf42c7a124a94a2962bb66","IPY_MODEL_4064335db7684f2f81ad237513e90c34","IPY_MODEL_99bbf4dd65424e87aab67bcf162a8171"],"layout":"IPY_MODEL_eee81f49e99f4cbb8db65b56c511adbd"}},"6a87fbd3fc6440c5a81a564ac94400cc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_01cd556393d643f8bb3fd88c8cd7bb84","IPY_MODEL_48555e1574db42db86f6572e9c888b58","IPY_MODEL_47b4dcc7177b41d1aa69d3918a6e8598","IPY_MODEL_dc75396dcc3c4606a8fe56401d29a4e9"],"layout":"IPY_MODEL_778afca8ec0a431ca8670604c10d8956"}},"6eed00e28fae4570b2348330bb9dcbab":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"72440081608048b1b5fc76e42011ad0c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":"hidden","width":null}},"778afca8ec0a431ca8670604c10d8956":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7a928ad4bd52422b9544d0d312402a5b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7e8086d63d1b4bbf96e4c70c63bc920a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175421024"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d89bf137dd5e4d698dc7f6187f2e148c","IPY_MODEL_3588eeb28e024f1ead6cd21f46d65a6c","IPY_MODEL_b0d6d88a4b824cf9be7c01d5611c3426"],"layout":"IPY_MODEL_39648bdbc39c4c7c827510141663804c"}},"878477e8708a47e8ab49b00be715d1cd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["display_option_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_bd6e62f7db8c40d695d6ca3d93cf7e94"],"layout":"IPY_MODEL_068b8601d7364d29b9598b80aeb782e0"}},"8b38a0bdf80149bba72324232beb14ed":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8ca00801b4e54cbd980c998791664f60":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_0d4014b56e93430e9782502bf29c7c2e","style":"IPY_MODEL_58b2ec779ab64a2b9e85d57160b453c9","value":false}},"90f41682d85f42a6becdca99a59e9a2c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"all","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_496e8548fc0f41b7b0c15c5881596326","style":"IPY_MODEL_df9bb4a390fa4e6597b832d09949d24e","value":false}},"919da88097344a689a9afb6fa5977b29":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"929913d45cc9400087eab703338d00f0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"99bbf4dd65424e87aab67bcf162a8171":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175414976"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_57f9b685011043418d9552b3cb48784e","IPY_MODEL_0f1bb59001314b13a091c7e1b331c58b"],"layout":"IPY_MODEL_fb08b9a59d6c4e678b368c915bb97405"}},"9a49e8c567b94a64a19dc3eecdf5a857":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_25cdb8370f70459886e00eeef0f66388","IPY_MODEL_175e7409479d4f56ba4a4faf4c8bebaa","IPY_MODEL_c944b093296248b4a9a75a9139d5003d"],"layout":"IPY_MODEL_72440081608048b1b5fc76e42011ad0c"}},"a2410f8101384beda0114d93ac655edb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Tesseract-crop-post","Tesseract-crop"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":0,"layout":"IPY_MODEL_bfa2f19eedfa4bd99b59d08365ae4676","style":"IPY_MODEL_a7755ebc78f34335a32f75fa342fcd76"}},"a7755ebc78f34335a32f75fa342fcd76":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"ad2bbb4b23a648209da1edab0006adae":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Action_Comics_1960-01-00_(262)","Adolf_Cap_01_008","Barnaby_v1-028","Barnaby_v1-029","Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013","Cannon-292","Contrato_con_Dios_028","Erase_una_vez_en_Francia_02_88","FOX_CHILLINTALES_T17_012","Furari_-_Jiro_Taniguchi_selma_056","Galactus_12","INOUE_KYOUMEN_002","MCCALL_ROBINHOOD_T31_010","MCCAY_LITTLENEMO_090","Mary_Perkins_On_Stage_v2006_1_-_P00068","PIKE_BOYLOVEGIRLS_T41_012","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K","Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2","Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024","Strange_Tales_172005","Strange_Tales_172021","Tarzan_014-21","Tintin_21_Les_Bijoux_de_la_Castafiore_page_39","Transformers_-_Unicron_000-004","Transformers_-_Unicron_000-016","WARE_ACME_024","Yoko_Tsuno_T01_1972-10","Your_Name_Another_Side_Earthbound_T02_084","manga_0033","ronson-031","哀心迷図のバベル 第01巻 - 22002_00_059"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":14,"layout":"IPY_MODEL_18ccc64fc8504bdeb2fb07043688c1c4","style":"IPY_MODEL_6eed00e28fae4570b2348330bb9dcbab"}},"b0d6d88a4b824cf9be7c01d5611c3426":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420544"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e13c49f50ebd4f81a1dafe940ac13a31"],"layout":"IPY_MODEL_1da81d9a8d7446838b76ae017e794e78"}},"bb3adfcd8e4a4af29079953bc9220ac1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":"bold"}},"bd6e62f7db8c40d695d6ca3d93cf7e94":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Boxes","Image","Mask","Image & Mask","Page data","Ground truth","Image All","Results","Best results","Accuracy","Dataframe","Config"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":7,"layout":"IPY_MODEL_0e3ae88bc86f4a31b5aedc1dfd0b1bf2","style":"IPY_MODEL_929913d45cc9400087eab703338d00f0"}},"bfa2f19eedfa4bd99b59d08365ae4676":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"fit-content"}},"c193eb8285a740208457bbe7985d356f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c944b093296248b4a9a75a9139d5003d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dcc0c0ee6a284ecb8c40f06891919805","placeholder":"​","style":"IPY_MODEL_1807cf3a1770434599e993a087293d91","value":" 11/11 [00:04<00:00,  2.10it/s]"}},"d0ed39572cea4c37bb07dc99577258e3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d6fed923432746c9b3dff00549de596d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d89bf137dd5e4d698dc7f6187f2e148c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["box_grp"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4f8b904d91eb44028477a6a61c34af54","IPY_MODEL_90f41682d85f42a6becdca99a59e9a2c","IPY_MODEL_3d0ad04ce06b4314b64dcdde269b38e6"],"layout":"IPY_MODEL_eab60b2fe7a84e4b9a913f1450766452"}},"dc75396dcc3c4606a8fe56401d29a4e9":{"model_module":"@jupyter-widgets/output","model_module_version":"1.0.0","model_name":"OutputModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_model_name":"OutputModel","_view_count":null,"_view_module":"@jupyter-widgets/output","_view_module_version":"1.0.0","_view_name":"OutputView","layout":"IPY_MODEL_ee44cc3dae5c42299460702dd280dbce","msg_id":"","outputs":[{"data":{"text/html":"
Maybe... but l tn certainly wouldn't feel right holding back tip money!
0.96
\n
\n
Maybe ... but I⎕⎕⎕ certainly wouldn't feel right holding back tip money!

Maybe... but l tn certainly wouldn't feel right holding back tip money!
","text/plain":""},"metadata":{},"output_type":"display_data"}]}},"dcc0c0ee6a284ecb8c40f06891919805":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df77be57fe7943298432a759e6ce8416":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":"0px 0px 0px 10px","right":null,"top":null,"visibility":null,"width":"initial"}},"df9bb4a390fa4e6597b832d09949d24e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"0px"}},"e13c49f50ebd4f81a1dafe940ac13a31":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":["wrapper-spinner"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f40fbbd396ac4528a97962ec911d5837","placeholder":"​","style":"IPY_MODEL_03b66d0eb8b5487b8ad21382b93cc68d","value":"\n
\n
\n
\n "}},"e1de76477d764776ac5a209f7ebcc8d6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":["context-visor","137612175420016"],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0c28619fcebc4c58a9a756888add9140"],"layout":"IPY_MODEL_fa20ebba2bdb493799524bdf780fa86e"}},"e36c1a1c609c425bb76ea83edf910425":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eab60b2fe7a84e4b9a913f1450766452":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ec3c881e1da4414a9c9499497bc5de74":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DropdownModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DropdownModel","_options_labels":["Initial box","Default","Default, grey pad","Padded 4px","Padded 8px","Extracted, init box","Padded 4, extracted","Padded 8, extracted","Padded 8, dilation 1","Pad 8, fract. 0.5","Pad 8, fract. 0.2"],"_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"DropdownView","description":"","description_tooltip":null,"disabled":false,"index":1,"layout":"IPY_MODEL_44f79f7ea58742a0a770aab41e876c9c","style":"IPY_MODEL_57f7d34ddc2b4a938d7fcb4bf066deb0"}},"ee44cc3dae5c42299460702dd280dbce":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eee81f49e99f4cbb8db65b56c511adbd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f40fbbd396ac4528a97962ec911d5837":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":"none","flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f542dba5c6de48a0a44e698706d87017":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":"initial"}},"fa20ebba2bdb493799524bdf780fa86e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fb08b9a59d6c4e678b368c915bb97405":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fbb4d3433c6f42e094934b6936659b8f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"4em"}}}}},"nbformat":4,"nbformat_minor":0} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing `Tesseract` OCR for comics\n", + "> Accuracy Enhancements for OCR in `PanelCleaner`\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, you can test how Tesseract performs with texts from a diverse array of comics, manga, languages, and styles. You can run this notebook locally using Jupyter Lab/Notebook or on any Jupyter-compatible platform like Google Colab or VSCode.\n", + "\n", + "We'll begin by setting up [PanelCleaner](https://github.com/VoxelCubes/PanelCleaner) and the [testbed](https://github.com/civvic/PanelCleaner/tree/testbed) in Colab, though the instructions are applicable to other platforms such as [Kaggle](https://www.kaggle.com/code). We will then verify the Tesseract installation, prime an `ExperimentContext`, and create a visor to experiment with different parameters and configurations. \n", + "\n", + "**New to Jupyter Notebooks?** If you are not familiar with Jupyter environments, consider exploring the [Introduction to Colab](https://colab.research.google.com/notebooks/intro.ipynb) and the others provided by Google. It offers a quick and comprehensive guide to using Jupyter Notebooks effectively. The Jupyter project is a great way to learn about the notebook interface and the [Jupyter ecosystem](https://jupyter.org/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Settings for Google Colab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To efficiently manage the image sources for our experiments, we recommend mounting your Google Drive and storing the experiment files there. If you are not familiar with Colab or Jupyter environments, it's best to leave these settings at their default values to ensure smooth operation.\n", + "\n", + "- Set `MOUNT_DRIVE` to `True` to enable mounting Google Drive in the Colab environment.\n", + "This allows the notebook to access files stored in your Google Drive.\n", + "\n", + "- `GDRIVE_MOUNT_POINT` specifies the local directory in Colab where your Google Drive will be mounted.\n", + "This acts as the root directory for accessing any files within your Google Drive from the notebook.\n", + "\n", + "- `PANELCLEANER_IN_GDRIVE` specifies the path within your Google Drive where the PanelCleaner project is located.\n", + "This path is used to access or store any files related to the PanelCleaner project directly from Google Drive.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MOUNT_DRIVE = True\n", + "GDRIVE_MOUNT_POINT = 'drive'\n", + "PANELCLEANER_IN_GDRIVE = 'MyDrive/Shared/PanelCleaner'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## install (Colab)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import fastcore.all as FC\n", + "import os\n", + "import re\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "from rich import print as cprint\n", + "from rich.text import Text\n", + "\n", + "def info(msg: str):\n", + " (t := Text(msg)).stylize(\"bold red\", 0, 6)\n", + " cprint(\"_\" * 10, t, \"_\" * 10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mount Google Drive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mnt_point = Path(f\"/content/{GDRIVE_MOUNT_POINT}\")\n", + "if FC.IN_COLAB:\n", + " if MOUNT_DRIVE:\n", + " if not mnt_point.exists():\n", + " info(\"Mounting Google Drive\")\n", + " from google.colab import drive\n", + " drive.mount(str(mnt_point), force_remount=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install **PanelCleaner**\n", + "\n", + "> We will attempt to use the version of **PanelCleaner** stored in your Google Drive. If it's not available, we'll install it from GitHub.\n", + "\n", + "Note that we specifically require the `testbed` branch of the **PanelCleaner** repository, not the main trunk. This branch contains necessary configurations and experimental features that are crucial for the tests conducted in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_COLAB:\n", + " pc_path = mnt_point/PANELCLEANER_IN_GDRIVE\n", + " tb_path = pc_path/'pcleaner/_testbed'\n", + " if tb_path.exists():\n", + " info('Installing PanelCleaner from your Google Drive')\n", + " else:\n", + " info('Installing PanelCleaner from GitHub')\n", + " !rm -rf PanelCleaner\n", + " !git clone -b testbed https://github.com/civvic/PanelCleaner.git\n", + " pc_path = Path('PanelCleaner').absolute()\n", + " tb_path = pc_path/'pcleaner/_testbed'\n", + " assert tb_path.exists(), \"PanelCleaner not found\"\n", + " os.chdir(tb_path)\n", + " sys.path.insert(0, f\"{tb_path}\")\n", + " sys.path.insert(0, f\"{pc_path}\")\n", + " !pip install -q -r requirements-colab.txt\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prologue\n", + "\n", + "In this section, we import essential components from the `PanelCleaner` testbed. `ExperimentsVisor` is used to manage and visualize the experiments, `CropMethod` defines the cropping strategies for image preprocessing, and `OCRExperimentContext` sets up the context for OCR experiments.\n", + "\n", + "If you're curious about the inner workings of these components, you can explore the notebooks that develop them in the `nbs` folder, or check out the source code they generate in the `testbed` directory. For instance, see [experiments.ipynb](nbs/experiments.ipynb) and [`_testbed/testbed/experiments.py`](testbed/experiments.py) for more details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from testbed.experiments import ExperimentsVisor, CropMethod, OCRExperimentContext\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tesseract setup\n", + "> This section ensures that Tesseract OCR is correctly installed and configured for our experiments. We require Tesseract version 5.x due to its improved accuracy and features.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **NOTE:** In the following cells, lines starting with an exclamation mark `!` (also known as a \"bang\") are shell commands. Uncomment these lines if you wish to execute the commands directly from this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check Current Tesseract Version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Correct version of Tesseract is installed.\n",
+       "
\n" + ], + "text/plain": [ + "Correct version of Tesseract is installed.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import subprocess\n", + "\n", + "def check_tesseract_version():\n", + " version_output = subprocess.run([\"tesseract\", \"--version\"], capture_output=True, text=True)\n", + " if 'tesseract 5.' in version_output.stdout:\n", + " cprint(\"Correct version of Tesseract is installed.\")\n", + " else:\n", + " cprint(\"No version or Incorrect version of Tesseract is installed. Please install Tesseract 5.x.\")\n", + "\n", + "check_tesseract_version()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remove Tesseract installation\n", + "> I you have the old 4.x version, you should consider removing the installation with the following commands.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Mac (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### WIndows (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Ubuntu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo apt-get remove tesseract-ocr\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Tesseract 5.x (if necessary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Mac (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### WIndows (TBD)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Linux (Ubuntu)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **5.x** release series is available in the [another PPA](https://launchpad.net/~alex-p/+archive/ubuntu/tesseract-ocr5) for Ubuntu **18.04**, **20.04**, and **22.04**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "refresh system package cache in case you’re still running old Ubuntu 18.04" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo apt update" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "install the software engine" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo apt install -y tesseract-ocr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Re-check version after installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Correct version of Tesseract is installed.\n",
+       "
\n" + ], + "text/plain": [ + "Correct version of Tesseract is installed.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_tesseract_version()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Tesseract languages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
tessdata path: /opt/homebrew/share/tessdata\n",
+       "
\n" + ], + "text/plain": [ + "tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Installed languages:\n",
+       "[\n",
+       "    'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces',\n",
+       "    'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo',\n",
+       "    'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc',\n",
+       "    'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert',\n",
+       "    'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal',\n",
+       "    'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol',\n",
+       "    'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \n",
+       "script/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur',\n",
+       "    'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \n",
+       "script/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \n",
+       "script/Kannada, script/Khmer',\n",
+       "    'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \n",
+       "script/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk',\n",
+       "    'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel',\n",
+       "    'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "Installed languages:\n", + "\u001b[1m[\u001b[0m\n", + " \u001b[32m'afr, amh, ara, asm, aze, aze_cyrl, bel, ben, bod, bos, bre, bul, cat, ceb, ces'\u001b[0m,\n", + " \u001b[32m'chi_sim, chi_sim_vert, chi_tra, chi_tra_vert, chr, cos, cym, dan, deu, div, dzo, ell, eng, enm, epo'\u001b[0m,\n", + " \u001b[32m'equ, est, eus, fao, fas, fil, fin, fra, frk, frm, fry, gla, gle, glg, grc'\u001b[0m,\n", + " \u001b[32m'guj, hat, heb, hin, hrv, hun, hye, iku, ind, isl, ita, ita_old, jav, jpn, jpn_vert'\u001b[0m,\n", + " \u001b[32m'kan, kat, kat_old, kaz, khm, kir, kmr, kor, kor_vert, lao, lat, lav, lit, ltz, mal'\u001b[0m,\n", + " \u001b[32m'mar, mkd, mlt, mon, mri, msa, mya, nep, nld, nor, oci, ori, osd, pan, pol'\u001b[0m,\n", + " \u001b[32m'por, pus, que, ron, rus, san, script/Arabic, script/Armenian, script/Bengali, script/Canadian_Aboriginal, \u001b[0m\n", + "\u001b[32mscript/Cherokee, script/Cyrillic, script/Devanagari, script/Ethiopic, script/Fraktur'\u001b[0m,\n", + " \u001b[32m'script/Georgian, script/Greek, script/Gujarati, script/Gurmukhi, script/HanS, script/HanS_vert, script/HanT, \u001b[0m\n", + "\u001b[32mscript/HanT_vert, script/Hangul, script/Hangul_vert, script/Hebrew, script/Japanese, script/Japanese_vert, \u001b[0m\n", + "\u001b[32mscript/Kannada, script/Khmer'\u001b[0m,\n", + " \u001b[32m'script/Lao, script/Latin, script/Malayalam, script/Myanmar, script/Oriya, script/Sinhala, script/Syriac, \u001b[0m\n", + "\u001b[32mscript/Tamil, script/Telugu, script/Thaana, script/Thai, script/Tibetan, script/Vietnamese, sin, slk'\u001b[0m,\n", + " \u001b[32m'slv, snd, snum, spa, spa_old, sqi, srp, srp_latn, sun, swa, swe, syr, tam, tat, tel'\u001b[0m,\n", + " \u001b[32m'tgk, tha, tir, ton, tur, uig, ukr, urd, uzb, uzb_cyrl, vie, yid, yor'\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "out = !tesseract --list-langs # type: ignore\n", + "tessdata = Path(out[0].split('\"')[1])\n", + "cprint(f\"tessdata path: {tessdata}\")\n", + "cprint(\"Installed languages:\", [', '.join(sub) for sub in [out[i:i + 15] for i in range(1, len(out), 15)]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Install **best** languages and **jpn_ver** Tesseract lang\n", + "> to get better results than default langs and `jpn` language model.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best). \n", + "Donwload from [here](https://groups.google.com/g/tesseract-ocr/c/FwjSZzoVgeg/m/u-zyFYQiBgAJ) a model trained for vertical Japanese text as found in manga.\n", + "\n", + "See [here](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html) the languages codes.\n", + "\n", + "**Note:** While the `jpn` and `jpn_vert` language models are available, the `manga-ocr` model use by `PanelCleaner`is generally more suited for manga text recognition. However, comparing these models can provide educational insights into their respective strengths and limitations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uncomment and excute to download the best language models:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/osd.traineddata\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/eng.traineddata\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn.traineddata\n", + "\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/jpn_vert.traineddata\n", + "# or\n", + "# !wget -O jpn_vert.traineddata https://github.com/zodiac3539/jpn_vert/blob/master/jpn_ver5.traineddata\n", + "\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/spa.traineddata\n", + "# !wget https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/fra.traineddata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy downloaded models to tessdata folder (double check that `tessdata` variable points to the right folder):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
tessdata path: /opt/homebrew/share/tessdata\n",
+       "
\n" + ], + "text/plain": [ + "tessdata path: \u001b[35m/opt/homebrew/share/\u001b[0m\u001b[95mtessdata\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint(f\"tessdata path: {tessdata}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !sudo mv *.traineddata $tessdata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "and remove the downloaded models:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rm: cannot remove '*.traineddata': No such file or directory\n" + ] + } + ], + "source": [ + "# !rm *.traineddata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check installed languages\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "    Path('/opt/homebrew/share/tessdata/spa.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/eng.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/jpn_vert.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/spa_old.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/fra.traineddata'),\n",
+       "    Path('/opt/homebrew/share/tessdata/jpn.traineddata')\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/eng.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn_vert.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/spa_old.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/fra.traineddata'\u001b[0m\u001b[1m)\u001b[0m,\n", + " \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/opt/homebrew/share/tessdata/jpn.traineddata'\u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint(list(filter(lambda x: re.match(r'eng|jpn|jpn_vert|fra|spa', x.name), tessdata.ls()))) # type: ignore\n", + "# cprint(pytesseract.get_languages())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Tesseract Experiments\n", + "\n", + "In this notebook, we focus on applying Tesseract OCR to a variety of comic book images to evaluate its performance across different text styles, languages, and image qualities. \n", + "\n", + "The experiments are specifically designed to explore how different cropping methods affect Tesseract's ability to recognize text in complex visual contexts typical of comic panels. By experimenting with various cropping strategies, we want to determine whether feeding Tesseract single cropped boxes, as opposed to whole pages, can enhance OCR accuracy.\n", + "\n", + "## Objectives\n", + "\n", + "- **Evaluate basic OCR performance:** Assess how well Tesseract recognizes text across a diverse set of comic book images.\n", + "- **Test different cropping methods:** Systematically vary the way images are cropped to isolate text boxes and see if this improves the accuracy of text recognition.\n", + "- **Optimize OCR settings (TBD):** Adjust Tesseract's configuration settings based on the results of the cropping experiments to optimize performance for comic texts.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment directory\n", + "\n", + "Defines the directory structure for storing images, caching auxiliary data, and saving experiment results.\n", + "\n", + "- **Source Directory (`EXP_DIR/source/`):** This is where the original images for the experiments are stored.\n", + "- **Cache Directory (`EXP_DIR/cache/`):** This directory is used for caching processed images or other auxiliary files that are generated during the experiments.\n", + "\n", + "You can modify the default locations of these directories as needed. The default setup assumes that you are working within the `PanelCleaner/pcleaner/_testbed` directory. Use the following code to verify your current working directory and to set up the experiment directory:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "EXP_DIR = Path('./experiment')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    Working dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed\n",
+       "Experiments dir: /Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment\n",
+       "
\n" + ], + "text/plain": [ + " Working dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/\u001b[0m\u001b[95m_testbed\u001b[0m\n", + "Experiments dir: \u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/\u001b[0m\u001b[95mexperiment\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint(f\"{'Working dir':>15}: {Path('.').resolve()}\\nExperiments dir: {EXP_DIR.resolve()}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test images\n", + "\n", + "Prepare and manage the comic book images for OCR testing.\n", + "\n", + "If you have specific comic book images you want to test, upload them to the `EXP_DIR/source/` directory. Ensure that each image file is accompanied by a corresponding text file containing the ground truth data. The text file should have the same name as the image but with a `.txt` extension. Each line in the text file should represent one text box as detected and processed by PanelCleaner.\n", + "\n", + "For those who prefer to use a standardized set of images for comparison purposes, we provide a link to download a pre-selected set of comic book images. After downloading, ensure to place these images in the `EXP_DIR/source/` directory.\n", + "\n", + "Optionally, you can include a `.json` file for each image, specifying the language of the text on the page. This file should have the same name as the image and a `.json` extension. Here is an example of the content for a language specification file:\n", + "\n", + "```json\n", + "{\n", + "\"lang\": \"Spanish\"\n", + "}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/source\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[35m/Users/vic/dev/repo/DL-mac/PanelCleaner/pcleaner/_testbed/experiment/\u001b[0m\u001b[95msource\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cprint((EXP_DIR/'source').resolve())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "or download the standard set:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !gdown --id 1MCqUImwFS5iQ271CD9_t2FSugJXdYj0a -O experiment.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !unzip -qn experiment.zip -d ." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup ngrok (Colab)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.\n", + "\n", + "Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:\n", + "- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.\n", + "- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.\n", + "\n", + "You choose.\n", + "\n", + "If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is `USE_PIL=False`. You can set the environment variable `USE_PIL=True` to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.\n", + "\n", + "I you don't change the default settings and\n", + "- the notebook is running locally, it'll serve the images directly without any additional setup.\n", + "- the notebook is running in Colab, it'll serve the images through a web server and ngrok.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FC.IN_COLAB:\n", + " os.environ['USE_TUNNEL'] = 'True'\n", + " os.environ['USE_PIL'] = 'False'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "SERVER = None\n", + "if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':\n", + " import testbed.web_server as web_server\n", + " SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CONTEXT\n", + "> Creates the `OCRExperimentContext` object we'll use to manage the experiments and visualize the configuration.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Configuration:\n", + "\n", + "Locale: System default\n", + "Default Profile: Built-in\n", + "Saved Profiles:\n", + "- victess: /Users/vic/dev/repo/DL-mac/cleaned/victess.conf\n", + "- vicmang: /Users/vic/dev/repo/DL-mac/cleaned/vicmang.conf\n", + "\n", + "Profile Editor: cursor\n", + "Cache Directory: System default\n", + "Default Torch Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt\n", + "Default CV2 Model Path: /Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt.onnx\n", + "GUI Theme: System default\n", + "\n", + "--------------------\n", + "\n", + "Config file located at: /Users/vic/Library/Application Support/pcleaner/pcleanerconfig.ini\n", + "System default cache directory: /Users/vic/Library/Caches/pcleaner\n" + ] + }, + { + "data": { + "text/html": [ + "
 config cache_dir: None\n",
+       "       model_path: Path('/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt')\n",
+       "           device: 'mps'\n",
+       "
\n" + ], + "text/plain": [ + " config cache_dir: \u001b[3;35mNone\u001b[0m\n", + " model_path: \u001b[1;35mPath\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'/Users/vic/Library/Caches/pcleaner/model/comictextdetector.pt'\u001b[0m\u001b[1m)\u001b[0m\n", + " device: \u001b[32m'mps'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
        force_PIL: False\n",
+       "       use_tunnel: False\n",
+       "       server_url: \n",
+       "   experiment dir: experiment\n",
+       "       source_dir: experiment/source\n",
+       "        cache_dir: experiment/cache\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + " force_PIL: \u001b[3;91mFalse\u001b[0m\n", + " use_tunnel: \u001b[3;91mFalse\u001b[0m\n", + " server_url: \n", + " experiment dir: experiment\n", + " source_dir: experiment/source\n", + " cache_dir: experiment/cache\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Experiment runs:\n",
+       "Tesseract-crop-post: 17\n",
+       "   Tesseract-crop: 0\n",
+       "
\n" + ], + "text/plain": [ + "Experiment runs:\n", + "Tesseract-crop-post: \u001b[1;36m17\u001b[0m\n", + " Tesseract-crop: \u001b[1;36m0\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "CONTEXT = OCRExperimentContext('Tesseract', EXP_DIR, server=SERVER, load=True)\n", + "CONTEXT.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Verify images setup\n", + "\n", + "Before visualizing the experiments, verify that all images are correctly recognized and accessible." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['00: Action_Comics_1960-01-00_(262).JPG',\n", + " '01: Adolf_Cap_01_008.jpg',\n", + " '02: Barnaby_v1-028.png',\n", + " '03: Barnaby_v1-029.png',\n", + " '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',\n", + " '05: Cannon-292.jpg',\n", + " '06: Contrato_con_Dios_028.jpg',\n", + " '07: Erase_una_vez_en_Francia_02_88.jpg',\n", + " '08: FOX_CHILLINTALES_T17_012.jpg',\n", + " '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',\n", + " '10: Galactus_12.jpg',\n", + " '11: INOUE_KYOUMEN_002.png',\n", + " '12: MCCALL_ROBINHOOD_T31_010.jpg',\n", + " '13: MCCAY_LITTLENEMO_090.jpg',\n", + " '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',\n", + " '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',\n", + " '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',\n", + " '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',\n", + " '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',\n", + " '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',\n", + " '20: Strange_Tales_172005.jpg',\n", + " '21: Strange_Tales_172021.jpg',\n", + " '22: Tarzan_014-21.JPG',\n", + " '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jpg',\n", + " '24: Transformers_-_Unicron_000-004.jpg',\n", + " '25: Transformers_-_Unicron_000-016.jpg',\n", + " '26: WARE_ACME_024.jpg',\n", + " '27: Yoko_Tsuno_T01_1972-10.jpg',\n", + " '28: Your_Name_Another_Side_Earthbound_T02_084.jpg',\n", + " '29: manga_0033.jpg',\n", + " '30: ronson-031.jpg',\n", + " '31: 哀心迷図のバベル 第01巻 - 22002_00_059.jpg']" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[f\"{i:02}: {_.name}\" for i,_ in enumerate(CONTEXT.image_paths)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "# Running an experiment\n", + "\n", + "Conduct an OCR experiment using the established context and tools. You will select an image, choose a cropping method, and decide which text box to analyze. The results will be visualized so you can assess the effectiveness of the OCR process.\n", + "\n", + "### Selecting and configuring the experiment\n", + "\n", + "1. **Choose an image:** Start by selecting an image from the loaded dataset.\n", + "2. **Specify cropping method:** Choose how the image should be cropped. Different cropping methods can affect OCR accuracy, as they change how the text is presented to the OCR engine.\n", + "3. **Select text box:** Select the specific text box within the image to focus the OCR process.\n", + "\n", + "### Visualizing results\n", + "\n", + "The results are visualized immediately. Here, it is crucial to have accurate **ground truth** data to effectively compare and assess the OCR results.\n", + "\n", + "You can assess the accuracy of OCR results at various levels: box by box, method by method, and overall. Currently, we use a simplified version of the `edit distance` metric to calculate accuracy. However, we plan to adopt more standardized metrics, such as the `Levenshtein distance`, in future updates.\n", + "\n", + "Additionally, we should probably develop a metric specifically tailored to the unique characteristics of comic texts, such as the prevalence of all-caps and handwritten text, to provide more relevant evaluations. OCR models are trained with typeset text, synthetic or real-world, and business, forms, news or literary data, and usually don't perform well on handwritten text. We haven't found any OCR dataset that incorporates comics style data.\n", + "\n", + "\n", + "### Managing experiment data\n", + "\n", + "- **Save results:** You have the option to save the results of the experiment, useful for documenting performance and changes over time. However, be cautious with this option as it will overwrite existing results without confirmation.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note:** This visor functionality is currently a work in progress (WIP). The interface and options are being refined to enhance the experience and provide more robust data management. This section gives you a preliminary look at what we are aiming for with the `testbed` project.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c3ed154f58e6480abe8993b4a2fc22b7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value=\"" + } + }, + "03b66d0eb8b5487b8ad21382b93cc68d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "044d11e5bc4e4f87ad9d099734418899": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "068b8601d7364d29b9598b80aeb782e0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0c28619fcebc4c58a9a756888add9140": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "model_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a2410f8101384beda0114d93ac655edb" + ], + "layout": "IPY_MODEL_24e8764bfd294a629d6c3855b4094f8b" + } + }, + "0d4014b56e93430e9782502bf29c7c2e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "0d5a810240d84cc683b42f56b1f4c2d1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0e3ae88bc86f4a31b5aedc1dfd0b1bf2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "120px" + } + }, + "0f1bb59001314b13a091c7e1b331c58b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "", + "description": "reset", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_3ab779d3f50a456980527636aa763dba", + "style": "IPY_MODEL_bb3adfcd8e4a4af29079953bc9220ac1", + "tooltip": "" + } + }, + "101d1fc2b6df471ebd6a32a44c8120ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_df77be57fe7943298432a759e6ce8416", + "placeholder": "​", + "style": "IPY_MODEL_8b38a0bdf80149bba72324232beb14ed", + "value": "Method:" + } + }, + "175e7409479d4f56ba4a4faf4c8bebaa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3f8e2f6b025e4fcd90ad34c2e99025ae", + "max": 11, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_044d11e5bc4e4f87ad9d099734418899", + "value": 11 + } + }, + "1807cf3a1770434599e993a087293d91": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "18ccc64fc8504bdeb2fb07043688c1c4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "25em" + } + }, + "1da81d9a8d7446838b76ae017e794e78": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "247e8a63e98441f294c1514a43b35e0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "24e8764bfd294a629d6c3855b4094f8b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "25cdb8370f70459886e00eeef0f66388": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_247e8a63e98441f294c1514a43b35e0d", + "placeholder": "​", + "style": "IPY_MODEL_7a928ad4bd52422b9544d0d312402a5b", + "value": "Box #1: 100%" + } + }, + "27c90e0d46934e80a38889a0ae18a726": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "31dcaeca8806453ea52292112bb0a4f3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3588eeb28e024f1ead6cd21f46d65a6c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "method_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_101d1fc2b6df471ebd6a32a44c8120ed", + "IPY_MODEL_8ca00801b4e54cbd980c998791664f60", + "IPY_MODEL_ec3c881e1da4414a9c9499497bc5de74" + ], + "layout": "IPY_MODEL_31dcaeca8806453ea52292112bb0a4f3" + } + }, + "39648bdbc39c4c7c827510141663804c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "visible", + "width": null + } + }, + "3ab779d3f50a456980527636aa763dba": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": "0px 0px 0px 3em", + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "5em" + } + }, + "3cf7099443124d289495da41bfbc8b58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": "lightblue", + "font_weight": "bold" + } + }, + "3d0ad04ce06b4314b64dcdde269b38e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoundedIntTextModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "BoundedIntTextModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "IntTextView", + "continuous_update": false, + "description": "", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_4aba84bf7b4847fdb1cac927ba6a7ebf", + "max": 14, + "min": 0, + "step": 1, + "style": "IPY_MODEL_f542dba5c6de48a0a44e698706d87017", + "value": 2 + } + }, + "3f8e2f6b025e4fcd90ad34c2e99025ae": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4064335db7684f2f81ad237513e90c34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612175420256" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_878477e8708a47e8ab49b00be715d1cd" + ], + "layout": "IPY_MODEL_00878ffbdb234a48b8917563f3058107" + } + }, + "44f79f7ea58742a0a770aab41e876c9c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "150px" + } + }, + "47b4dcc7177b41d1aa69d3918a6e8598": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [ + "message_visor-yXy" + ], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_c193eb8285a740208457bbe7985d356f", + "msg_id": "", + "outputs": [] + } + }, + "48555e1574db42db86f6572e9c888b58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612216365504" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_60e22ee6a0fc428b8e3709e394e44841", + "IPY_MODEL_7e8086d63d1b4bbf96e4c70c63bc920a" + ], + "layout": "IPY_MODEL_0d5a810240d84cc683b42f56b1f4c2d1" + } + }, + "48cfde671ecf42c7a124a94a2962bb66": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612175420208" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ad2bbb4b23a648209da1edab0006adae" + ], + "layout": "IPY_MODEL_d6fed923432746c9b3dff00549de596d" + } + }, + "496e8548fc0f41b7b0c15c5881596326": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "4aba84bf7b4847fdb1cac927ba6a7ebf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50px" + } + }, + "4f8b904d91eb44028477a6a61c34af54": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_919da88097344a689a9afb6fa5977b29", + "placeholder": "​", + "style": "IPY_MODEL_d0ed39572cea4c37bb07dc99577258e3", + "value": "Box # (of 15):" + } + }, + "57f7d34ddc2b4a938d7fcb4bf066deb0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "57f9b685011043418d9552b3cb48784e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "", + "description": "save", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_fbb4d3433c6f42e094934b6936659b8f", + "style": "IPY_MODEL_3cf7099443124d289495da41bfbc8b58", + "tooltip": "" + } + }, + "58b2ec779ab64a2b9e85d57160b453c9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "0px" + } + }, + "60e22ee6a0fc428b8e3709e394e44841": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e1de76477d764776ac5a209f7ebcc8d6", + "IPY_MODEL_48cfde671ecf42c7a124a94a2962bb66", + "IPY_MODEL_4064335db7684f2f81ad237513e90c34", + "IPY_MODEL_99bbf4dd65424e87aab67bcf162a8171" + ], + "layout": "IPY_MODEL_eee81f49e99f4cbb8db65b56c511adbd" + } + }, + "6a87fbd3fc6440c5a81a564ac94400cc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_01cd556393d643f8bb3fd88c8cd7bb84", + "IPY_MODEL_48555e1574db42db86f6572e9c888b58", + "IPY_MODEL_47b4dcc7177b41d1aa69d3918a6e8598", + "IPY_MODEL_dc75396dcc3c4606a8fe56401d29a4e9" + ], + "layout": "IPY_MODEL_778afca8ec0a431ca8670604c10d8956" + } + }, + "6eed00e28fae4570b2348330bb9dcbab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "72440081608048b1b5fc76e42011ad0c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": "hidden", + "width": null + } + }, + "778afca8ec0a431ca8670604c10d8956": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a928ad4bd52422b9544d0d312402a5b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7e8086d63d1b4bbf96e4c70c63bc920a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612175421024" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d89bf137dd5e4d698dc7f6187f2e148c", + "IPY_MODEL_3588eeb28e024f1ead6cd21f46d65a6c", + "IPY_MODEL_b0d6d88a4b824cf9be7c01d5611c3426" + ], + "layout": "IPY_MODEL_39648bdbc39c4c7c827510141663804c" + } + }, + "878477e8708a47e8ab49b00be715d1cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "display_option_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bd6e62f7db8c40d695d6ca3d93cf7e94" + ], + "layout": "IPY_MODEL_068b8601d7364d29b9598b80aeb782e0" + } + }, + "8b38a0bdf80149bba72324232beb14ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8ca00801b4e54cbd980c998791664f60": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "CheckboxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "all", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_0d4014b56e93430e9782502bf29c7c2e", + "style": "IPY_MODEL_58b2ec779ab64a2b9e85d57160b453c9", + "value": false + } + }, + "90f41682d85f42a6becdca99a59e9a2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "CheckboxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "all", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_496e8548fc0f41b7b0c15c5881596326", + "style": "IPY_MODEL_df9bb4a390fa4e6597b832d09949d24e", + "value": false + } + }, + "919da88097344a689a9afb6fa5977b29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": "0px 0px 0px 10px", + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "929913d45cc9400087eab703338d00f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "99bbf4dd65424e87aab67bcf162a8171": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612175414976" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_57f9b685011043418d9552b3cb48784e", + "IPY_MODEL_0f1bb59001314b13a091c7e1b331c58b" + ], + "layout": "IPY_MODEL_fb08b9a59d6c4e678b368c915bb97405" + } + }, + "9a49e8c567b94a64a19dc3eecdf5a857": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_25cdb8370f70459886e00eeef0f66388", + "IPY_MODEL_175e7409479d4f56ba4a4faf4c8bebaa", + "IPY_MODEL_c944b093296248b4a9a75a9139d5003d" + ], + "layout": "IPY_MODEL_72440081608048b1b5fc76e42011ad0c" + } + }, + "a2410f8101384beda0114d93ac655edb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Tesseract-crop-post", + "Tesseract-crop" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 0, + "layout": "IPY_MODEL_bfa2f19eedfa4bd99b59d08365ae4676", + "style": "IPY_MODEL_a7755ebc78f34335a32f75fa342fcd76" + } + }, + "a7755ebc78f34335a32f75fa342fcd76": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "ad2bbb4b23a648209da1edab0006adae": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Action_Comics_1960-01-00_(262)", + "Adolf_Cap_01_008", + "Barnaby_v1-028", + "Barnaby_v1-029", + "Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013", + "Cannon-292", + "Contrato_con_Dios_028", + "Erase_una_vez_en_Francia_02_88", + "FOX_CHILLINTALES_T17_012", + "Furari_-_Jiro_Taniguchi_selma_056", + "Galactus_12", + "INOUE_KYOUMEN_002", + "MCCALL_ROBINHOOD_T31_010", + "MCCAY_LITTLENEMO_090", + "Mary_Perkins_On_Stage_v2006_1_-_P00068", + "PIKE_BOYLOVEGIRLS_T41_012", + "Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1", + "Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K", + "Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2", + "Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024", + "Strange_Tales_172005", + "Strange_Tales_172021", + "Tarzan_014-21", + "Tintin_21_Les_Bijoux_de_la_Castafiore_page_39", + "Transformers_-_Unicron_000-004", + "Transformers_-_Unicron_000-016", + "WARE_ACME_024", + "Yoko_Tsuno_T01_1972-10", + "Your_Name_Another_Side_Earthbound_T02_084", + "manga_0033", + "ronson-031", + "哀心迷図のバベル 第01巻 - 22002_00_059" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 14, + "layout": "IPY_MODEL_18ccc64fc8504bdeb2fb07043688c1c4", + "style": "IPY_MODEL_6eed00e28fae4570b2348330bb9dcbab" + } + }, + "b0d6d88a4b824cf9be7c01d5611c3426": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612175420544" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e13c49f50ebd4f81a1dafe940ac13a31" + ], + "layout": "IPY_MODEL_1da81d9a8d7446838b76ae017e794e78" + } + }, + "bb3adfcd8e4a4af29079953bc9220ac1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": null, + "font_weight": "bold" + } + }, + "bd6e62f7db8c40d695d6ca3d93cf7e94": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Boxes", + "Image", + "Mask", + "Image & Mask", + "Page data", + "Ground truth", + "Image All", + "Results", + "Best results", + "Accuracy", + "Dataframe", + "Config" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 7, + "layout": "IPY_MODEL_0e3ae88bc86f4a31b5aedc1dfd0b1bf2", + "style": "IPY_MODEL_929913d45cc9400087eab703338d00f0" + } + }, + "bfa2f19eedfa4bd99b59d08365ae4676": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "fit-content" + } + }, + "c193eb8285a740208457bbe7985d356f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c944b093296248b4a9a75a9139d5003d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dcc0c0ee6a284ecb8c40f06891919805", + "placeholder": "​", + "style": "IPY_MODEL_1807cf3a1770434599e993a087293d91", + "value": " 11/11 [00:04<00:00,  2.10it/s]" + } + }, + "d0ed39572cea4c37bb07dc99577258e3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d6fed923432746c9b3dff00549de596d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d89bf137dd5e4d698dc7f6187f2e148c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "box_grp" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4f8b904d91eb44028477a6a61c34af54", + "IPY_MODEL_90f41682d85f42a6becdca99a59e9a2c", + "IPY_MODEL_3d0ad04ce06b4314b64dcdde269b38e6" + ], + "layout": "IPY_MODEL_eab60b2fe7a84e4b9a913f1450766452" + } + }, + "dc75396dcc3c4606a8fe56401d29a4e9": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_ee44cc3dae5c42299460702dd280dbce", + "msg_id": "", + "outputs": [ + { + "data": { + "text/html": "
Maybe... but l tn certainly wouldn't feel right holding back tip money!
0.96
\n
\n
Maybe ... but I⎕⎕⎕ certainly wouldn't feel right holding back tip money!

Maybe... but l tn certainly wouldn't feel right holding back tip money!
", + "text/plain": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ] + } + }, + "dcc0c0ee6a284ecb8c40f06891919805": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "df77be57fe7943298432a759e6ce8416": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": "0px 0px 0px 10px", + "right": null, + "top": null, + "visibility": null, + "width": "initial" + } + }, + "df9bb4a390fa4e6597b832d09949d24e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "0px" + } + }, + "e13c49f50ebd4f81a1dafe940ac13a31": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [ + "wrapper-spinner" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f40fbbd396ac4528a97962ec911d5837", + "placeholder": "​", + "style": "IPY_MODEL_03b66d0eb8b5487b8ad21382b93cc68d", + "value": "\n
\n
\n
\n " + } + }, + "e1de76477d764776ac5a209f7ebcc8d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [ + "context-visor", + "137612175420016" + ], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0c28619fcebc4c58a9a756888add9140" + ], + "layout": "IPY_MODEL_fa20ebba2bdb493799524bdf780fa86e" + } + }, + "e36c1a1c609c425bb76ea83edf910425": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eab60b2fe7a84e4b9a913f1450766452": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ec3c881e1da4414a9c9499497bc5de74": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Initial box", + "Default", + "Default, grey pad", + "Padded 4px", + "Padded 8px", + "Extracted, init box", + "Padded 4, extracted", + "Padded 8, extracted", + "Padded 8, dilation 1", + "Pad 8, fract. 0.5", + "Pad 8, fract. 0.2" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "", + "description_tooltip": null, + "disabled": false, + "index": 1, + "layout": "IPY_MODEL_44f79f7ea58742a0a770aab41e876c9c", + "style": "IPY_MODEL_57f7d34ddc2b4a938d7fcb4bf066deb0" + } + }, + "ee44cc3dae5c42299460702dd280dbce": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eee81f49e99f4cbb8db65b56c511adbd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f40fbbd396ac4528a97962ec911d5837": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": "none", + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f542dba5c6de48a0a44e698706d87017": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "initial" + } + }, + "fa20ebba2bdb493799524bdf780fa86e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fb08b9a59d6c4e678b368c915bb97405": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fbb4d3433c6f42e094934b6936659b8f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "4em" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 473a47425ec4d6e126303fe44f7566175a9a2329 Mon Sep 17 00:00:00 2001 From: Voxel <41875513+VoxelCubes@users.noreply.github.com> Date: Sun, 2 Jun 2024 18:22:37 +0200 Subject: [PATCH 27/27] Revert "Downgrade to Python 10 as Colab not yet support 11" This reverts commit a2d15f4f3996378a33b4943547a6a0063c5bea7a. --- pcleaner/config.py | 19 +++++++------------ pcleaner/gui/profile_parser.py | 2 +- pcleaner/preprocessor.py | 12 ++++++------ pcleaner/structures.py | 4 ++-- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/pcleaner/config.py b/pcleaner/config.py index 4d4add4d..1a3066f3 100644 --- a/pcleaner/config.py +++ b/pcleaner/config.py @@ -72,17 +72,13 @@ Percentage = NewType("Percentage", float) -class EnumStr_(Enum): - @classmethod - def display_names(cls) -> dict[str, 'EnumStr_']: - return {e.value: e for e in cls} - -class ReadingOrder(EnumStr_): +class ReadingOrder(StrEnum): AUTO = "auto" MANGA = "manga" COMIC = "comic" -class OCREngine(EnumStr_): + +class OCREngine(StrEnum): AUTO = "auto" MANGAOCR = "manga-ocr" TESSERACT = "tesseract" @@ -1313,14 +1309,13 @@ def try_to_load( return # check before: `StrEnum` is a `str` - elif isinstance(attr_type, type) and issubclass(attr_type, EnumStr_): - names = attr_type.display_names() - if conf_data in names: # type: ignore - attr_value = names[conf_data] + elif isinstance(attr_type, type) and issubclass(attr_type, StrEnum): + if conf_data in attr_type.__members__.values(): + attr_value = conf_data else: print( f"Option {attr_name} in section {section} should be a one " - f"of {', '.join(_.value for _ in attr_type)}.\n" + f"of {', '.join(repr(str(_)) for _ in attr_type.__members__.values())}.\n" f"Failed to parse '{conf_data}'" ) return diff --git a/pcleaner/gui/profile_parser.py b/pcleaner/gui/profile_parser.py index 9aaa48e2..4af9d5bb 100644 --- a/pcleaner/gui/profile_parser.py +++ b/pcleaner/gui/profile_parser.py @@ -206,7 +206,7 @@ def _get_text() -> str | None: enm = {EntryTypes.OCREngine: OCREngine, EntryTypes.ReadingOrder: ReadingOrder}[ entry_type ] - for member in enm: + for member in enm.__members__.values(): self._data_widget.addTextItemLinkedData(member.value, member) self._data_widget.setCurrentIndexByLinkedData(enm.AUTO) self._data_widget.currentIndexChanged.connect(self._value_changed) diff --git a/pcleaner/preprocessor.py b/pcleaner/preprocessor.py index c15cd251..0f43ac3e 100644 --- a/pcleaner/preprocessor.py +++ b/pcleaner/preprocessor.py @@ -130,15 +130,15 @@ def prep_json_file( original_path: str = json_data["original_path"] scale: float = json_data["scale"] boxes: list[st.Box] = [] - page_langs: list[str] = [] + page_langs: list[st.DetectedLang] = [] # Define permitted languages based on strictness. # Since the OCR model is only trained to recognize Japanese, # we need to discard anything that isn't, and if strict, also # those that are unknown (likely a mix). - language_whitelist = [st.DetectedLang.JA.value, st.DetectedLang.ENG.value] + language_whitelist = [st.DetectedLang.JA, st.DetectedLang.ENG] if not preprocessor_conf.ocr_strict_language: - language_whitelist.append(st.DetectedLang.UNKNOWN.value) + language_whitelist.append(st.DetectedLang.UNKNOWN) for data in json_data["blk_list"]: # Check box language. @@ -155,9 +155,9 @@ def prep_json_file( page_langs.append(data["language"]) boxes.append(box) - page_lang: st.DetectedLang = st.DetectedLang.display_names()[( - Counter(page_langs).most_common(1)[0][0] if boxes else st.DetectedLang.UNKNOWN.value - )] + page_lang: st.DetectedLang = ( + Counter(page_langs).most_common(1)[0][0] if boxes else st.DetectedLang.UNKNOWN + ) logger.debug(f"Detected lang: {page_lang}") # reading_order = preprocessor_conf.reading_order diff --git a/pcleaner/structures.py b/pcleaner/structures.py index 8b3a1215..02362bf1 100644 --- a/pcleaner/structures.py +++ b/pcleaner/structures.py @@ -1,7 +1,7 @@ import json import re from enum import Enum -# from enum import StrEnum +from enum import StrEnum from importlib import resources from pathlib import Path from typing import Sequence @@ -15,7 +15,7 @@ import pcleaner.data -class DetectedLang(cfg.EnumStr_): +class DetectedLang(StrEnum): JA = "ja" ENG = "eng" UNKNOWN = "unknown"