Skip to content

Commit

Permalink
update tests, add help
Browse files Browse the repository at this point in the history
  • Loading branch information
kha-white committed Feb 18, 2024
1 parent c5b8c83 commit 89a8652
Show file tree
Hide file tree
Showing 77 changed files with 257 additions and 1,800 deletions.
2 changes: 1 addition & 1 deletion mokuro/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.2.0-beta.6'
__version__ = '0.2.0-beta.7'

from mokuro.manga_page_ocr import MangaPageOcr
from mokuro.mokuro_generator import MokuroGenerator
48 changes: 33 additions & 15 deletions mokuro/run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import Counter
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Sequence, Optional, Union

import fire
from loguru import logger
Expand All @@ -10,25 +11,42 @@
from mokuro.volume import VolumeCollection


def run(*paths,
parent_dir=None,
pretrained_model_name_or_path='kha-white/manga-ocr-base',
force_cpu=False,
disable_confirmation=False,
ignore_errors=False,
no_cache=False,
unzip=False,
legacy_html=True,
as_one_file=True,
disable_ocr=False,
def run(*paths: Optional[Sequence[Union[str, Path]]],
parent_dir: Optional[Union[str, Path]] = None,
pretrained_model_name_or_path: str = 'kha-white/manga-ocr-base',
force_cpu: bool = False,
disable_confirmation: bool = False,
disable_ocr: bool = False,
ignore_errors: bool = False,
no_cache: bool = False,
unzip: bool = False,
disable_html: bool = False,
as_one_file: bool = True,
):
"""
Process manga volumes with mokuro.
Args:
paths: Paths to manga volumes. Volume can ba a directory, a zip file or a cbz file.
parent_dir: Parent directory to scan for volumes. If provided, all volumes inside this directory will be processed.
pretrained_model_name_or_path: Name or path of the manga-ocr model.
force_cpu: Force the use of CPU even if CUDA is available.
disable_confirmation: Disable confirmation prompt. If False, the user will be prompted to confirm the list of volumes to be processed.
disable_ocr: Disable OCR processing. Generate mokuro/HTML files without OCR results.
ignore_errors: Continue processing volumes even if an error occurs.
no_cache: Do not use cached OCR results from previous runs (_ocr directories).
unzip: Extract volumes in zip/cbz format in their original location.
disable_html: Disable legacy HTML output. If True, acts as if --unzip is True.
as_one_file: Applies only to legacy HTML. If False, generate separate CSS and JS files instead of embedding them in the HTML file.
"""

if disable_ocr:
logger.info('Running with OCR disabled')

if legacy_html:
if not disable_html:
logger.warning(
'Legacy HTML output is deprecated and will not be further developed. '
'Use .mokuro format and web reader instead.')
"Legacy HTML output is deprecated and will not be further developed. "
"It's recommended to use .mokuro format and web reader instead.")
# legacy HTML works only with unzipped output
unzip = True

Expand Down Expand Up @@ -105,7 +123,7 @@ def run(*paths,
try:
volume.unzip(tmp_dir)
mg.process_volume(volume, ignore_errors=ignore_errors, no_cache=no_cache)
if legacy_html:
if not disable_html:
generate_legacy_html(volume, as_one_file=as_one_file, ignore_errors=ignore_errors)

except Exception:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="mokuro",
version='0.2.0-beta.6',
version='0.2.0-beta.7',
description="Browser reader for manga with selectable text",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
2 changes: 1 addition & 1 deletion tests/data/expected_results/test0/_ocr/vol1/000a.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version": "0.1.8", "img_width": 827, "img_height": 1170, "blocks": [{"box": [37, 0, 863, 235], "vertical": false, "font_size": 137.5, "lines_coords": [[[582.0, 18.0], [785.0, 13.0], [787.0, 53.0], [583.0, 58.0]], [[37.0, 0.0], [863.0, 0.0], [863.0, 235.0], [37.0, 235.0]]], "lines": ["ダイアリー・", "うちの猫ず日記"]}, {"box": [233, 1048, 608, 1170], "vertical": false, "font_size": 115, "lines_coords": [[[240.0, 1038.0], [498.0, 1057.0], [490.0, 1169.0], [232.0, 1163.0]]], "lines": ["がぁさん"]}]}
{"version": "0.2.0-beta.6", "img_width": 827, "img_height": 1170, "blocks": [{"box": [37, 0, 863, 235], "vertical": false, "font_size": 137.5, "lines_coords": [[[582.0, 18.0], [785.0, 13.0], [787.0, 53.0], [583.0, 58.0]], [[37.0, 0.0], [863.0, 0.0], [863.0, 235.0], [37.0, 235.0]]], "lines": ["ダイアリー・", "うちの猫ず日記"]}, {"box": [233, 1048, 608, 1170], "vertical": false, "font_size": 115, "lines_coords": [[[240.0, 1038.0], [498.0, 1057.0], [490.0, 1169.0], [232.0, 1163.0]]], "lines": ["がぁさん"]}]}
2 changes: 1 addition & 1 deletion tests/data/expected_results/test0/_ocr/vol1/000b.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version": "0.1.8", "img_width": 827, "img_height": 1170, "blocks": [{"box": [708, 87, 771, 276], "vertical": true, "font_size": 26, "lines_coords": [[[740.0, 90.0], [767.0, 90.0], [771.0, 276.0], [743.0, 276.0]], [[708.0, 87.0], [735.0, 87.0], [735.0, 199.0], [708.0, 199.0]]], "lines": ["あたしはナナ!!", "5さい!"]}, {"box": [611, 91, 670, 276], "vertical": true, "font_size": 26, "lines_coords": [[[646.0, 91.0], [670.0, 91.0], [670.0, 247.0], [646.0, 247.0]], [[611.0, 92.0], [638.0, 92.0], [638.0, 276.0], [611.0, 276.0]]], "lines": ["トカゲ狩りの", "エキスパート!"]}, {"box": [178, 98, 260, 331], "vertical": true, "font_size": 25, "lines_coords": [[[235.0, 100.0], [259.0, 100.0], [259.0, 238.0], [235.0, 238.0]], [[205.0, 100.0], [233.0, 100.0], [229.0, 331.0], [202.0, 331.0]], [[178.0, 103.0], [201.0, 103.0], [201.0, 241.0], [178.0, 241.0]]], "lines": ["今日も大猟!", "ぎっそくご主人さまに", "ホーコクよ!"]}, {"box": [102, 131, 147, 181], "vertical": true, "font_size": 45, "lines_coords": [[[102.0, 131.0], [147.0, 131.0], [147.0, 181.0], [102.0, 181.0]]], "lines": ["わ"]}, {"box": [627, 527, 739, 551], "vertical": false, "font_size": 24, "lines_coords": [[[627.0, 527.0], [739.0, 527.0], [739.0, 551.0], [627.0, 551.0]]], "lines": ["..."]}, {"box": [295, 481, 520, 618], "vertical": false, "font_size": 67.5, "lines_coords": [[[296.0, 481.0], [482.0, 487.0], [480.0, 524.0], [295.0, 517.0]], [[407.0, 523.0], [493.0, 498.0], [520.0, 592.0], [435.0, 618.0]]], "lines": ["[がぁさん]", "これは..."]}, {"box": [216, 380, 259, 526], "vertical": true, "font_size": 43, "lines_coords": [[[216.0, 380.0], [259.0, 380.0], [259.0, 526.0], [216.0, 526.0]]], "lines": ["みてみて〜〜♪"]}, {"box": [43, 86, 80, 899], "vertical": true, "font_size": 34, "lines_coords": [[[43.0, 86.0], [77.0, 86.0], [79.0, 899.0], [45.0, 899.0]]], "lines": ["常猫大好き作者『がぁさん』この新連載・猫マンガスタート・"]}, {"box": [702, 720, 755, 813], "vertical": true, "font_size": 43, "lines_coords": [[[712.0, 720.0], [755.0, 725.0], [744.0, 813.0], [702.0, 808.0]]], "lines": ["放流!"]}, {"box": [469, 652, 627, 671], "vertical": false, "font_size": 19, "lines_coords": [[[469.0, 652.0], [627.0, 652.0], [627.0, 671.0], [469.0, 671.0]]], "lines": ["トカゲハンター・ナナー"]}, {"box": [693, 868, 777, 1058], "vertical": true, "font_size": 21, "lines_coords": [[[751.0, 868.0], [775.0, 868.0], [775.0, 984.0], [751.0, 984.0]], [[724.0, 868.0], [747.0, 868.0], [747.0, 1054.0], [724.0, 1054.0]], [[693.0, 868.0], [716.0, 868.0], [718.0, 1008.0], [695.0, 1008.0]], [[700.0, 1004.0], [713.0, 1004.0], [713.0, 1031.0], [700.0, 1031.0]]], "lines": ["ご主人さま", "トカゲはおキライ", "なのかしら!?", "..."]}, {"box": [101, 863, 131, 914], "vertical": true, "font_size": 30, "lines_coords": [[[101.0, 863.0], [131.0, 863.0], [131.0, 914.0], [101.0, 914.0]]], "lines": ["!"]}]}
{"version": "0.2.0-beta.6", "img_width": 827, "img_height": 1170, "blocks": [{"box": [708, 87, 771, 276], "vertical": true, "font_size": 26, "lines_coords": [[[740.0, 90.0], [767.0, 90.0], [771.0, 276.0], [743.0, 276.0]], [[708.0, 87.0], [735.0, 87.0], [735.0, 199.0], [708.0, 199.0]]], "lines": ["あたしはナナ!!", "5さい!"]}, {"box": [611, 91, 670, 276], "vertical": true, "font_size": 26, "lines_coords": [[[646.0, 91.0], [670.0, 91.0], [670.0, 247.0], [646.0, 247.0]], [[611.0, 92.0], [638.0, 92.0], [638.0, 276.0], [611.0, 276.0]]], "lines": ["トカゲ狩りの", "エキスパート!"]}, {"box": [178, 98, 260, 331], "vertical": true, "font_size": 25, "lines_coords": [[[235.0, 100.0], [259.0, 100.0], [259.0, 238.0], [235.0, 238.0]], [[205.0, 100.0], [233.0, 100.0], [229.0, 331.0], [202.0, 331.0]], [[178.0, 103.0], [201.0, 103.0], [201.0, 241.0], [178.0, 241.0]]], "lines": ["今日も大猟!", "ぎっそくご主人さまに", "ホーコクよ!"]}, {"box": [102, 131, 147, 181], "vertical": true, "font_size": 45, "lines_coords": [[[102.0, 131.0], [147.0, 131.0], [147.0, 181.0], [102.0, 181.0]]], "lines": ["わ"]}, {"box": [627, 527, 739, 551], "vertical": false, "font_size": 24, "lines_coords": [[[627.0, 527.0], [739.0, 527.0], [739.0, 551.0], [627.0, 551.0]]], "lines": ["..."]}, {"box": [295, 481, 520, 618], "vertical": false, "font_size": 67.5, "lines_coords": [[[296.0, 481.0], [482.0, 487.0], [480.0, 524.0], [295.0, 517.0]], [[407.0, 523.0], [493.0, 498.0], [520.0, 592.0], [435.0, 618.0]]], "lines": ["[がぁさん]", "これは..."]}, {"box": [216, 380, 259, 526], "vertical": true, "font_size": 43, "lines_coords": [[[216.0, 380.0], [259.0, 380.0], [259.0, 526.0], [216.0, 526.0]]], "lines": ["みてみて〜〜♪"]}, {"box": [43, 86, 80, 899], "vertical": true, "font_size": 34, "lines_coords": [[[43.0, 86.0], [77.0, 86.0], [79.0, 899.0], [45.0, 899.0]]], "lines": ["常猫大好き作者『がぁさん』この新連載・猫マンガスタート・"]}, {"box": [702, 720, 755, 813], "vertical": true, "font_size": 43, "lines_coords": [[[712.0, 720.0], [755.0, 725.0], [744.0, 813.0], [702.0, 808.0]]], "lines": ["放流!"]}, {"box": [469, 652, 627, 671], "vertical": false, "font_size": 19, "lines_coords": [[[469.0, 652.0], [627.0, 652.0], [627.0, 671.0], [469.0, 671.0]]], "lines": ["トカゲハンター・ナナー"]}, {"box": [693, 868, 777, 1058], "vertical": true, "font_size": 21, "lines_coords": [[[751.0, 868.0], [775.0, 868.0], [775.0, 984.0], [751.0, 984.0]], [[724.0, 868.0], [747.0, 868.0], [747.0, 1054.0], [724.0, 1054.0]], [[693.0, 868.0], [716.0, 868.0], [718.0, 1008.0], [695.0, 1008.0]], [[700.0, 1004.0], [713.0, 1004.0], [713.0, 1031.0], [700.0, 1031.0]]], "lines": ["ご主人さま", "トカゲはおキライ", "なのかしら!?", "..."]}, {"box": [101, 863, 131, 914], "vertical": true, "font_size": 30, "lines_coords": [[[101.0, 863.0], [131.0, 863.0], [131.0, 914.0], [101.0, 914.0]]], "lines": ["!"]}]}
2 changes: 1 addition & 1 deletion tests/data/expected_results/test0/_ocr/vol1/001a.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"version": "0.1.8", "img_width": 827, "img_height": 1170, "blocks": [{"box": [688, 26, 718, 161], "vertical": true, "font_size": 24, "lines_coords": [[[688.0, 26.0], [712.0, 26.0], [715.0, 161.0], [691.0, 161.0]]], "lines": ["すごいのハッ"]}, {"box": [584, 70, 652, 138], "vertical": true, "font_size": 32, "lines_coords": [[[615.0, 71.0], [652.0, 71.0], [652.0, 124.0], [615.0, 124.0]], [[584.0, 71.0], [611.0, 70.0], [614.0, 137.0], [588.0, 138.0]]], "lines": ["あっ", "またっ"]}, {"box": [347, 38, 427, 199], "vertical": true, "font_size": 24, "lines_coords": [[[403.0, 70.0], [427.0, 70.0], [427.0, 199.0], [403.0, 199.0]], [[374.0, 57.0], [397.0, 57.0], [397.0, 197.0], [374.0, 197.0]], [[347.0, 42.0], [371.0, 42.0], [371.0, 174.0], [347.0, 174.0]]], "lines": ["ツドの下なら", "やつされずに", "ゆっくり..."]}, {"box": [257, 119, 305, 246], "vertical": true, "font_size": 22, "lines_coords": [[[284.0, 121.0], [303.0, 121.0], [303.0, 246.0], [284.0, 246.0]], [[257.0, 121.0], [282.0, 121.0], [282.0, 188.0], [257.0, 188.0]]], "lines": ["出てきなさい", "ナナ!"]}, {"box": [605, 545, 630, 612], "vertical": true, "font_size": 25, "lines_coords": [[[605.0, 545.0], [630.0, 545.0], [630.0, 612.0], [605.0, 612.0]]], "lines": ["あが..."]}, {"box": [493, 350, 526, 436], "vertical": true, "font_size": 33, "lines_coords": [[[493.0, 350.0], [526.0, 350.0], [526.0, 436.0], [493.0, 436.0]]], "lines": ["うわ!"]}, {"box": [302, 329, 435, 461], "vertical": true, "font_size": 29, "lines_coords": [[[405.0, 342.0], [435.0, 342.0], [435.0, 422.0], [405.0, 422.0]], [[373.0, 342.0], [402.0, 342.0], [402.0, 422.0], [373.0, 422.0]], [[333.0, 330.0], [362.0, 329.0], [365.0, 460.0], [336.0, 461.0]], [[302.0, 330.0], [331.0, 330.0], [331.0, 411.0], [302.0, 411.0]]], "lines": ["なんだ", "これ!?", "トカゲじゃ", "ない!?"]}, {"box": [213, 327, 272, 460], "vertical": true, "font_size": 26, "lines_coords": [[[244.0, 327.0], [271.0, 327.0], [271.0, 460.0], [244.0, 460.0]], [[213.0, 330.0], [237.0, 330.0], [237.0, 433.0], [213.0, 433.0]]], "lines": ["これって...", "ヤモリ!?"]}, {"box": [46, 333, 131, 450], "vertical": true, "font_size": 27, "lines_coords": [[[99.0, 333.0], [131.0, 333.0], [131.0, 386.0], [99.0, 386.0]], [[75.0, 338.0], [99.0, 338.0], [99.0, 450.0], [75.0, 450.0]], [[46.0, 338.0], [70.0, 338.0], [70.0, 406.0], [46.0, 406.0]]], "lines": ["あの", "田舎とかに", "いる!?"]}, {"box": [176, 803, 225, 909], "vertical": true, "font_size": 23, "lines_coords": [[[198.0, 803.0], [225.0, 803.0], [225.0, 851.0], [198.0, 851.0]], [[177.0, 806.0], [196.0, 806.0], [196.0, 908.0], [177.0, 908.0]]], "lines": ["でも", "リリース!"]}, {"box": [676, 798, 724, 968], "vertical": true, "font_size": 20, "lines_coords": [[[701.0, 798.0], [724.0, 798.0], [724.0, 968.0], [701.0, 968.0]], [[676.0, 800.0], [695.0, 800.0], [695.0, 968.0], [676.0, 968.0]]], "lines": ["どっから捕まえて", "きたんだよお前!"]}, {"box": [603, 973, 680, 1155], "vertical": true, "font_size": 20, "lines_coords": [[[661.0, 973.0], [680.0, 973.0], [680.0, 1133.0], [661.0, 1133.0]], [[631.0, 974.0], [651.0, 974.0], [651.0, 1155.0], [631.0, 1155.0]], [[603.0, 975.0], [624.0, 975.0], [624.0, 1088.0], [603.0, 1088.0]]], "lines": ["初めて見たよ!", "こんな住宅地にも", "いるんだ!"]}, {"box": [247, 852, 273, 1011], "vertical": true, "font_size": 24, "lines_coords": [[[247.0, 853.0], [271.0, 853.0], [271.0, 1011.0], [247.0, 1011.0]]], "lines": ["めずらしいな〜"]}]}
{"version": "0.2.0-beta.6", "img_width": 827, "img_height": 1170, "blocks": [{"box": [688, 26, 718, 161], "vertical": true, "font_size": 24, "lines_coords": [[[688.0, 26.0], [712.0, 26.0], [715.0, 161.0], [691.0, 161.0]]], "lines": ["すごいのハッ"]}, {"box": [584, 70, 652, 138], "vertical": true, "font_size": 32, "lines_coords": [[[615.0, 71.0], [652.0, 71.0], [652.0, 124.0], [615.0, 124.0]], [[584.0, 71.0], [611.0, 70.0], [614.0, 137.0], [588.0, 138.0]]], "lines": ["あっ", "またっ"]}, {"box": [347, 38, 427, 199], "vertical": true, "font_size": 24, "lines_coords": [[[403.0, 70.0], [427.0, 70.0], [427.0, 199.0], [403.0, 199.0]], [[374.0, 57.0], [397.0, 57.0], [397.0, 197.0], [374.0, 197.0]], [[347.0, 42.0], [371.0, 42.0], [371.0, 174.0], [347.0, 174.0]]], "lines": ["ツドの下なら", "やつされずに", "ゆっくり..."]}, {"box": [257, 119, 305, 246], "vertical": true, "font_size": 22, "lines_coords": [[[284.0, 121.0], [303.0, 121.0], [303.0, 246.0], [284.0, 246.0]], [[257.0, 121.0], [282.0, 121.0], [282.0, 188.0], [257.0, 188.0]]], "lines": ["出てきなさい", "ナナ!"]}, {"box": [605, 545, 630, 612], "vertical": true, "font_size": 25, "lines_coords": [[[605.0, 545.0], [630.0, 545.0], [630.0, 612.0], [605.0, 612.0]]], "lines": ["あが..."]}, {"box": [493, 350, 526, 436], "vertical": true, "font_size": 33, "lines_coords": [[[493.0, 350.0], [526.0, 350.0], [526.0, 436.0], [493.0, 436.0]]], "lines": ["うわ!"]}, {"box": [302, 329, 435, 461], "vertical": true, "font_size": 29, "lines_coords": [[[405.0, 342.0], [435.0, 342.0], [435.0, 422.0], [405.0, 422.0]], [[373.0, 342.0], [402.0, 342.0], [402.0, 422.0], [373.0, 422.0]], [[333.0, 330.0], [362.0, 329.0], [365.0, 460.0], [336.0, 461.0]], [[302.0, 330.0], [331.0, 330.0], [331.0, 411.0], [302.0, 411.0]]], "lines": ["なんだ", "これ!?", "トカゲじゃ", "ない!?"]}, {"box": [213, 327, 272, 460], "vertical": true, "font_size": 26, "lines_coords": [[[244.0, 327.0], [271.0, 327.0], [271.0, 460.0], [244.0, 460.0]], [[213.0, 330.0], [237.0, 330.0], [237.0, 433.0], [213.0, 433.0]]], "lines": ["これって...", "ヤモリ!?"]}, {"box": [46, 333, 131, 450], "vertical": true, "font_size": 27, "lines_coords": [[[99.0, 333.0], [131.0, 333.0], [131.0, 386.0], [99.0, 386.0]], [[75.0, 338.0], [99.0, 338.0], [99.0, 450.0], [75.0, 450.0]], [[46.0, 338.0], [70.0, 338.0], [70.0, 406.0], [46.0, 406.0]]], "lines": ["あの", "田舎とかに", "いる!?"]}, {"box": [176, 803, 225, 909], "vertical": true, "font_size": 23, "lines_coords": [[[198.0, 803.0], [225.0, 803.0], [225.0, 851.0], [198.0, 851.0]], [[177.0, 806.0], [196.0, 806.0], [196.0, 908.0], [177.0, 908.0]]], "lines": ["でも", "リリース!"]}, {"box": [676, 798, 724, 968], "vertical": true, "font_size": 20, "lines_coords": [[[701.0, 798.0], [724.0, 798.0], [724.0, 968.0], [701.0, 968.0]], [[676.0, 800.0], [695.0, 800.0], [695.0, 968.0], [676.0, 968.0]]], "lines": ["どっから捕まえて", "きたんだよお前!"]}, {"box": [603, 973, 680, 1155], "vertical": true, "font_size": 20, "lines_coords": [[[661.0, 973.0], [680.0, 973.0], [680.0, 1133.0], [661.0, 1133.0]], [[631.0, 974.0], [651.0, 974.0], [651.0, 1155.0], [631.0, 1155.0]], [[603.0, 975.0], [624.0, 975.0], [624.0, 1088.0], [603.0, 1088.0]]], "lines": ["初めて見たよ!", "こんな住宅地にも", "いるんだ!"]}, {"box": [247, 852, 273, 1011], "vertical": true, "font_size": 24, "lines_coords": [[[247.0, 853.0], [271.0, 853.0], [271.0, 1011.0], [247.0, 1011.0]]], "lines": ["めずらしいな〜"]}]}
Loading

0 comments on commit 89a8652

Please sign in to comment.