From 7303a8debc21ec503cde7a1dadc484a03f1c8034 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 5 Dec 2022 13:57:06 +1000 Subject: [PATCH 01/59] Add tests for chapter sorting --- beetsplug/audible.py | 9 ++- tests/test_audible.py | 124 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 tests/test_audible.py diff --git a/beetsplug/audible.py b/beetsplug/audible.py index efa3348..a047b2d 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -16,6 +16,11 @@ from .goodreads import get_original_date +def sort_items(items): + naturally_sorted_items = os_sorted(items, key=lambda i: util.bytestring_path(i.path)) + return naturally_sorted_items + + class Audible(BeetsPlugin): data_source = "Audible" @@ -187,7 +192,7 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): # This does work correctly when the album has multiple disks # using the bytestring_path function from Beets is needed for correctness # I was noticing inaccurate sorting if using str to convert paths to strings - naturally_sorted_items = os_sorted(items, key=lambda i: util.bytestring_path(i.path)) + naturally_sorted_items = sort_items(items) a.tracks = [ TrackInfo(**common_track_attributes, title=item.title, length=item.length, index=i + 1) for i, item in enumerate(naturally_sorted_items) @@ -237,7 +242,7 @@ def get_album_from_yaml_metadata(self, data, items): "subtitle": subtitle, } - naturally_sorted_items = os_sorted(items, key=lambda i: util.bytestring_path(i.path)) + naturally_sorted_items = sort_items(items) # populate tracks by using some of the info from the files being imported tracks = [ TrackInfo( diff --git a/tests/test_audible.py b/tests/test_audible.py new file mode 100644 index 0000000..5ec4a6b --- /dev/null +++ b/tests/test_audible.py @@ -0,0 +1,124 @@ +from copy import deepcopy +from pathlib import Path +from random import shuffle +from typing import List, Optional, Sequence, Tuple +from unittest.mock import MagicMock + +import pytest +from beets.library import Item + +import beetsplug.audible as audible + + +def create_mock_item(item_name: str, item_index: int, filename: Optional[str] = None) -> MagicMock: + out = MagicMock() + out.item_name = item_name + out.track = item_index + out.path = bytes(Path(".", "test_audiobook", filename if filename else item_name + ".mp3").resolve()) + out.__str__.return_value = f"{item_name} {out.path}" + return out + + +def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[List]: + out = [] + for l in lists: + for i in range(1, n): + shuffle(l) + out.append(deepcopy(l)) + return out + + +chapter_lists = ( + [ + create_mock_item("01", 0), + create_mock_item("02", 0), + create_mock_item("03", 0), + create_mock_item("04", 0), + create_mock_item("05", 0), + create_mock_item("06", 0), + create_mock_item("07", 0), + create_mock_item("08", 0), + create_mock_item("09", 0), + create_mock_item("10", 0), + create_mock_item("11", 0), + create_mock_item("12", 0), + create_mock_item("13", 0), + ], + [ + create_mock_item("Chapter 01", 0), + create_mock_item("Chapter 02", 0), + create_mock_item("Chapter 03", 0), + create_mock_item("Chapter 04", 0), + create_mock_item("Chapter 05", 0), + create_mock_item("Chapter 06", 0), + create_mock_item("Chapter 07", 0), + create_mock_item("Chapter 08", 0), + create_mock_item("Chapter 09", 0), + create_mock_item("Chapter 10", 0), + ], + [ + create_mock_item("Chapter - 01", 0), + create_mock_item("Chapter - 02", 0), + create_mock_item("Chapter - 03", 0), + create_mock_item("Chapter - 04", 0), + create_mock_item("Chapter - 05", 0), + create_mock_item("Chapter - 06", 0), + create_mock_item("Chapter - 07", 0), + create_mock_item("Chapter - 08", 0), + create_mock_item("Chapter - 09", 0), + create_mock_item("Chapter - 10", 0), + create_mock_item("Chapter - 11", 0), + create_mock_item("Chapter - 12", 0), + create_mock_item("Chapter - 13", 0), + ], + [ + create_mock_item("Chapter-01", 0), + create_mock_item("Chapter-02", 0), + create_mock_item("Chapter-03", 0), + create_mock_item("Chapter-04", 0), + create_mock_item("Chapter-05", 0), + create_mock_item("Chapter-06", 0), + create_mock_item("Chapter-07", 0), + create_mock_item("Chapter-08", 0), + create_mock_item("Chapter-09", 0), + create_mock_item("Chapter-10", 0), + create_mock_item("Chapter-11", 0), + create_mock_item("Chapter-12", 0), + create_mock_item("Chapter-13", 0), + ], + [ + create_mock_item("Mediocre-Part01", 0), + create_mock_item("Mediocre-Part02", 0), + create_mock_item("Mediocre-Part03", 0), + create_mock_item("Mediocre-Part04", 0), + create_mock_item("Mediocre-Part05", 0), + create_mock_item("Mediocre-Part06", 0), + create_mock_item("Mediocre-Part07", 0), + create_mock_item("Mediocre-Part08", 0), + create_mock_item("Mediocre-Part09", 0), + create_mock_item("Mediocre-Part10", 0), + create_mock_item("Mediocre-Part11", 0), + create_mock_item("Mediocre-Part12", 0), + ], +) + + +@pytest.mark.parametrize("items", chapter_lists) +def test_sort_items(items: List[Item]): + expected = deepcopy(items) + result = audible.sort_items(items) + assert [str(result[i]) == str(e) for i, e in enumerate(expected)] + + +@pytest.mark.parametrize("items", chapter_lists) +def test_sort_items_reversed(items: List[Item]): + expected = deepcopy(items) + result = audible.sort_items(reversed(items)) + assert [str(result[i]) == str(e) for i, e in enumerate(expected)] + + +@pytest.mark.parametrize("items", randomise_lists(chapter_lists, 10)) +def test_sort_items_randomised(items: List[Item]): + expected = deepcopy(items) + result = audible.sort_items(items) + assert [str(result[i]) == str(e) for i, e in enumerate(expected)] From e6588c160c5427a894a4eb00f86cc521a21c3a31 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 5 Dec 2022 16:54:52 +1000 Subject: [PATCH 02/59] Add test for Audnex API call --- beetsplug/api.py | 11 +++++++++-- tests/test_api.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 tests/test_api.py diff --git a/beetsplug/api.py b/beetsplug/api.py index 04a1683..9a1c53d 100644 --- a/beetsplug/api.py +++ b/beetsplug/api.py @@ -5,6 +5,8 @@ from urllib import parse, request from urllib.error import HTTPError +import requests + from .book import Book, BookChapters AUDIBLE_ENDPOINT = "https://api.audible.com/1.0/catalog/products" @@ -36,13 +38,18 @@ def search_goodreads(api_key: str, keywords: str) -> ET.Element: def get_book_info(asin: str) -> Tuple[Book, BookChapters]: - book_response = json.loads(make_request(f"{AUDNEX_ENDPOINT}/books/{asin}")) - chapter_response = json.loads(make_request(f"{AUDNEX_ENDPOINT}/books/{asin}/chapters")) + book_response, chapter_response = call_audnex_for_book_info(asin) book = Book.from_audnex_book(book_response) book_chapters = BookChapters.from_audnex_chapter_info(chapter_response) return book, book_chapters +def call_audnex_for_book_info(asin: str) -> Tuple[Dict, Dict]: + book_response = json.loads(make_request(f"{AUDNEX_ENDPOINT}/books/{asin}")) + chapter_response = json.loads(make_request(f"{AUDNEX_ENDPOINT}/books/{asin}/chapters")) + return book_response, chapter_response + + def make_request(url: str) -> bytes: """Makes a request to the specified url and returns received response The request will be retried up to 3 times in case of failure. diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..d0fcef6 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,32 @@ +from typing import Dict, Tuple +from xml.etree.ElementTree import Element + +import pytest + +import beetsplug.api as api + + +@pytest.mark.parametrize( + ("test_asin", "expected_dicts", "expected_chapters"), + ( + ( + "1529353823", + ( + { + "asin": "1529353823", + "formatType": "unabridged", + "language": "english", + }, + { + "brandIntroDurationMs": 1625, + }, + ), + 12, + ), + ), +) +def test_call_audnex_for_book_info(test_asin: str, expected_dicts: Tuple[Dict, Dict], expected_chapters: int): + result = api.call_audnex_for_book_info(test_asin) + assert expected_chapters == len(result[1]["chapters"]) + assert all([expected_dicts[0].get(k) == result[0][k] for k in expected_dicts[0].keys()]) + assert all([expected_dicts[1].get(k) == result[1][k] for k in expected_dicts[1].keys()]) From 0b49b90f22555e9628455515c8022bd3d0320d9a Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 16:48:43 +1000 Subject: [PATCH 03/59] Fix tests --- tests/test_audible.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/tests/test_audible.py b/tests/test_audible.py index 5ec4a6b..acc0844 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -6,6 +6,7 @@ import pytest from beets.library import Item +from beets.util import bytestring_path import beetsplug.audible as audible @@ -14,17 +15,18 @@ def create_mock_item(item_name: str, item_index: int, filename: Optional[str] = out = MagicMock() out.item_name = item_name out.track = item_index - out.path = bytes(Path(".", "test_audiobook", filename if filename else item_name + ".mp3").resolve()) + out.path = bytestring_path(str(Path(".", "test_audiobook", filename if filename else item_name + ".mp3").resolve())) out.__str__.return_value = f"{item_name} {out.path}" return out -def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[List]: +def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, List]]: out = [] for l in lists: for i in range(1, n): - shuffle(l) - out.append(deepcopy(l)) + copy = deepcopy(l) + shuffle(copy) + out.append((l, copy)) return out @@ -100,6 +102,24 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[List]: create_mock_item("Mediocre-Part11", 0), create_mock_item("Mediocre-Part12", 0), ], + [ + create_mock_item("Chapter 1 The DC Sniper The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 2 Terrorism The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 3 Brothers in the Arena The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 4 Call Me God The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 5 Close to Home The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 6 A Local Case The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 7 Demands The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 8 The Profile The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 9 Suspects The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 10 Prelude The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 11 The Arrest The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 12 Revenge The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item( + "Chapter 13 The Trials of a Teenager The Untold Story of the DC Sniper Investigation - 1.m4b", 0 + ), + create_mock_item("Chapter 14 Last Words The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + ], ) @@ -107,18 +127,17 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[List]: def test_sort_items(items: List[Item]): expected = deepcopy(items) result = audible.sort_items(items) - assert [str(result[i]) == str(e) for i, e in enumerate(expected)] + assert all([str(result[i]) == str(e) for i, e in enumerate(expected)]) @pytest.mark.parametrize("items", chapter_lists) def test_sort_items_reversed(items: List[Item]): expected = deepcopy(items) result = audible.sort_items(reversed(items)) - assert [str(result[i]) == str(e) for i, e in enumerate(expected)] + assert all([str(result[i]) == str(e) for i, e in enumerate(expected)]) -@pytest.mark.parametrize("items", randomise_lists(chapter_lists, 10)) -def test_sort_items_randomised(items: List[Item]): - expected = deepcopy(items) +@pytest.mark.parametrize("correct, items", randomise_lists(chapter_lists, 10)) +def test_sort_items_randomised(correct: List[Item], items: List[Item]): result = audible.sort_items(items) - assert [str(result[i]) == str(e) for i, e in enumerate(expected)] + assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) From 1bc63fdc9fb01e5e1f7e9cb521c49344d1c78781 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 17:07:34 +1000 Subject: [PATCH 04/59] Remove import --- beetsplug/api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/beetsplug/api.py b/beetsplug/api.py index 9a1c53d..1af4c98 100644 --- a/beetsplug/api.py +++ b/beetsplug/api.py @@ -5,8 +5,6 @@ from urllib import parse, request from urllib.error import HTTPError -import requests - from .book import Book, BookChapters AUDIBLE_ENDPOINT = "https://api.audible.com/1.0/catalog/products" From 32e140a3bda2e98d35d58621c3bbc7e0fcc850f5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 17:14:12 +1000 Subject: [PATCH 05/59] Fix reference name --- beetsplug/audible.py | 2 +- tests/test_audible.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index a047b2d..7dca755 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -17,7 +17,7 @@ def sort_items(items): - naturally_sorted_items = os_sorted(items, key=lambda i: util.bytestring_path(i.path)) + naturally_sorted_items = natsorted(items, key=lambda i: i.title) return naturally_sorted_items diff --git a/tests/test_audible.py b/tests/test_audible.py index acc0844..af05929 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -13,7 +13,7 @@ def create_mock_item(item_name: str, item_index: int, filename: Optional[str] = None) -> MagicMock: out = MagicMock() - out.item_name = item_name + out.title = item_name out.track = item_index out.path = bytestring_path(str(Path(".", "test_audiobook", filename if filename else item_name + ".mp3").resolve())) out.__str__.return_value = f"{item_name} {out.path}" From 5a8dbe65a086e8fdddfbe6081bb40daa1b8481a9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 17:15:21 +1000 Subject: [PATCH 06/59] Add additional test --- tests/test_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index d0fcef6..064e82a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -30,3 +30,9 @@ def test_call_audnex_for_book_info(test_asin: str, expected_dicts: Tuple[Dict, D assert expected_chapters == len(result[1]["chapters"]) assert all([expected_dicts[0].get(k) == result[0][k] for k in expected_dicts[0].keys()]) assert all([expected_dicts[1].get(k) == result[1][k] for k in expected_dicts[1].keys()]) + + +@pytest.mark.parametrize("test_asin", ("1529353823", "B00KDKSKFO", "1529063094", "B0B3PL1HQL")) +def test_get_book_info(test_asin: str): + # Just checking to make sure that there are no exceptions thrown + _, _ = api.get_book_info(test_asin) From 2d90c5f39462d2c45fb0fa9ce9cd8414af598dbf Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 17:28:59 +1000 Subject: [PATCH 07/59] Fix import --- beetsplug/audible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 7dca755..050ab7e 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -10,7 +10,7 @@ from beets import importer, util from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.plugins import BeetsPlugin, get_distance -from natsort import os_sorted +from natsort import natsorted from .api import get_book_info, make_request, search_audible from .goodreads import get_original_date From 94cce6cef9e7d69a25639e839f34785a2cdd3682 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 17:30:17 +1000 Subject: [PATCH 08/59] Add test cases --- tests/test_audible.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_audible.py b/tests/test_audible.py index af05929..668bc29 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -46,6 +46,18 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, create_mock_item("12", 0), create_mock_item("13", 0), ], + [ + create_mock_item("Chapter 1", 0), + create_mock_item("Chapter 2", 0), + create_mock_item("Chapter 3", 0), + create_mock_item("Chapter 4", 0), + create_mock_item("Chapter 5", 0), + create_mock_item("Chapter 6", 0), + create_mock_item("Chapter 7", 0), + create_mock_item("Chapter 8", 0), + create_mock_item("Chapter 9", 0), + create_mock_item("Chapter 10", 0), + ], [ create_mock_item("Chapter 01", 0), create_mock_item("Chapter 02", 0), @@ -120,6 +132,21 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, ), create_mock_item("Chapter 14 Last Words The Untold Story of the DC Sniper Investigation - 1.m4b", 0), ], + [ + create_mock_item("Prologue", 0), + create_mock_item("Chapter 1", 0), + create_mock_item("Chapter 2", 0), + create_mock_item("Chapter 3", 0), + create_mock_item("Chapter 4", 0), + create_mock_item("Chapter 5", 0), + create_mock_item("Chapter 6", 0), + create_mock_item("Chapter 7", 0), + create_mock_item("Chapter 8", 0), + create_mock_item("Chapter 9", 0), + create_mock_item("Chapter 10", 0), + create_mock_item("End", 0), + create_mock_item("Author's Note", 0), + ], ) From 0205ebdce60381fd3c60812e1c3f840a02d83fb5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 11 Dec 2022 17:32:52 +1000 Subject: [PATCH 09/59] Add typing for method --- beetsplug/audible.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 050ab7e..3f39c30 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -4,11 +4,13 @@ import re import urllib.error from tempfile import NamedTemporaryFile +from typing import List import mediafile import yaml from beets import importer, util from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.library import Item from beets.plugins import BeetsPlugin, get_distance from natsort import natsorted @@ -16,7 +18,7 @@ from .goodreads import get_original_date -def sort_items(items): +def sort_items(items: List[Item]): naturally_sorted_items = natsorted(items, key=lambda i: i.title) return naturally_sorted_items From cb9c74464167ad5998047d7147c4142e68f6516c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 16:48:39 +1000 Subject: [PATCH 10/59] Add alternative method of sorting chapters --- beetsplug/audible.py | 99 ++++++++++++++++++++++++++++++++++++------- requirements.txt | 3 +- tests/test_audible.py | 59 ++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 16 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 3f39c30..fdb3fbf 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -3,15 +3,18 @@ import pathlib import re import urllib.error +from copy import deepcopy from tempfile import NamedTemporaryFile -from typing import List +from typing import List, Optional, Tuple +import beets.autotag.hooks +import Levenshtein import mediafile import yaml from beets import importer, util from beets.autotag.hooks import AlbumInfo, TrackInfo from beets.library import Item -from beets.plugins import BeetsPlugin, get_distance +from beets.plugins import BeetsPlugin from natsort import natsorted from .api import get_book_info, make_request, search_audible @@ -23,6 +26,66 @@ def sort_items(items: List[Item]): return naturally_sorted_items +def find_regular_affixes(example_strings: List[str]) -> Tuple[str, str]: + if len(example_strings) <= 1: + return "", "" + # find prefixes + prefix = _find_prefix(example_strings) + suffix = _find_prefix([s[::-1] for s in example_strings]) + suffix = suffix[::-1] + + return prefix, suffix + + +def _find_prefix(example_strings: List[str]) -> str: + i = 0 + for i in range(0, len(example_strings[0]) + 1): + if not all([e[:i] == example_strings[0][:i] for e in example_strings]): + i += -1 + break + if i <= 0: + return "" + prefix = example_strings[0][:i] + return prefix + + +def strip_affixes(token: str, affixes: Tuple[str, str]) -> str: + affixes = (re.escape(affixes[0]), re.escape(affixes[1])) + token = re.sub(rf"^{affixes[0]}", "", token) + token = re.sub(rf"{affixes[1]}$", "", token) + return token + + +def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[Tuple[str, str]] = None) -> int: + if ignored_affixes: + token1 = strip_affixes(token1, ignored_affixes) + token2 = strip_affixes(token2, ignored_affixes) + operations = Levenshtein.editops(token1, token2) + total_cost = 0 + for operation in operations: + op, s1, s2 = operation + if s1 >= len(token1): + test1 = "" + else: + test1 = token1[s1] + if s2 >= len(token2): + test2 = "" + else: + test2 = token2[s2] + if any([re.match(r"\d", s) for s in (test1, test2)]): + total_cost += 10 + else: + total_cost += 1 + return total_cost + + +def normalised_track_indices(tracks: List[Item]) -> List[Item]: + tracks = sorted(tracks, key=lambda t: t.index) + for i, track in enumerate(tracks): + track.index = i + return tracks + + class Audible(BeetsPlugin): data_source = "Audible" @@ -111,11 +174,13 @@ def __init__(self): self.add_media_field("subtitle", subtitle) def album_distance(self, items, album_info, mapping): - dist = get_distance(data_source=self.data_source, info=album_info, config=self.config) + dist = beets.autotag.hooks.Distance() return dist def track_distance(self, item, track_info): - return get_distance(data_source=self.data_source, info=track_info, config=self.config) + dist = beets.autotag.hooks.Distance() + dist.add_string("track_title", item.title, track_info.title) + return dist def candidates(self, items, artist, album, va_likely, extra_tags=None): """Returns a list of AlbumInfo objects for Audible search results @@ -188,17 +253,21 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): del common_track_attributes["length"] del common_track_attributes["title"] - # Ignore existing track numbers, and instead sort based on file path - # Use natural sorting instead of lexigraphical to avoid this order: - # chapter 1, 10, 12, ..., 19, 2, etc - # This does work correctly when the album has multiple disks - # using the bytestring_path function from Beets is needed for correctness - # I was noticing inaccurate sorting if using str to convert paths to strings - naturally_sorted_items = sort_items(items) - a.tracks = [ - TrackInfo(**common_track_attributes, title=item.title, length=item.length, index=i + 1) - for i, item in enumerate(naturally_sorted_items) - ] + all_remote_chapters: List = deepcopy(a.tracks) + matches = [] + affixes = find_regular_affixes([c.title for c in items]) + for chapter in items: + # need a string distance algorithm that penalises number replacements more + best_match = list( + sorted( + all_remote_chapters, + key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes) + ) + ) + best_match = best_match[0] + matches.append(best_match) + all_remote_chapters.remove(best_match) + a.tracks = normalised_track_indices(matches) return albums def get_album_from_yaml_metadata(self, data, items): diff --git a/requirements.txt b/requirements.txt index 714f200..50d1002 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ beets-copyartifacts3 markdownify -natsort \ No newline at end of file +natsort +levenshtein diff --git a/tests/test_audible.py b/tests/test_audible.py index 668bc29..ac2e5c9 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -168,3 +168,62 @@ def test_sort_items_reversed(items: List[Item]): def test_sort_items_randomised(correct: List[Item], items: List[Item]): result = audible.sort_items(items) assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) + + +@pytest.mark.parametrize( + ("test_token1", "test_token2", "expected"), + ( + ("example", "example", 0), + ("exampl", "example", 1), + ("example1", "example", 10), + ("example1", "example2", 10), + ("example1", "example12", 10), + ("example21", "example12", 20), + ("example1", "example1 test", 5), + ), +) +def test_specialised_levenshtein(test_token1: str, test_token2: str, expected: int): + result = audible.specialised_levenshtein(test_token1, test_token2) + assert isinstance(result, int) + assert result == expected + + +@pytest.mark.parametrize( + ("test_tokens", "expected_prefix", "expected_suffix"), + ( + ([], "", ""), + ( + [ + "test", + ], + "", + "", + ), + (["test", "test"], "test", "test"), + (["test1", "test2"], "test", ""), + (["testing", "test2"], "test", ""), + (["testing", "test2"], "test", ""), + (["prefix1suffix", "prefix2suffix"], "prefix", "suffix"), + ), +) +def test_find_regular_affixes(test_tokens: List[str], expected_prefix: str, expected_suffix: str): + results = audible.find_regular_affixes(test_tokens) + assert results[0] == expected_prefix + assert results[1] == expected_suffix + + +@pytest.mark.parametrize( + ("test_token", "test_affixes", "expected"), + ( + ("example", ("", ""), "example"), + ("test", ("test", ""), ""), + ("test", ("", "test"), ""), + ('testexampletest',('test',''), 'exampletest'), + ('testexampletest', ('','test'), 'testexample'), + ('test.mp3', ('','.mp3'), 'test'), + ('testxmp3',('','.mp3'), 'testxmp3'), + ), +) +def test_strip_affixes(test_token: str, test_affixes: Tuple[str, str], expected: str): + result = audible.strip_affixes(test_token, test_affixes) + assert result == expected From 581987476fe170dd166bbc6533c6f292b061fc96 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 18:26:05 +1000 Subject: [PATCH 11/59] Add more complicated track matching algorithm --- beetsplug/audible.py | 63 ++++++++++++++++++++++++++++++++----------- tests/test_audible.py | 8 +++--- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index fdb3fbf..1e8882b 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -26,6 +26,51 @@ def sort_items(items: List[Item]): return naturally_sorted_items +def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[Item]: + # if there's only one item, return as is + if len(items) == 1: + return items + + affixes = find_regular_affixes([c.title for c in items]) + stripped_titles = [strip_affixes(i.title, affixes) for i in items] + average_title_change = calculate_average_levenshtein_difference(stripped_titles) + + # if there are only a few track differences from each to the other, it's likely they're numbered and don't have + # otherwise unique titles, so just sort them as best as possible + + # magic number here, it's a judgement call + if max(average_title_change) < 4: + if len(items) == len(album.tracks): + # if the number of chapters are the same, then it's likely that they are mislabelled but correlate + return album.tracks + else: + matches = natsorted(items, key=lambda t: t.title) + else: + all_remote_chapters: List = deepcopy(album.tracks) + matches = [] + for chapter in items: + # need a string distance algorithm that penalises number replacements more + best_match = list( + sorted(all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes)) + ) + best_match = best_match[0] + matches.append(best_match) + all_remote_chapters.remove(best_match) + tracks = normalised_track_indices(matches) + return tracks + + +def calculate_average_levenshtein_difference(tokens: List[str]) -> List[float]: + out = [] + for token in tokens: + temp = [] + for other in tokens: + temp.append(Levenshtein.distance(token, other)) + num = len(tokens) - 1 + out.append(sum(temp) / num) + return out + + def find_regular_affixes(example_strings: List[str]) -> Tuple[str, str]: if len(example_strings) <= 1: return "", "" @@ -80,7 +125,7 @@ def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[ def normalised_track_indices(tracks: List[Item]) -> List[Item]: - tracks = sorted(tracks, key=lambda t: t.index) + tracks = sorted(tracks, key=lambda t: t.track) for i, track in enumerate(tracks): track.index = i return tracks @@ -253,21 +298,7 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): del common_track_attributes["length"] del common_track_attributes["title"] - all_remote_chapters: List = deepcopy(a.tracks) - matches = [] - affixes = find_regular_affixes([c.title for c in items]) - for chapter in items: - # need a string distance algorithm that penalises number replacements more - best_match = list( - sorted( - all_remote_chapters, - key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes) - ) - ) - best_match = best_match[0] - matches.append(best_match) - all_remote_chapters.remove(best_match) - a.tracks = normalised_track_indices(matches) + a.tracks = sort_tracks(a, items) return albums def get_album_from_yaml_metadata(self, data, items): diff --git a/tests/test_audible.py b/tests/test_audible.py index ac2e5c9..6e639bc 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -218,10 +218,10 @@ def test_find_regular_affixes(test_tokens: List[str], expected_prefix: str, expe ("example", ("", ""), "example"), ("test", ("test", ""), ""), ("test", ("", "test"), ""), - ('testexampletest',('test',''), 'exampletest'), - ('testexampletest', ('','test'), 'testexample'), - ('test.mp3', ('','.mp3'), 'test'), - ('testxmp3',('','.mp3'), 'testxmp3'), + ("testexampletest", ("test", ""), "exampletest"), + ("testexampletest", ("", "test"), "testexample"), + ("test.mp3", ("", ".mp3"), "test"), + ("testxmp3", ("", ".mp3"), "testxmp3"), ), ) def test_strip_affixes(test_token: str, test_affixes: Tuple[str, str], expected: str): From 7112920d99719061bda5ab5a70799d51717ad2b5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 18:39:00 +1000 Subject: [PATCH 12/59] Fix return type --- beetsplug/audible.py | 87 ++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 1e8882b..3b5504d 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -5,7 +5,7 @@ import urllib.error from copy import deepcopy from tempfile import NamedTemporaryFile -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple import beets.autotag.hooks import Levenshtein @@ -26,37 +26,61 @@ def sort_items(items: List[Item]): return naturally_sorted_items -def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[Item]: - # if there's only one item, return as is - if len(items) == 1: - return items +def get_common_data_attributes(item: Item) -> Dict: + common_track_attributes = dict(item) + del common_track_attributes["index"] + del common_track_attributes["length"] + del common_track_attributes["title"] + return common_track_attributes - affixes = find_regular_affixes([c.title for c in items]) - stripped_titles = [strip_affixes(i.title, affixes) for i in items] - average_title_change = calculate_average_levenshtein_difference(stripped_titles) - # if there are only a few track differences from each to the other, it's likely they're numbered and don't have - # otherwise unique titles, so just sort them as best as possible +def normalised_track_indices(tracks: List[TrackInfo]) -> List[TrackInfo]: + tracks = sorted(tracks, key=lambda t: t.index) + for i, track in enumerate(tracks): + track.index = i + return tracks - # magic number here, it's a judgement call - if max(average_title_change) < 4: - if len(items) == len(album.tracks): - # if the number of chapters are the same, then it's likely that they are mislabelled but correlate - return album.tracks - else: - matches = natsorted(items, key=lambda t: t.title) + +def convert_items_to_trackinfo(items: List[Item]) -> List[TrackInfo]: + out = [] + common_attrs = get_common_data_attributes(items[0]) + for i, item in enumerate(items, start=1): + track = TrackInfo(**common_attrs, title=item.title, length=item.length, index=i + 1) + out.append(track) + return out + + +def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: + # if there's only one item, return as is + if len(items) == 1: + matches = items else: - all_remote_chapters: List = deepcopy(album.tracks) - matches = [] - for chapter in items: - # need a string distance algorithm that penalises number replacements more - best_match = list( - sorted(all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes)) - ) - best_match = best_match[0] - matches.append(best_match) - all_remote_chapters.remove(best_match) - tracks = normalised_track_indices(matches) + affixes = find_regular_affixes([c.title for c in items]) + stripped_titles = [strip_affixes(i.title, affixes) for i in items] + average_title_change = calculate_average_levenshtein_difference(stripped_titles) + + # if there are only a few track differences from each to the other, it's likely they're numbered and don't have + # otherwise unique titles, so just sort them as best as possible + + # magic number here, it's a judgement call + if max(average_title_change) < 4: + if len(items) == len(album.tracks): + # if the number of chapters are the same, then it's likely that they are mislabelled but correlate + return album.tracks + else: + matches = natsorted(items, key=lambda t: t.title) + else: + all_remote_chapters: List = deepcopy(album.tracks) + matches = [] + for chapter in items: + # need a string distance algorithm that penalises number replacements more + best_match = list( + sorted(all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes)) + ) + best_match = best_match[0] + matches.append(best_match) + all_remote_chapters.remove(best_match) + tracks = convert_items_to_trackinfo(matches) return tracks @@ -124,13 +148,6 @@ def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[ return total_cost -def normalised_track_indices(tracks: List[Item]) -> List[Item]: - tracks = sorted(tracks, key=lambda t: t.track) - for i, track in enumerate(tracks): - track.index = i - return tracks - - class Audible(BeetsPlugin): data_source = "Audible" From 8cc7917517f0d21ffd839fd7524dd17d8751c1e6 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 18:47:50 +1000 Subject: [PATCH 13/59] Fix item reference --- beetsplug/audible.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 3b5504d..350ff43 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -26,8 +26,8 @@ def sort_items(items: List[Item]): return naturally_sorted_items -def get_common_data_attributes(item: Item) -> Dict: - common_track_attributes = dict(item) +def get_common_data_attributes(track: TrackInfo) -> Dict: + common_track_attributes = dict(track) del common_track_attributes["index"] del common_track_attributes["length"] del common_track_attributes["title"] @@ -41,9 +41,8 @@ def normalised_track_indices(tracks: List[TrackInfo]) -> List[TrackInfo]: return tracks -def convert_items_to_trackinfo(items: List[Item]) -> List[TrackInfo]: +def convert_items_to_trackinfo(items: List[Item], common_attrs: Dict) -> List[TrackInfo]: out = [] - common_attrs = get_common_data_attributes(items[0]) for i, item in enumerate(items, start=1): track = TrackInfo(**common_attrs, title=item.title, length=item.length, index=i + 1) out.append(track) @@ -51,6 +50,7 @@ def convert_items_to_trackinfo(items: List[Item]) -> List[TrackInfo]: def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: + common_attrs = get_common_data_attributes(album.tracks[0]) # if there's only one item, return as is if len(items) == 1: matches = items @@ -80,7 +80,7 @@ def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: best_match = best_match[0] matches.append(best_match) all_remote_chapters.remove(best_match) - tracks = convert_items_to_trackinfo(matches) + tracks = convert_items_to_trackinfo(matches, common_attrs) return tracks From ede5e54c320c42a90dccc1d8b170aaa1569f52c6 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 18:55:33 +1000 Subject: [PATCH 14/59] Fix index start --- beetsplug/audible.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 350ff43..74bc4f9 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -34,17 +34,10 @@ def get_common_data_attributes(track: TrackInfo) -> Dict: return common_track_attributes -def normalised_track_indices(tracks: List[TrackInfo]) -> List[TrackInfo]: - tracks = sorted(tracks, key=lambda t: t.index) - for i, track in enumerate(tracks): - track.index = i - return tracks - - def convert_items_to_trackinfo(items: List[Item], common_attrs: Dict) -> List[TrackInfo]: out = [] for i, item in enumerate(items, start=1): - track = TrackInfo(**common_attrs, title=item.title, length=item.length, index=i + 1) + track = TrackInfo(**common_attrs, title=item.title, length=item.length, index=i) out.append(track) return out From 51fc5e2dba12cde352e96c357de6d1fa7845a036 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 19:04:22 +1000 Subject: [PATCH 15/59] Add case to matching algorithm --- beetsplug/audible.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 74bc4f9..adcaf86 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -46,7 +46,11 @@ def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: common_attrs = get_common_data_attributes(album.tracks[0]) # if there's only one item, return as is if len(items) == 1: - matches = items + # Prefer a sdingle named book from the remote source + if len(album.tracks) == 1: + matches = album.tracks + else: + matches = items else: affixes = find_regular_affixes([c.title for c in items]) stripped_titles = [strip_affixes(i.title, affixes) for i in items] @@ -61,6 +65,7 @@ def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: # if the number of chapters are the same, then it's likely that they are mislabelled but correlate return album.tracks else: + # otherwise a natural sort to make sure it's all sorted correctly matches = natsorted(items, key=lambda t: t.title) else: all_remote_chapters: List = deepcopy(album.tracks) From b9c10097dee359092d467a0b3a2501700aa113f0 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 17 Dec 2022 19:07:28 +1000 Subject: [PATCH 16/59] Fix typo --- beetsplug/audible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index adcaf86..847c6da 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -46,7 +46,7 @@ def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: common_attrs = get_common_data_attributes(album.tracks[0]) # if there's only one item, return as is if len(items) == 1: - # Prefer a sdingle named book from the remote source + # Prefer a single named book from the remote source if len(album.tracks) == 1: matches = album.tracks else: From 34ded236a2fc37fc14174d8ad8c5e08c74d7c5bb Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 18 Dec 2022 20:10:39 +1000 Subject: [PATCH 17/59] Add better affix detection --- beetsplug/audible.py | 60 +++++++++++++++++++++++++++++++------------ tests/test_audible.py | 1 + 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 847c6da..35b4b9a 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -42,7 +42,7 @@ def convert_items_to_trackinfo(items: List[Item], common_attrs: Dict) -> List[Tr return out -def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: +def sort_tracks(album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: common_attrs = get_common_data_attributes(album.tracks[0]) # if there's only one item, return as is if len(items) == 1: @@ -68,14 +68,18 @@ def sort_tracks(album: AlbumInfo, items: List[Item]) -> List[TrackInfo]: # otherwise a natural sort to make sure it's all sorted correctly matches = natsorted(items, key=lambda t: t.title) else: + if len(items) > len(album.tracks): + # TODO: find a better way to handle this + # right now just reject this match + return None all_remote_chapters: List = deepcopy(album.tracks) matches = [] for chapter in items: # need a string distance algorithm that penalises number replacements more - best_match = list( + best_matches = list( sorted(all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes)) ) - best_match = best_match[0] + best_match = best_matches[0] matches.append(best_match) all_remote_chapters.remove(best_match) tracks = convert_items_to_trackinfo(matches, common_attrs) @@ -94,26 +98,46 @@ def calculate_average_levenshtein_difference(tokens: List[str]) -> List[float]: def find_regular_affixes(example_strings: List[str]) -> Tuple[str, str]: + """Find regular prefixes and suffices that occur in most of the titles""" if len(example_strings) <= 1: return "", "" - # find prefixes - prefix = _find_prefix(example_strings) - suffix = _find_prefix([s[::-1] for s in example_strings]) - suffix = suffix[::-1] + prefix_result = find_best_affix_sequence(example_strings) + prefix = _check_affix_commonness(prefix_result) + + reversed_strings = [e[::-1] for e in example_strings] + suffix_result = find_best_affix_sequence(reversed_strings) + suffix = _check_affix_commonness(suffix_result)[::-1] return prefix, suffix -def _find_prefix(example_strings: List[str]) -> str: - i = 0 - for i in range(0, len(example_strings[0]) + 1): - if not all([e[:i] == example_strings[0][:i] for e in example_strings]): - i += -1 - break - if i <= 0: - return "" - prefix = example_strings[0][:i] - return prefix +def _check_affix_commonness(affix_result: Tuple[str, float]) -> str: + # the 75% is a magic number, done through testing + if affix_result[1] >= 0.75: + out = affix_result[0] + else: + out = '' + return out + + +def find_best_affix_sequence(example_strings: List[str]) -> Tuple[str, float]: + affix_sequences = set() + for s in example_strings: + for i in range(0, len(s) + 1): + affix_sequences.add(s[0:i]) + # filter to minimum affix length + # 4 is a magic number + filtered_affixes = filter(lambda p: len(p) >= 4, affix_sequences) + affix_commonness = [(p, _check_affix_commonality(example_strings, rf"^{re.escape(p)}")) for p in filtered_affixes] + sorted_affixes = sorted(affix_commonness, key=lambda p: (p[1], len(p[0])), reverse=True) + affix = sorted_affixes[0] + return affix + + +def _check_affix_commonality(tokens: List[str], pattern: str) -> float: + matches = list(filter(None, [re.match(rf"{pattern}", t) for t in tokens])) + total = len(matches) + return total / len(tokens) def strip_affixes(token: str, affixes: Tuple[str, str]) -> str: @@ -124,6 +148,7 @@ def strip_affixes(token: str, affixes: Tuple[str, str]) -> str: def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[Tuple[str, str]] = None) -> int: + """Find the Levenshtein distance between two strings, penalising operations involving digits x10""" if ignored_affixes: token1 = strip_affixes(token1, ignored_affixes) token2 = strip_affixes(token2, ignored_affixes) @@ -314,6 +339,7 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): del common_track_attributes["title"] a.tracks = sort_tracks(a, items) + albums = list(filter(lambda a: a.tracks is not None, albums)) return albums def get_album_from_yaml_metadata(self, data, items): diff --git a/tests/test_audible.py b/tests/test_audible.py index 6e639bc..ff1faf8 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -204,6 +204,7 @@ def test_specialised_levenshtein(test_token1: str, test_token2: str, expected: i (["testing", "test2"], "test", ""), (["testing", "test2"], "test", ""), (["prefix1suffix", "prefix2suffix"], "prefix", "suffix"), + (["prologue", "chapter1", "chapter2", "chapter3"], "chapter", ""), ), ) def test_find_regular_affixes(test_tokens: List[str], expected_prefix: str, expected_suffix: str): From faeac09b36e52882f65e55517d44aa688bb817af Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 19 Dec 2022 12:20:57 +1000 Subject: [PATCH 18/59] Remove old logic --- beetsplug/audible.py | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 35b4b9a..1c352fd 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -303,42 +303,7 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): self._log.debug(f"Searching Audible for {query}") albums = self.get_albums(query) for a in albums: - is_chapter_data_accurate = a.is_chapter_data_accurate - punctuation = r"[^\w\s\d]" - # normalize by removing punctuation, converting to lowercase, - # as well as changing multiple consecutive spaces in the string to a single space - normalized_book_title = re.sub(punctuation, "", a.album.strip().lower()) - normalized_book_title = " ".join(normalized_book_title.split()) - normalized_album_name = re.sub(abridged_indicator, "", album.strip().lower()) - normalized_album_name = re.sub(punctuation, "", normalized_album_name) - - normalized_album_name = " ".join(normalized_album_name.split()) - self._log.debug(f"Matching album name {normalized_album_name} with book title {normalized_book_title}") - # account for different length strings - is_likely_match = ( - normalized_album_name in normalized_book_title or normalized_book_title in normalized_album_name - ) - is_chapterized = len(a.tracks) == len(items) - # matching doesn't work well if the number of files in the album doesn't match the number of chapters - # As a workaround, return the same number of tracks as the number of files. - # This white lie is a workaround but works extraordinarily well - if self.config["match_chapters"] and is_likely_match and is_chapterized and not is_chapter_data_accurate: - # Logging this for now because this situation - # is technically possible (based on the API) but unsure how often it happens - self._log.warn(f"Chapter data for {a.album} could be inaccurate.") - - if is_likely_match and (not is_chapterized or not self.config["match_chapters"]): - self._log.debug( - f"Attempting to match book: album {album} with {len(items)} files" - f" to book {a.album} with {len(a.tracks)} chapters." - ) - - common_track_attributes = dict(a.tracks[0]) - del common_track_attributes["index"] - del common_track_attributes["length"] - del common_track_attributes["title"] - - a.tracks = sort_tracks(a, items) + a.tracks = sort_tracks(a, items) albums = list(filter(lambda a: a.tracks is not None, albums)) return albums From d56c46c57aeada30eed08c48b6fa14861270c8b3 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 19 Dec 2022 14:08:42 +1000 Subject: [PATCH 19/59] Fix case --- beetsplug/audible.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 1c352fd..e88ef18 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -61,12 +61,9 @@ def sort_tracks(album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo] # magic number here, it's a judgement call if max(average_title_change) < 4: - if len(items) == len(album.tracks): - # if the number of chapters are the same, then it's likely that they are mislabelled but correlate - return album.tracks - else: - # otherwise a natural sort to make sure it's all sorted correctly - matches = natsorted(items, key=lambda t: t.title) + # can't assume that the tracks actually match even when there are the same number of items, since lengths + # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort + matches = natsorted(items, key=lambda t: t.title) else: if len(items) > len(album.tracks): # TODO: find a better way to handle this From 9c0913508627ac2b686df37ddfdf1d846f0abe61 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 19 Dec 2022 14:21:22 +1000 Subject: [PATCH 20/59] Fix case where no prefixes are found --- beetsplug/audible.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index e88ef18..41a5bbf 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -99,11 +99,17 @@ def find_regular_affixes(example_strings: List[str]) -> Tuple[str, str]: if len(example_strings) <= 1: return "", "" prefix_result = find_best_affix_sequence(example_strings) - prefix = _check_affix_commonness(prefix_result) + if prefix_result: + prefix = _check_affix_commonness(prefix_result) + else: + prefix = "" reversed_strings = [e[::-1] for e in example_strings] suffix_result = find_best_affix_sequence(reversed_strings) - suffix = _check_affix_commonness(suffix_result)[::-1] + if suffix_result: + suffix = _check_affix_commonness(suffix_result)[::-1] + else: + suffix = "" return prefix, suffix @@ -113,11 +119,11 @@ def _check_affix_commonness(affix_result: Tuple[str, float]) -> str: if affix_result[1] >= 0.75: out = affix_result[0] else: - out = '' + out = "" return out -def find_best_affix_sequence(example_strings: List[str]) -> Tuple[str, float]: +def find_best_affix_sequence(example_strings: List[str]) -> Optional[Tuple[str, float]]: affix_sequences = set() for s in example_strings: for i in range(0, len(s) + 1): @@ -126,9 +132,12 @@ def find_best_affix_sequence(example_strings: List[str]) -> Tuple[str, float]: # 4 is a magic number filtered_affixes = filter(lambda p: len(p) >= 4, affix_sequences) affix_commonness = [(p, _check_affix_commonality(example_strings, rf"^{re.escape(p)}")) for p in filtered_affixes] - sorted_affixes = sorted(affix_commonness, key=lambda p: (p[1], len(p[0])), reverse=True) - affix = sorted_affixes[0] - return affix + if affix_commonness: + sorted_affixes = sorted(affix_commonness, key=lambda p: (p[1], len(p[0])), reverse=True) + affix = sorted_affixes[0] + return affix + else: + return None def _check_affix_commonality(tokens: List[str], pattern: str) -> float: From 43d091ca5d799160c6ec79155e882188c2819538 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 16 Feb 2023 13:56:48 +1000 Subject: [PATCH 21/59] Bump version --- .pre-commit-config.yaml | 4 +- tests/conftest.py | 18 +++ tests/test_audible.py | 248 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 267 insertions(+), 3 deletions(-) create mode 100644 tests/conftest.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ec6a23..138a5a0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,12 +3,12 @@ repos: - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 23.1.0 hooks: - id: black - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort name: isort (python) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7c7efd6 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,18 @@ +from unittest.mock import MagicMock + +import pytest + + +@pytest.fixture(scope="session") +def mock_audible_plugin() -> MagicMock: + out = MagicMock() + out.config = { + "fetch_art": True, + "match_chapters": True, + "source_weight": 0.0, + "write_description_file": True, + "write_reader_file": True, + "include_narrator_in_artists": True, + "goodreads_apikey": None, + } + return out diff --git a/tests/test_audible.py b/tests/test_audible.py index ff1faf8..3a87d9e 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -11,7 +11,7 @@ import beetsplug.audible as audible -def create_mock_item(item_name: str, item_index: int, filename: Optional[str] = None) -> MagicMock: +def create_mock_item(item_name: str, item_index: int = 0, filename: Optional[str] = None) -> MagicMock: out = MagicMock() out.title = item_name out.track = item_index @@ -170,6 +170,252 @@ def test_sort_items_randomised(correct: List[Item], items: List[Item]): assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) +@pytest.mark.online +@pytest.mark.parametrize( + ("test_audiobook_id", "test_items", "expected_items"), + ( + ( + "0063007711", + (create_mock_item("Kleptopia: How Dirty Money Is Conquering the World"),), + ("Kleptopia: How Dirty Money Is Conquering the World",), + ), + ( + "B07XTN4FTJ", + ( + create_mock_item("Chapter 1 The DC Sniper The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 2 Terrorism The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item( + "Chapter 3 Brothers in the Arena The Untold Story of the DC Sniper Investigation - 1.m4b", 0 + ), + create_mock_item("Chapter 4 Call Me God The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 5 Close to Home The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 6 A Local Case The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 7 Demands The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 8 The Profile The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 9 Suspects The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 10 Prelude The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 11 The Arrest The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item("Chapter 12 Revenge The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + create_mock_item( + "Chapter 13 The Trials of a Teenager The Untold Story of the DC Sniper Investigation - 1.m4b", 0 + ), + create_mock_item("Chapter 14 Last Words The Untold Story of the DC Sniper Investigation - 1.m4b", 0), + ), + ( + "Chapter 1: The DC Sniper", + "Chapter 2: Terrorism", + "Chapter 3: Brothers in the Arena", + "Chapter 4: Call Me God", + "Chapter 5: Close to Home", + "Chapter 6: A Local Case", + "Chapter 7: Demands", + "Chapter 8: The Profile", + "Chapter 9: Suspects", + "Chapter 10: Prelude", + "Chapter 11: The Arrest", + "Chapter 12: Revenge", + "Chapter 13: The Trials of a Teenager", + "Chapter 14: Last Words", + ), + ), + ( + "B005CJAB5S", + ( + create_mock_item("Cats-Eye-000.mp3", 0), + create_mock_item("Cats-Eye-001.mp3", 0), + create_mock_item("Cats-Eye-002.mp3", 0), + create_mock_item("Cats-Eye-003.mp3", 0), + create_mock_item("Cats-Eye-004.mp3", 0), + create_mock_item("Cats-Eye-005.mp3", 0), + create_mock_item("Cats-Eye-006.mp3", 0), + create_mock_item("Cats-Eye-007.mp3", 0), + create_mock_item("Cats-Eye-008.mp3", 0), + create_mock_item("Cats-Eye-009.mp3", 0), + create_mock_item("Cats-Eye-010.mp3", 0), + create_mock_item("Cats-Eye-011.mp3", 0), + ), + ( + "Cats-Eye-000.mp3", + "Cats-Eye-001.mp3", + "Cats-Eye-002.mp3", + "Cats-Eye-003.mp3", + "Cats-Eye-004.mp3", + "Cats-Eye-005.mp3", + "Cats-Eye-006.mp3", + "Cats-Eye-007.mp3", + "Cats-Eye-008.mp3", + "Cats-Eye-009.mp3", + "Cats-Eye-010.mp3", + "Cats-Eye-011.mp3", + ), + ), + ( + "1250767547", + ( + create_mock_item("01 - Paolini, C - To Sleep in a Sea of Stars — 01.mp3", 0), + create_mock_item("02 - Paolini, C - To Sleep in a Sea of Stars — 02.mp3", 0), + create_mock_item("03 - Paolini, C - To Sleep in a Sea of Stars — 03.mp3", 0), + create_mock_item("04 - Paolini, C - To Sleep in a Sea of Stars — 04.mp3", 0), + create_mock_item("05 - Paolini, C - To Sleep in a Sea of Stars — 05.mp3", 0), + create_mock_item("06 - Paolini, C - To Sleep in a Sea of Stars — 06.mp3", 0), + create_mock_item("07 - Paolini, C - To Sleep in a Sea of Stars — 07.mp3", 0), + create_mock_item("08 - Paolini, C - To Sleep in a Sea of Stars — 08.mp3", 0), + create_mock_item("09 - Paolini, C - To Sleep in a Sea of Stars — 09.mp3", 0), + create_mock_item("10 - Paolini, C - To Sleep in a Sea of Stars — 10.mp3", 0), + create_mock_item("11 - Paolini, C - To Sleep in a Sea of Stars — 11.mp3", 0), + create_mock_item("12 - Paolini, C - To Sleep in a Sea of Stars — 12.mp3", 0), + create_mock_item("13 - Paolini, C - To Sleep in a Sea of Stars — 13.mp3", 0), + create_mock_item("14 - Paolini, C - To Sleep in a Sea of Stars — 14.mp3", 0), + create_mock_item("15 - Paolini, C - To Sleep in a Sea of Stars — 15.mp3", 0), + create_mock_item("16 - Paolini, C - To Sleep in a Sea of Stars — 16.mp3", 0), + create_mock_item("17 - Paolini, C - To Sleep in a Sea of Stars — 17.mp3", 0), + create_mock_item("18 - Paolini, C - To Sleep in a Sea of Stars — 18.mp3", 0), + create_mock_item("19 - Paolini, C - To Sleep in a Sea of Stars — 19.mp3", 0), + create_mock_item("20 - Paolini, C - To Sleep in a Sea of Stars — 20.mp3", 0), + create_mock_item("21 - Paolini, C - To Sleep in a Sea of Stars — 21.mp3", 0), + create_mock_item("22 - Paolini, C - To Sleep in a Sea of Stars — 22.mp3", 0), + create_mock_item("23 - Paolini, C - To Sleep in a Sea of Stars — 23.mp3", 0), + create_mock_item("24 - Paolini, C - To Sleep in a Sea of Stars — 24.mp3", 0), + create_mock_item("25 - Paolini, C - To Sleep in a Sea of Stars — 25.mp3", 0), + create_mock_item("26 - Paolini, C - To Sleep in a Sea of Stars — 26.mp3", 0), + create_mock_item("27 - Paolini, C - To Sleep in a Sea of Stars — 27.mp3", 0), + create_mock_item("28 - Paolini, C - To Sleep in a Sea of Stars — 28.mp3", 0), + create_mock_item("29 - Paolini, C - To Sleep in a Sea of Stars — 29.mp3", 0), + create_mock_item("30 - Paolini, C - To Sleep in a Sea of Stars — 30.mp3", 0), + create_mock_item("31 - Paolini, C - To Sleep in a Sea of Stars — 31.mp3", 0), + create_mock_item("32 - Paolini, C - To Sleep in a Sea of Stars — 32.mp3", 0), + create_mock_item("33 - Paolini, C - To Sleep in a Sea of Stars — 33.mp3", 0), + create_mock_item("34 - Paolini, C - To Sleep in a Sea of Stars — 34.mp3", 0), + create_mock_item("35 - Paolini, C - To Sleep in a Sea of Stars — 35.mp3", 0), + create_mock_item("36 - Paolini, C - To Sleep in a Sea of Stars — 36.mp3", 0), + create_mock_item("37 - Paolini, C - To Sleep in a Sea of Stars — 37.mp3", 0), + create_mock_item("38 - Paolini, C - To Sleep in a Sea of Stars — 38.mp3", 0), + create_mock_item("39 - Paolini, C - To Sleep in a Sea of Stars — 39.mp3", 0), + create_mock_item("40 - Paolini, C - To Sleep in a Sea of Stars — 40.mp3", 0), + create_mock_item("41 - Paolini, C - To Sleep in a Sea of Stars — 41.mp3", 0), + create_mock_item("42 - Paolini, C - To Sleep in a Sea of Stars — 42.mp3", 0), + create_mock_item("43 - Paolini, C - To Sleep in a Sea of Stars — 43.mp3", 0), + create_mock_item("44 - Paolini, C - To Sleep in a Sea of Stars — 44.mp3", 0), + create_mock_item("45 - Paolini, C - To Sleep in a Sea of Stars — 45.mp3", 0), + create_mock_item("46 - Paolini, C - To Sleep in a Sea of Stars — 46.mp3", 0), + create_mock_item("47 - Paolini, C - To Sleep in a Sea of Stars — 47.mp3", 0), + create_mock_item("48 - Paolini, C - To Sleep in a Sea of Stars — 48.mp3", 0), + create_mock_item("49 - Paolini, C - To Sleep in a Sea of Stars — 49.mp3", 0), + create_mock_item("50 - Paolini, C - To Sleep in a Sea of Stars — 50.mp3", 0), + create_mock_item("51 - Paolini, C - To Sleep in a Sea of Stars — 51.mp3", 0), + create_mock_item("52 - Paolini, C - To Sleep in a Sea of Stars — 52.mp3", 0), + create_mock_item("53 - Paolini, C - To Sleep in a Sea of Stars — 53.mp3", 0), + create_mock_item("54 - Paolini, C - To Sleep in a Sea of Stars — 54.mp3", 0), + create_mock_item("55 - Paolini, C - To Sleep in a Sea of Stars — 55.mp3", 0), + create_mock_item("56 - Paolini, C - To Sleep in a Sea of Stars — 56.mp3", 0), + create_mock_item("57 - Paolini, C - To Sleep in a Sea of Stars — 57.mp3", 0), + create_mock_item("58 - Paolini, C - To Sleep in a Sea of Stars — 58.mp3", 0), + create_mock_item("59 - Paolini, C - To Sleep in a Sea of Stars — 59.mp3", 0), + create_mock_item("60 - Paolini, C - To Sleep in a Sea of Stars — 60.mp3", 0), + ), + ( + "01 - Paolini, C - To Sleep in a Sea of Stars — 01.mp3", + "02 - Paolini, C - To Sleep in a Sea of Stars — 02.mp3", + "03 - Paolini, C - To Sleep in a Sea of Stars — 03.mp3", + "04 - Paolini, C - To Sleep in a Sea of Stars — 04.mp3", + "05 - Paolini, C - To Sleep in a Sea of Stars — 05.mp3", + "06 - Paolini, C - To Sleep in a Sea of Stars — 06.mp3", + "07 - Paolini, C - To Sleep in a Sea of Stars — 07.mp3", + "08 - Paolini, C - To Sleep in a Sea of Stars — 08.mp3", + "09 - Paolini, C - To Sleep in a Sea of Stars — 09.mp3", + "10 - Paolini, C - To Sleep in a Sea of Stars — 10.mp3", + "11 - Paolini, C - To Sleep in a Sea of Stars — 11.mp3", + "12 - Paolini, C - To Sleep in a Sea of Stars — 12.mp3", + "13 - Paolini, C - To Sleep in a Sea of Stars — 13.mp3", + "14 - Paolini, C - To Sleep in a Sea of Stars — 14.mp3", + "15 - Paolini, C - To Sleep in a Sea of Stars — 15.mp3", + "16 - Paolini, C - To Sleep in a Sea of Stars — 16.mp3", + "17 - Paolini, C - To Sleep in a Sea of Stars — 17.mp3", + "18 - Paolini, C - To Sleep in a Sea of Stars — 18.mp3", + "19 - Paolini, C - To Sleep in a Sea of Stars — 19.mp3", + "20 - Paolini, C - To Sleep in a Sea of Stars — 20.mp3", + "21 - Paolini, C - To Sleep in a Sea of Stars — 21.mp3", + "22 - Paolini, C - To Sleep in a Sea of Stars — 22.mp3", + "23 - Paolini, C - To Sleep in a Sea of Stars — 23.mp3", + "24 - Paolini, C - To Sleep in a Sea of Stars — 24.mp3", + "25 - Paolini, C - To Sleep in a Sea of Stars — 25.mp3", + "26 - Paolini, C - To Sleep in a Sea of Stars — 26.mp3", + "27 - Paolini, C - To Sleep in a Sea of Stars — 27.mp3", + "28 - Paolini, C - To Sleep in a Sea of Stars — 28.mp3", + "29 - Paolini, C - To Sleep in a Sea of Stars — 29.mp3", + "30 - Paolini, C - To Sleep in a Sea of Stars — 30.mp3", + "31 - Paolini, C - To Sleep in a Sea of Stars — 31.mp3", + "32 - Paolini, C - To Sleep in a Sea of Stars — 32.mp3", + "33 - Paolini, C - To Sleep in a Sea of Stars — 33.mp3", + "34 - Paolini, C - To Sleep in a Sea of Stars — 34.mp3", + "35 - Paolini, C - To Sleep in a Sea of Stars — 35.mp3", + "36 - Paolini, C - To Sleep in a Sea of Stars — 36.mp3", + "37 - Paolini, C - To Sleep in a Sea of Stars — 37.mp3", + "38 - Paolini, C - To Sleep in a Sea of Stars — 38.mp3", + "39 - Paolini, C - To Sleep in a Sea of Stars — 39.mp3", + "40 - Paolini, C - To Sleep in a Sea of Stars — 40.mp3", + "41 - Paolini, C - To Sleep in a Sea of Stars — 41.mp3", + "42 - Paolini, C - To Sleep in a Sea of Stars — 42.mp3", + "43 - Paolini, C - To Sleep in a Sea of Stars — 43.mp3", + "44 - Paolini, C - To Sleep in a Sea of Stars — 44.mp3", + "45 - Paolini, C - To Sleep in a Sea of Stars — 45.mp3", + "46 - Paolini, C - To Sleep in a Sea of Stars — 46.mp3", + "47 - Paolini, C - To Sleep in a Sea of Stars — 47.mp3", + "48 - Paolini, C - To Sleep in a Sea of Stars — 48.mp3", + "49 - Paolini, C - To Sleep in a Sea of Stars — 49.mp3", + "50 - Paolini, C - To Sleep in a Sea of Stars — 50.mp3", + "51 - Paolini, C - To Sleep in a Sea of Stars — 51.mp3", + "52 - Paolini, C - To Sleep in a Sea of Stars — 52.mp3", + "53 - Paolini, C - To Sleep in a Sea of Stars — 53.mp3", + "54 - Paolini, C - To Sleep in a Sea of Stars — 54.mp3", + "55 - Paolini, C - To Sleep in a Sea of Stars — 55.mp3", + "56 - Paolini, C - To Sleep in a Sea of Stars — 56.mp3", + "57 - Paolini, C - To Sleep in a Sea of Stars — 57.mp3", + "58 - Paolini, C - To Sleep in a Sea of Stars — 58.mp3", + "59 - Paolini, C - To Sleep in a Sea of Stars — 59.mp3", + "60 - Paolini, C - To Sleep in a Sea of Stars — 60.mp3", + ), + ), + ( + "B09VMXJP5W", + ( + create_mock_item("1.mp3", 0), + create_mock_item("2.mp3", 0), + create_mock_item("3.mp3", 0), + create_mock_item("4.mp3", 0), + create_mock_item("5.mp3", 0), + create_mock_item("6.mp3", 0), + create_mock_item("7.mp3", 0), + create_mock_item("8.mp3", 0), + create_mock_item("9.mp3", 0), + ), + ( + "1.mp3", + "2.mp3", + "3.mp3", + "4.mp3", + "5.mp3", + "6.mp3", + "7.mp3", + "8.mp3", + "9.mp3", + ), + ), + ), +) +def test_audiobook_chapter_matching( + test_audiobook_id: str, + test_items: List[Item], + expected_items: List[Item], + mock_audible_plugin: MagicMock, +): + test_album = audible.Audible.get_album_info(mock_audible_plugin, test_audiobook_id) + results = audible.sort_tracks(test_album, test_items) + assert results is not None + assert all([results[i].title == e for i, e in enumerate(expected_items)]) + assert len(results) == len(expected_items) + + @pytest.mark.parametrize( ("test_token1", "test_token2", "expected"), ( From 7ea60cab4b0546cb7c303249e9f6f00230437f4b Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 20 May 2023 13:12:42 +1000 Subject: [PATCH 22/59] Initialise seed for randomisation --- tests/test_audible.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_audible.py b/tests/test_audible.py index 3a87d9e..39d0b58 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -1,6 +1,6 @@ +import random from copy import deepcopy from pathlib import Path -from random import shuffle from typing import List, Optional, Sequence, Tuple from unittest.mock import MagicMock @@ -10,6 +10,8 @@ import beetsplug.audible as audible +random.seed(42) + def create_mock_item(item_name: str, item_index: int = 0, filename: Optional[str] = None) -> MagicMock: out = MagicMock() @@ -25,7 +27,7 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, for l in lists: for i in range(1, n): copy = deepcopy(l) - shuffle(copy) + random.shuffle(copy) out.append((l, copy)) return out From b0d4c228d18c7942270ed1dee361aea8edb6211c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 20 May 2023 13:35:48 +1000 Subject: [PATCH 23/59] Add two more test cases --- tests/test_audible.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_audible.py b/tests/test_audible.py index 39d0b58..6427c32 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -149,6 +149,50 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, create_mock_item("End", 0), create_mock_item("Author's Note", 0), ], + [ + create_mock_item("01 - Zg503IhGHXyRdZeVn838pZGhvO7uwM5C", 0), + create_mock_item("02 - Hf7KcfUB8rsP2AzR8DlZgx3CRXN5ABKM", 0), + create_mock_item("03 - nzCOIa8IGCZtInmoxa1M3zS5FmM9F8E7", 0), + create_mock_item("04 - atijzxWA7 PZOI5aN mzNxyE1FU4hFGQ", 0), + create_mock_item("05 - ucwzgmf4aA5YlHCvcYMdn2TCGz1Uxb6E", 0), + create_mock_item("06 - k4yXBbaPPgtPOpI6nVnXM1ex Dg1Hgaw", 0), + create_mock_item("07 - VtQIEznFhhWPvGLzljcL8vpHGITPv ey", 0), + create_mock_item("08 - xe4fePnrboPoYKiiGTFC4LEOB8nP5NSt", 0), + create_mock_item("09 - B8V 7YSwH02fjRFnkBpfvD8x iHNdUF2", 0), + create_mock_item("10 - vPusDiJioUeF8j67IXBzcCoYuENxtjrU", 0), + create_mock_item("11 - PHxgYdqZPcI6DwPIJXKy3AQvLzrdgzQD", 0), + create_mock_item("12 - 7KCqFir8teX GEG Pa oi8S4K3t3leSI", 0), + create_mock_item("13 - CvSesAAwUHbCIJbnI43aVdPlApEiUcIZ", 0), + create_mock_item("14 - lvDZY0kn60eEmb3mkTC9vkCyDZSf1ifk", 0), + create_mock_item("15 - dzlLiHkRm3FMd8Yad8Iigrm0qK8gIRib", 0), + create_mock_item("16 - SKW2vzcx3mgjro3GjBu51qBSaMwiVFRB", 0), + create_mock_item("17 - xGnYqQILdJVM3VcaqbqorbdiPMcieOzw", 0), + create_mock_item("18 - XIvowcJjMMBjgTht8azQb3XIGQSb1s9e", 0), + create_mock_item("19 - 97D0VmI9qgnRDPhwwkRwuL7atFCSKmNJ", 0), + create_mock_item("20 - AEBmW2Dspf6czIMsF3l9u2IwmqUw826u", 0), + ], + [ + create_mock_item("Zg503IhGHXyRdZeVn838pZGhvO7uwM5C", 1), + create_mock_item("Hf7KcfUB8rsP2AzR8DlZgx3CRXN5ABKM", 2), + create_mock_item("nzCOIa8IGCZtInmoxa1M3zS5FmM9F8E7", 3), + create_mock_item("atijzxWA7 PZOI5aN mzNxyE1FU4hFGQ", 4), + create_mock_item("ucwzgmf4aA5YlHCvcYMdn2TCGz1Uxb6E", 5), + create_mock_item("k4yXBbaPPgtPOpI6nVnXM1ex Dg1Hgaw", 6), + create_mock_item("VtQIEznFhhWPvGLzljcL8vpHGITPv ey", 7), + create_mock_item("xe4fePnrboPoYKiiGTFC4LEOB8nP5NSt", 8), + create_mock_item("B8V 7YSwH02fjRFnkBpfvD8x iHNdUF2", 9), + create_mock_item("vPusDiJioUeF8j67IXBzcCoYuENxtjrU", 10), + create_mock_item("PHxgYdqZPcI6DwPIJXKy3AQvLzrdgzQD", 11), + create_mock_item("7KCqFir8teX GEG Pa oi8S4K3t3leSI", 12), + create_mock_item("CvSesAAwUHbCIJbnI43aVdPlApEiUcIZ", 13), + create_mock_item("lvDZY0kn60eEmb3mkTC9vkCyDZSf1ifk", 14), + create_mock_item("dzlLiHkRm3FMd8Yad8Iigrm0qK8gIRib", 15), + create_mock_item("SKW2vzcx3mgjro3GjBu51qBSaMwiVFRB", 16), + create_mock_item("xGnYqQILdJVM3VcaqbqorbdiPMcieOzw", 17), + create_mock_item("XIvowcJjMMBjgTht8azQb3XIGQSb1s9e", 18), + create_mock_item("97D0VmI9qgnRDPhwwkRwuL7atFCSKmNJ", 19), + create_mock_item("AEBmW2Dspf6czIMsF3l9u2IwmqUw826u", 20), + ], ) From 68dfd6aa3a3c96722318e93be1338a6b7dac918c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 20 May 2023 13:36:53 +1000 Subject: [PATCH 24/59] Add additional config option --- beetsplug/audible.py | 1 + 1 file changed, 1 insertion(+) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 41a5bbf..10b8c05 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -194,6 +194,7 @@ def __init__(self): "keep_series_reference_in_title": True, "keep_series_reference_in_subtitle": True, "goodreads_apikey": None, + "trust_source_numbering": True, } ) self.config["goodreads_apikey"].redact = True From 8ffbd64caf9284d72fd1a5ee21300236abdae86c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 14:25:45 +1000 Subject: [PATCH 25/59] Add method --- beetsplug/audible.py | 6 +++++- tests/test_audible.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 10b8c05..2970572 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -5,7 +5,7 @@ import urllib.error from copy import deepcopy from tempfile import NamedTemporaryFile -from typing import Dict, List, Optional, Tuple +from typing import Dict, Iterable, List, Optional, Tuple import beets.autotag.hooks import Levenshtein @@ -42,6 +42,10 @@ def convert_items_to_trackinfo(items: List[Item], common_attrs: Dict) -> List[Tr return out +def is_continuous_number_series(numbers: Iterable[int]): + return all(b - a == 1 for a, b in zip(numbers, numbers[1:])) + + def sort_tracks(album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: common_attrs = get_common_data_attributes(album.tracks[0]) # if there's only one item, return as is diff --git a/tests/test_audible.py b/tests/test_audible.py index 6427c32..7235a1c 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -1,7 +1,7 @@ import random from copy import deepcopy from pathlib import Path -from typing import List, Optional, Sequence, Tuple +from typing import Iterable, List, Optional, Sequence, Tuple from unittest.mock import MagicMock import pytest @@ -520,3 +520,18 @@ def test_find_regular_affixes(test_tokens: List[str], expected_prefix: str, expe def test_strip_affixes(test_token: str, test_affixes: Tuple[str, str], expected: str): result = audible.strip_affixes(test_token, test_affixes) assert result == expected + + +@pytest.mark.parametrize( + ("test_numbers", "expected"), + ( + ((1,), True), + ((1, 2), True), + ((0, 1, 2), True), + ((10, 11, 12), True), + ((10, 11, 13), False), + ), +) +def test_is_continuous_number_series(test_numbers: Iterable[int], expected: bool): + result = audible.is_continuous_number_series(test_numbers) + assert result == expected From 23f4100106f554bd915c2e8a97be7ba46aa29ea9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 14:26:50 +1000 Subject: [PATCH 26/59] Move method --- beetsplug/audible.py | 85 ++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 2970572..610c92d 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -46,47 +46,6 @@ def is_continuous_number_series(numbers: Iterable[int]): return all(b - a == 1 for a, b in zip(numbers, numbers[1:])) -def sort_tracks(album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: - common_attrs = get_common_data_attributes(album.tracks[0]) - # if there's only one item, return as is - if len(items) == 1: - # Prefer a single named book from the remote source - if len(album.tracks) == 1: - matches = album.tracks - else: - matches = items - else: - affixes = find_regular_affixes([c.title for c in items]) - stripped_titles = [strip_affixes(i.title, affixes) for i in items] - average_title_change = calculate_average_levenshtein_difference(stripped_titles) - - # if there are only a few track differences from each to the other, it's likely they're numbered and don't have - # otherwise unique titles, so just sort them as best as possible - - # magic number here, it's a judgement call - if max(average_title_change) < 4: - # can't assume that the tracks actually match even when there are the same number of items, since lengths - # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort - matches = natsorted(items, key=lambda t: t.title) - else: - if len(items) > len(album.tracks): - # TODO: find a better way to handle this - # right now just reject this match - return None - all_remote_chapters: List = deepcopy(album.tracks) - matches = [] - for chapter in items: - # need a string distance algorithm that penalises number replacements more - best_matches = list( - sorted(all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes)) - ) - best_match = best_matches[0] - matches.append(best_match) - all_remote_chapters.remove(best_match) - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks - - def calculate_average_levenshtein_difference(tokens: List[str]) -> List[float]: out = [] for token in tokens: @@ -278,6 +237,48 @@ def track_distance(self, item, track_info): dist.add_string("track_title", item.title, track_info.title) return dist + def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: + common_attrs = get_common_data_attributes(album.tracks[0]) + # if there's only one item, return as is + if len(items) == 1: + # Prefer a single named book from the remote source + if len(album.tracks) == 1: + matches = album.tracks + else: + matches = items + else: + affixes = find_regular_affixes([c.title for c in items]) + stripped_titles = [strip_affixes(i.title, affixes) for i in items] + average_title_change = calculate_average_levenshtein_difference(stripped_titles) + + # if there are only a few track differences from each to the other, it's likely they're numbered and don't have + # otherwise unique titles, so just sort them as best as possible + + # magic number here, it's a judgement call + if max(average_title_change) < 4: + # can't assume that the tracks actually match even when there are the same number of items, since lengths + # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort + matches = natsorted(items, key=lambda t: t.title) + else: + if len(items) > len(album.tracks): + # TODO: find a better way to handle this + # right now just reject this match + return None + all_remote_chapters: List = deepcopy(album.tracks) + matches = [] + for chapter in items: + # need a string distance algorithm that penalises number replacements more + best_matches = list( + sorted( + all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes) + ) + ) + best_match = best_matches[0] + matches.append(best_match) + all_remote_chapters.remove(best_match) + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + def candidates(self, items, artist, album, va_likely, extra_tags=None): """Returns a list of AlbumInfo objects for Audible search results matching an album and artist (if not various). @@ -314,7 +315,7 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None): self._log.debug(f"Searching Audible for {query}") albums = self.get_albums(query) for a in albums: - a.tracks = sort_tracks(a, items) + a.tracks = self.sort_tracks(a, items) albums = list(filter(lambda a: a.tracks is not None, albums)) return albums From ccf22f735cc640ad494c019d2c248bac00f04518 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 15:01:20 +1000 Subject: [PATCH 27/59] Fix comments --- beetsplug/audible.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 610c92d..2023bd8 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -252,7 +252,7 @@ def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[Trac average_title_change = calculate_average_levenshtein_difference(stripped_titles) # if there are only a few track differences from each to the other, it's likely they're numbered and don't have - # otherwise unique titles, so just sort them as best as possible + # otherwise unique titles, so just sort them best as possible # magic number here, it's a judgement call if max(average_title_change) < 4: @@ -267,7 +267,7 @@ def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[Trac all_remote_chapters: List = deepcopy(album.tracks) matches = [] for chapter in items: - # need a string distance algorithm that penalises number replacements more + # TODO: need a string distance algorithm that penalises number replacements more best_matches = list( sorted( all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes) From 498aa354c48e36f7e193d75fad6b0782cb3097f5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 19:24:00 +1000 Subject: [PATCH 28/59] Add more complex parameterisation --- tests/conftest.py | 4 ++ tests/test_audible.py | 86 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7c7efd6..f994638 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,15 +2,19 @@ import pytest +from beetsplug.audible import Audible + @pytest.fixture(scope="session") def mock_audible_plugin() -> MagicMock: out = MagicMock() + out.sort_tracks = Audible.sort_tracks out.config = { "fetch_art": True, "match_chapters": True, "source_weight": 0.0, "write_description_file": True, + "trust_source_numbering": True, "write_reader_file": True, "include_narrator_in_artists": True, "goodreads_apikey": None, diff --git a/tests/test_audible.py b/tests/test_audible.py index 7235a1c..d1df39a 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -1,8 +1,9 @@ import random +import string from copy import deepcopy from pathlib import Path from typing import Iterable, List, Optional, Sequence, Tuple -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest from beets.library import Item @@ -32,7 +33,54 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, return out -chapter_lists = ( +def generate_random_string(n: int) -> str: + # Define the characters we want to use + chars = string.ascii_letters + " " + return "".join(random.choice(chars) for _ in range(n)) + + +def generate_fixture_albums(chapter_list: List[MagicMock]) -> List[MagicMock]: + out = [] + + # this mock just the chapters + mock_1 = MagicMock() + mock_1.tracks = chapter_list + out.append(mock_1) + + # this mock is a random set of strings of the same number of chapters + mock_2 = MagicMock() + mock_2.tracks = [generate_random_string(20) for _ in chapter_list] + out.append(mock_2) + + # this mock is a random set of strings that is more than the given chapters + mock_3 = MagicMock() + mock_3.tracks = [generate_random_string(20) for _ in range(1, len(chapter_list) + 10)] + out.append(mock_3) + + if len(chapter_list) > 1: + # this mock is a random set of strings that is less than the given chapters + mock_4 = MagicMock() + mock_4.tracks = [generate_random_string(20) for _ in range(1, (len(chapter_list) // 2) + 1)] + out.append(mock_4) + + return out + + +def pytest_generate_tests(metafunc: pytest.Metafunc): + if "chapter_lists" in metafunc.fixturenames and "test_album" in metafunc.fixturenames: + test_albums = [] + all_ids = [] + for i, c in enumerate(all_chapter_lists): + for j, a in enumerate(generate_fixture_albums(c)): + test_albums.append((c, a)) + all_ids.append(f"chapter_list{i}-album_fixture{j}") + + metafunc.parametrize("chapter_lists,test_album", test_albums, ids=all_ids) + elif "chapter_lists" in metafunc.fixturenames: + metafunc.parametrize("chapter_lists", all_chapter_lists) + + +all_chapter_lists = ( [ create_mock_item("01", 0), create_mock_item("02", 0), @@ -196,24 +244,31 @@ def randomise_lists(lists: Tuple[List, ...], n: int = 5) -> Sequence[Tuple[List, ) -@pytest.mark.parametrize("items", chapter_lists) -def test_sort_items(items: List[Item]): - expected = deepcopy(items) - result = audible.sort_items(items) +def test_sort_items(chapter_lists: List[Item], mock_audible_plugin, test_album: List[MagicMock]): + expected = deepcopy(chapter_lists) + result = sort_tracks_for_test(chapter_lists, mock_audible_plugin, test_album) assert all([str(result[i]) == str(e) for i, e in enumerate(expected)]) -@pytest.mark.parametrize("items", chapter_lists) -def test_sort_items_reversed(items: List[Item]): - expected = deepcopy(items) - result = audible.sort_items(reversed(items)) +def sort_tracks_for_test(chapter_lists, mock_audible_plugin, test_album): + with patch("beetsplug.audible.get_common_data_attributes", return_value=dict()): + with patch("beetsplug.audible.convert_items_to_trackinfo", lambda x, _: x): + result = mock_audible_plugin.sort_tracks(mock_audible_plugin, test_album, chapter_lists) + return result + + +def test_sort_items_reversed(chapter_lists: List[Item], mock_audible_plugin, test_album: List[MagicMock]): + expected = deepcopy(chapter_lists) + result = mock_audible_plugin.sort_tracks(mock_audible_plugin, test_album, reversed(chapter_lists)) assert all([str(result[i]) == str(e) for i, e in enumerate(expected)]) -@pytest.mark.parametrize("correct, items", randomise_lists(chapter_lists, 10)) -def test_sort_items_randomised(correct: List[Item], items: List[Item]): - result = audible.sort_items(items) - assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) +# @pytest.mark.parametrize("correct, items", randomise_lists(chapter_lists, 10)) +# def test_sort_items_randomised( +# correct: List[Item], items: List[Item], mock_audible_plugin, test_album: List[MagicMock] +# ): +# result = mock_audible_plugin.sort_tracks(mock_audible_plugin, test_album, items) +# assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) @pytest.mark.online @@ -456,7 +511,7 @@ def test_audiobook_chapter_matching( mock_audible_plugin: MagicMock, ): test_album = audible.Audible.get_album_info(mock_audible_plugin, test_audiobook_id) - results = audible.sort_tracks(test_album, test_items) + results = mock_audible_plugin.sort_tracks(mock_audible_plugin, test_album, test_items) assert results is not None assert all([results[i].title == e for i, e in enumerate(expected_items)]) assert len(results) == len(expected_items) @@ -530,6 +585,7 @@ def test_strip_affixes(test_token: str, test_affixes: Tuple[str, str], expected: ((0, 1, 2), True), ((10, 11, 12), True), ((10, 11, 13), False), + ((3, 2, 1), False), ), ) def test_is_continuous_number_series(test_numbers: Iterable[int], expected: bool): From e9e78d8a1d5269b36215b1d02072e1efb436378c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 19:36:52 +1000 Subject: [PATCH 29/59] Add function --- beetsplug/audible.py | 12 ++++++++++++ tests/test_audible.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 2023bd8..f090c0f 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -116,6 +116,18 @@ def strip_affixes(token: str, affixes: Tuple[str, str]) -> str: return token +def check_starts_with_number(string: str) -> Optional[int]: + pattern = re.compile(r"^(\d+)[ -_]") + result = pattern.match(string) + if result: + try: + number = result.group(1) + number = int(number) + return number + except ValueError: + pass + + def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[Tuple[str, str]] = None) -> int: """Find the Levenshtein distance between two strings, penalising operations involving digits x10""" if ignored_affixes: diff --git a/tests/test_audible.py b/tests/test_audible.py index d1df39a..5140041 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -591,3 +591,20 @@ def test_strip_affixes(test_token: str, test_affixes: Tuple[str, str], expected: def test_is_continuous_number_series(test_numbers: Iterable[int], expected: bool): result = audible.is_continuous_number_series(test_numbers) assert result == expected + + +@pytest.mark.parametrize( + ("test_string", "expected"), + ( + ("01 test", 1), + ("01-test", 1), + ("01_test", 1), + ("100_test", 100), + ("01 - test", 1), + ("test", None), + ("0t1 test", None), + ), +) +def test_check_starts_with_number(test_string: str, expected: Optional[int]): + result = audible.check_starts_with_number(test_string) + assert result == expected From 622ee1a8e6e14cf19c23d37d378f8939a7e35481 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 19:42:06 +1000 Subject: [PATCH 30/59] Add tests and fix function --- beetsplug/audible.py | 4 ++-- tests/test_audible.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index f090c0f..e8b41d4 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -42,8 +42,8 @@ def convert_items_to_trackinfo(items: List[Item], common_attrs: Dict) -> List[Tr return out -def is_continuous_number_series(numbers: Iterable[int]): - return all(b - a == 1 for a, b in zip(numbers, numbers[1:])) +def is_continuous_number_series(numbers: Iterable[Optional[int]]): + return all([n is not None for n in numbers]) and all(b - a == 1 for a, b in zip(numbers, numbers[1:])) def calculate_average_levenshtein_difference(tokens: List[str]) -> List[float]: diff --git a/tests/test_audible.py b/tests/test_audible.py index 5140041..7b45ea2 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -583,12 +583,13 @@ def test_strip_affixes(test_token: str, test_affixes: Tuple[str, str], expected: ((1,), True), ((1, 2), True), ((0, 1, 2), True), + ((0, None, 2), False), + ((0, 1, None), False), ((10, 11, 12), True), ((10, 11, 13), False), - ((3, 2, 1), False), ), ) -def test_is_continuous_number_series(test_numbers: Iterable[int], expected: bool): +def test_is_continuous_number_series(test_numbers: Iterable[Optional[int]], expected: bool): result = audible.is_continuous_number_series(test_numbers) assert result == expected From 31e92bdae2b77273b243b9dde4ff4439a9682a06 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 20:06:55 +1000 Subject: [PATCH 31/59] Improve chapter matching logic and tests --- beetsplug/audible.py | 68 +++++++++++++++++++++++++++-------------- tests/test_audible.py | 71 +++++++++++++++++++++++++++++++------------ 2 files changed, 96 insertions(+), 43 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index e8b41d4..7e92ee2 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -259,35 +259,57 @@ def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[Trac else: matches = items else: + # if the source files are numbered continuously and the option is set, trust that + if self.config["trust_source_numbering"]: + if is_continuous_number_series([t.track for t in sorted(items, key=lambda t: t.track)]): + # if the track is zero indexed, re-number them + if items[0].track != 1: + matches = [] + for i, item in enumerate(items, start=1): + match = item + match.track = i + matches.append(match) + else: + matches = items + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + affixes = find_regular_affixes([c.title for c in items]) stripped_titles = [strip_affixes(i.title, affixes) for i in items] average_title_change = calculate_average_levenshtein_difference(stripped_titles) - # if there are only a few track differences from each to the other, it's likely they're numbered and don't have - # otherwise unique titles, so just sort them best as possible - - # magic number here, it's a judgement call - if max(average_title_change) < 4: - # can't assume that the tracks actually match even when there are the same number of items, since lengths - # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort - matches = natsorted(items, key=lambda t: t.title) + starting_numbers = [check_starts_with_number(s) for s in stripped_titles] + if all(starting_numbers) and is_continuous_number_series(sorted(starting_numbers)): + items_with_numbers = list(zip(starting_numbers, items)) + matches = sorted(items_with_numbers, key=lambda i: i[0]) + matches = [i[1] for i in matches] else: - if len(items) > len(album.tracks): - # TODO: find a better way to handle this - # right now just reject this match - return None - all_remote_chapters: List = deepcopy(album.tracks) - matches = [] - for chapter in items: - # TODO: need a string distance algorithm that penalises number replacements more - best_matches = list( - sorted( - all_remote_chapters, key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes) + # if there are only a few track differences from each to the other, it's likely they're numbered and don't have + # otherwise unique titles, so just sort them best as possible + + # magic number here, it's a judgement call + if max(average_title_change) < 4: + # can't assume that the tracks actually match even when there are the same number of items, since lengths + # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort + matches = natsorted(items, key=lambda t: t.title) + else: + if len(items) > len(album.tracks): + # TODO: find a better way to handle this + # right now just reject this match + return None + all_remote_chapters: List = deepcopy(album.tracks) + matches = [] + for chapter in items: + # TODO: need a string distance algorithm that penalises number replacements more + best_matches = list( + sorted( + all_remote_chapters, + key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes), + ) ) - ) - best_match = best_matches[0] - matches.append(best_match) - all_remote_chapters.remove(best_match) + best_match = best_matches[0] + matches.append(best_match) + all_remote_chapters.remove(best_match) tracks = convert_items_to_trackinfo(matches, common_attrs) return tracks diff --git a/tests/test_audible.py b/tests/test_audible.py index 7b45ea2..49513ae 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -76,6 +76,18 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): all_ids.append(f"chapter_list{i}-album_fixture{j}") metafunc.parametrize("chapter_lists,test_album", test_albums, ids=all_ids) + elif all(s in metafunc.fixturenames for s in ("randomised_items", "correct", "test_album")): + test_albums = [] + all_ids = [] + for i, c in enumerate(all_chapter_lists): + for j in range(1, 11): + intermediate = random.sample(c, k=len(c)) + for k, a in enumerate(generate_fixture_albums(intermediate)): + test_albums.append((c, intermediate, a)) + all_ids.append(f"chapter_list{i}-randomisation{j}-album_fixture{k}") + + metafunc.parametrize("correct,randomised_items,test_album", test_albums, ids=all_ids) + elif "chapter_lists" in metafunc.fixturenames: metafunc.parametrize("chapter_lists", all_chapter_lists) @@ -183,19 +195,19 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("Chapter 14 Last Words The Untold Story of the DC Sniper Investigation - 1.m4b", 0), ], [ - create_mock_item("Prologue", 0), - create_mock_item("Chapter 1", 0), - create_mock_item("Chapter 2", 0), - create_mock_item("Chapter 3", 0), - create_mock_item("Chapter 4", 0), - create_mock_item("Chapter 5", 0), - create_mock_item("Chapter 6", 0), - create_mock_item("Chapter 7", 0), - create_mock_item("Chapter 8", 0), - create_mock_item("Chapter 9", 0), - create_mock_item("Chapter 10", 0), - create_mock_item("End", 0), - create_mock_item("Author's Note", 0), + create_mock_item("Prologue", 1), + create_mock_item("Chapter 1", 2), + create_mock_item("Chapter 2", 3), + create_mock_item("Chapter 3", 4), + create_mock_item("Chapter 4", 5), + create_mock_item("Chapter 5", 6), + create_mock_item("Chapter 6", 7), + create_mock_item("Chapter 7", 8), + create_mock_item("Chapter 8", 9), + create_mock_item("Chapter 9", 10), + create_mock_item("Chapter 10", 11), + create_mock_item("End", 12), + create_mock_item("Author's Note", 13), ], [ create_mock_item("01 - Zg503IhGHXyRdZeVn838pZGhvO7uwM5C", 0), @@ -241,6 +253,23 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("97D0VmI9qgnRDPhwwkRwuL7atFCSKmNJ", 19), create_mock_item("AEBmW2Dspf6czIMsF3l9u2IwmqUw826u", 20), ], + # haven't thought of logic that would allow for this type of thing. honestly it might be impossible if there's no + # online reference that exactly matches + # [ + # create_mock_item("Prologue", 0), + # create_mock_item("Chapter 1", 0), + # create_mock_item("Chapter 2", 0), + # create_mock_item("Chapter 3", 0), + # create_mock_item("Chapter 4", 0), + # create_mock_item("Chapter 5", 0), + # create_mock_item("Chapter 6", 0), + # create_mock_item("Chapter 7", 0), + # create_mock_item("Chapter 8", 0), + # create_mock_item("Chapter 9", 0), + # create_mock_item("Chapter 10", 0), + # create_mock_item("End", 0), + # create_mock_item("Author's Note", 0), + # ], ) @@ -259,16 +288,18 @@ def sort_tracks_for_test(chapter_lists, mock_audible_plugin, test_album): def test_sort_items_reversed(chapter_lists: List[Item], mock_audible_plugin, test_album: List[MagicMock]): expected = deepcopy(chapter_lists) - result = mock_audible_plugin.sort_tracks(mock_audible_plugin, test_album, reversed(chapter_lists)) + result = sort_tracks_for_test(chapter_lists, mock_audible_plugin, test_album) assert all([str(result[i]) == str(e) for i, e in enumerate(expected)]) -# @pytest.mark.parametrize("correct, items", randomise_lists(chapter_lists, 10)) -# def test_sort_items_randomised( -# correct: List[Item], items: List[Item], mock_audible_plugin, test_album: List[MagicMock] -# ): -# result = mock_audible_plugin.sort_tracks(mock_audible_plugin, test_album, items) -# assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) +def test_sort_items_randomised( + correct: List[Item], + randomised_items: List[Item], + mock_audible_plugin, + test_album: List[MagicMock], +): + result = sort_tracks_for_test(randomised_items, mock_audible_plugin, test_album) + assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) @pytest.mark.online From 8580b1b6bb16f445b75403379585a09b403bca58 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 20:09:49 +1000 Subject: [PATCH 32/59] Add comment test markers for convienience --- tests/test_audible.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_audible.py b/tests/test_audible.py index 49513ae..9f4aa95 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -93,7 +93,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): all_chapter_lists = ( - [ + [ # 0 create_mock_item("01", 0), create_mock_item("02", 0), create_mock_item("03", 0), @@ -108,7 +108,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("12", 0), create_mock_item("13", 0), ], - [ + [ # 1 create_mock_item("Chapter 1", 0), create_mock_item("Chapter 2", 0), create_mock_item("Chapter 3", 0), @@ -120,7 +120,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("Chapter 9", 0), create_mock_item("Chapter 10", 0), ], - [ + [ # 2 create_mock_item("Chapter 01", 0), create_mock_item("Chapter 02", 0), create_mock_item("Chapter 03", 0), @@ -132,7 +132,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("Chapter 09", 0), create_mock_item("Chapter 10", 0), ], - [ + [ # 3 create_mock_item("Chapter - 01", 0), create_mock_item("Chapter - 02", 0), create_mock_item("Chapter - 03", 0), @@ -147,7 +147,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("Chapter - 12", 0), create_mock_item("Chapter - 13", 0), ], - [ + [ # 4 create_mock_item("Chapter-01", 0), create_mock_item("Chapter-02", 0), create_mock_item("Chapter-03", 0), @@ -162,7 +162,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("Chapter-12", 0), create_mock_item("Chapter-13", 0), ], - [ + [ # 5 create_mock_item("Mediocre-Part01", 0), create_mock_item("Mediocre-Part02", 0), create_mock_item("Mediocre-Part03", 0), @@ -176,7 +176,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("Mediocre-Part11", 0), create_mock_item("Mediocre-Part12", 0), ], - [ + [ # 6 create_mock_item("Chapter 1 The DC Sniper The Untold Story of the DC Sniper Investigation - 1.m4b", 0), create_mock_item("Chapter 2 Terrorism The Untold Story of the DC Sniper Investigation - 1.m4b", 0), create_mock_item("Chapter 3 Brothers in the Arena The Untold Story of the DC Sniper Investigation - 1.m4b", 0), @@ -194,7 +194,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): ), create_mock_item("Chapter 14 Last Words The Untold Story of the DC Sniper Investigation - 1.m4b", 0), ], - [ + [ # 7 create_mock_item("Prologue", 1), create_mock_item("Chapter 1", 2), create_mock_item("Chapter 2", 3), @@ -209,7 +209,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("End", 12), create_mock_item("Author's Note", 13), ], - [ + [ # 8 create_mock_item("01 - Zg503IhGHXyRdZeVn838pZGhvO7uwM5C", 0), create_mock_item("02 - Hf7KcfUB8rsP2AzR8DlZgx3CRXN5ABKM", 0), create_mock_item("03 - nzCOIa8IGCZtInmoxa1M3zS5FmM9F8E7", 0), @@ -231,7 +231,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("19 - 97D0VmI9qgnRDPhwwkRwuL7atFCSKmNJ", 0), create_mock_item("20 - AEBmW2Dspf6czIMsF3l9u2IwmqUw826u", 0), ], - [ + [ # 9 create_mock_item("Zg503IhGHXyRdZeVn838pZGhvO7uwM5C", 1), create_mock_item("Hf7KcfUB8rsP2AzR8DlZgx3CRXN5ABKM", 2), create_mock_item("nzCOIa8IGCZtInmoxa1M3zS5FmM9F8E7", 3), From 9fc0a5474d88470e993c9d4c0221f1895cb11e08 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 21 May 2023 20:15:37 +1000 Subject: [PATCH 33/59] Fix bug with ordering of trusted source orderings --- beetsplug/audible.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 7e92ee2..b43eb99 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -261,16 +261,17 @@ def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[Trac else: # if the source files are numbered continuously and the option is set, trust that if self.config["trust_source_numbering"]: - if is_continuous_number_series([t.track for t in sorted(items, key=lambda t: t.track)]): + sorted_tracks = sorted(items, key=lambda t: t.track) + if is_continuous_number_series([t.track for t in sorted_tracks]): # if the track is zero indexed, re-number them - if items[0].track != 1: + if sorted_tracks[0].track != 1: matches = [] - for i, item in enumerate(items, start=1): + for i, item in enumerate(sorted_tracks, start=1): match = item match.track = i matches.append(match) else: - matches = items + matches = sorted_tracks tracks = convert_items_to_trackinfo(matches, common_attrs) return tracks From 60d9722192dbd81bf71cbe052fbf1bf88ba73751 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 15:08:03 +1000 Subject: [PATCH 34/59] Refactor methods out --- beetsplug/audible.py | 149 ++++++++++++++++++++++++++----------------- tests/conftest.py | 5 ++ 2 files changed, 96 insertions(+), 58 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index b43eb99..9f34502 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -249,70 +249,103 @@ def track_distance(self, item, track_info): dist.add_string("track_title", item.title, track_info.title) return dist - def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: - common_attrs = get_common_data_attributes(album.tracks[0]) - # if there's only one item, return as is + def attempt_match_trust_source_numbering(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + if self.config["trust_source_numbering"]: + sorted_tracks = sorted(items, key=lambda t: t.track) + if is_continuous_number_series([t.track for t in sorted_tracks]): + # if the track is zero indexed, re-number them + if sorted_tracks[0].track != 1: + matches = [] + for i, item in enumerate(sorted_tracks, start=1): + match = item + match.track = i + matches.append(match) + else: + matches = sorted_tracks + return matches + + def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + affixes = find_regular_affixes([c.title for c in items]) + stripped_titles = [strip_affixes(i.title, affixes) for i in items] + + starting_numbers = [check_starts_with_number(s) for s in stripped_titles] + if all(starting_numbers) and is_continuous_number_series(sorted(starting_numbers)): + items_with_numbers = list(zip(starting_numbers, items)) + matches = sorted(items_with_numbers, key=lambda i: i[0]) + matches = [i[1] for i in matches] + return matches + + def attempt_match_natsort(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + affixes = find_regular_affixes([c.title for c in items]) + stripped_titles = [strip_affixes(i.title, affixes) for i in items] + average_title_change = calculate_average_levenshtein_difference(stripped_titles) + # magic number here, it's a judgement call + if max(average_title_change) < 4: + # can't assume that the tracks actually match even when there are the same number of items, since lengths + # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort + matches = natsorted(items, key=lambda t: t.title) + return matches + + def attempt_match_chapter_strings(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + affixes = find_regular_affixes([c.title for c in items]) + + all_remote_chapters: List = deepcopy(album.tracks) + matches = [] + for chapter in items: + # TODO: need a string distance algorithm that penalises number replacements more + best_matches = list( + sorted( + all_remote_chapters, + key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes), + ) + ) + best_match = best_matches[0] + matches.append(best_match) + all_remote_chapters.remove(best_match) + return matches + + def attempt_match_single_item(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: if len(items) == 1: # Prefer a single named book from the remote source if len(album.tracks) == 1: matches = album.tracks else: matches = items - else: - # if the source files are numbered continuously and the option is set, trust that - if self.config["trust_source_numbering"]: - sorted_tracks = sorted(items, key=lambda t: t.track) - if is_continuous_number_series([t.track for t in sorted_tracks]): - # if the track is zero indexed, re-number them - if sorted_tracks[0].track != 1: - matches = [] - for i, item in enumerate(sorted_tracks, start=1): - match = item - match.track = i - matches.append(match) - else: - matches = sorted_tracks - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks - - affixes = find_regular_affixes([c.title for c in items]) - stripped_titles = [strip_affixes(i.title, affixes) for i in items] - average_title_change = calculate_average_levenshtein_difference(stripped_titles) - - starting_numbers = [check_starts_with_number(s) for s in stripped_titles] - if all(starting_numbers) and is_continuous_number_series(sorted(starting_numbers)): - items_with_numbers = list(zip(starting_numbers, items)) - matches = sorted(items_with_numbers, key=lambda i: i[0]) - matches = [i[1] for i in matches] - else: - # if there are only a few track differences from each to the other, it's likely they're numbered and don't have - # otherwise unique titles, so just sort them best as possible - - # magic number here, it's a judgement call - if max(average_title_change) < 4: - # can't assume that the tracks actually match even when there are the same number of items, since lengths - # can be different e.g. an even split into n parts that aren't necessarily chapter-based so just natsort - matches = natsorted(items, key=lambda t: t.title) - else: - if len(items) > len(album.tracks): - # TODO: find a better way to handle this - # right now just reject this match - return None - all_remote_chapters: List = deepcopy(album.tracks) - matches = [] - for chapter in items: - # TODO: need a string distance algorithm that penalises number replacements more - best_matches = list( - sorted( - all_remote_chapters, - key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes), - ) - ) - best_match = best_matches[0] - matches.append(best_match) - all_remote_chapters.remove(best_match) - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks + return matches + + def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: + common_attrs = get_common_data_attributes(album.tracks[0]) + # if there's only one item, return as is + matches = self.attempt_match_single_item(items, album) + if matches is not None: + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + # if the source files are numbered continuously and the option is set, trust that + matches = self.attempt_match_trust_source_numbering(items, album) + if matches is not None: + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + + matches = self.attempt_match_starting_numbers(items, album) + if matches is not None: + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + + # if there are only a few track differences from each to the other, it's likely they're numbered and don't have + # otherwise unique titles, so just sort them best as possible + matches = self.attempt_match_natsort(items, album) + if matches is not None: + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + + if len(items) > len(album.tracks): + # TODO: find a better way to handle this + # right now just reject this match + return None + matches = self.attempt_match_chapter_strings(album, items, matches) + if matches is not None: + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks def candidates(self, items, artist, album, va_likely, extra_tags=None): """Returns a list of AlbumInfo objects for Audible search results diff --git a/tests/conftest.py b/tests/conftest.py index f994638..739ffb6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,11 @@ def mock_audible_plugin() -> MagicMock: out = MagicMock() out.sort_tracks = Audible.sort_tracks + out.attempt_match_chapter_strings = lambda x, y: Audible.attempt_match_chapter_strings(out, x, y) + out.attempt_match_natsort = lambda x, y: Audible.attempt_match_natsort(out, x, y) + out.attempt_match_starting_numbers = lambda x, y: Audible.attempt_match_starting_numbers(out, x, y) + out.attempt_match_trust_source_numbering = lambda x, y: Audible.attempt_match_trust_source_numbering(out, x, y) + out.attempt_match_single_item = lambda x, y: Audible.attempt_match_single_item(out, x, y) out.config = { "fetch_art": True, "match_chapters": True, From a25e599af3ba2aa1acbc98cc348b0b51f7aa0c33 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 16:12:40 +1000 Subject: [PATCH 35/59] Rename function --- beetsplug/audible.py | 2 +- tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 9f34502..ad16697 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -286,7 +286,7 @@ def attempt_match_natsort(self, items: List[Item], album: AlbumInfo) -> Optional matches = natsorted(items, key=lambda t: t.title) return matches - def attempt_match_chapter_strings(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + def attempt_match_chapter_levenshtein(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: affixes = find_regular_affixes([c.title for c in items]) all_remote_chapters: List = deepcopy(album.tracks) diff --git a/tests/conftest.py b/tests/conftest.py index 739ffb6..0ecaa08 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ def mock_audible_plugin() -> MagicMock: out = MagicMock() out.sort_tracks = Audible.sort_tracks - out.attempt_match_chapter_strings = lambda x, y: Audible.attempt_match_chapter_strings(out, x, y) + out.attempt_match_chapter_levenshtein = lambda x, y: Audible.attempt_match_chapter_levenshtein(out, x, y) out.attempt_match_natsort = lambda x, y: Audible.attempt_match_natsort(out, x, y) out.attempt_match_starting_numbers = lambda x, y: Audible.attempt_match_starting_numbers(out, x, y) out.attempt_match_trust_source_numbering = lambda x, y: Audible.attempt_match_trust_source_numbering(out, x, y) From 85c173299e1b3adbab7a5f6320637b93a34b76e8 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 16:21:27 +1000 Subject: [PATCH 36/59] Add comments for methods --- beetsplug/audible.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index ad16697..32852d9 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -250,6 +250,8 @@ def track_distance(self, item, track_info): return dist def attempt_match_trust_source_numbering(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + """If the input album is numbered and the number range is contiguous (doesn't skip any numbers), then trust + that and start the index from 1 if it's not already.""" if self.config["trust_source_numbering"]: sorted_tracks = sorted(items, key=lambda t: t.track) if is_continuous_number_series([t.track for t in sorted_tracks]): @@ -265,6 +267,7 @@ def attempt_match_trust_source_numbering(self, items: List[Item], album: AlbumIn return matches def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + """Order tracks based on a starting number in the track name.""" affixes = find_regular_affixes([c.title for c in items]) stripped_titles = [strip_affixes(i.title, affixes) for i in items] @@ -276,6 +279,7 @@ def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> return matches def attempt_match_natsort(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + """Use a natural sort on the input tracks to order them like a person would i.e. 10 is after 9, not 2.""" affixes = find_regular_affixes([c.title for c in items]) stripped_titles = [strip_affixes(i.title, affixes) for i in items] average_title_change = calculate_average_levenshtein_difference(stripped_titles) @@ -287,6 +291,10 @@ def attempt_match_natsort(self, items: List[Item], album: AlbumInfo) -> Optional return matches def attempt_match_chapter_levenshtein(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + """For every chapter in the input album, calculate the Levenshtein difference between every item in the online + album and match each input track to the closest online track.""" + # Warning, this method is rather messy, and it's easy for this to go wrong. + # This should be used as a last resort affixes = find_regular_affixes([c.title for c in items]) all_remote_chapters: List = deepcopy(album.tracks) @@ -305,6 +313,7 @@ def attempt_match_chapter_levenshtein(self, items: List[Item], album: AlbumInfo) return matches def attempt_match_single_item(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: + """If the input album has a single item, use that; if the album also has a single item, prefer that.""" if len(items) == 1: # Prefer a single named book from the remote source if len(album.tracks) == 1: From 70afac8bbf7f47c4768e03e4b36719f2ccb4709f Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 16:31:33 +1000 Subject: [PATCH 37/59] Rename readme to follow convention --- readme.md => README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename readme.md => README.md (100%) diff --git a/readme.md b/README.md similarity index 100% rename from readme.md rename to README.md From 11317e5d7fc75de0a37546b35cb2751aff3c9d0d Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 16:39:37 +1000 Subject: [PATCH 38/59] Re-indent YAML --- README.md | 84 +++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 445bca6..96a1150 100644 --- a/README.md +++ b/README.md @@ -17,49 +17,47 @@ This Beets plugin solves both problems. 1. Install via pip: `pip install beets-audible beets-copyartifacts3` (copyartifacts is optional but recommended). See the next section instead if you're running Beets in Docker (highly recommended as it makes it easier to maintain a separate Beets installation dedicated to audiobooks). 2. Use a separate beets config and database for managing audiobooks. This is the recommended Beets config for this plugin: - ```yaml - # add audible to the list of plugins - # copyartifacts is optional but recommended if you're manually specifying metadata via metadata.yml, see the "Importing non-audible content" section - # also add the "web" plugin if using the docker image - plugins: audible copyartifacts edit fromfilename scrub - - directory: /audiobooks - - # Place books in their own folders to be compatible with Booksonic and Audiobookshelf servers - paths: - # For books that belong to a series - "albumtype:audiobook series_name::.+ series_position::.+": $albumartist/%ifdef{series_name}/%ifdef{series_position} - $album%aunique{}/$track - $title - "albumtype:audiobook series_name::.+": $albumartist/%ifdef{series_name}/$album%aunique{}/$track - $title - # Stand-alone books - "albumtype:audiobook": $albumartist/$album%aunique{}/$track - $title - default: $albumartist/$album%aunique{}/$track - $title - singleton: Non-Album/$artist - $title - comp: Compilations/$album%aunique{}/$track - $title - albumtype_soundtrack: Soundtracks/$album/$track $title - - # disables musicbrainz lookup, as it doesn't help for audiobooks - musicbrainz: - host: localhost:5123 - - audible: - # if the number of files in the book is the same as the number of chapters from Audible, - # attempt to match each file to an audible chapter - match_chapters: true - source_weight: 0.0 # disable the source_weight penalty - fetch_art: true # whether to retrieve cover art - include_narrator_in_artists: true # include author and narrator in artist tag. Or just author - keep_series_reference_in_title: true # set to false to remove ", Book X" from end of titles - keep_series_reference_in_subtitle: true # set to false to remove subtitle if it contains the series name and the word book ex. "Book 1 in Great Series", "Great Series, Book 1" - - write_description_file: true # output desc.txt - write_reader_file: true # output reader.txt - - copyartifacts: - extensions: .yml # so that metadata.yml is copied, see below - - scrub: - auto: yes # optional, enabling this is personal preference - ``` +```yaml +# add audible to the list of plugins +# copyartifacts is optional but recommended if you're manually specifying metadata via metadata.yml, see the "Importing non-audible content" section +plugins: audible copyartifacts edit fromfilename scrub + +directory: /audiobooks + +# Place books in their own folders to be compatible with Booksonic and Audiobookshelf servers +paths: + # For books that belong to a series + "albumtype:audiobook series_name::.+ series_position::.+": $albumartist/%ifdef{series_name}/%ifdef{series_position} - $album%aunique{}/$track - $title + "albumtype:audiobook series_name::.+": $albumartist/%ifdef{series_name}/$album%aunique{}/$track - $title + # Stand-alone books + "albumtype:audiobook": $albumartist/$album%aunique{}/$track - $title + default: $albumartist/$album%aunique{}/$track - $title + singleton: Non-Album/$artist - $title + comp: Compilations/$album%aunique{}/$track - $title + albumtype_soundtrack: Soundtracks/$album/$track $title + +# disables musicbrainz lookup, as it doesn't help for audiobooks +musicbrainz: +enabled: no + +audible: + # if the number of files in the book is the same as the number of chapters from Audible, + # attempt to match each file to an audible chapter + match_chapters: true + source_weight: 0.0 # disable the source_weight penalty + fetch_art: true # whether to retrieve cover art + include_narrator_in_artists: true # include author and narrator in artist tag. Or just author + keep_series_reference_in_title: true # set to false to remove ", Book X" from end of titles + keep_series_reference_in_subtitle: true # set to false to remove subtitle if it contains the series name and the word book ex. "Book 1 in Great Series", "Great Series, Book 1" + write_description_file: true # output desc.txt + write_reader_file: true # output reader.txt + +copyartifacts: + extensions: .yml # so that metadata.yml is copied, see below + +scrub: + auto: yes # optional, enabling this is personal preference +``` 3. Run the `beet --version` command and verify that the audible plugin appears in the list of plugins. From ac55b38bd617a4cb2c481a0d59c8be62d387cdb5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 16:56:47 +1000 Subject: [PATCH 39/59] Switch to using configuration to determine chapter algorithm --- README.md | 6 ++++ beetsplug/audible.py | 86 ++++++++++++++++++++++---------------------- tests/conftest.py | 8 ++++- 3 files changed, 55 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 96a1150..0688a2a 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,12 @@ audible: keep_series_reference_in_subtitle: true # set to false to remove subtitle if it contains the series name and the word book ex. "Book 1 in Great Series", "Great Series, Book 1" write_description_file: true # output desc.txt write_reader_file: true # output reader.txt + chapter_matching_algorithms: + single_file + source_numbering + starting_numbers + natural_sort + chapter_levenshtein copyartifacts: extensions: .yml # so that metadata.yml is copied, see below diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 32852d9..e644c96 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -160,6 +160,13 @@ def __init__(self): self.config.add( { + "chapter_matching_algorithms": [ + "single_file", + "source_numbering", + "starting_numbers", + "natural_sort", + "chapter_levenshtein", + ], "fetch_art": True, "match_chapters": True, "source_weight": 0.0, @@ -252,19 +259,18 @@ def track_distance(self, item, track_info): def attempt_match_trust_source_numbering(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: """If the input album is numbered and the number range is contiguous (doesn't skip any numbers), then trust that and start the index from 1 if it's not already.""" - if self.config["trust_source_numbering"]: - sorted_tracks = sorted(items, key=lambda t: t.track) - if is_continuous_number_series([t.track for t in sorted_tracks]): - # if the track is zero indexed, re-number them - if sorted_tracks[0].track != 1: - matches = [] - for i, item in enumerate(sorted_tracks, start=1): - match = item - match.track = i - matches.append(match) - else: - matches = sorted_tracks - return matches + sorted_tracks = sorted(items, key=lambda t: t.track) + if is_continuous_number_series([t.track for t in sorted_tracks]): + # if the track is zero indexed, re-number them + if sorted_tracks[0].track != 1: + matches = [] + for i, item in enumerate(sorted_tracks, start=1): + match = item + match.track = i + matches.append(match) + else: + matches = sorted_tracks + return matches def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: """Order tracks based on a starting number in the track name.""" @@ -324,37 +330,29 @@ def attempt_match_single_item(self, items: List[Item], album: AlbumInfo) -> Opti def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[TrackInfo]]: common_attrs = get_common_data_attributes(album.tracks[0]) - # if there's only one item, return as is - matches = self.attempt_match_single_item(items, album) - if matches is not None: - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks - # if the source files are numbered continuously and the option is set, trust that - matches = self.attempt_match_trust_source_numbering(items, album) - if matches is not None: - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks - - matches = self.attempt_match_starting_numbers(items, album) - if matches is not None: - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks - - # if there are only a few track differences from each to the other, it's likely they're numbered and don't have - # otherwise unique titles, so just sort them best as possible - matches = self.attempt_match_natsort(items, album) - if matches is not None: - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks - - if len(items) > len(album.tracks): - # TODO: find a better way to handle this - # right now just reject this match - return None - matches = self.attempt_match_chapter_strings(album, items, matches) - if matches is not None: - tracks = convert_items_to_trackinfo(matches, common_attrs) - return tracks + + # this is the master list of different approaches + # must be updated for any additional options added in the future + possible_matching_algorithms = { + "single_file": self.attempt_match_single_item, + "source_numbering": self.attempt_match_trust_source_numbering, + "starting_numbers": self.attempt_match_starting_numbers, + "natural_sort": self.attempt_match_natsort, + "chapter_levenshtein": self.attempt_match_chapter_levenshtein, + } + for algorithm_choice in self.config["chapter_matching_algorithms"]: + if algorithm_choice not in possible_matching_algorithms.keys(): + self._log.error(f"'{algorithm_choice}' is not a valid algorithm choice for chapter matching") + return + function = possible_matching_algorithms[algorithm_choice] + matches = function(items, album) + if matches is not None: + tracks = convert_items_to_trackinfo(matches, common_attrs) + return tracks + # if len(items) > len(album.tracks): + # # TODO: find a better way to handle this + # # right now just reject this match + # return None def candidates(self, items, artist, album, va_likely, extra_tags=None): """Returns a list of AlbumInfo objects for Audible search results diff --git a/tests/conftest.py b/tests/conftest.py index 0ecaa08..9306bba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,11 +15,17 @@ def mock_audible_plugin() -> MagicMock: out.attempt_match_trust_source_numbering = lambda x, y: Audible.attempt_match_trust_source_numbering(out, x, y) out.attempt_match_single_item = lambda x, y: Audible.attempt_match_single_item(out, x, y) out.config = { + "chapter_matching_algorithms": [ + "single_file", + "source_numbering", + "starting_numbers", + "natural_sort", + "chapter_levenshtein", + ], "fetch_art": True, "match_chapters": True, "source_weight": 0.0, "write_description_file": True, - "trust_source_numbering": True, "write_reader_file": True, "include_narrator_in_artists": True, "goodreads_apikey": None, From 41a3c02471a5e65957921aa2426cc7c4af2f06e9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 17:39:27 +1000 Subject: [PATCH 40/59] Update pyproject with new README name --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a1970c5..2327db5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Beets plugin for audiobook management" authors = ["Dickson Tan "] keywords = ["beets", "audible"] license = "MIT" -readme = "readme.md" +readme = "README.md" repository = "https://github.com/Neurrone/beets-audible" packages = [{ include = "beetsplug" }] From 901cb29aa3c1f67f857fbdc6c308c10a9ad4c726 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Mon, 22 May 2023 19:43:25 +1000 Subject: [PATCH 41/59] Continue on wrong algorithm specification --- beetsplug/audible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index e644c96..e643c97 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -343,7 +343,7 @@ def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[Trac for algorithm_choice in self.config["chapter_matching_algorithms"]: if algorithm_choice not in possible_matching_algorithms.keys(): self._log.error(f"'{algorithm_choice}' is not a valid algorithm choice for chapter matching") - return + continue function = possible_matching_algorithms[algorithm_choice] matches = function(items, album) if matches is not None: From 191aba85128dcd2a71b20812bcce6b653dbe1a39 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Tue, 23 May 2023 22:39:30 +1000 Subject: [PATCH 42/59] Fix bug with options --- beetsplug/audible.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index e643c97..b9c2add 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -341,8 +341,11 @@ def sort_tracks(self, album: AlbumInfo, items: List[Item]) -> Optional[List[Trac "chapter_levenshtein": self.attempt_match_chapter_levenshtein, } for algorithm_choice in self.config["chapter_matching_algorithms"]: - if algorithm_choice not in possible_matching_algorithms.keys(): - self._log.error(f"'{algorithm_choice}' is not a valid algorithm choice for chapter matching") + algorithm_choice = str(algorithm_choice) + if algorithm_choice not in possible_matching_algorithms: + self._log.error( + f"'{algorithm_choice}' is not a valid algorithm choice for chapter matching; there are {len(possible_matching_algorithms.keys())}" + ) continue function = possible_matching_algorithms[algorithm_choice] matches = function(items, album) From d0588107ed40894b4f92f5c2e399a85cfaaf2295 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 31 May 2023 17:55:34 +1000 Subject: [PATCH 43/59] Refactor out methods --- tests/test_audible.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/tests/test_audible.py b/tests/test_audible.py index 9f4aa95..5998394 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -39,28 +39,48 @@ def generate_random_string(n: int) -> str: return "".join(random.choice(chars) for _ in range(n)) +def generate_test_album_random_string_smaller_len(chapter_list): + mock_4 = MagicMock() + mock_4.tracks = [generate_random_string(20) for _ in range(1, (len(chapter_list) // 2) + 1)] + return mock_4 + + +def generate_test_album_random_string_greater_len(chapter_list): + mock_3 = MagicMock() + mock_3.tracks = [generate_random_string(20) for _ in range(1, len(chapter_list) + 10)] + return mock_3 + + +def generate_test_album_random_string_same_len(chapter_list): + mock_2 = MagicMock() + mock_2.tracks = [generate_random_string(20) for _ in chapter_list] + return mock_2 + + +def generate_test_album_same(chapter_list): + mock_1 = MagicMock() + mock_1.tracks = chapter_list + return mock_1 + + def generate_fixture_albums(chapter_list: List[MagicMock]) -> List[MagicMock]: out = [] # this mock just the chapters - mock_1 = MagicMock() - mock_1.tracks = chapter_list + mock_1 = generate_test_album_same(chapter_list) out.append(mock_1) # this mock is a random set of strings of the same number of chapters - mock_2 = MagicMock() - mock_2.tracks = [generate_random_string(20) for _ in chapter_list] + mock_2 = generate_test_album_random_string_same_len(chapter_list) out.append(mock_2) # this mock is a random set of strings that is more than the given chapters - mock_3 = MagicMock() - mock_3.tracks = [generate_random_string(20) for _ in range(1, len(chapter_list) + 10)] + mock_3 = generate_test_album_random_string_greater_len(chapter_list) out.append(mock_3) if len(chapter_list) > 1: # this mock is a random set of strings that is less than the given chapters - mock_4 = MagicMock() - mock_4.tracks = [generate_random_string(20) for _ in range(1, (len(chapter_list) // 2) + 1)] + mock_4 = generate_test_album_random_string_smaller_len(chapter_list) out.append(mock_4) return out From d60d5249686e8745a227d1a65dacd5151f8a330d Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 31 May 2023 18:38:32 +1000 Subject: [PATCH 44/59] Add more tests --- beetsplug/audible.py | 2 +- tests/conftest.py | 2 +- tests/test_audible.py | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index b9c2add..c8baf56 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -117,7 +117,7 @@ def strip_affixes(token: str, affixes: Tuple[str, str]) -> str: def check_starts_with_number(string: str) -> Optional[int]: - pattern = re.compile(r"^(\d+)[ -_]") + pattern = re.compile(r"^(\d+)[ -_]?") result = pattern.match(string) if result: try: diff --git a/tests/conftest.py b/tests/conftest.py index 9306bba..80b2690 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ from beetsplug.audible import Audible -@pytest.fixture(scope="session") +@pytest.fixture(scope="function") def mock_audible_plugin() -> MagicMock: out = MagicMock() out.sort_tracks = Audible.sort_tracks diff --git a/tests/test_audible.py b/tests/test_audible.py index 5998394..05c0cb5 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -2,7 +2,7 @@ import string from copy import deepcopy from pathlib import Path -from typing import Iterable, List, Optional, Sequence, Tuple +from typing import Callable, Iterable, List, Optional, Sequence, Tuple from unittest.mock import MagicMock, patch import pytest @@ -322,6 +322,31 @@ def test_sort_items_randomised( assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) +@pytest.mark.parametrize( + ("test_chapter_list", "test_algorithm_choice", "album_generation_function"), + ( + (all_chapter_lists[0], "starting_numbers", generate_test_album_same), + (all_chapter_lists[0], "natural_sort", generate_test_album_same), + (all_chapter_lists[0], "chapter_levenshtein", generate_test_album_same), + (all_chapter_lists[0], "chapter_levenshtein", generate_test_album_same), + (all_chapter_lists[7], "source_numbering", generate_test_album_same), + ), +) +def test_sort_specific_method( + mock_audible_plugin: MagicMock, + test_chapter_list: List, + test_algorithm_choice: str, + album_generation_function: Callable, +): + test_album = album_generation_function(deepcopy(test_chapter_list)) + correct = deepcopy(test_chapter_list) + mock_audible_plugin.config["chapter_matching_algorithms"] = [ + test_algorithm_choice, + ] + result = sort_tracks_for_test(test_chapter_list, mock_audible_plugin, test_album) + assert all([str(result[i]) == str(e) for i, e in enumerate(correct)]) + + @pytest.mark.online @pytest.mark.parametrize( ("test_audiobook_id", "test_items", "expected_items"), @@ -654,7 +679,10 @@ def test_is_continuous_number_series(test_numbers: Iterable[Optional[int]], expe ("100_test", 100), ("01 - test", 1), ("test", None), - ("0t1 test", None), + ("0t1 test", 0), + ("01", 1), + ("001", 1), + ("012", 12), ), ) def test_check_starts_with_number(test_string: str, expected: Optional[int]): From a338dcd46431f6cf9c92d52cdf1d84f9806085d0 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 31 May 2023 18:47:42 +1000 Subject: [PATCH 45/59] Fix bug with zero indexed tracks --- beetsplug/audible.py | 2 +- tests/test_audible.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index c8baf56..b5a1cd1 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -278,7 +278,7 @@ def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> stripped_titles = [strip_affixes(i.title, affixes) for i in items] starting_numbers = [check_starts_with_number(s) for s in stripped_titles] - if all(starting_numbers) and is_continuous_number_series(sorted(starting_numbers)): + if all([s is not None for s in starting_numbers]) and is_continuous_number_series(sorted(starting_numbers)): items_with_numbers = list(zip(starting_numbers, items)) matches = sorted(items_with_numbers, key=lambda i: i[0]) matches = [i[1] for i in matches] diff --git a/tests/test_audible.py b/tests/test_audible.py index 05c0cb5..c31707d 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -273,6 +273,34 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): create_mock_item("97D0VmI9qgnRDPhwwkRwuL7atFCSKmNJ", 19), create_mock_item("AEBmW2Dspf6czIMsF3l9u2IwmqUw826u", 20), ], + [ # 10 + create_mock_item("00 Part One.mp3"), + create_mock_item("01.mp3"), + create_mock_item("02.mp3"), + create_mock_item("03.mp3"), + create_mock_item("04.mp3"), + create_mock_item("05.mp3"), + create_mock_item("06 Part Two.mp3"), + create_mock_item("07.mp3"), + create_mock_item("08.mp3"), + create_mock_item("09.mp3"), + create_mock_item("10.mp3"), + create_mock_item("11 Part Three.mp3"), + create_mock_item("12.mp3"), + create_mock_item("13.mp3"), + create_mock_item("14.mp3"), + create_mock_item("15.mp3"), + create_mock_item("16 Part Four.mp3"), + create_mock_item("17.mp3"), + create_mock_item("18.mp3"), + create_mock_item("19.mp3"), + create_mock_item("20.mp3"), + create_mock_item("21 Part Five.mp3"), + create_mock_item("22.mp3"), + create_mock_item("23.mp3"), + create_mock_item("24.mp3"), + create_mock_item("25.mp3"), + ], # haven't thought of logic that would allow for this type of thing. honestly it might be impossible if there's no # online reference that exactly matches # [ From 71680fc376bbadbce03e30b6a1288eb16eb46af7 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Wed, 31 May 2023 19:30:03 +1000 Subject: [PATCH 46/59] Catch error in levenshtein function --- beetsplug/audible.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index b5a1cd1..61957b9 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -313,7 +313,10 @@ def attempt_match_chapter_levenshtein(self, items: List[Item], album: AlbumInfo) key=lambda c: specialised_levenshtein(chapter.title, c.title, affixes), ) ) - best_match = best_matches[0] + try: + best_match = best_matches[0] + except IndexError: + return None matches.append(best_match) all_remote_chapters.remove(best_match) return matches From 37a9598e40445a38f8d23b3c3635f1b3baa86144 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 1 Jun 2023 14:23:15 +1000 Subject: [PATCH 47/59] Update README with information --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 0688a2a..d0be5d9 100644 --- a/README.md +++ b/README.md @@ -207,6 +207,28 @@ George Orwell/ Desc.txt and reader.txt contain the book description and narrator populated from Audible. +## Chapter Matching Algorithms + +There are a number of different ways to try and match chapters that appear in audiobooks. These fit the vast majority of cases that can occur with the files of any audiobooks but the process is entirely customisable. There are a number of algorithms, approaches to do this, that are included in this plugin. However if you encounter a situation that these algorithms don't cover, submit it as a bug report so it can be seen and added to the test cases. + +Below are descriptions of the different approaches. + +- `single_file` + - If the audiobook consists of a single file, then the chapters will simply be the file itself, or the name of the chapters from online if there is only one match returned. +- `source_numbering` + - If the metadata for the chapters are already contains an order that is continuous, then this will be trusted and used as the ordering. +- `starting_numbers` + - If the files start with a consistent series of numbers (with or without a consistent prefix such as 'Chapter') and those numbers are contiguous, then those will be used. +- `natural_sort` + - If the files have only a little difference between them, then they will be sorted as a person sorts them. +- `chapter_levenshtein` + - The distance between the names of the chapters and the online data is computed and each chapter is matched with the closest online version. + - This method should be last as it is the most variable and least likely to work, but is also the only one that rewrites the chapters' data. + +Each of these is included in the configuration file. Reordering the lines in the configuration file will change the order in which they are used. Removing a line will prevent that algorithm from being used entirely. This is the way it is possible to customise the plugin to fit your library, or even a specific audiobook. + +Note that only the Levenshtein approach changes the names of the chapters. At the most the indexes will be changed with the remained of the algorithms, due to the approaches they take. This is assuming that it is more important for the chapters to be in the right order, and for the book itself to have the right metadata as a whole, rather than the chapters having the same title as the tracks on Audible. + ## Tags Written The plugin writes the following tags: From b6c83b668bf0f5b0f484c3711fa46f93f8e803e9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 1 Jun 2023 14:25:32 +1000 Subject: [PATCH 48/59] Reformat table --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d0be5d9..19075ef 100644 --- a/README.md +++ b/README.md @@ -234,7 +234,7 @@ Note that only the Levenshtein approach changes the names of the chapters. At th The plugin writes the following tags: | ID3 Tag | Audible.com Value | -| ---------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | +|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| | `TIT1` (CONTENTGROUP) | Series, Book # | | `TALB` (ALBUM) | Title | | `TIT3` (SUBTITLE) | Subtitle | From 678d1435a1e59bf6cb882fa3ef22b001f0430360 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 1 Jun 2023 14:26:57 +1000 Subject: [PATCH 49/59] Remove old test case --- tests/test_audible.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/tests/test_audible.py b/tests/test_audible.py index c31707d..146062b 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -384,45 +384,6 @@ def test_sort_specific_method( (create_mock_item("Kleptopia: How Dirty Money Is Conquering the World"),), ("Kleptopia: How Dirty Money Is Conquering the World",), ), - ( - "B07XTN4FTJ", - ( - create_mock_item("Chapter 1 The DC Sniper The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 2 Terrorism The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item( - "Chapter 3 Brothers in the Arena The Untold Story of the DC Sniper Investigation - 1.m4b", 0 - ), - create_mock_item("Chapter 4 Call Me God The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 5 Close to Home The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 6 A Local Case The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 7 Demands The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 8 The Profile The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 9 Suspects The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 10 Prelude The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 11 The Arrest The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item("Chapter 12 Revenge The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - create_mock_item( - "Chapter 13 The Trials of a Teenager The Untold Story of the DC Sniper Investigation - 1.m4b", 0 - ), - create_mock_item("Chapter 14 Last Words The Untold Story of the DC Sniper Investigation - 1.m4b", 0), - ), - ( - "Chapter 1: The DC Sniper", - "Chapter 2: Terrorism", - "Chapter 3: Brothers in the Arena", - "Chapter 4: Call Me God", - "Chapter 5: Close to Home", - "Chapter 6: A Local Case", - "Chapter 7: Demands", - "Chapter 8: The Profile", - "Chapter 9: Suspects", - "Chapter 10: Prelude", - "Chapter 11: The Arrest", - "Chapter 12: Revenge", - "Chapter 13: The Trials of a Teenager", - "Chapter 14: Last Words", - ), - ), ( "B005CJAB5S", ( From 6611bfc2fe2ec0c8a950589fd035bf7df43b2cf5 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 1 Jun 2023 14:33:19 +1000 Subject: [PATCH 50/59] Fix indent --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 19075ef..cccdebf 100644 --- a/README.md +++ b/README.md @@ -51,12 +51,12 @@ audible: keep_series_reference_in_subtitle: true # set to false to remove subtitle if it contains the series name and the word book ex. "Book 1 in Great Series", "Great Series, Book 1" write_description_file: true # output desc.txt write_reader_file: true # output reader.txt - chapter_matching_algorithms: - single_file - source_numbering - starting_numbers - natural_sort - chapter_levenshtein + chapter_matching_algorithms: + - single_file + - source_numbering + - starting_numbers + - natural_sort + - chapter_levenshtein copyartifacts: extensions: .yml # so that metadata.yml is copied, see below From 9938c810fe837bc8809516952c133423aa9ba184 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 1 Jun 2023 14:40:33 +1000 Subject: [PATCH 51/59] Reformat according to black --- beetsplug/audible.py | 17 +++++++++++------ tests/test_api.py | 10 +++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 61957b9..653af2f 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -540,7 +540,7 @@ def get_album_info(self, asin): series_position = series.position title_cruft = f", Book {series_position}" - if not self.config['keep_series_reference_in_title'] and title.endswith(title_cruft): + if not self.config["keep_series_reference_in_title"] and title.endswith(title_cruft): # check if ', Book X' is in title, remove it self._log.debug(f"Title contains '{title_cruft}'. Removing it.") title = title.removesuffix(title_cruft) @@ -552,11 +552,16 @@ def get_album_info(self, asin): album_sort = f"{series_name} - {title}" content_group_description = None - #clean up subtitle - if not self.config['keep_series_reference_in_subtitle'] and subtitle and series_name.lower() in subtitle.lower() and 'book' in subtitle.lower(): - #subtitle contains both the series name and the word "book" - #so it is likely just "Series, Book X" or "Book X in Series" - #don't include subtitle + # clean up subtitle + if ( + not self.config["keep_series_reference_in_subtitle"] + and subtitle + and series_name.lower() in subtitle.lower() + and "book" in subtitle.lower() + ): + # subtitle contains both the series name and the word "book" + # so it is likely just "Series, Book X" or "Book X in Series" + # don't include subtitle subtitle = None self._log.debug(f"Subtitle of '{subtitle}' is mostly just the series name. Removing it.") diff --git a/tests/test_api.py b/tests/test_api.py index 064e82a..0972320 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -32,7 +32,15 @@ def test_call_audnex_for_book_info(test_asin: str, expected_dicts: Tuple[Dict, D assert all([expected_dicts[1].get(k) == result[1][k] for k in expected_dicts[1].keys()]) -@pytest.mark.parametrize("test_asin", ("1529353823", "B00KDKSKFO", "1529063094", "B0B3PL1HQL")) +@pytest.mark.parametrize( + "test_asin", + ( + "1529353823", + "B00KDKSKFO", + "1529063094", + "B0B3PL1HQL", + ), +) def test_get_book_info(test_asin: str): # Just checking to make sure that there are no exceptions thrown _, _ = api.get_book_info(test_asin) From f6b6f99e2a989a886e2fde74b52b9a9c9bdbaf09 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 1 Jun 2023 14:41:21 +1000 Subject: [PATCH 52/59] Remove old tests --- tests/test_api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 0972320..b02721f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -36,9 +36,7 @@ def test_call_audnex_for_book_info(test_asin: str, expected_dicts: Tuple[Dict, D "test_asin", ( "1529353823", - "B00KDKSKFO", "1529063094", - "B0B3PL1HQL", ), ) def test_get_book_info(test_asin: str): From b4dbd78467de31fc3022546a2139f1bcea6e65ab Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sat, 3 Jun 2023 11:50:59 +1000 Subject: [PATCH 53/59] Fix bug where a single file is named 'chapter 1' This isn't really a bug with the audible plugin, but something that audnex returns. I don't really know why but since it could be misleading this ignores those cases. --- beetsplug/audible.py | 2 +- tests/test_audible.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 653af2f..e62a830 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -325,7 +325,7 @@ def attempt_match_single_item(self, items: List[Item], album: AlbumInfo) -> Opti """If the input album has a single item, use that; if the album also has a single item, prefer that.""" if len(items) == 1: # Prefer a single named book from the remote source - if len(album.tracks) == 1: + if len(album.tracks) == 1 and album.tracks[0].title != "Chapter 1": matches = album.tracks else: matches = items diff --git a/tests/test_audible.py b/tests/test_audible.py index 146062b..46aac4b 100644 --- a/tests/test_audible.py +++ b/tests/test_audible.py @@ -567,6 +567,11 @@ def test_sort_specific_method( "9.mp3", ), ), + ( + "B077KBHSNQ", + (create_mock_item("The Tranzia Rebellion.mp3", 0),), + ("The Tranzia Rebellion.mp3",), + ), ), ) def test_audiobook_chapter_matching( From 563e83f170a71fa151602cb6cf195ffdf7e5718b Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 2 Jul 2023 18:51:16 +1000 Subject: [PATCH 54/59] Add narrator field for albums --- beetsplug/audible.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index e62a830..d01b260 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -13,7 +13,8 @@ import yaml from beets import importer, util from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.library import Item +from beets.dbcore.types import STRING +from beets.library import Album, Item from beets.plugins import BeetsPlugin from natsort import natsorted @@ -152,8 +153,15 @@ def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[ return total_cost +def _get_album_narrator(album: Album): + return album.items()[0]["composer"] + + class Audible(BeetsPlugin): data_source = "Audible" + album_types = { + "narrator": STRING, + } def __init__(self): super().__init__() @@ -185,6 +193,8 @@ def __init__(self): # stores paths of downloaded cover art to be used during import self.cover_art = {} + self.album_template_fields["narrator"] = _get_album_narrator + self.register_listener("write", self.on_write) self.register_listener("import_task_files", self.on_import_task_files) From 2c0fd3b621ebff5197b1dc214435aa84f9e6803b Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Sun, 2 Jul 2023 18:51:57 +1000 Subject: [PATCH 55/59] Remove old option --- beetsplug/audible.py | 1 - 1 file changed, 1 deletion(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index d01b260..4598fa1 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -184,7 +184,6 @@ def __init__(self): "keep_series_reference_in_title": True, "keep_series_reference_in_subtitle": True, "goodreads_apikey": None, - "trust_source_numbering": True, } ) self.config["goodreads_apikey"].redact = True From e133b5e92fa5b02a24b73faa2e9b3b908100a313 Mon Sep 17 00:00:00 2001 From: Serene-Arc <33189705+Serene-Arc@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:22:46 +1000 Subject: [PATCH 56/59] Add requirement for testing --- test-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test-requirements.txt b/test-requirements.txt index 6757b60..7d28dc5 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,3 +1,4 @@ pytest +pytest-random tox -tox-pyenv \ No newline at end of file +tox-pyenv From 925960b9d4fa752b0bee06616a485bcdf7526848 Mon Sep 17 00:00:00 2001 From: Serene-Arc <33189705+Serene-Arc@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:23:06 +1000 Subject: [PATCH 57/59] Fix issue with unused parameters --- beetsplug/audible.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 4598fa1..5377b3e 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -265,9 +265,8 @@ def track_distance(self, item, track_info): dist.add_string("track_title", item.title, track_info.title) return dist - def attempt_match_trust_source_numbering(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: - """If the input album is numbered and the number range is contiguous (doesn't skip any numbers), then trust - that and start the index from 1 if it's not already.""" + def attempt_match_trust_source_numbering(self, *args): + items, _ = args sorted_tracks = sorted(items, key=lambda t: t.track) if is_continuous_number_series([t.track for t in sorted_tracks]): # if the track is zero indexed, re-number them @@ -281,8 +280,8 @@ def attempt_match_trust_source_numbering(self, items: List[Item], album: AlbumIn matches = sorted_tracks return matches - def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: - """Order tracks based on a starting number in the track name.""" + def attempt_match_starting_numbers(self, *args): + items, _ = args affixes = find_regular_affixes([c.title for c in items]) stripped_titles = [strip_affixes(i.title, affixes) for i in items] @@ -293,8 +292,8 @@ def attempt_match_starting_numbers(self, items: List[Item], album: AlbumInfo) -> matches = [i[1] for i in matches] return matches - def attempt_match_natsort(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: - """Use a natural sort on the input tracks to order them like a person would i.e. 10 is after 9, not 2.""" + def attempt_match_natsort(self, *args): + items, _ = args affixes = find_regular_affixes([c.title for c in items]) stripped_titles = [strip_affixes(i.title, affixes) for i in items] average_title_change = calculate_average_levenshtein_difference(stripped_titles) @@ -305,11 +304,8 @@ def attempt_match_natsort(self, items: List[Item], album: AlbumInfo) -> Optional matches = natsorted(items, key=lambda t: t.title) return matches - def attempt_match_chapter_levenshtein(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: - """For every chapter in the input album, calculate the Levenshtein difference between every item in the online - album and match each input track to the closest online track.""" - # Warning, this method is rather messy, and it's easy for this to go wrong. - # This should be used as a last resort + def attempt_match_chapter_levenshtein(self, *args): + items, album = args affixes = find_regular_affixes([c.title for c in items]) all_remote_chapters: List = deepcopy(album.tracks) @@ -330,8 +326,8 @@ def attempt_match_chapter_levenshtein(self, items: List[Item], album: AlbumInfo) all_remote_chapters.remove(best_match) return matches - def attempt_match_single_item(self, items: List[Item], album: AlbumInfo) -> Optional[List[Item]]: - """If the input album has a single item, use that; if the album also has a single item, prefer that.""" + def attempt_match_single_item(self, *args): + items, album = args if len(items) == 1: # Prefer a single named book from the remote source if len(album.tracks) == 1 and album.tracks[0].title != "Chapter 1": From 4b9fdaf6bf746ca8ff93dd05f3bf51d952148cf4 Mon Sep 17 00:00:00 2001 From: Serene-Arc <33189705+Serene-Arc@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:21:01 +1000 Subject: [PATCH 58/59] Fix album property --- beetsplug/audible.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beetsplug/audible.py b/beetsplug/audible.py index 5377b3e..b8a0cc7 100644 --- a/beetsplug/audible.py +++ b/beetsplug/audible.py @@ -8,12 +8,12 @@ from typing import Dict, Iterable, List, Optional, Tuple import beets.autotag.hooks +import beets.dbcore.types as types import Levenshtein import mediafile import yaml from beets import importer, util from beets.autotag.hooks import AlbumInfo, TrackInfo -from beets.dbcore.types import STRING from beets.library import Album, Item from beets.plugins import BeetsPlugin from natsort import natsorted @@ -154,13 +154,13 @@ def specialised_levenshtein(token1: str, token2: str, ignored_affixes: Optional[ def _get_album_narrator(album: Album): - return album.items()[0]["composer"] + return list(album.items())[0]["composer"] class Audible(BeetsPlugin): data_source = "Audible" album_types = { - "narrator": STRING, + "narrator": types.STRING, } def __init__(self): From 5aecb02295c3f59d7b69f07822a367921abdb75b Mon Sep 17 00:00:00 2001 From: Serene-Arc <33189705+Serene-Arc@users.noreply.github.com> Date: Tue, 26 Sep 2023 10:21:54 +1000 Subject: [PATCH 59/59] Remove source weight from example doc --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index cccdebf..93e1496 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,6 @@ audible: # if the number of files in the book is the same as the number of chapters from Audible, # attempt to match each file to an audible chapter match_chapters: true - source_weight: 0.0 # disable the source_weight penalty fetch_art: true # whether to retrieve cover art include_narrator_in_artists: true # include author and narrator in artist tag. Or just author keep_series_reference_in_title: true # set to false to remove ", Book X" from end of titles