Skip to content

Commit 15c8ac3

Browse files
author
Arav K.
committed
[beets/autotag/hooks] Rework article switching in string distance
The new version doesn't rely on regular expressions, provides more intuitive names, and will probably be easier to maintain. See: <beetbox#5337 (comment)>
1 parent ed52d0b commit 15c8ac3

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

beets/autotag/hooks.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,6 @@ def copy(self) -> TrackInfo:
267267
# Candidate distance scoring.
268268

269269
# Parameters for string distance function.
270-
# Words that can be moved to the end of a string using a comma.
271-
SD_END_REPLACE = re.compile(r"^(.*), (the|a|an)$")
272270
# Reduced weights for certain portions of the string.
273271
SD_PATTERNS = [
274272
(r"^the ", 0.1),
@@ -311,17 +309,24 @@ def string_dist(str1: Optional[str], str2: Optional[str]) -> float:
311309
if str1 is None or str2 is None:
312310
return 1.0
313311

312+
# Make all following comparison case-insensitive.
314313
str1 = str1.lower()
315314
str2 = str2.lower()
316315

317316
# Don't penalize strings that move certain words to the end. For
318317
# example, "the something" should be considered equal to
319318
# "something, the".
320-
def replacer(m: re.Match[str]) -> str:
321-
return f"{m.group(2)} {m.group(1)}"
319+
def switch_article(string: str) -> str:
320+
if ", " not in string:
321+
return string
322+
[title, article] = string.rsplit(", ", maxsplit=1)
323+
if article in ["the", "a", "an"]:
324+
return f"{article} {title}"
325+
else:
326+
return string
322327

323-
str1 = re.sub(SD_END_REPLACE, replacer, str1)
324-
str2 = re.sub(SD_END_REPLACE, replacer, str2)
328+
str1 = switch_article(str1)
329+
str2 = switch_article(str2)
325330

326331
# Perform a couple of basic normalizing substitutions.
327332
for pat, repl in SD_REPLACE:

0 commit comments

Comments
 (0)