From 3d6878d7f0439f35bce727a74bbc49bc4cdf1908 Mon Sep 17 00:00:00 2001
From: OlteanuRares
Date: Tue, 3 Sep 2024 16:02:54 +0300
Subject: [PATCH] revert on line length error instead of cursor placement
---
docs/changelog.rst | 9 ++-
pycaption/scc/__init__.py | 31 +++-------
pycaption/scc/specialized_collections.py | 38 +++++-------
pycaption/scc/state_machines.py | 14 ++---
setup.py | 2 +-
tests/fixtures/dfxp.py | 42 ++++++-------
tests/fixtures/scc.py | 2 +-
tests/test_scc.py | 79 +++---------------------
8 files changed, 68 insertions(+), 149 deletions(-)
diff --git a/docs/changelog.rst b/docs/changelog.rst
index e6e0823c..400e9cdd 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,14 +1,19 @@
Changelog
---------
-2.2.12
+2.2.13
^^^^^^
- Mid-row codes only add spaces only if there isn't one before
- Mid-row codes only add spaces only if affects the text in the same row (not adding if after previous text follows break or breaks)
- Remove spaces to the end of the lines
-- CaptionLineLengthError is now raised if the cursor goes after column 32 instead of string length over 32
- Change error message for the 32 character limit.
- Close italics on receiving another style setting command.
- Throw an CaptionReadNoCaptions error in case of empty input file are provided
+- Properly add breaks (it was only for jumps to next row). Now it adds as many breaks as the difference between row numbers.
+- Ignore repositioning commands which are not followed by any text before breaks.
+
+2.2.12
+^^^^^^
+- Pinned nltk to 3.8.0
2.2.11
^^^^^^
diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py
index 0d0a1de9..b28444a0 100644
--- a/pycaption/scc/__init__.py
+++ b/pycaption/scc/__init__.py
@@ -233,11 +233,14 @@ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0):
# check captions for incorrect lengths
lines_too_long = defaultdict(list)
-
for caption in self.caption_stash._collection:
- line_length = self.get_scc_line_line_length(caption.to_real_caption())
- if line_length:
- lines_too_long.update(line_length)
+ caption_start = caption.to_real_caption().format_start()
+ caption_text = "".join(caption.to_real_caption().get_text_nodes())
+ text_too_long = [line for line in caption_text.split("\n") if len(line) > 32]
+ if caption_start in lines_too_long:
+ lines_too_long[caption_start] = text_too_long
+ else:
+ lines_too_long[caption_start].extend(text_too_long)
msg = ""
if bool(lines_too_long.keys()):
@@ -248,8 +251,8 @@ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0):
msg += line + f" - Length { len(line)}" + "\n"
if len(msg):
raise CaptionLineLengthError(
- f"Cursor goes over column 32 for caption cue in scc file.\n"
- f"Affected lines:\n"
+ f"32 character limit for caption cue in scc file.\n"
+ f"Lines longer than 32:\n"
f"{msg}"
)
@@ -513,22 +516,6 @@ def _pop_on(self, end=0):
pop_on_cue = self.pop_ons_queue.pop()
self.caption_stash.create_and_store(pop_on_cue.buffer, pop_on_cue.start, end)
- @staticmethod
- def get_scc_line_line_length(caption):
- long_line = defaultdict(list)
- caption_start = caption.format_start()
- text_nodes = [node for node in caption.nodes if node.type_ == CaptionNode.TEXT]
- if not text_nodes:
- return None
- start_writing_at = text_nodes[0].position[1]
- caption_text = "".join(caption.get_text_nodes())
- if start_writing_at:
- caption_text = " " * start_writing_at + caption_text
- long_line[caption_start] = [
- line for line in caption_text.split("\n") if len(line) > 32
- ]
- return long_line if len(long_line[caption_start]) else None
-
class SCCWriter(BaseWriter):
def __init__(self, *args, **kw):
diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py
index 5fe79eef..85bad6ba 100644
--- a/pycaption/scc/specialized_collections.py
+++ b/pycaption/scc/specialized_collections.py
@@ -310,8 +310,6 @@ def __init__(self, collection=None, position_tracker=None):
self._collection = collection
self.last_style = None # can be italic on or italic off as we only support italics
- self._cursor_position = 0
-
self._position_tracer = position_tracker
def is_empty(self):
@@ -342,14 +340,17 @@ def add_chars(self, *chars):
# handle a simple line break
if self._position_tracer.is_linebreak_required():
- # must insert a line break here
- self._collection.append(
- _InstructionNode.create_break(position=current_position)
- )
+ for _ in range(self._position_tracer._breaks_required):
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
+ self._position_tracer.acknowledge_linebreak_consumed()
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
- self._cursor_position = self._position_tracer._last_column
- self._position_tracer.acknowledge_linebreak_consumed()
+ if self._position_tracer.is_repositioning_required():
+ # it means we have a reposition command which was not followed by
+ # any text, so we just ignore it and break
+ self._position_tracer.acknowledge_position_changed()
# handle completely new positioning
elif self._position_tracer.is_repositioning_required():
@@ -361,7 +362,6 @@ def add_chars(self, *chars):
self._position_tracer.acknowledge_position_changed()
node.add_chars(*chars)
- self._cursor_position += len("".join(chars))
@staticmethod
def get_style_for_command(command):
@@ -385,8 +385,6 @@ def interpret_command(self, command):
"""
self._update_positioning(command)
- text = COMMANDS.get(command, "")
-
if command == "94a1":
self.handle_backspace("94a1")
@@ -412,9 +410,10 @@ def interpret_command(self, command):
# it should open italic tag
# if break is required, break then add style tag
if self._position_tracer.is_linebreak_required():
- self._collection.append(
- _InstructionNode.create_break(position=current_position)
- )
+ for _ in range(self._position_tracer._breaks_required):
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
self._position_tracer.acknowledge_linebreak_consumed()
self._collection.append(
_InstructionNode.create_italics_style(current_position)
@@ -433,9 +432,10 @@ def interpret_command(self, command):
)
self.last_style = "italics off"
if self._position_tracer.is_linebreak_required():
- self._collection.append(
- _InstructionNode.create_break(position=current_position)
- )
+ for _ in range(self._position_tracer._breaks_required):
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
self._position_tracer.acknowledge_linebreak_consumed()
# handle mid-row codes that follows a text node
@@ -459,7 +459,6 @@ def interpret_command(self, command):
# need to close italics tag, add a space
# to the end of the previous text node
prev_text_node.text = prev_text_node.text + " "
- self._cursor_position += 1
def _update_positioning(self, command):
"""Sets the positioning information to use for the next nodes
@@ -470,7 +469,6 @@ def _update_positioning(self, command):
if command in PAC_TAB_OFFSET_COMMANDS:
tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
positioning = (prev_positioning[0], prev_positioning[1] + tab_offset)
- self._cursor_position += tab_offset
else:
first, second = command[:2], command[2:]
try:
@@ -479,7 +477,6 @@ def _update_positioning(self, command):
except KeyError:
# if not PAC or OFFSET we're not changing position
return
- self._cursor_position = positioning[1]
self._position_tracer.update_positioning(positioning)
def __iter__(self):
@@ -533,7 +530,6 @@ def handle_backspace(self, word):
# only if the previous character in not also extended
if delete_previous_condition:
node.text = node.text[:-1]
- self._cursor_position -= 1
def get_previous_text_node(self):
for node in self._collection[::-1]:
diff --git a/pycaption/scc/state_machines.py b/pycaption/scc/state_machines.py
index 04fc632b..fed5e508 100644
--- a/pycaption/scc/state_machines.py
+++ b/pycaption/scc/state_machines.py
@@ -11,7 +11,7 @@ def __init__(self, positioning=None):
:type positioning: tuple[int]
"""
self._positions = [positioning]
- self._break_required = False
+ self._breaks_required = 0
self._repositioning_required = False
# Since the actual column is not applied when encountering a line break
# this attribute is used to store it and determine by comparison if the
@@ -35,18 +35,18 @@ def update_positioning(self, positioning):
return
row, col = current
- if self._break_required:
+ if self._breaks_required:
col = self._last_column
new_row, new_col = positioning
is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3
# One line below will be treated as line break, not repositioning
- if new_row == row + 1:
+ if new_row > row:
self._positions.append((new_row, col))
- self._break_required = True
+ self._breaks_required = new_row - row
self._last_column = new_col
# Tab offsets after line breaks will be ignored to avoid repositioning
- elif self._break_required and is_tab_offset:
+ elif self._breaks_required and is_tab_offset:
return
else:
# Reset the "current" position altogether.
@@ -86,11 +86,11 @@ def is_linebreak_required(self):
"""If the current position is simply one line below the previous.
:rtype: bool
"""
- return self._break_required
+ return self._breaks_required > 0
def acknowledge_linebreak_consumed(self):
"""Call to acknowledge that the line required was consumed"""
- self._break_required = False
+ self._breaks_required = 0
class DefaultProvidingPositionTracker(_PositioningTracker):
diff --git a/setup.py b/setup.py
index dc03856f..3d4bd260 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
setup(
name='pycaption',
- version='2.2.12.dev5',
+ version='2.2.12.dev6',
description='Closed caption converter',
long_description=open(README_PATH).read(),
author='Joe Norton',
diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py
index 33fb6ef5..d0a052d0 100644
--- a/tests/fixtures/dfxp.py
+++ b/tests/fixtures/dfxp.py
@@ -914,16 +914,9 @@ def sample_dfxp_from_scc_output():
-
-
-
-
-
-
-
-
-
-
+
+
+
@@ -932,40 +925,41 @@ def sample_dfxp_from_scc_output():
abab
- cdcd
-
-
+ cdcd
efef
-
+
+
ghgh
ijij
klkl
-
+
+
mnmn
-
+
opop
-
+
qrqr
-
+
+
stst
uvuv
wxwx
-
+
+
yzyz
-
- 0101
-
-
+
+ 0101
2323
-
+
+
4545
6767
8989
diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py
index 96f8c2cc..e09a10d3 100644
--- a/tests/fixtures/scc.py
+++ b/tests/fixtures/scc.py
@@ -81,7 +81,7 @@ def sample_scc_pop_on():
def sample_scc_multiple_positioning():
return """Scenarist_SCC V1.0
-00:00:00:16 94ae 94ae 9420 9420 1370 1370 6162 6162 91d6 91d6 e364 e364 927c 927c e5e6 e5e6 942c 942c 942f 942f
+00:00:00:16 94ae 94ae 9420 9420 1370 1370 6162 6162 91d6 91d6 e364 e364 92fd 92fd e5e6 e5e6 942c 942c 942f 942f
00:00:02:16 94ae 94ae 9420 9420 16f2 16f2 6768 6768 9752 9752 e9ea e9ea 97f2 97f2 6bec 6bec 942c 942c 942f 942f
diff --git a/tests/test_scc.py b/tests/test_scc.py
index 21e52dfd..fd8c7a1e 100644
--- a/tests/test_scc.py
+++ b/tests/test_scc.py
@@ -70,17 +70,16 @@ def test_positioning(self, sample_scc_multiple_positioning):
expected_positioning = [
((10.0, UnitEnum.PERCENT), (77.0, UnitEnum.PERCENT)),
((40.0, UnitEnum.PERCENT), (5.0, UnitEnum.PERCENT)),
- ((70.0, UnitEnum.PERCENT), (23.0, UnitEnum.PERCENT)),
- ((20.0, UnitEnum.PERCENT), (47.0, UnitEnum.PERCENT)),
- ((20.0, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
+ ((40.0, UnitEnum.PERCENT), (5.0, UnitEnum.PERCENT)),
+ ((40.0, UnitEnum.PERCENT), (5.0, UnitEnum.PERCENT)),
((40.0, UnitEnum.PERCENT), (53.0, UnitEnum.PERCENT)),
((70.0, UnitEnum.PERCENT), (17.0, UnitEnum.PERCENT)),
- ((20.0, UnitEnum.PERCENT), (35.0, UnitEnum.PERCENT)),
- ((20.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+ ((70.0, UnitEnum.PERCENT), (17.0, UnitEnum.PERCENT)),
+ ((70.0, UnitEnum.PERCENT), (17.0, UnitEnum.PERCENT)),
((70.0, UnitEnum.PERCENT), (11.0, UnitEnum.PERCENT)),
- ((40.0, UnitEnum.PERCENT), (41.0, UnitEnum.PERCENT)),
- ((20.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT)),
+ ((70.0, UnitEnum.PERCENT), (11.0, UnitEnum.PERCENT))
]
+
actual_positioning = [
caption_.layout_info.origin.serialized()
for caption_ in captions.get_captions("en-US")
@@ -267,11 +266,11 @@ def test_line_too_long(self, sample_scc_with_line_too_long):
SCCReader().read(sample_scc_with_line_too_long)
assert exc_info.value.args[0].startswith(
- "Cursor goes over column 32 for caption cue in scc file."
+ "32 character limit for caption cue in scc file."
)
str_to_check = (
"was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l "
- "Denison, a friend - Length 84"
+ "Denison, a friend - Length 81"
)
assert "around 00:00:05.900" in exc_info.value.args[0].split("\n")[2]
assert str_to_check in exc_info.value.args[0].split("\n")[2]
@@ -578,68 +577,6 @@ def test_italics_commands_are_formatted_properly(self):
assert result[13].is_explicit_break()
assert result[14].is_text_node()
- @staticmethod
- def check_cursor_placement(node_creator):
- node_creator.interpret_command("91df") # row 01, column 28,
- assert node_creator._cursor_position == 28
- # check that it keeps the column of the last pac
- # before text
- node_creator.interpret_command("9152") # row 01, column 04
- assert node_creator._cursor_position == 4
- node_creator.add_chars("foo")
- assert node_creator._cursor_position == 7 # 4 + len(foo)
- # check on break
- node_creator.interpret_command("91f2") # row 02, column 04
- # it should be 4
- assert node_creator._cursor_position == 4
- # check mid-row codes with open italics
- node_creator.interpret_command("91ae")
- node_creator.add_chars("bar")
- # writing "foo" then breaks to row 2, col 4 then writes "bar"
- # starts at (2, 4) and because it follows a break it should not add space
- assert node_creator._cursor_position == 7
- # check mid-row codes with closed italics
- node_creator.add_chars("baz")
- node_creator.interpret_command("9120")
- node_creator.add_chars("spam")
- # should be 7 + len("baz") + one space + len(spam) = 15
- assert node_creator._cursor_position == 15
- node_creator.add_chars("aaa ")
- node_creator.interpret_command("91ae")
- node_creator.add_chars("bbb")
- # 15 + 4 + 3 = 22
- assert node_creator._cursor_position == 22
- # break again
- node_creator.interpret_command("9254") # row 03, column 08
- assert node_creator._cursor_position == 8
- node_creator.interpret_command("91ae")
- node_creator.add_chars("ccc")
- # should start at 8, not adding space as it has a break in front
- # should be 8 + len(ccc) = 11
- assert node_creator._cursor_position == 11
-
- def test_cursor_placement(self):
- # pop-on caption
- node_creator = InstructionNodeCreator(
- position_tracker=(DefaultProvidingPositionTracker())
- )
- node_creator.interpret_command("9420")
- self.check_cursor_placement(node_creator=node_creator)
-
- # roll-on caption
- node_creator = InstructionNodeCreator(
- position_tracker=(DefaultProvidingPositionTracker())
- )
- node_creator.interpret_command("9425")
- self.check_cursor_placement(node_creator=node_creator)
-
- # paint caption
- node_creator = InstructionNodeCreator(
- position_tracker=(DefaultProvidingPositionTracker())
- )
- node_creator.interpret_command("9429")
- self.check_cursor_placement(node_creator=node_creator)
-
@staticmethod
def check_closing_italics_closing_on_style_change(node_creator):
node_creator.interpret_command("9140") # row 01, col 0 plaintext