Skip to content

Commit

Permalink
revert on line length error instead of cursor placement
Browse files Browse the repository at this point in the history
  • Loading branch information
OlteanuRares committed Sep 3, 2024
1 parent 019be6e commit 3d6878d
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 149 deletions.
9 changes: 7 additions & 2 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
Changelog
---------
2.2.12
2.2.13
^^^^^^
- Mid-row codes only add spaces only if there isn't one before
- Mid-row codes only add spaces only if affects the text in the same row (not adding if after previous text follows break or breaks)
- Remove spaces to the end of the lines
- CaptionLineLengthError is now raised if the cursor goes after column 32 instead of string length over 32
- Change error message for the 32 character limit.
- Close italics on receiving another style setting command.
- Throw an CaptionReadNoCaptions error in case of empty input file are provided
- Properly add breaks (it was only for jumps to next row). Now it adds as many breaks as the difference between row numbers.
- Ignore repositioning commands which are not followed by any text before breaks.

2.2.12
^^^^^^
- Pinned nltk to 3.8.0

2.2.11
^^^^^^
Expand Down
31 changes: 9 additions & 22 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,14 @@ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0):

# check captions for incorrect lengths
lines_too_long = defaultdict(list)

for caption in self.caption_stash._collection:
line_length = self.get_scc_line_line_length(caption.to_real_caption())
if line_length:
lines_too_long.update(line_length)
caption_start = caption.to_real_caption().format_start()
caption_text = "".join(caption.to_real_caption().get_text_nodes())
text_too_long = [line for line in caption_text.split("\n") if len(line) > 32]
if caption_start in lines_too_long:
lines_too_long[caption_start] = text_too_long
else:
lines_too_long[caption_start].extend(text_too_long)

msg = ""
if bool(lines_too_long.keys()):
Expand All @@ -248,8 +251,8 @@ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0):
msg += line + f" - Length { len(line)}" + "\n"
if len(msg):
raise CaptionLineLengthError(
f"Cursor goes over column 32 for caption cue in scc file.\n"
f"Affected lines:\n"
f"32 character limit for caption cue in scc file.\n"
f"Lines longer than 32:\n"
f"{msg}"
)

Expand Down Expand Up @@ -513,22 +516,6 @@ def _pop_on(self, end=0):
pop_on_cue = self.pop_ons_queue.pop()
self.caption_stash.create_and_store(pop_on_cue.buffer, pop_on_cue.start, end)

@staticmethod
def get_scc_line_line_length(caption):
long_line = defaultdict(list)
caption_start = caption.format_start()
text_nodes = [node for node in caption.nodes if node.type_ == CaptionNode.TEXT]
if not text_nodes:
return None
start_writing_at = text_nodes[0].position[1]
caption_text = "".join(caption.get_text_nodes())
if start_writing_at:
caption_text = " " * start_writing_at + caption_text
long_line[caption_start] = [
line for line in caption_text.split("\n") if len(line) > 32
]
return long_line if len(long_line[caption_start]) else None


class SCCWriter(BaseWriter):
def __init__(self, *args, **kw):
Expand Down
38 changes: 17 additions & 21 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,6 @@ def __init__(self, collection=None, position_tracker=None):
self._collection = collection

self.last_style = None # can be italic on or italic off as we only support italics
self._cursor_position = 0

self._position_tracer = position_tracker

def is_empty(self):
Expand Down Expand Up @@ -342,14 +340,17 @@ def add_chars(self, *chars):

# handle a simple line break
if self._position_tracer.is_linebreak_required():
# must insert a line break here
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
for _ in range(self._position_tracer._breaks_required):
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._position_tracer.acknowledge_linebreak_consumed()
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
self._cursor_position = self._position_tracer._last_column
self._position_tracer.acknowledge_linebreak_consumed()
if self._position_tracer.is_repositioning_required():
# it means we have a reposition command which was not followed by
# any text, so we just ignore it and break
self._position_tracer.acknowledge_position_changed()

# handle completely new positioning
elif self._position_tracer.is_repositioning_required():
Expand All @@ -361,7 +362,6 @@ def add_chars(self, *chars):
self._position_tracer.acknowledge_position_changed()

node.add_chars(*chars)
self._cursor_position += len("".join(chars))

@staticmethod
def get_style_for_command(command):
Expand All @@ -385,8 +385,6 @@ def interpret_command(self, command):
"""
self._update_positioning(command)

text = COMMANDS.get(command, "")

if command == "94a1":
self.handle_backspace("94a1")

Expand All @@ -412,9 +410,10 @@ def interpret_command(self, command):
# it should open italic tag
# if break is required, break then add style tag
if self._position_tracer.is_linebreak_required():
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
for _ in range(self._position_tracer._breaks_required):
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._position_tracer.acknowledge_linebreak_consumed()
self._collection.append(
_InstructionNode.create_italics_style(current_position)
Expand All @@ -433,9 +432,10 @@ def interpret_command(self, command):
)
self.last_style = "italics off"
if self._position_tracer.is_linebreak_required():
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
for _ in range(self._position_tracer._breaks_required):
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._position_tracer.acknowledge_linebreak_consumed()

# handle mid-row codes that follows a text node
Expand All @@ -459,7 +459,6 @@ def interpret_command(self, command):
# need to close italics tag, add a space
# to the end of the previous text node
prev_text_node.text = prev_text_node.text + " "
self._cursor_position += 1

def _update_positioning(self, command):
"""Sets the positioning information to use for the next nodes
Expand All @@ -470,7 +469,6 @@ def _update_positioning(self, command):
if command in PAC_TAB_OFFSET_COMMANDS:
tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
positioning = (prev_positioning[0], prev_positioning[1] + tab_offset)
self._cursor_position += tab_offset
else:
first, second = command[:2], command[2:]
try:
Expand All @@ -479,7 +477,6 @@ def _update_positioning(self, command):
except KeyError:
# if not PAC or OFFSET we're not changing position
return
self._cursor_position = positioning[1]
self._position_tracer.update_positioning(positioning)

def __iter__(self):
Expand Down Expand Up @@ -533,7 +530,6 @@ def handle_backspace(self, word):
# only if the previous character in not also extended
if delete_previous_condition:
node.text = node.text[:-1]
self._cursor_position -= 1

def get_previous_text_node(self):
for node in self._collection[::-1]:
Expand Down
14 changes: 7 additions & 7 deletions pycaption/scc/state_machines.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self, positioning=None):
:type positioning: tuple[int]
"""
self._positions = [positioning]
self._break_required = False
self._breaks_required = 0
self._repositioning_required = False
# Since the actual column is not applied when encountering a line break
# this attribute is used to store it and determine by comparison if the
Expand All @@ -35,18 +35,18 @@ def update_positioning(self, positioning):
return

row, col = current
if self._break_required:
if self._breaks_required:
col = self._last_column
new_row, new_col = positioning
is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3

# One line below will be treated as line break, not repositioning
if new_row == row + 1:
if new_row > row:
self._positions.append((new_row, col))
self._break_required = True
self._breaks_required = new_row - row
self._last_column = new_col
# Tab offsets after line breaks will be ignored to avoid repositioning
elif self._break_required and is_tab_offset:
elif self._breaks_required and is_tab_offset:
return
else:
# Reset the "current" position altogether.
Expand Down Expand Up @@ -86,11 +86,11 @@ def is_linebreak_required(self):
"""If the current position is simply one line below the previous.
:rtype: bool
"""
return self._break_required
return self._breaks_required > 0

def acknowledge_linebreak_consumed(self):
"""Call to acknowledge that the line required was consumed"""
self._break_required = False
self._breaks_required = 0


class DefaultProvidingPositionTracker(_PositioningTracker):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

setup(
name='pycaption',
version='2.2.12.dev5',
version='2.2.12.dev6',
description='Closed caption converter',
long_description=open(README_PATH).read(),
author='Joe Norton',
Expand Down
42 changes: 18 additions & 24 deletions tests/fixtures/dfxp.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,16 +914,9 @@ def sample_dfxp_from_scc_output():
<region tts:displayAlign="after" tts:textAlign="start" xml:id="bottom"/>
<region tts:displayAlign="before" tts:origin="10% 77%" tts:textAlign="left" xml:id="r0"/>
<region tts:displayAlign="before" tts:origin="40% 5%" tts:textAlign="left" xml:id="r1"/>
<region tts:displayAlign="before" tts:origin="70% 23%" tts:textAlign="left" xml:id="r2"/>
<region tts:displayAlign="before" tts:origin="20% 47%" tts:textAlign="left" xml:id="r3"/>
<region tts:displayAlign="before" tts:origin="20% 89%" tts:textAlign="left" xml:id="r4"/>
<region tts:displayAlign="before" tts:origin="40% 53%" tts:textAlign="left" xml:id="r5"/>
<region tts:displayAlign="before" tts:origin="70% 17%" tts:textAlign="left" xml:id="r6"/>
<region tts:displayAlign="before" tts:origin="20% 35%" tts:textAlign="left" xml:id="r7"/>
<region tts:displayAlign="before" tts:origin="20% 83%" tts:textAlign="left" xml:id="r8"/>
<region tts:displayAlign="before" tts:origin="70% 11%" tts:textAlign="left" xml:id="r9"/>
<region tts:displayAlign="before" tts:origin="40% 41%" tts:textAlign="left" xml:id="r10"/>
<region tts:displayAlign="before" tts:origin="20% 71%" tts:textAlign="left" xml:id="r11"/>
<region tts:displayAlign="before" tts:origin="40% 53%" tts:textAlign="left" xml:id="r2"/>
<region tts:displayAlign="before" tts:origin="70% 17%" tts:textAlign="left" xml:id="r3"/>
<region tts:displayAlign="before" tts:origin="70% 11%" tts:textAlign="left" xml:id="r4"/>
</layout>
</head>
<body>
Expand All @@ -932,40 +925,41 @@ def sample_dfxp_from_scc_output():
abab
</p>
<p begin="00:00:01.134" end="00:00:03.136" region="r1" style="default">
cdcd
</p>
<p begin="00:00:01.134" end="00:00:03.136" region="r2" style="default">
cdcd<br/><br/><br/>
efef
</p>
<p begin="00:00:03.136" end="00:00:09.709" region="r3" style="default">
<p begin="00:00:03.136" end="00:00:09.709" region="r1" style="default">
<br/><br/><br/><br/>
ghgh<br/>
ijij<br/>
klkl
</p>
<p begin="00:00:09.709" end="00:00:11.711" region="r4" style="default">
<p begin="00:00:09.709" end="00:00:11.711" region="r1" style="default">
<br/><br/><br/><br/><br/>
mnmn
</p>
<p begin="00:00:09.709" end="00:00:11.711" region="r5" style="default">
<p begin="00:00:09.709" end="00:00:11.711" region="r2" style="default">
opop
</p>
<p begin="00:00:09.709" end="00:00:11.711" region="r6" style="default">
<p begin="00:00:09.709" end="00:00:11.711" region="r3" style="default">
qrqr
</p>
<p begin="00:00:11.711" end="00:00:20.086" region="r7" style="default">
<p begin="00:00:11.711" end="00:00:20.086" region="r3" style="default">
<br/><br/><br/>
stst<br/>
uvuv<br/>
wxwx
</p>
<p begin="00:00:20.086" end="00:00:22.088" region="r8" style="default">
<p begin="00:00:20.086" end="00:00:22.088" region="r3" style="default">
<br/><br/><br/><br/><br/><br/>
yzyz
</p>
<p begin="00:00:20.086" end="00:00:22.088" region="r9" style="default">
0101
</p>
<p begin="00:00:20.086" end="00:00:22.088" region="r10" style="default">
<p begin="00:00:20.086" end="00:00:22.088" region="r4" style="default">
0101<br/><br/><br/><br/><br/>
2323
</p>
<p begin="00:00:22.088" end="00:00:26.088" region="r11" style="default">
<p begin="00:00:22.088" end="00:00:26.088" region="r4" style="default">
<br/><br/><br/><br/><br/>
4545<br/>
6767<br/>
8989
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def sample_scc_pop_on():
def sample_scc_multiple_positioning():
return """Scenarist_SCC V1.0
00:00:00:16 94ae 94ae 9420 9420 1370 1370 6162 6162 91d6 91d6 e364 e364 927c 927c e5e6 e5e6 942c 942c 942f 942f
00:00:00:16 94ae 94ae 9420 9420 1370 1370 6162 6162 91d6 91d6 e364 e364 92fd 92fd e5e6 e5e6 942c 942c 942f 942f
00:00:02:16 94ae 94ae 9420 9420 16f2 16f2 6768 6768 9752 9752 e9ea e9ea 97f2 97f2 6bec 6bec 942c 942c 942f 942f
Expand Down
Loading

0 comments on commit 3d6878d

Please sign in to comment.