Skip to content

Commit

Permalink
revert handling jumping over multiple rows with repositioning instead…
Browse files Browse the repository at this point in the history
… of breaks
  • Loading branch information
OlteanuRares committed Sep 10, 2024
1 parent 3d6878d commit 03571dd
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 201 deletions.
10 changes: 5 additions & 5 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ Changelog
---------
2.2.13
^^^^^^
- Mid-row codes only add spaces only if there isn't one before
- Mid-row codes only add spaces only if affects the text in the same row (not adding if after previous text follows break or breaks)
- Remove spaces to the end of the lines
- Change error message for the 32 character limit.
- Mid-row codes only add spaces only if there isn't one before.
- Mid-row codes add spaces only if affects the text in the same row (not adding if it follows break or PACS).
- Remove spaces to the end of the lines.
- Close italics on receiving another style setting command.
- Throw an CaptionReadNoCaptions error in case of empty input file are provided
- Properly add breaks (it was only for jumps to next row). Now it adds as many breaks as the difference between row numbers.
- Ignore repositioning commands which are not followed by any text before breaks.
- Mid-row codes will not add the space if is in front of punctuation.
- Fix a bug with background codes when InstructionNodeCreator collection is empty.

2.2.12
^^^^^^
Expand Down
56 changes: 37 additions & 19 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,35 @@
from copy import deepcopy

from pycaption.base import BaseReader, BaseWriter, CaptionNode, CaptionSet
from pycaption.exceptions import (CaptionLineLengthError,
CaptionReadNoCaptions,
CaptionReadTimingError, InvalidInputError)

from .constants import (CHARACTER_TO_CODE, CHARACTERS, COMMANDS,
CUE_STARTING_COMMAND, EXTENDED_CHARS, HEADER,
MICROSECONDS_PER_CODEWORD,
PAC_BYTES_TO_POSITIONING_MAP, PAC_HIGH_BYTE_BY_ROW,
PAC_LOW_BYTE_BY_ROW_RESTRICTED,
PAC_TAB_OFFSET_COMMANDS, SPECIAL_CHARS,
SPECIAL_OR_EXTENDED_CHAR_TO_CODE)
from pycaption.exceptions import (
CaptionLineLengthError,
CaptionReadNoCaptions,
CaptionReadTimingError,
InvalidInputError,
)

from .constants import (
CHARACTER_TO_CODE,
CHARACTERS,
COMMANDS,
CUE_STARTING_COMMAND,
EXTENDED_CHARS,
HEADER,
MICROSECONDS_PER_CODEWORD,
PAC_BYTES_TO_POSITIONING_MAP,
PAC_HIGH_BYTE_BY_ROW,
PAC_LOW_BYTE_BY_ROW_RESTRICTED,
PAC_TAB_OFFSET_COMMANDS,
SPECIAL_CHARS,
SPECIAL_OR_EXTENDED_CHAR_TO_CODE,
)
from .specialized_collections import CaptionCreator # noqa: F401
from .specialized_collections import (InstructionNodeCreator, NotifyingDict,
PopOnCue, TimingCorrectingCaptionList)
from .specialized_collections import (
InstructionNodeCreator,
NotifyingDict,
PopOnCue,
TimingCorrectingCaptionList,
)
from .state_machines import DefaultProvidingPositionTracker


Expand Down Expand Up @@ -236,7 +251,9 @@ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0):
for caption in self.caption_stash._collection:
caption_start = caption.to_real_caption().format_start()
caption_text = "".join(caption.to_real_caption().get_text_nodes())
text_too_long = [line for line in caption_text.split("\n") if len(line) > 32]
text_too_long = [
line for line in caption_text.split("\n") if len(line) > 32
]
if caption_start in lines_too_long:
lines_too_long[caption_start] = text_too_long
else:
Expand Down Expand Up @@ -313,9 +330,10 @@ def _translate_line(self, line):
for idx, word in enumerate(word_list):
word = word.strip()
if len(word) == 4:
self._translate_word(word=word)
next_command = word_list[idx + 1] if idx + 1 < len(word_list) else None
self._translate_word(word=word, next_command=next_command)

def _translate_word(self, word):
def _translate_word(self, word, next_command=None):
if self._handle_double_command(word):
# count frames for timing
self.time_translator.increment_frames()
Expand All @@ -324,7 +342,7 @@ def _translate_word(self, word):
# TODO - check that all the positioning commands are here, or use
# some other strategy to determine if the word is a command.
if word in COMMANDS or _is_pac_command(word):
self._translate_command(word=word)
self._translate_command(word=word, next_command=next_command)

# second, check if word is a special character
elif word in SPECIAL_CHARS:
Expand Down Expand Up @@ -396,7 +414,7 @@ def _translate_extended_char(self, word):
# add to buffer
self.buffer.add_chars(EXTENDED_CHARS[word])

def _translate_command(self, word):
def _translate_command(self, word, next_command=None):
# if command is pop_up
if word == "9420":
self.buffer_dict.set_active("pop")
Expand Down Expand Up @@ -462,7 +480,7 @@ def _translate_command(self, word):

# If command is not one of the aforementioned, add it to buffer
else:
self.buffer.interpret_command(command=word)
self.buffer.interpret_command(command=word, next_command=next_command)

def _translate_characters(self, word):
# split word into the 2 bytes
Expand Down
64 changes: 38 additions & 26 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
BACKGROUND_COLOR_CODES,
COMMANDS,
EXTENDED_CHARS,
ITALICS_COMMANDS,
MICROSECONDS_PER_CODEWORD,
MID_ROW_CODES,
PAC_BYTES_TO_POSITIONING_MAP,
PAC_TAB_OFFSET_COMMANDS,
ITALICS_COMMANDS,
UNDERLINE_COMMANDS,
PLAIN_TEXT_COMMANDS,
STYLE_SETTING_COMMANDS
STYLE_SETTING_COMMANDS,
UNDERLINE_COMMANDS,
)

PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
Expand Down Expand Up @@ -309,7 +309,9 @@ def __init__(self, collection=None, position_tracker=None):
else:
self._collection = collection

self.last_style = None # can be italic on or italic off as we only support italics
self.last_style = (
None # can be italic on or italic off as we only support italics
)
self._position_tracer = position_tracker

def is_empty(self):
Expand Down Expand Up @@ -340,10 +342,9 @@ def add_chars(self, *chars):

# handle a simple line break
if self._position_tracer.is_linebreak_required():
for _ in range(self._position_tracer._breaks_required):
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._position_tracer.acknowledge_linebreak_consumed()
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
Expand Down Expand Up @@ -374,14 +375,15 @@ def get_style_for_command(command):
# only remaining possibility is plain text
return "plaintext"

def interpret_command(self, command):
def interpret_command(self, command, next_command=None):
"""Given a command determines whether to turn italics on or off,
or to set the positioning
This is mostly used to convert from the legacy-style commands
:type command: str
or a PAC_TAB_OFFSET_COMMANDS
:type next_command: the command that follows next
"""
self._update_positioning(command)

Expand All @@ -394,7 +396,7 @@ def interpret_command(self, command):
# which will be deleted when the code is applied.
# ex: 2080 97ad 94a1
if (
len(self._collection) > 1
len(self._collection) > 0
and self._collection[-1].is_text_node()
and self._collection[-1].text[-1].isspace()
):
Expand All @@ -410,10 +412,9 @@ def interpret_command(self, command):
# it should open italic tag
# if break is required, break then add style tag
if self._position_tracer.is_linebreak_required():
for _ in range(self._position_tracer._breaks_required):
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._position_tracer.acknowledge_linebreak_consumed()
self._collection.append(
_InstructionNode.create_italics_style(current_position)
Expand All @@ -432,23 +433,28 @@ def interpret_command(self, command):
)
self.last_style = "italics off"
if self._position_tracer.is_linebreak_required():
for _ in range(self._position_tracer._breaks_required):
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._collection.append(
_InstructionNode.create_break(position=current_position)
)
self._position_tracer.acknowledge_linebreak_consumed()

# handle mid-row codes that follows a text node
# don't add space if the next command adds one of
# ['.', '!', '?', ',']
punctuation = ["ae", "a1", "bf", "2c"]
next_is_punctuation = next_command and next_command[:2] in punctuation
prev_text_node = self.get_previous_text_node()
prev_node_is_break = prev_text_node is not None and any(
x.is_explicit_break() for x in self._collection[self._collection.index(prev_text_node):]
x.is_explicit_break()
for x in self._collection[self._collection.index(prev_text_node) :]
)
if (
command in MID_ROW_CODES and
prev_text_node and not
prev_node_is_break and not
prev_text_node.text[-1].isspace() and
command not in PAC_TAB_OFFSET_COMMANDS
command in MID_ROW_CODES
and prev_text_node
and not prev_node_is_break
and not prev_text_node.text[-1].isspace()
and command not in PAC_TAB_OFFSET_COMMANDS
and not next_is_punctuation
):
if self.last_style == "italics off":
# need to open italics tag, add a space
Expand All @@ -465,8 +471,8 @@ def _update_positioning(self, command):
:type command: str
"""
prev_positioning = self._position_tracer.default
if command in PAC_TAB_OFFSET_COMMANDS:
prev_positioning = self._position_tracer.default
tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
positioning = (prev_positioning[0], prev_positioning[1] + tab_offset)
else:
Expand Down Expand Up @@ -751,6 +757,12 @@ def _format_italics(collection):
new_collection = _remove_noop_italics(new_collection)

# remove spaces to the end of the lines
new_collection = _remove_spaces_at_end_of_the_line(new_collection)

return new_collection


def _remove_spaces_at_end_of_the_line(collection: list[_InstructionNode]):
for idx, node in enumerate(collection):
if (
idx > 0
Expand All @@ -762,7 +774,7 @@ def _format_italics(collection):
# handle last node
if collection[-1].is_text_node():
collection[-1].text = collection[-1].text.rstrip()
return new_collection
return collection


def _remove_noop_on_off_italics(collection):
Expand Down
20 changes: 10 additions & 10 deletions pycaption/scc/state_machines.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ class _PositioningTracker:
"""Helps determine the positioning of a node, having kept track of
positioning-related commands.
"""

def __init__(self, positioning=None):
"""
:param positioning: positioning information (row, column)
:type positioning: tuple[int]
"""
self._positions = [positioning]
self._breaks_required = 0
self._break_required = False
self._repositioning_required = False
# Since the actual column is not applied when encountering a line break
# this attribute is used to store it and determine by comparison if the
Expand All @@ -35,18 +36,18 @@ def update_positioning(self, positioning):
return

row, col = current
if self._breaks_required:
if self._break_required:
col = self._last_column
new_row, new_col = positioning
is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3

# One line below will be treated as line break, not repositioning
if new_row > row:
if new_row == row + 1:
self._positions.append((new_row, col))
self._breaks_required = new_row - row
self._break_required = 1
self._last_column = new_col
# Tab offsets after line breaks will be ignored to avoid repositioning
elif self._breaks_required and is_tab_offset:
elif self._break_required and is_tab_offset:
return
else:
# Reset the "current" position altogether.
Expand All @@ -64,9 +65,7 @@ def get_current_position(self):
:raise: CaptionReadSyntaxError
"""
if not any(self._positions):
raise CaptionReadSyntaxError(
'No Preamble Address Code [PAC] was provided'
)
raise CaptionReadSyntaxError("No Preamble Address Code [PAC] was provided")
else:
return self._positions[0]

Expand All @@ -86,17 +85,18 @@ def is_linebreak_required(self):
"""If the current position is simply one line below the previous.
:rtype: bool
"""
return self._breaks_required > 0
return self._break_required

def acknowledge_linebreak_consumed(self):
"""Call to acknowledge that the line required was consumed"""
self._breaks_required = 0
self._break_required = False


class DefaultProvidingPositionTracker(_PositioningTracker):
"""A _PositioningTracker that provides if needed a default value (14, 0), or
uses the last positioning value set anywhere in the document
"""

default = (14, 0)

def __init__(self, positioning=None, default=None):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

setup(
name='pycaption',
version='2.2.12.dev6',
version='2.2.12.dev7',
description='Closed caption converter',
long_description=open(README_PATH).read(),
author='Joe Norton',
Expand Down
Loading

0 comments on commit 03571dd

Please sign in to comment.