diff --git a/docs/changelog.rst b/docs/changelog.rst index 400e9cdd..a058b2d3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,14 +2,14 @@ Changelog --------- 2.2.13 ^^^^^^ -- Mid-row codes only add spaces only if there isn't one before -- Mid-row codes only add spaces only if affects the text in the same row (not adding if after previous text follows break or breaks) -- Remove spaces to the end of the lines -- Change error message for the 32 character limit. +- Mid-row codes only add spaces only if there isn't one before. +- Mid-row codes add spaces only if affects the text in the same row (not adding if it follows break or PACS). +- Remove spaces to the end of the lines. - Close italics on receiving another style setting command. - Throw an CaptionReadNoCaptions error in case of empty input file are provided -- Properly add breaks (it was only for jumps to next row). Now it adds as many breaks as the difference between row numbers. - Ignore repositioning commands which are not followed by any text before breaks. +- Mid-row codes will not add the space if is in front of punctuation. +- Fix a bug with background codes when InstructionNodeCreator collection is empty. 2.2.12 ^^^^^^ diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index b28444a0..398745ed 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -85,20 +85,35 @@ from copy import deepcopy from pycaption.base import BaseReader, BaseWriter, CaptionNode, CaptionSet -from pycaption.exceptions import (CaptionLineLengthError, - CaptionReadNoCaptions, - CaptionReadTimingError, InvalidInputError) - -from .constants import (CHARACTER_TO_CODE, CHARACTERS, COMMANDS, - CUE_STARTING_COMMAND, EXTENDED_CHARS, HEADER, - MICROSECONDS_PER_CODEWORD, - PAC_BYTES_TO_POSITIONING_MAP, PAC_HIGH_BYTE_BY_ROW, - PAC_LOW_BYTE_BY_ROW_RESTRICTED, - PAC_TAB_OFFSET_COMMANDS, SPECIAL_CHARS, - SPECIAL_OR_EXTENDED_CHAR_TO_CODE) +from pycaption.exceptions import ( + CaptionLineLengthError, + CaptionReadNoCaptions, + CaptionReadTimingError, + InvalidInputError, +) + +from .constants import ( + CHARACTER_TO_CODE, + CHARACTERS, + COMMANDS, + CUE_STARTING_COMMAND, + EXTENDED_CHARS, + HEADER, + MICROSECONDS_PER_CODEWORD, + PAC_BYTES_TO_POSITIONING_MAP, + PAC_HIGH_BYTE_BY_ROW, + PAC_LOW_BYTE_BY_ROW_RESTRICTED, + PAC_TAB_OFFSET_COMMANDS, + SPECIAL_CHARS, + SPECIAL_OR_EXTENDED_CHAR_TO_CODE, +) from .specialized_collections import CaptionCreator # noqa: F401 -from .specialized_collections import (InstructionNodeCreator, NotifyingDict, - PopOnCue, TimingCorrectingCaptionList) +from .specialized_collections import ( + InstructionNodeCreator, + NotifyingDict, + PopOnCue, + TimingCorrectingCaptionList, +) from .state_machines import DefaultProvidingPositionTracker @@ -236,7 +251,9 @@ def read(self, content, lang="en-US", simulate_roll_up=False, offset=0): for caption in self.caption_stash._collection: caption_start = caption.to_real_caption().format_start() caption_text = "".join(caption.to_real_caption().get_text_nodes()) - text_too_long = [line for line in caption_text.split("\n") if len(line) > 32] + text_too_long = [ + line for line in caption_text.split("\n") if len(line) > 32 + ] if caption_start in lines_too_long: lines_too_long[caption_start] = text_too_long else: @@ -313,9 +330,10 @@ def _translate_line(self, line): for idx, word in enumerate(word_list): word = word.strip() if len(word) == 4: - self._translate_word(word=word) + next_command = word_list[idx + 1] if idx + 1 < len(word_list) else None + self._translate_word(word=word, next_command=next_command) - def _translate_word(self, word): + def _translate_word(self, word, next_command=None): if self._handle_double_command(word): # count frames for timing self.time_translator.increment_frames() @@ -324,7 +342,7 @@ def _translate_word(self, word): # TODO - check that all the positioning commands are here, or use # some other strategy to determine if the word is a command. if word in COMMANDS or _is_pac_command(word): - self._translate_command(word=word) + self._translate_command(word=word, next_command=next_command) # second, check if word is a special character elif word in SPECIAL_CHARS: @@ -396,7 +414,7 @@ def _translate_extended_char(self, word): # add to buffer self.buffer.add_chars(EXTENDED_CHARS[word]) - def _translate_command(self, word): + def _translate_command(self, word, next_command=None): # if command is pop_up if word == "9420": self.buffer_dict.set_active("pop") @@ -462,7 +480,7 @@ def _translate_command(self, word): # If command is not one of the aforementioned, add it to buffer else: - self.buffer.interpret_command(command=word) + self.buffer.interpret_command(command=word, next_command=next_command) def _translate_characters(self, word): # split word into the 2 bytes diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index 85bad6ba..a5e132a8 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -14,14 +14,14 @@ BACKGROUND_COLOR_CODES, COMMANDS, EXTENDED_CHARS, + ITALICS_COMMANDS, MICROSECONDS_PER_CODEWORD, MID_ROW_CODES, PAC_BYTES_TO_POSITIONING_MAP, PAC_TAB_OFFSET_COMMANDS, - ITALICS_COMMANDS, - UNDERLINE_COMMANDS, PLAIN_TEXT_COMMANDS, - STYLE_SETTING_COMMANDS + STYLE_SETTING_COMMANDS, + UNDERLINE_COMMANDS, ) PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end") @@ -309,7 +309,9 @@ def __init__(self, collection=None, position_tracker=None): else: self._collection = collection - self.last_style = None # can be italic on or italic off as we only support italics + self.last_style = ( + None # can be italic on or italic off as we only support italics + ) self._position_tracer = position_tracker def is_empty(self): @@ -340,10 +342,9 @@ def add_chars(self, *chars): # handle a simple line break if self._position_tracer.is_linebreak_required(): - for _ in range(self._position_tracer._breaks_required): - self._collection.append( - _InstructionNode.create_break(position=current_position) - ) + self._collection.append( + _InstructionNode.create_break(position=current_position) + ) self._position_tracer.acknowledge_linebreak_consumed() node = _InstructionNode.create_text(current_position) self._collection.append(node) @@ -374,7 +375,7 @@ def get_style_for_command(command): # only remaining possibility is plain text return "plaintext" - def interpret_command(self, command): + def interpret_command(self, command, next_command=None): """Given a command determines whether to turn italics on or off, or to set the positioning @@ -382,6 +383,7 @@ def interpret_command(self, command): :type command: str or a PAC_TAB_OFFSET_COMMANDS + :type next_command: the command that follows next """ self._update_positioning(command) @@ -394,7 +396,7 @@ def interpret_command(self, command): # which will be deleted when the code is applied. # ex: 2080 97ad 94a1 if ( - len(self._collection) > 1 + len(self._collection) > 0 and self._collection[-1].is_text_node() and self._collection[-1].text[-1].isspace() ): @@ -410,10 +412,9 @@ def interpret_command(self, command): # it should open italic tag # if break is required, break then add style tag if self._position_tracer.is_linebreak_required(): - for _ in range(self._position_tracer._breaks_required): - self._collection.append( - _InstructionNode.create_break(position=current_position) - ) + self._collection.append( + _InstructionNode.create_break(position=current_position) + ) self._position_tracer.acknowledge_linebreak_consumed() self._collection.append( _InstructionNode.create_italics_style(current_position) @@ -432,23 +433,28 @@ def interpret_command(self, command): ) self.last_style = "italics off" if self._position_tracer.is_linebreak_required(): - for _ in range(self._position_tracer._breaks_required): - self._collection.append( - _InstructionNode.create_break(position=current_position) - ) + self._collection.append( + _InstructionNode.create_break(position=current_position) + ) self._position_tracer.acknowledge_linebreak_consumed() # handle mid-row codes that follows a text node + # don't add space if the next command adds one of + # ['.', '!', '?', ','] + punctuation = ["ae", "a1", "bf", "2c"] + next_is_punctuation = next_command and next_command[:2] in punctuation prev_text_node = self.get_previous_text_node() prev_node_is_break = prev_text_node is not None and any( - x.is_explicit_break() for x in self._collection[self._collection.index(prev_text_node):] + x.is_explicit_break() + for x in self._collection[self._collection.index(prev_text_node) :] ) if ( - command in MID_ROW_CODES and - prev_text_node and not - prev_node_is_break and not - prev_text_node.text[-1].isspace() and - command not in PAC_TAB_OFFSET_COMMANDS + command in MID_ROW_CODES + and prev_text_node + and not prev_node_is_break + and not prev_text_node.text[-1].isspace() + and command not in PAC_TAB_OFFSET_COMMANDS + and not next_is_punctuation ): if self.last_style == "italics off": # need to open italics tag, add a space @@ -465,8 +471,8 @@ def _update_positioning(self, command): :type command: str """ - prev_positioning = self._position_tracer.default if command in PAC_TAB_OFFSET_COMMANDS: + prev_positioning = self._position_tracer.default tab_offset = PAC_TAB_OFFSET_COMMANDS[command] positioning = (prev_positioning[0], prev_positioning[1] + tab_offset) else: @@ -751,6 +757,12 @@ def _format_italics(collection): new_collection = _remove_noop_italics(new_collection) # remove spaces to the end of the lines + new_collection = _remove_spaces_at_end_of_the_line(new_collection) + + return new_collection + + +def _remove_spaces_at_end_of_the_line(collection: list[_InstructionNode]): for idx, node in enumerate(collection): if ( idx > 0 @@ -762,7 +774,7 @@ def _format_italics(collection): # handle last node if collection[-1].is_text_node(): collection[-1].text = collection[-1].text.rstrip() - return new_collection + return collection def _remove_noop_on_off_italics(collection): diff --git a/pycaption/scc/state_machines.py b/pycaption/scc/state_machines.py index fed5e508..7353eff8 100644 --- a/pycaption/scc/state_machines.py +++ b/pycaption/scc/state_machines.py @@ -5,13 +5,14 @@ class _PositioningTracker: """Helps determine the positioning of a node, having kept track of positioning-related commands. """ + def __init__(self, positioning=None): """ :param positioning: positioning information (row, column) :type positioning: tuple[int] """ self._positions = [positioning] - self._breaks_required = 0 + self._break_required = False self._repositioning_required = False # Since the actual column is not applied when encountering a line break # this attribute is used to store it and determine by comparison if the @@ -35,18 +36,18 @@ def update_positioning(self, positioning): return row, col = current - if self._breaks_required: + if self._break_required: col = self._last_column new_row, new_col = positioning is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3 # One line below will be treated as line break, not repositioning - if new_row > row: + if new_row == row + 1: self._positions.append((new_row, col)) - self._breaks_required = new_row - row + self._break_required = 1 self._last_column = new_col # Tab offsets after line breaks will be ignored to avoid repositioning - elif self._breaks_required and is_tab_offset: + elif self._break_required and is_tab_offset: return else: # Reset the "current" position altogether. @@ -64,9 +65,7 @@ def get_current_position(self): :raise: CaptionReadSyntaxError """ if not any(self._positions): - raise CaptionReadSyntaxError( - 'No Preamble Address Code [PAC] was provided' - ) + raise CaptionReadSyntaxError("No Preamble Address Code [PAC] was provided") else: return self._positions[0] @@ -86,17 +85,18 @@ def is_linebreak_required(self): """If the current position is simply one line below the previous. :rtype: bool """ - return self._breaks_required > 0 + return self._break_required def acknowledge_linebreak_consumed(self): """Call to acknowledge that the line required was consumed""" - self._breaks_required = 0 + self._break_required = False class DefaultProvidingPositionTracker(_PositioningTracker): """A _PositioningTracker that provides if needed a default value (14, 0), or uses the last positioning value set anywhere in the document """ + default = (14, 0) def __init__(self, positioning=None, default=None): diff --git a/setup.py b/setup.py index 3d4bd260..a139f657 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.12.dev6', + version='2.2.12.dev7', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton', diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py index d0a052d0..864f3e7a 100644 --- a/tests/fixtures/dfxp.py +++ b/tests/fixtures/dfxp.py @@ -729,6 +729,7 @@ def sample_dfxp_to_render_with_only_default_positioning_input():
@@ -925,41 +933,40 @@ def sample_dfxp_from_scc_output(): abab
- cdcd
+ cdcd
+
efef
-
-
+
ghgh
ijij
klkl
-
+
mnmn
-+
opop
-+
qrqr
-
-
+
stst
uvuv
wxwx
-
+
yzyz
-
- 0101
+
+ 0101 +
+2323
-
-
+
4545
6767
8989
@@ -1519,4 +1526,4 @@ def sample_dfxp_default_styling_p_tags():