From 88e487b35318bd2e8e7a9a055e5221d397c16a90 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Fri, 24 May 2024 14:08:29 +0300 Subject: [PATCH 01/10] change logic to consider the entire cue instead of lines in checking if first commands are doubled --- pycaption/scc/__init__.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 50e98f6f..28f27761 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -164,6 +164,7 @@ def __init__(self, *args, **kw): ) self.last_command = '' + self.cue = [] self.buffer_dict = NotifyingDict() @@ -223,6 +224,7 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0): # split lines lines = content.splitlines() + # loop through each line except the first for line in lines[1:]: self._translate_line(line) @@ -307,15 +309,25 @@ def _translate_line(self, line): parts = r.findall(line.lower()) self.time_translator.start_at(parts[0][0]) - + cue_starters = ['9425', '9426', '94a7', '9429', '9420'] word_list = parts[0][2].split(' ') - pacs_are_doubled = len(word_list) > 1 and word_list[0] == word_list[1] + line_starts_a_cue = any(word in cue_starters for word in word_list) for idx, word in enumerate(word_list): # ignore empty results or invalid commands word = word.strip() - previous_is_pac_or_tab = idx > 0 and ( - _is_pac_command(word_list[idx-1]) or word_list[idx-1] in PAC_TAB_OFFSET_COMMANDS + # if line_starts_a_cue: + if word in cue_starters: + if len(word_list) > 0 and word_list[idx] == word_list[idx-1]: + self.cue.append(word) + else: + self.cue = [word] + else: + self.cue.append(word) + + previous_is_pac_or_tab = len(self.cue) > 1 and ( + _is_pac_command(word_list[idx - 1]) or word_list[idx - 1] in PAC_TAB_OFFSET_COMMANDS ) + pacs_are_doubled = len(self.cue) > 1 and self.cue[0] == self.cue[1] if len(word) == 4: self._translate_word( word=word, @@ -356,9 +368,10 @@ def _handle_double_command(self, word, pacs_are_doubled): # If we have doubled commands we're skipping also # doubled special characters and doubled extended characters # with only one member of each pair being displayed. - doubled_types = word in COMMANDS or _is_pac_command(word) + + doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word) if pacs_are_doubled: - doubled_types = doubled_types or word in SPECIAL_CHARS or word in EXTENDED_CHARS + doubled_types = doubled_types or word in SPECIAL_CHARS or word in EXTENDED_CHARS or word == "94a1" if doubled_types and word == self.last_command: self.last_command = '' From 61d8e84582fd2b632cc370aa60e0e4f79e791fac Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Fri, 24 May 2024 14:12:03 +0300 Subject: [PATCH 02/10] remove leftover --- pycaption/scc/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 28f27761..5e4f5d02 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -311,7 +311,6 @@ def _translate_line(self, line): self.time_translator.start_at(parts[0][0]) cue_starters = ['9425', '9426', '94a7', '9429', '9420'] word_list = parts[0][2].split(' ') - line_starts_a_cue = any(word in cue_starters for word in word_list) for idx, word in enumerate(word_list): # ignore empty results or invalid commands word = word.strip() From a51632f0d88d9e5a08bc46dd137d74ddc796c0bc Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Fri, 24 May 2024 17:59:33 +0300 Subject: [PATCH 03/10] solve the doubles at the start of cue issue --- pycaption/scc/__init__.py | 28 ++++++++++-------------- pycaption/scc/specialized_collections.py | 3 +-- tests/test_scc.py | 5 ++--- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 5e4f5d02..dbfc51d4 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -164,7 +164,7 @@ def __init__(self, *args, **kw): ) self.last_command = '' - self.cue = [] + self.double_starter = None self.buffer_dict = NotifyingDict() @@ -309,29 +309,18 @@ def _translate_line(self, line): parts = r.findall(line.lower()) self.time_translator.start_at(parts[0][0]) - cue_starters = ['9425', '9426', '94a7', '9429', '9420'] word_list = parts[0][2].split(' ') + for idx, word in enumerate(word_list): - # ignore empty results or invalid commands word = word.strip() - # if line_starts_a_cue: - if word in cue_starters: - if len(word_list) > 0 and word_list[idx] == word_list[idx-1]: - self.cue.append(word) - else: - self.cue = [word] - else: - self.cue.append(word) - - previous_is_pac_or_tab = len(self.cue) > 1 and ( + previous_is_pac_or_tab = len(word_list) > 1 and ( _is_pac_command(word_list[idx - 1]) or word_list[idx - 1] in PAC_TAB_OFFSET_COMMANDS ) - pacs_are_doubled = len(self.cue) > 1 and self.cue[0] == self.cue[1] if len(word) == 4: self._translate_word( word=word, previous_is_pac_or_tab=previous_is_pac_or_tab, - pacs_are_doubled=pacs_are_doubled + pacs_are_doubled=self.double_starter ) def _translate_word(self, word, previous_is_pac_or_tab, pacs_are_doubled): @@ -367,12 +356,17 @@ def _handle_double_command(self, word, pacs_are_doubled): # If we have doubled commands we're skipping also # doubled special characters and doubled extended characters # with only one member of each pair being displayed. - + cue_starter_commands = ['9425', '9426', '94a7', '9429', '9420'] doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word) if pacs_are_doubled: - doubled_types = doubled_types or word in SPECIAL_CHARS or word in EXTENDED_CHARS or word == "94a1" + doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" + + if word in cue_starter_commands and word != self.last_command: + self.double_starter = False if doubled_types and word == self.last_command: + if word in cue_starter_commands: + self.double_starter = True self.last_command = '' return True # Fix for the diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index 920d0d7c..deaf56b9 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -1,5 +1,4 @@ import collections -import unicodedata from ..base import CaptionList, Caption, CaptionNode from ..geometry import ( @@ -9,7 +8,7 @@ from .constants import ( PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS, MICROSECONDS_PER_CODEWORD, BACKGROUND_COLOR_CODES, - MID_ROW_CODES, EXTENDED_CHARS, SPECIAL_CHARS + MID_ROW_CODES, EXTENDED_CHARS ) PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end") diff --git a/tests/test_scc.py b/tests/test_scc.py index ece0e1ce..13a1da8c 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -223,11 +223,10 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset): def test_skip_duplicate_special_characters( self, sample_scc_duplicate_special_characters): expected_lines = [ - '®°½¿™¢£♪à èâêîôû', + '®®°°½½¿¿™™¢¢££♪♪àà èèââêêîîôôûû', '®°½¿™¢£♪à èâêîôû', '®°AA½¿™¢£♪à èâêAAîôû' ] - caption_set = SCCReader().read(sample_scc_duplicate_special_characters) actual_lines = [ node.content @@ -278,7 +277,7 @@ def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2): 'HELPING THE LOCAL NEIGHBORHOODS', 'AND IMPROVING THE LIVES OF ALL', 'WE SERVE.', - '®°½', + '®°½½', 'ABû', 'ÁÉÓ¡', "WHERE YOU'RE STANDING NOW,", From 0cf9273b00139a91b39750bd9afff2dd7524d766 Mon Sep 17 00:00:00 2001 From: Lorand Varga Date: Mon, 27 May 2024 13:39:16 +0300 Subject: [PATCH 04/10] Updates changelog and version Updates changelog and version --- docs/changelog.rst | 8 ++++++-- docs/conf.py | 4 ++-- setup.py | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index a80d63ff..b6067d18 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,10 +1,14 @@ Changelog --------- -2.2.10 -^^^^^ +2.2.11 +^^^^^^ - A space should not be placed before a mid row code if it follows a PAC command or a Tab Offset - The backspace command should be treated like other commands and duplicates should be skipped if PAC commands are duplicated +2.2.10 +^^^^^ +- Yanked. + 2.2.9 ^^^^^ - Yanked. diff --git a/docs/conf.py b/docs/conf.py index 39447828..9b455abf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,9 +53,9 @@ # built documents. # # The short X.Y version. -version = '2.2.10' +version = '2.2.11' # The full version, including alpha/beta/rc tags. -release = '2.2.10' +release = '2.2.11' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/setup.py b/setup.py index 45c0fc12..23ce3a7a 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.10', + version='2.2.11.dev1', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton', From 1dd94d218e597d52d518439a7339b64eef3e5794 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 28 May 2024 15:37:46 +0300 Subject: [PATCH 05/10] use class variable instead of sending double flag as parameter --- pycaption/scc/__init__.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index dbfc51d4..ff5d12a1 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -164,7 +164,7 @@ def __init__(self, *args, **kw): ) self.last_command = '' - self.double_starter = None + self.double_starter = False self.buffer_dict = NotifyingDict() @@ -320,11 +320,10 @@ def _translate_line(self, line): self._translate_word( word=word, previous_is_pac_or_tab=previous_is_pac_or_tab, - pacs_are_doubled=self.double_starter ) - def _translate_word(self, word, previous_is_pac_or_tab, pacs_are_doubled): - if self._handle_double_command(word, pacs_are_doubled): + def _translate_word(self, word, previous_is_pac_or_tab): + if self._handle_double_command(word): # count frames for timing self.time_translator.increment_frames() return @@ -348,7 +347,7 @@ def _translate_word(self, word, previous_is_pac_or_tab, pacs_are_doubled): # count frames for timing only after processing a command self.time_translator.increment_frames() - def _handle_double_command(self, word, pacs_are_doubled): + def _handle_double_command(self, word): # If the caption is to be broadcast, each of the commands are doubled # up for redundancy in case the signal is garbled in transmission. # The decoder is programmed to ignore a second command when it is the @@ -358,7 +357,7 @@ def _handle_double_command(self, word, pacs_are_doubled): # with only one member of each pair being displayed. cue_starter_commands = ['9425', '9426', '94a7', '9429', '9420'] doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word) - if pacs_are_doubled: + if self.double_starter: doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" if word in cue_starter_commands and word != self.last_command: From 5c12b60c142cb2b14accfd27ab6c1e574404d1ad Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 28 May 2024 16:38:34 +0300 Subject: [PATCH 06/10] move to constants cue starting codes --- pycaption/scc/__init__.py | 7 +++---- pycaption/scc/constants.py | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index ff5d12a1..d7defbda 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -94,7 +94,7 @@ MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE, SPECIAL_OR_EXTENDED_CHAR_TO_CODE, PAC_BYTES_TO_POSITIONING_MAP, PAC_HIGH_BYTE_BY_ROW, PAC_LOW_BYTE_BY_ROW_RESTRICTED, - PAC_TAB_OFFSET_COMMANDS, + PAC_TAB_OFFSET_COMMANDS, CUE_STARTING_COMMAND ) from .specialized_collections import ( # noqa: F401 TimingCorrectingCaptionList, NotifyingDict, CaptionCreator, @@ -355,16 +355,15 @@ def _handle_double_command(self, word): # If we have doubled commands we're skipping also # doubled special characters and doubled extended characters # with only one member of each pair being displayed. - cue_starter_commands = ['9425', '9426', '94a7', '9429', '9420'] doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word) if self.double_starter: doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" - if word in cue_starter_commands and word != self.last_command: + if word in CUE_STARTING_COMMAND and word != self.last_command: self.double_starter = False if doubled_types and word == self.last_command: - if word in cue_starter_commands: + if word in CUE_STARTING_COMMAND: self.double_starter = True self.last_command = '' return True diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py index feb089b4..bc2fcd50 100644 --- a/pycaption/scc/constants.py +++ b/pycaption/scc/constants.py @@ -1058,3 +1058,5 @@ def _restructure_bytes_to_position_map(byte_to_pos_map): "10a7", "10a8", "1029", "102a", "10ab", "102c", "10ad", "10ae", "102f", "97ad" ] + +CUE_STARTING_COMMAND = ['9425', '9426', '94a7', '9429', '9420'] From 6e5d9c793c82ed53a659722e44f03cf33c22c921 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Wed, 29 May 2024 17:39:17 +0300 Subject: [PATCH 07/10] fix itlics breaking the lines too early, preventing webvtt writer from create a new cue in case of line break, add back special characters on code skipping condition --- pycaption/base.py | 20 ++++++++---- pycaption/scc/__init__.py | 3 +- pycaption/scc/specialized_collections.py | 9 +++++- pycaption/webvtt.py | 12 +++++-- tests/conftest.py | 4 +-- tests/fixtures/scc.py | 40 ++++++++++++++++++++++++ tests/test_scc.py | 8 ++--- tests/test_scc_conversion.py | 5 +++ 8 files changed, 85 insertions(+), 16 deletions(-) diff --git a/pycaption/base.py b/pycaption/base.py index 563d7f89..8e3da975 100644 --- a/pycaption/base.py +++ b/pycaption/base.py @@ -114,13 +114,16 @@ class CaptionNode: STYLE = 2 BREAK = 3 - def __init__(self, type_, layout_info=None, content=None, start=None): + def __init__( + self, type_, layout_info=None, content=None, start=None, position=None + ): """ :type type_: int :type layout_info: Layout """ self.type_ = type_ self.content = content + self.position = position # Boolean. Marks the beginning/ end of a Style node. self.start = start @@ -139,19 +142,24 @@ def __repr__(self): raise RuntimeError(f'Unknown node type: {t}') @staticmethod - def create_text(text, layout_info=None): + def create_text(text, layout_info=None, position=None): return CaptionNode( - CaptionNode.TEXT, layout_info=layout_info, content=text) + type_=CaptionNode.TEXT, layout_info=layout_info, + position=position, content=text + ) @staticmethod def create_style(start, content, layout_info=None): return CaptionNode( - CaptionNode.STYLE, layout_info=layout_info, content=content, + type_=CaptionNode.STYLE, layout_info=layout_info, content=content, start=start) @staticmethod - def create_break(layout_info=None): - return CaptionNode(CaptionNode.BREAK, layout_info=layout_info) + def create_break(layout_info=None, content=None): + return CaptionNode( + type_=CaptionNode.BREAK, layout_info=layout_info, + content=content + ) class Caption: diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index d7defbda..ef74b406 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -355,9 +355,10 @@ def _handle_double_command(self, word): # If we have doubled commands we're skipping also # doubled special characters and doubled extended characters # with only one member of each pair being displayed. + doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word) if self.double_starter: - doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" + doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" or word in SPECIAL_CHARS if word in CUE_STARTING_COMMAND and word != self.last_command: self.double_starter = False diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index deaf56b9..4b8800ed 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -254,7 +254,10 @@ def create_and_store(self, node_buffer, start, end=0): layout_info = _get_layout_from_tuple(instruction.position) caption.nodes.append( CaptionNode.create_text( - instruction.text, layout_info=layout_info), + text=instruction.text, + layout_info=layout_info, + position=instruction.position + ) ) caption.layout_info = layout_info @@ -365,6 +368,10 @@ def interpret_command(self, command, previous_is_pac_or_tab=False): self._collection[-1].text = self._collection[-1].text[:-1] if 'italic' in text: + if self._position_tracer.is_linebreak_required(): + self._collection.append(_InstructionNode.create_break( + position=self._position_tracer.get_current_position())) + self._position_tracer.acknowledge_linebreak_consumed() if 'end' not in text: self._collection.append( _InstructionNode.create_italics_style( diff --git a/pycaption/webvtt.py b/pycaption/webvtt.py index 925ac8d3..d40f02c3 100644 --- a/pycaption/webvtt.py +++ b/pycaption/webvtt.py @@ -394,6 +394,7 @@ def _group_cues_by_layout(self, nodes, caption_set): return [] current_layout = None + current_node = None # A list with layout groups. Since WebVTT only support positioning # for different cues, each layout group has to be represented in a @@ -402,17 +403,24 @@ def _group_cues_by_layout(self, nodes, caption_set): # A properly encoded WebVTT string (plain unicode must be properly # escaped before being appended to this string) s = '' + row, column, prev_row, prev_column = 0, 0, 0, 0 for i, node in enumerate(nodes): if node.type_ == CaptionNode.TEXT: if s and current_layout and node.layout_info != current_layout: # If the positioning changes from one text node to # another, a new WebVTT cue has to be created. - layout_groups.append((s, current_layout)) - s = '' + row, column = node.position if node.position else (0, 0) + prev_row, prev_column = current_node.position if current_node.position else (0, 0) + if row == prev_row + 1: + s += '\n' + else: + layout_groups.append((s, current_layout)) + s = '' # ATTENTION: This is where the plain unicode node content is # finally encoded as WebVTT. s += self._encode_illegal_characters(node.content) or ' ' current_layout = node.layout_info + current_node = node elif node.type_ == CaptionNode.STYLE: resulting_style = self._calculate_resulting_style( node.content, caption_set diff --git a/tests/conftest.py b/tests/conftest.py index 2e361fb8..74530ae8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,14 +54,14 @@ scc_that_generates_webvtt_with_proper_newlines, sample_scc_produces_captions_with_start_and_end_time_the_same, sample_scc_pop_on, sample_scc_multiple_positioning, sample_scc_with_italics, - sample_scc_empty, sample_scc_roll_up_ru2, sample_no_positioning_at_all_scc, + sample_scc_empty, sample_scc_roll_up_ru2, sample_scc_roll_up_ru3, + sample_no_positioning_at_all_scc, sample_scc_with_line_too_long, sample_scc_no_explicit_end_to_last_caption, sample_scc_flashing_cue, sample_scc_eoc_first_command, sample_scc_with_extended_characters, sample_scc_with_ampersand_character, sample_scc_multiple_formats, sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters, sample_scc_tab_offset, sample_scc_with_unknown_commands, sample_scc_special_and_extended_characters, - sample_scc_with_line_too_long ) from tests.fixtures.srt import ( # noqa: F401 sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty, diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py index 425e1d24..a1b66892 100644 --- a/tests/fixtures/scc.py +++ b/tests/fixtures/scc.py @@ -140,6 +140,46 @@ def sample_scc_roll_up_ru2(): 00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132 +00:00:12;30 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132 + +00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf + +00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7 + +00:00:17;01 9426 9426 94ad 94ad 9470 9470 57c8 4552 4520 d94f d5a7 5245 20d3 54c1 cec4 49ce c720 ce4f 572c + +00:00:18;19 9426 9426 94ad 94ad 9470 9470 4c4f 4fcb 49ce c720 4fd5 5420 54c8 4552 452c 2054 c8c1 54a7 d320 c14c 4c + +00:00:20;06 9426 9426 94ad 94ad 9470 9470 54c8 4520 4352 4f57 c4ae + +00:00:21;24 9426 9426 94ad 94ad 9470 9470 3e3e 2049 5420 57c1 d320 c74f 4fc4 2054 4f20 c245 2049 ce20 54c8 45 + +00:00:34;27 94a7 94ad 9470 c16e 6420 f2e5 73f4 eff2 e520 49ef f761 a773 20ec 616e 642c 20f7 61f4 e5f2 + +00:00:36;12 94a7 94ad 9470 c16e 6420 f7e9 ec64 ece9 e6e5 ae80 + +00:00:44;08 94a7 94ad 9470 3e3e 20c2 e96b e520 49ef f761 2c20 79ef 75f2 2073 ef75 f2e3 e520 e6ef f280 +""" + + +@pytest.fixture(scope="session") +def sample_scc_roll_up_ru3(): + return """\ +Scenarist_SCC V1.0 +00:00:00;22 9425 9425 94ad 94ad 9470 9470 3e3e 3e20 c849 ae80 + +00:00:02;23 9425 9425 94ad 94ad 9470 9470 49a7 cd20 cb45 d649 ce20 43d5 cece 49ce c720 c1ce c420 c154 + +00:00:04;17 9425 9425 94ad 94ad 9470 9470 49ce d645 d354 4f52 a7d3 20c2 c1ce cb20 5745 20c2 454c 4945 d645 2049 ce80 + +00:00:06;04 9425 9425 94ad 94ad 9470 9470 c845 4cd0 49ce c720 54c8 4520 4c4f 43c1 4c20 ce45 49c7 c8c2 4f52 c84f 4fc4 d380 + +00:00:09;21 9425 9425 94ad 94ad 9470 9470 c1ce c420 49cd d052 4fd6 49ce c720 54c8 4520 4c49 d645 d320 4f46 20c1 4c4c + +00:00:11;07 9425 9425 94ad 94ad 9470 9470 5745 20d3 4552 d645 ae80 + +00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132 + 00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf 00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7 diff --git a/tests/test_scc.py b/tests/test_scc.py index 13a1da8c..3b78e138 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -223,7 +223,7 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset): def test_skip_duplicate_special_characters( self, sample_scc_duplicate_special_characters): expected_lines = [ - '®®°°½½¿¿™™¢¢££♪♪àà èèââêêîîôôûû', + '®°½¿™¢£♪à èâêîôû', '®°½¿™¢£♪à èâêîôû', '®°AA½¿™¢£♪à èâêAAîôû' ] @@ -277,6 +277,7 @@ def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2): 'HELPING THE LOCAL NEIGHBORHOODS', 'AND IMPROVING THE LIVES OF ALL', 'WE SERVE.', + '®°½', '®°½½', 'ABû', 'ÁÉÓ¡', @@ -322,8 +323,8 @@ def test_multiple_formats(self, sample_scc_multiple_formats): assert expected_text_lines == text_lines - def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2): - scc1 = SCCReader().read(sample_scc_roll_up_ru2) + def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru3): + scc1 = SCCReader().read(sample_scc_roll_up_ru3) captions = scc1.get_captions('en-US') expected_timings = [ (733333.3333333333, 2766666.6666666665), @@ -345,7 +346,6 @@ def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2): ] actual_timings = [(c_.start, c_.end) for c_ in captions] - assert expected_timings == actual_timings def test_freeze_colon_spec_time(self, sample_scc_pop_on): diff --git a/tests/test_scc_conversion.py b/tests/test_scc_conversion.py index 67dc1fb9..a329dea4 100644 --- a/tests/test_scc_conversion.py +++ b/tests/test_scc_conversion.py @@ -48,6 +48,11 @@ def test_dfxp_is_valid_xml_when_scc_source_has_weird_italic_commands( dfxp = DFXPWriter().write(caption_set) + print("================") + print("================") + print("================") + print(dfxp) + assert dfxp == sample_dfxp_with_properly_closing_spans_output def test_dfxp_is_valid_xml_when_scc_source_has_ampersand_character( From 3b02169b5f565ba2853b79e16754fb74b5b30153 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Wed, 29 May 2024 17:42:55 +0300 Subject: [PATCH 08/10] bump version to 2.2.11.dev2 --- docs/changelog.rst | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index b6067d18..1860e6ad 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,8 @@ Changelog ^^^^^^ - A space should not be placed before a mid row code if it follows a PAC command or a Tab Offset - The backspace command should be treated like other commands and duplicates should be skipped if PAC commands are duplicated +- Prevent webvtt writer from creating a new cue in case of line break +- In case of style setting PAC which also breaks the line, we add the break first, then the style tag 2.2.10 ^^^^^ diff --git a/setup.py b/setup.py index 23ce3a7a..24b3f328 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.11.dev1', + version='2.2.11.dev2', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton', From 29e30fd3da22c621232661a66f8ba41e33359bb2 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Thu, 30 May 2024 10:50:02 +0300 Subject: [PATCH 09/10] bump version to 2.2.11 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 24b3f328..217443fa 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.11.dev2', + version='2.2.11', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton', From dbffa9026501c5a91a83d5dc3a89138fcda4aa18 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Thu, 30 May 2024 10:53:35 +0300 Subject: [PATCH 10/10] remove leftover prints --- tests/test_scc_conversion.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_scc_conversion.py b/tests/test_scc_conversion.py index a329dea4..67dc1fb9 100644 --- a/tests/test_scc_conversion.py +++ b/tests/test_scc_conversion.py @@ -48,11 +48,6 @@ def test_dfxp_is_valid_xml_when_scc_source_has_weird_italic_commands( dfxp = DFXPWriter().write(caption_set) - print("================") - print("================") - print("================") - print(dfxp) - assert dfxp == sample_dfxp_with_properly_closing_spans_output def test_dfxp_is_valid_xml_when_scc_source_has_ampersand_character(