-
Notifications
You must be signed in to change notification settings - Fork 9
/
SilentIntervalSRT.py
207 lines (171 loc) · 7.09 KB
/
SilentIntervalSRT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Given an SRT, find any unmarked silent intervals and add them to the SRT
# It also does some other preprocessing steps:
# * cleaning up quotation symbols because those cause trouble for Praat
# * remove consecutive blank lines
# * make the interval numbers incremently increase from 1
# * find timing errors
# Made by Hossep Dolatian (github.com/jhdeov/)
import codecs
import sys
import re
# Input and output files as arguments
inFile= sys.argv[1] # "srtInput.srt"
outFile = sys.argv[2] # "srtOutput.srt"
print("Useful debugging info is printed into the message.log")
# The printing code was taken from https://stackoverflow.com/a/2513511
old_stdout = sys.stdout
log_file = open(inFile+".message.log","w")
sys.stdout = log_file
# define a class for intervals based on the basic template for an SRT interval
class srtInterval:
def __init__(self,number,range,content):
self.number= number[:]
self.range = range[:]
self.startTime, self.endTime = range[:].split(" --> ")
# If the content of the SRT has a quotation symbol ", then that is changed to ""
# This is because Praat TextGrids are sensitive to such symbols
self.content= content.replace('"', '""')
def __str__(self):
return "index: "+str(self.number) + "\ntimes: " + str(self.range) +"\ncontent: " + self.content
# creates an interval between two pre-existing SRT intervals
def createMissingInterval(currentInterval,nextInterval):
newNumber= currentInterval.number + ".5"
newRange = currentInterval.endTime + " --> " + nextInterval.startTime
newContent = "[Silence]"
newInterval = srtInterval(newNumber,newRange,newContent)
return newInterval
# updates the number indexes the SRT intervals in the list. This is needed if we had to insert an interval
# note that the list is forced to start with an index 1. I don't know if this is bad
def updateIntervals(srtintervals):
for i in range(len(srtintervals)):
srtintervals[i].number= str(i+1)
return srtintervals
# create a silent interval at the beginning of the file, if needed
def createInitialSilence(endTime):
newRange = "00:00:00,000 --> " + endTime
newInterval = srtInterval("0", newRange, "[Silence]")
return newInterval
# with codecs.open(inFile, 'r', 'utf-8') as i:
# with codecs.open(outFile, 'w', 'utf-8') as o:
##################################
# Will now read the SRT input file and start to cleanup
# this boolean will be used to check if we had to insert silence intervals
insertedSilences= False
srtintervals = []
# First we read the file and turn it into a list of SRTs
with codecs.open(inFile, 'r', 'utf-8') as iFile:
lines = iFile.read().splitlines()
# the input file must end in an empty new line. we add it in case it's absent
if lines[-1] is not "":
lines.append("")
# Will remove any consecutive blank lines, if present
linesTemp = []
for i in range(len(lines) - 1):
if lines[i] == '' and lines[i + 1] == '':
print(f'There was a blank line at index {i} before another blank line. It was removed ')
else:
linesTemp.append(lines[i])
linesTemp.append(lines[-1])
lines = linesTemp
lines[0] = lines[0].replace('\ufeff', "")
lineCounter = 0
while lineCounter < len(lines):
print(f"Currently working on line number {lineCounter} with content {lines[lineCounter]}")
tempIndex = lines[lineCounter]
lineCounter+=1
tempTime = lines[lineCounter]
lineCounter += 1
tempContent = lines[lineCounter]
lineCounter += 1
print("\tCurrently on line ",lines[lineCounter])
seeNewLine= len(lines[lineCounter])<1
print("\tLength of this line: ", len(lines[lineCounter]))
print("\tIs the line empty?: ",seeNewLine)
while not seeNewLine:
tempContent = tempContent + "\n" + lines[lineCounter]
print("\tContent of the current line:", tempContent)
lineCounter += 1
print("\tLineCounter:",lineCounter)
seeNewLine= len(lines[lineCounter])<1
lineCounter += 1
currentInterval = srtInterval(tempIndex,tempTime,tempContent)
print("Created the following interval",currentInterval)
srtintervals.append(currentInterval)
print("Done with creating the list")
print("The list currently has the following intervals:")
for i in srtintervals:
print(i)
print("")
# Now we check if there any conflicting times, like if interval A precedes interval B,
# but A's endtime is after B's starttime
foundTimingError = False
for i in range(len(srtintervals)-1):
currentInterval = srtintervals[i]
nextInterval = srtintervals[i+1]
if currentInterval.endTime > nextInterval.startTime:
print("Error, the following two consecutive intervals have contradictory times")
print("Interval A:")
print(currentInterval)
print("Interval B")
print(nextInterval)
print("Cannot create cleaned up SRT until this error is manually solved in the original SRT")
foundTimingError = True
print("")
if foundTimingError: quit()
print("")
# Now we clean up the file by adding silences
# Because the list of intervals will grow as we add silences, we have to continously
# check the list length
strIntervalsCounter = 0
while strIntervalsCounter < len(srtintervals)-1:
i = strIntervalsCounter
currentInterval = srtintervals[i]
nextInterval = srtintervals[i+1]
if currentInterval.endTime == nextInterval.startTime:
print("There is no missing interval between the following two intervals")
print(currentInterval)
print(nextInterval)
else:
insertedSilences = True
print("There is a missing interval between the following two intervals")
print(currentInterval)
print(nextInterval)
newInterval = createMissingInterval(currentInterval,nextInterval)
print("We created a silence new interval")
print(newInterval)
srtintervals.insert(i+1, newInterval)
strIntervalsCounter+= 1
print("")
print("The list currently:")
for i in srtintervals:
print(i)
print("")
# check if need to add an initial silence
print("Check if need to add initial silence")
needInitialSilence= False
if srtintervals[0].startTime is not "00:00:00,000":
print("There is a missing initial silence:",srtintervals[0].startTime)
newInterval = createInitialSilence(srtintervals[0].startTime)
print("here's new interval")
print(newInterval)
srtintervals.insert(0, newInterval)
else:
print("There is no missing initial silence:",srtintervals[0].startTime)
print("")
print("The list currently:")
for i in srtintervals:
print(i)
# updates the interval indexes in the list, if needed
print("We will update the list with new indexes")
srtintervals = updateIntervals(srtintervals)
print("")
print("The list currently:")
for i in srtintervals:
print(i)
with codecs.open(outFile, 'w', 'utf-8') as o:
for line in srtintervals:
o.writelines(line.number + '\n')
o.writelines(line.range + '\n')
o.writelines(line.content + '\n\n')
sys.stdout = old_stdout
log_file.close()