Skip to content

Commit

Permalink
Merge pull request #50 from bscholer/auto-timestamp-format
Browse files Browse the repository at this point in the history
Enhanced Timestamp Formatting and Custom Interval Grouping
  • Loading branch information
djmango authored Feb 13, 2024
2 parents cc36ae5 + 9e86e49 commit e29dc89
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 34 deletions.
27 changes: 25 additions & 2 deletions src/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Transcription } from "./main";
interface TranscriptionSettings {
timestamps: boolean;
timestampFormat: string;
timestampInterval: string; // easier to store as a string and convert to number when needed
translate: boolean;
language: string;
verbosity: number;
Expand Down Expand Up @@ -33,7 +34,8 @@ const IS_SWIFTINK = "swiftink";

const DEFAULT_SETTINGS: TranscriptionSettings = {
timestamps: false,
timestampFormat: "HH:mm:ss",
timestampFormat: "auto",
timestampInterval: "0",
translate: false,
language: "auto",
verbosity: 1,
Expand Down Expand Up @@ -253,11 +255,12 @@ class TranscriptionSettingTab extends PluginSettingTab {
new Setting(containerEl)
.setName("Timestamp format")
.setDesc(
"Your choice of hours, minutes, and/or seconds in the timestamp",
"Your choice of hours, minutes, and/or seconds in the timestamp. Auto uses the shortest possible format.",
)
.setClass("depends-on-timestamps")
.addDropdown((dropdown) =>
dropdown
.addOption("auto", "Auto")
.addOption("HH:mm:ss", "HH:mm:ss")
.addOption("mm:ss", "mm:ss")
.addOption("ss", "ss")
Expand All @@ -268,6 +271,26 @@ class TranscriptionSettingTab extends PluginSettingTab {
}),
);

new Setting(containerEl)
.setName("Timestamp interval")
.setDesc("The interval at which to add timestamps, in seconds.")
.setClass("depends-on-timestamps")
.addDropdown((dropdown) =>
dropdown
.addOption("0", "Off")
.addOption("5", "5")
.addOption("10", "10")
.addOption("15", "15")
.addOption("20", "20")
.addOption("30", "30")
.addOption("60", "60")
.setValue(this.plugin.settings.timestampInterval)
.onChange(async (value) => {
this.plugin.settings.timestampInterval = value;
await this.plugin.saveSettings();
}),
);

new Setting(containerEl)
.setName("Swiftink Settings")
.setClass("swiftink-settings")
Expand Down
104 changes: 72 additions & 32 deletions src/transcribe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,60 @@ export class TranscriptionEngine {
segmentsToTimestampedString(
segments: components["schemas"]["TimestampedTextSegment"][],
timestampFormat: string,
interval: number = 0 // in seconds, default is 0 which means no interval adjustment
): string {
let transcription = "";
for (const segment of segments) {
let start = new Date(segment.start * 1000);
let end = new Date(segment.end * 1000);
let maxDuration = 0;

start = new Date(
start.getTime() + start.getTimezoneOffset() * 60000,
);
end = new Date(end.getTime() + end.getTimezoneOffset() * 60000);
// Find the largest timestamp in the segments
segments.forEach(segment => {
maxDuration = Math.max(maxDuration, segment.end);
});

const start_formatted = format(start, timestampFormat);
const end_formatted = format(end, timestampFormat);
// Decide format based on maxDuration
const autoFormat = maxDuration < 3600 ? "mm:ss" : "HH:mm:ss";

const renderSegments = (segments: components["schemas"]["TimestampedTextSegment"][]) => (
segments.reduce((transcription: string, segment ) => {
let start = new Date(segment.start * 1000);
let end = new Date(segment.end * 1000);
start = new Date(start.getTime() + start.getTimezoneOffset() * 60000);
end = new Date(end.getTime() + end.getTimezoneOffset() * 60000);
const formatToUse = timestampFormat === 'auto' ? autoFormat : timestampFormat;
const start_formatted = format(start, formatToUse);
const end_formatted = format(end, formatToUse);
const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text.trim()}\n`;
transcription += segment_string;
return transcription;
}, ""));

if (interval > 0) {
// Group segments based on interval
const groupedSegments: Record<string, { start: number, end: number, texts: string[] }> = {};
segments.forEach(segment => {
// Determine which interval the segment's start time falls into
const intervalStart = Math.floor(segment.start / interval) * interval;
if (!groupedSegments[intervalStart]) {
groupedSegments[intervalStart] = {
start: segment.start,
end: segment.end,
texts: [segment.text]
};
} else {
groupedSegments[intervalStart].end = Math.max(groupedSegments[intervalStart].end, segment.end);
groupedSegments[intervalStart].texts.push(segment.text);
}
});

const segment_string = `${start_formatted} - ${end_formatted}: ${segment.text}\n`;
transcription += segment_string;
const bucketedSegments = Object.values(groupedSegments).map(group => ({
start: group.start,
end: group.end,
text: group.texts.join("").trim()
}));
return renderSegments(bucketedSegments);
} else {
// Default behavior: timestamp each segment individually
return renderSegments(segments);
}
return transcription;
}

async getTranscription(file: TFile): Promise<string> {
Expand Down Expand Up @@ -92,11 +128,11 @@ export class TranscriptionEngine {
await payloadGenerator(payload_data);

let args = "output=json"; // always output json, so we can have the timestamps if we need them
const { translate, encode, vadFilter, timestamps, wordTimestamps, language, initialPrompt } = this.settings;
args += `&word_timestamps=true`; // always output word timestamps, so we can have the timestamps if we need them
const { translate, encode, vadFilter, language, initialPrompt } = this.settings;
if (translate) args += `&task=translate`;
if (encode !== DEFAULT_SETTINGS.encode) args += `&encode=${encode}`;
if (vadFilter !== DEFAULT_SETTINGS.vadFilter) args += `&vad_filter=${vadFilter}`;
if (timestamps && wordTimestamps !== DEFAULT_SETTINGS.wordTimestamps) args += `&word_timestamps=${wordTimestamps}`;
if (language !== DEFAULT_SETTINGS.language) args += `&language=${language}`;
if (initialPrompt) args += `&initial_prompt=${initialPrompt}`;

Expand Down Expand Up @@ -124,23 +160,24 @@ export class TranscriptionEngine {
const preprocessed = preprocessWhisperASRResponse(response.json);
if (this.settings.debug) console.log("Preprocessed response:", preprocessed);

if (
this.settings.wordTimestamps
&& preprocessed.segments.some((segment: WhisperASRSegment) => segment.wordTimestamps)
) {
// Create segments for each word timestamp if word timestamps are available and enabled
const wordSegments = preprocessed.segments
.reduce((acc: components["schemas"]["TimestampedTextSegment"][], segment: WhisperASRSegment) => {
if (segment.wordTimestamps) {
acc.push(...segment.wordTimestamps.map(wordTimestamp => ({
start: wordTimestamp.start,
end: wordTimestamp.end,
text: wordTimestamp.word
} as components["schemas"]["TimestampedTextSegment"])));
}
return acc;
}, []);
// Create segments for each word timestamp if word timestamps are available
const wordSegments = preprocessed.segments
.reduce((acc: components["schemas"]["TimestampedTextSegment"][], segment: WhisperASRSegment) => {
if (segment.wordTimestamps) {
acc.push(...segment.wordTimestamps.map(wordTimestamp => ({
start: wordTimestamp.start,
end: wordTimestamp.end,
text: wordTimestamp.word
} as components["schemas"]["TimestampedTextSegment"])));
}
return acc;
}, []);

if (this.settings.wordTimestamps) {
return this.segmentsToTimestampedString(wordSegments, this.settings.timestampFormat);
} else if (parseInt(this.settings.timestampInterval)) {
// Feed the function word segments with the interval
return this.segmentsToTimestampedString(wordSegments, this.settings.timestampFormat, parseInt(this.settings.timestampInterval));
} else if (this.settings.timestamps) {
// Use existing segment-to-string functionality if only segment timestamps are needed
const segments = preprocessed.segments.map((segment: WhisperASRSegment) => ({
Expand All @@ -151,7 +188,10 @@ export class TranscriptionEngine {
return this.segmentsToTimestampedString(segments, this.settings.timestampFormat);
} else if (preprocessed.segments) {
// Concatenate all segments into a single string if no timestamps are required
return preprocessed.segments.map((segment: WhisperASRSegment) => segment.text).join("\n");
return preprocessed.segments
.map((segment: WhisperASRSegment) => segment.text)
.map(s => s.trim())
.join("\n");
} else {
// Fallback to full text if no segments are there
return preprocessed.text;
Expand Down

0 comments on commit e29dc89

Please sign in to comment.