because i'm a fkin idiot

2026-07-01 08:15:46 +00:00 · 2025-12-11 15:10:29 -08:00
parent 7b3514bc02
commit f26fbab0f3
1 changed files with 26 additions and 41 deletions
--- a/web/frontend/src/features/transcription/hooks/useKaraokeHighlight.ts
+++ b/web/frontend/src/features/transcription/hooks/useKaraokeHighlight.ts
@@ -28,24 +28,11 @@ export function computeWordOffsets(words: { word: string; start: number; end: nu

    if (!words) return { fullText: '', offsets: [] };

-    words.forEach((w, index) => {
+    words.forEach((w) => {
        const startChar = textBuilder.length;
        textBuilder += w.word;
        const endChar = textBuilder.length;

-        // Gap Filling Logic:
-        // Extend the previous word's endTime to meet this word's startTime
-        // if the gap is small (e.g. natural pauses).
-        // This prevents flickering/skipping when the playback update rate is lower than the gap size.
-        if (index > 0) {
-            const prev = computedOffsets[index - 1];
-            const gap = w.start - prev.endTime;
-            // Fill gaps smaller than 0.7 seconds
-            if (gap > 0 && gap < 0.7) {
-                prev.endTime = w.start;
-            }
-        }
-
        computedOffsets.push({
            startChar,
            endChar,
@@ -60,6 +47,29 @@ export function computeWordOffsets(words: { word: string; start: number; end: nu
    return { fullText: textBuilder, offsets: computedOffsets };
 }

+// O(log N) binary search to find the latest word that has started (startTime <= currentTime)
+// Returns the index of the word, or -1 if no word has started yet.
+export function findActiveWordIndex(
+    offsets: { startTime: number; endTime: number; }[],
+    currentTime: number
+): number {
+    let low = 0;
+    let high = offsets.length - 1;
+    let result = -1;
+
+    while (low <= high) {
+        const mid = Math.floor((low + high) / 2);
+        if (offsets[mid].startTime <= currentTime) {
+            result = mid; // Candidate found, look effectively later for a tighter match? 
+            // Actually, since sorted by startTime, we want the LARGEST startTime <= currentTime.
+            low = mid + 1;
+        } else {
+            high = mid - 1;
+        }
+    }
+    return result;
+}
+
 export function useKaraokeHighlight(
    containerRef: React.RefObject<HTMLDivElement | null>,
    words: { word: string; start: number; end: number }[],
@@ -75,33 +85,8 @@ export function useKaraokeHighlight(
    useEffect(() => {
        if (!containerRef.current || typeof CSS === 'undefined' || !CSS.highlights) return;

-        // Binary search for performance (O(logN) vs O(N))
-        // particularly important for long transcripts
-        let activeWord = null;
-        let low = 0;
-        let high = offsets.length - 1;
-
-        while (low <= high) {
-            const mid = Math.floor((low + high) / 2);
-            if (offsets[mid].startTime <= currentTime) {
-                // This word started before or at currentTime.
-                // It's a candidate, but there might be a later one that also started before currentTime.
-                // (Though with non-overlapping words, this is usually unique, but let's be safe).
-                // Actually, if we find one, we check if it contains currentTime.
-                // If offsets are sorted and non-overlapping (mostly), we can optimize.
-                const w = offsets[mid];
-                if (currentTime <= w.endTime) {
-                    activeWord = w;
-                    break;
-                }
-                // If we are here, w started before currentTime but ended before currentTime.
-                // So we need to look later.
-                low = mid + 1;
-            } else {
-                // w started after currentTime. Look earlier.
-                high = mid - 1;
-            }
-        }
+        const activeIndex = findActiveWordIndex(offsets, currentTime);
+        const activeWord = activeIndex !== -1 ? offsets[activeIndex] : null;

        if (!activeWord) {
            if (CSS.highlights.has('karaoke-word')) {