From c2d29fc9c6f757ea62193803715c914df196cf60 Mon Sep 17 00:00:00 2001
From: Geoff Tognetti <grt@Mac.localdomain>
Date: Sat, 15 Nov 2025 13:24:48 -0600
Subject: [PATCH] Fix YouTube downloads - Add Deno runtime for video cipher
 decryption

YouTube downloads were failing with "exit status 1" error. Root cause:
YouTube now requires yt-dlp to use a JavaScript runtime for video cipher
decryption.

Changes:
- Install Deno runtime in both Dockerfiles (standard and CUDA)
- Upgrade from yt-dlp to yt-dlp[default] to include all optional dependencies
- Add stderr capture to YouTube download handler for better error diagnostics
- Add performance logging for YouTube downloads (timing and file size)

Fixes #224

See: https://github.com/yt-dlp/yt-dlp/issues/14404
---
 Dockerfile                                    |  8 ++++
 Dockerfile.cuda                               |  8 ++++
 internal/api/handlers.go                      | 37 ++++++++++++++++++-
 .../adapters/whisperx_adapter.go              | 14 ++-----
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d362269f..238f899b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -68,6 +68,14 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
   && chmod 755 /usr/local/bin/uv \
   && uv --version
 
+# Install Deno (JavaScript runtime required for yt-dlp YouTube downloads)
+# YouTube now requires JS execution for video cipher decryption
+# See: https://github.com/yt-dlp/yt-dlp/issues/14404
+RUN curl -fsSL https://deno.land/install.sh | sh \
+  && cp /root/.deno/bin/deno /usr/local/bin/deno \
+  && chmod 755 /usr/local/bin/deno \
+  && deno --version
+
 # Create default user (will be modified at runtime if needed)
 RUN groupadd -g 1000 appuser \
   && useradd -m -u 1000 -g 1000 appuser \
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
index 24a4d6ab..c91ebacc 100644
--- a/Dockerfile.cuda
+++ b/Dockerfile.cuda
@@ -71,6 +71,14 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
   && chmod 755 /usr/local/bin/uv \
   && uv --version
 
+# Install Deno (JavaScript runtime required for yt-dlp YouTube downloads)
+# YouTube now requires JS execution for video cipher decryption
+# See: https://github.com/yt-dlp/yt-dlp/issues/14404
+RUN curl -fsSL https://deno.land/install.sh | sh \
+  && cp /root/.deno/bin/deno /usr/local/bin/deno \
+  && chmod 755 /usr/local/bin/deno \
+  && deno --version
+
 # Create default user (will be modified at runtime if needed)
 # Use 10001 to avoid conflicts with existing users in CUDA base image
 RUN groupadd -g 10001 appuser \
diff --git a/internal/api/handlers.go b/internal/api/handlers.go
index 3a3956c7..06b834c8 100644
--- a/internal/api/handlers.go
+++ b/internal/api/handlers.go
@@ -1,6 +1,7 @@
 package api
 
 import (
+	"bytes"
 	"context"
 	"crypto/rand"
 	"crypto/sha256"
@@ -2684,16 +2685,22 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) {
 		title = *req.Title
 	} else {
 		// Get title from yt-dlp
+		titleStart := time.Now()
 		cmd := exec.Command(h.config.UVPath, "run", "--native-tls", "--project", h.config.WhisperXEnv, "python", "-m", "yt_dlp", "--get-title", req.URL)
 		titleBytes, err := cmd.Output()
 		if err != nil {
 			title = "YouTube Audio"
+			logger.Warn("Failed to get YouTube title", "url", req.URL, "error", err.Error(), "duration", time.Since(titleStart))
 		} else {
 			title = strings.TrimSpace(string(titleBytes))
+			logger.Info("YouTube title retrieved", "title", title, "duration", time.Since(titleStart))
 		}
 	}
 
 	// Download audio using yt-dlp in Python environment
+	logger.Info("Starting YouTube download", "url", req.URL, "job_id", jobID)
+	downloadStart := time.Now()
+
 	ytDlpCmd := exec.Command(h.config.UVPath, "run", "--native-tls", "--project", h.config.WhisperXEnv, "python", "-m", "yt_dlp",
 		"--extract-audio",
 		"--audio-format", "mp3",
@@ -2703,9 +2710,23 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) {
 		req.URL,
 	)
 
-	// Execute download
+	// Execute download and capture stderr for better error messages
+	var stderr bytes.Buffer
+	ytDlpCmd.Stderr = &stderr
+
 	if err := ytDlpCmd.Run(); err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Failed to download YouTube audio: %v", err)})
+		stderrOutput := stderr.String()
+		logger.Error("YouTube download failed",
+			"url", req.URL,
+			"job_id", jobID,
+			"error", err.Error(),
+			"stderr", stderrOutput,
+			"duration", time.Since(downloadStart))
+
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error":   fmt.Sprintf("Failed to download YouTube audio: %v", err),
+			"details": stderrOutput,
+		})
 		return
 	}
 
@@ -2719,6 +2740,18 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) {
 
 	actualFilePath := matches[0]
 
+	// Get file size for performance logging
+	fileInfo, err := os.Stat(actualFilePath)
+	if err == nil {
+		fileSizeMB := float64(fileInfo.Size()) / 1024 / 1024
+		logger.Info("YouTube download completed",
+			"url", req.URL,
+			"job_id", jobID,
+			"file_path", actualFilePath,
+			"file_size_mb", fmt.Sprintf("%.2f", fileSizeMB),
+			"duration", time.Since(downloadStart))
+	}
+
 	// Create transcription record
 	job := models.TranscriptionJob{
 		ID:        jobID,
diff --git a/internal/transcription/adapters/whisperx_adapter.go b/internal/transcription/adapters/whisperx_adapter.go
index fb6bc4de..7aaf3533 100644
--- a/internal/transcription/adapters/whisperx_adapter.go
+++ b/internal/transcription/adapters/whisperx_adapter.go
@@ -12,7 +12,6 @@ import (
 	"time"
 
 	"scriberr/internal/transcription/interfaces"
-	"scriberr/internal/transcription/registry"
 	"scriberr/pkg/logger"
 )
 
@@ -23,9 +22,7 @@ type WhisperXAdapter struct {
 }
 
 // NewWhisperXAdapter creates a new WhisperX adapter
-func NewWhisperXAdapter() *WhisperXAdapter {
-	envPath := "whisperx-env"
-	
+func NewWhisperXAdapter(envPath string) *WhisperXAdapter {
 	capabilities := interfaces.ModelCapabilities{
 		ModelID:     "whisperx",
 		ModelFamily: "whisper",
@@ -349,7 +346,7 @@ func (w *WhisperXAdapter) updateWhisperXDependencies(whisperxPath string) error
 		content = strings.ReplaceAll(content,
 			`"transformers>=4.48.0",`,
 			`"transformers>=4.48.0",
-    "yt-dlp",`)
+    "yt-dlp[default]",`)
 	}
 
 	if err := os.WriteFile(pyprojectPath, []byte(content), 0644); err != nil {
@@ -593,14 +590,9 @@ func (w *WhisperXAdapter) parseResult(outputDir string, input interfaces.AudioIn
 func (w *WhisperXAdapter) GetEstimatedProcessingTime(input interfaces.AudioInput) time.Duration {
 	// WhisperX processing time varies by model size
 	baseTime := w.BaseAdapter.GetEstimatedProcessingTime(input)
-	
+
 	// Adjust based on model size (if we can determine it)
 	// This would need model size information from parameters
 	// For now, use base estimation
 	return baseTime
-}
-
-// init registers the WhisperX adapter
-func init() {
-	registry.RegisterTranscriptionAdapter("whisperx", NewWhisperXAdapter())
 }
\ No newline at end of file