From c2d29fc9c6f757ea62193803715c914df196cf60 Mon Sep 17 00:00:00 2001 From: Geoff Tognetti Date: Sat, 15 Nov 2025 13:24:48 -0600 Subject: [PATCH] Fix YouTube downloads - Add Deno runtime for video cipher decryption YouTube downloads were failing with "exit status 1" error. Root cause: YouTube now requires yt-dlp to use a JavaScript runtime for video cipher decryption. Changes: - Install Deno runtime in both Dockerfiles (standard and CUDA) - Upgrade from yt-dlp to yt-dlp[default] to include all optional dependencies - Add stderr capture to YouTube download handler for better error diagnostics - Add performance logging for YouTube downloads (timing and file size) Fixes #224 See: https://github.com/yt-dlp/yt-dlp/issues/14404 --- Dockerfile | 8 ++++ Dockerfile.cuda | 8 ++++ internal/api/handlers.go | 37 ++++++++++++++++++- .../adapters/whisperx_adapter.go | 14 ++----- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index d362269f..238f899b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,6 +68,14 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ && chmod 755 /usr/local/bin/uv \ && uv --version +# Install Deno (JavaScript runtime required for yt-dlp YouTube downloads) +# YouTube now requires JS execution for video cipher decryption +# See: https://github.com/yt-dlp/yt-dlp/issues/14404 +RUN curl -fsSL https://deno.land/install.sh | sh \ + && cp /root/.deno/bin/deno /usr/local/bin/deno \ + && chmod 755 /usr/local/bin/deno \ + && deno --version + # Create default user (will be modified at runtime if needed) RUN groupadd -g 1000 appuser \ && useradd -m -u 1000 -g 1000 appuser \ diff --git a/Dockerfile.cuda b/Dockerfile.cuda index 24a4d6ab..c91ebacc 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -71,6 +71,14 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ && chmod 755 /usr/local/bin/uv \ && uv --version +# Install Deno (JavaScript runtime required for yt-dlp YouTube downloads) +# YouTube now requires JS execution for video cipher decryption +# See: https://github.com/yt-dlp/yt-dlp/issues/14404 +RUN curl -fsSL https://deno.land/install.sh | sh \ + && cp /root/.deno/bin/deno /usr/local/bin/deno \ + && chmod 755 /usr/local/bin/deno \ + && deno --version + # Create default user (will be modified at runtime if needed) # Use 10001 to avoid conflicts with existing users in CUDA base image RUN groupadd -g 10001 appuser \ diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 3a3956c7..06b834c8 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -1,6 +1,7 @@ package api import ( + "bytes" "context" "crypto/rand" "crypto/sha256" @@ -2684,16 +2685,22 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) { title = *req.Title } else { // Get title from yt-dlp + titleStart := time.Now() cmd := exec.Command(h.config.UVPath, "run", "--native-tls", "--project", h.config.WhisperXEnv, "python", "-m", "yt_dlp", "--get-title", req.URL) titleBytes, err := cmd.Output() if err != nil { title = "YouTube Audio" + logger.Warn("Failed to get YouTube title", "url", req.URL, "error", err.Error(), "duration", time.Since(titleStart)) } else { title = strings.TrimSpace(string(titleBytes)) + logger.Info("YouTube title retrieved", "title", title, "duration", time.Since(titleStart)) } } // Download audio using yt-dlp in Python environment + logger.Info("Starting YouTube download", "url", req.URL, "job_id", jobID) + downloadStart := time.Now() + ytDlpCmd := exec.Command(h.config.UVPath, "run", "--native-tls", "--project", h.config.WhisperXEnv, "python", "-m", "yt_dlp", "--extract-audio", "--audio-format", "mp3", @@ -2703,9 +2710,23 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) { req.URL, ) - // Execute download + // Execute download and capture stderr for better error messages + var stderr bytes.Buffer + ytDlpCmd.Stderr = &stderr + if err := ytDlpCmd.Run(); err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Failed to download YouTube audio: %v", err)}) + stderrOutput := stderr.String() + logger.Error("YouTube download failed", + "url", req.URL, + "job_id", jobID, + "error", err.Error(), + "stderr", stderrOutput, + "duration", time.Since(downloadStart)) + + c.JSON(http.StatusInternalServerError, gin.H{ + "error": fmt.Sprintf("Failed to download YouTube audio: %v", err), + "details": stderrOutput, + }) return } @@ -2719,6 +2740,18 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) { actualFilePath := matches[0] + // Get file size for performance logging + fileInfo, err := os.Stat(actualFilePath) + if err == nil { + fileSizeMB := float64(fileInfo.Size()) / 1024 / 1024 + logger.Info("YouTube download completed", + "url", req.URL, + "job_id", jobID, + "file_path", actualFilePath, + "file_size_mb", fmt.Sprintf("%.2f", fileSizeMB), + "duration", time.Since(downloadStart)) + } + // Create transcription record job := models.TranscriptionJob{ ID: jobID, diff --git a/internal/transcription/adapters/whisperx_adapter.go b/internal/transcription/adapters/whisperx_adapter.go index fb6bc4de..7aaf3533 100644 --- a/internal/transcription/adapters/whisperx_adapter.go +++ b/internal/transcription/adapters/whisperx_adapter.go @@ -12,7 +12,6 @@ import ( "time" "scriberr/internal/transcription/interfaces" - "scriberr/internal/transcription/registry" "scriberr/pkg/logger" ) @@ -23,9 +22,7 @@ type WhisperXAdapter struct { } // NewWhisperXAdapter creates a new WhisperX adapter -func NewWhisperXAdapter() *WhisperXAdapter { - envPath := "whisperx-env" - +func NewWhisperXAdapter(envPath string) *WhisperXAdapter { capabilities := interfaces.ModelCapabilities{ ModelID: "whisperx", ModelFamily: "whisper", @@ -349,7 +346,7 @@ func (w *WhisperXAdapter) updateWhisperXDependencies(whisperxPath string) error content = strings.ReplaceAll(content, `"transformers>=4.48.0",`, `"transformers>=4.48.0", - "yt-dlp",`) + "yt-dlp[default]",`) } if err := os.WriteFile(pyprojectPath, []byte(content), 0644); err != nil { @@ -593,14 +590,9 @@ func (w *WhisperXAdapter) parseResult(outputDir string, input interfaces.AudioIn func (w *WhisperXAdapter) GetEstimatedProcessingTime(input interfaces.AudioInput) time.Duration { // WhisperX processing time varies by model size baseTime := w.BaseAdapter.GetEstimatedProcessingTime(input) - + // Adjust based on model size (if we can determine it) // This would need model size information from parameters // For now, use base estimation return baseTime -} - -// init registers the WhisperX adapter -func init() { - registry.RegisterTranscriptionAdapter("whisperx", NewWhisperXAdapter()) } \ No newline at end of file