mirror of
https://github.com/rishikanthc/Scriberr.git
synced 2026-03-03 02:27:01 +00:00
refactor: Switch yt-dlp to standalone binary & cleanup UV config
- Dockerfiles: Install yt-dlp binary from GitHub releases to /usr/local/bin - Go: Execute yt-dlp binary directly, removing uv python wrapper - Config: Remove unused UVPath configuration and findUVPath function - Entrypoint: Remove yt-dlp init logic (still initializes whisperx env if needed)
This commit is contained in:
@@ -77,6 +77,11 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& chmod 755 /usr/local/bin/uv \
|
||||
&& uv --version
|
||||
|
||||
# Install yt-dlp standalone binary
|
||||
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux -o /usr/local/bin/yt-dlp \
|
||||
&& chmod a+rx /usr/local/bin/yt-dlp \
|
||||
&& yt-dlp --version
|
||||
|
||||
# Install Deno (JavaScript runtime required for yt-dlp YouTube downloads)
|
||||
# YouTube now requires JS execution for video cipher decryption
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/14404
|
||||
|
||||
@@ -78,6 +78,11 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& chmod 755 /usr/local/bin/uv \
|
||||
&& uv --version
|
||||
|
||||
# Install yt-dlp standalone binary
|
||||
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux -o /usr/local/bin/yt-dlp \
|
||||
&& chmod a+rx /usr/local/bin/yt-dlp \
|
||||
&& yt-dlp --version
|
||||
|
||||
# Install Deno (JavaScript runtime required for yt-dlp YouTube downloads)
|
||||
# YouTube now requires JS execution for video cipher decryption
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/14404
|
||||
|
||||
@@ -43,6 +43,25 @@ setup_user() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to initialize python environment and dependencies
|
||||
initialize_python_env() {
|
||||
local env_dir="${WHISPERX_ENV:-/app/whisperx-env}"
|
||||
echo "Checking Python environment in $env_dir..."
|
||||
|
||||
# Ensure venv exists
|
||||
if [ ! -f "$env_dir/pyvenv.cfg" ]; then
|
||||
echo "Creating virtual environment..."
|
||||
uv venv "$env_dir"
|
||||
fi
|
||||
|
||||
# Check for whisperx
|
||||
# Note: Using git install as it's often more up to date for this library
|
||||
if [ ! -f "$env_dir/bin/whisperx" ]; then
|
||||
echo "Installing whisperx..."
|
||||
uv pip install -p "$env_dir" git+https://github.com/m-bain/whisperx.git
|
||||
fi
|
||||
}
|
||||
|
||||
# Setup the user (only if running as root)
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
setup_user "$PUID" "$PGID"
|
||||
@@ -52,6 +71,10 @@ if [ "$(id -u)" = "0" ]; then
|
||||
mkdir -p /app/data/uploads /app/data/transcripts /app/whisperx-env
|
||||
chown -R "$PUID:$PGID" /app/data /app/whisperx-env
|
||||
|
||||
echo "Initializing dependencies as appuser..."
|
||||
# Run initialization as the app user to ensure permissions are correct
|
||||
gosu appuser bash -c "$(declare -f initialize_python_env); initialize_python_env"
|
||||
|
||||
echo "=== Setup Complete ==="
|
||||
echo "Switching to user appuser (UID=$PUID, GID=$PGID) and starting application..."
|
||||
|
||||
@@ -63,6 +86,9 @@ else
|
||||
# Just ensure directories exist
|
||||
mkdir -p /app/data/uploads /app/data/transcripts /app/whisperx-env 2>/dev/null || true
|
||||
|
||||
echo "Initializing dependencies..."
|
||||
initialize_python_env
|
||||
|
||||
echo "=== Setup Complete ==="
|
||||
echo "Starting Scriberr application..."
|
||||
|
||||
|
||||
@@ -2584,15 +2584,17 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) {
|
||||
if req.Title != nil && *req.Title != "" {
|
||||
title = *req.Title
|
||||
} else {
|
||||
// Get title from yt-dlp
|
||||
// Get title first using standalone yt-dlp
|
||||
titleStart := time.Now()
|
||||
cmd := exec.Command(h.config.UVPath, "run", "--native-tls", "--project", h.config.WhisperXEnv, "python", "-m", "yt_dlp", "--get-title", req.URL)
|
||||
titleBytes, err := cmd.Output()
|
||||
cmd := exec.Command("yt-dlp", "--get-title", req.URL)
|
||||
var out bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
title = "YouTube Audio"
|
||||
logger.Warn("Failed to get YouTube title", "url", req.URL, "error", err.Error(), "duration", time.Since(titleStart))
|
||||
} else {
|
||||
title = strings.TrimSpace(string(titleBytes))
|
||||
title = strings.TrimSpace(out.String())
|
||||
logger.Info("YouTube title retrieved", "title", title, "duration", time.Since(titleStart))
|
||||
}
|
||||
}
|
||||
@@ -2601,7 +2603,8 @@ func (h *Handler) DownloadFromYouTube(c *gin.Context) {
|
||||
logger.Info("Starting YouTube download", "url", req.URL, "job_id", jobID)
|
||||
downloadStart := time.Now()
|
||||
|
||||
ytDlpCmd := exec.Command(h.config.UVPath, "run", "--native-tls", "--project", h.config.WhisperXEnv, "python", "-m", "yt_dlp",
|
||||
// Executing yt-dlp directly (standalone binary)
|
||||
ytDlpCmd := exec.Command("yt-dlp",
|
||||
"--extract-audio",
|
||||
"--audio-format", "mp3",
|
||||
"--audio-quality", "0", // best quality
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
@@ -30,7 +29,6 @@ type Config struct {
|
||||
TranscriptsDir string
|
||||
|
||||
// Python/WhisperX configuration
|
||||
UVPath string
|
||||
WhisperXEnv string
|
||||
|
||||
// Environment configuration
|
||||
@@ -63,7 +61,6 @@ func Load() *Config {
|
||||
JWTSecret: getJWTSecret(),
|
||||
UploadDir: getEnv("UPLOAD_DIR", "data/uploads"),
|
||||
TranscriptsDir: getEnv("TRANSCRIPTS_DIR", "data/transcripts"),
|
||||
UVPath: findUVPath(),
|
||||
WhisperXEnv: getEnv("WHISPERX_ENV", "data/whisperx-env"),
|
||||
SecureCookies: getEnv("SECURE_COOKIES", defaultSecure) == "true",
|
||||
OpenAIAPIKey: getEnv("OPENAI_API_KEY", ""),
|
||||
@@ -106,18 +103,3 @@ func getJWTSecret() string {
|
||||
logger.Debug("Generated persistent JWT secret", "path", secretFile)
|
||||
return secret
|
||||
}
|
||||
|
||||
// findUVPath finds UV package manager in common locations
|
||||
func findUVPath() string {
|
||||
if uvPath := os.Getenv("UV_PATH"); uvPath != "" {
|
||||
return uvPath
|
||||
}
|
||||
|
||||
if path, err := exec.LookPath("uv"); err == nil {
|
||||
logger.Debug("Found UV package manager", "path", path)
|
||||
return path
|
||||
}
|
||||
|
||||
logger.Warn("UV package manager not found in PATH, using fallback", "fallback", "uv")
|
||||
return "uv"
|
||||
}
|
||||
|
||||
57
startup_log.txt
Normal file
57
startup_log.txt
Normal file
@@ -0,0 +1,57 @@
|
||||
scriberr | === Scriberr Container Setup ===
|
||||
scriberr | Requested UID: 10001, GID: 10001
|
||||
scriberr | Setting up custom user with UID=10001, GID=10001...
|
||||
scriberr | Group with GID 10001 already exists, using it
|
||||
scriberr | usermod: no changes
|
||||
scriberr | Setting up data directories...
|
||||
scriberr | === Setup Complete ===
|
||||
scriberr | Switching to user appuser (UID=10001, GID=10001) and starting application...
|
||||
scriberr | time=02:50:36 level="INFO " msg="Starting Scriberr" version=dev
|
||||
scriberr | [+] Loading configuration
|
||||
scriberr | time=02:50:36 level="INFO " msg="Registering adapters with environment path" whisperx_env=/app/whisperx-env
|
||||
scriberr | time=02:50:36 level="INFO " msg="Adapter registration complete"
|
||||
scriberr | [+] Connecting to database
|
||||
scriberr | [+] Setting up authentication
|
||||
scriberr | [+] Initializing SSE broadcaster
|
||||
scriberr | [+] Initializing repositories
|
||||
scriberr | [+] Initializing services
|
||||
scriberr | [+] Initializing transcription service
|
||||
scriberr | [+] Initializing transcription service
|
||||
scriberr | [+] Preparing Python environment
|
||||
scriberr | time=02:50:36 level="INFO " msg="Initializing unified transcription service"
|
||||
scriberr | time=02:50:36 level="INFO " msg="Initializing registered models in parallel..."
|
||||
scriberr | time=02:50:36 level="INFO " msg="Preparing NVIDIA Sortformer environment" env_path=/app/whisperx-env/parakeet
|
||||
scriberr | time=02:50:36 level="INFO " msg="transcription model initialized" model_id=openai_whisper
|
||||
scriberr | time=02:50:36 level="INFO " msg="Preparing NVIDIA Canary environment" env_path=/app/whisperx-env/parakeet
|
||||
scriberr | time=02:50:36 level="INFO " msg="Preparing PyAnnote environment" env_path=/app/whisperx-env/pyannote
|
||||
scriberr | time=02:50:36 level="INFO " msg="Preparing NVIDIA Parakeet environment" env_path=/app/whisperx-env/parakeet
|
||||
scriberr | time=02:50:36 level="INFO " msg="Preparing WhisperX environment" env_path=/app/whisperx-env
|
||||
scriberr | time=02:50:36 level="INFO " msg="Installing PyAnnote dependencies"
|
||||
scriberr | time=02:50:36 level="INFO " msg="Parakeet environment not ready, setting up"
|
||||
scriberr | time=02:50:36 level="INFO " msg="Installing Canary dependencies"
|
||||
scriberr | time=02:50:36 level="INFO " msg="Installing Parakeet dependencies"
|
||||
scriberr | time=02:50:36 level="INFO " msg="Downloading Sortformer model" path=/app/whisperx-env/parakeet/diar_streaming_sortformer_4spk-v2.nemo
|
||||
Downloading diar_streaming_sortformer_4spk-v2.nemo: 100% (449.5 MB / 449.5 MB)
|
||||
scriberr | time=02:50:53 level="INFO " msg="Successfully downloaded Sortformer model" size=471367680
|
||||
scriberr | time=02:50:53 level="INFO " msg="Sortformer environment prepared successfully"
|
||||
scriberr | time=02:50:53 level="INFO " msg="diarization model initialized" model_id=sortformer
|
||||
scriberr | time=02:53:11 level="INFO " msg="WhisperX environment prepared successfully"
|
||||
scriberr | time=02:53:11 level="INFO " msg="transcription model initialized" model_id=whisperx
|
||||
scriberr | time=02:53:14 level="INFO " msg="PyAnnote environment prepared successfully"
|
||||
scriberr | time=02:53:14 level="INFO " msg="diarization model initialized" model_id=pyannote
|
||||
scriberr | time=02:53:28 level="INFO " msg="Downloading Canary model" path=/app/whisperx-env/parakeet/canary-1b-v2.nemo
|
||||
scriberr | time=02:53:28 level="INFO " msg="Downloading Parakeet model" path=/app/whisperx-env/parakeet/parakeet-tdt-0.6b-v3.nemo
|
||||
Downloading parakeet-tdt-0.6b-v3.nemo: 100% (2.3 GB / 2.3 GB)
|
||||
scriberr | time=02:54:37 level="INFO " msg="Successfully downloaded Parakeet model" size=2509332480
|
||||
scriberr | time=02:54:37 level="INFO " msg="Created buffered transcription script" path=/app/whisperx-env/parakeet/transcribe_buffered.py
|
||||
scriberr | time=02:54:37 level="INFO " msg="Parakeet environment prepared successfully"
|
||||
scriberr | time=02:54:37 level="INFO " msg="transcription model initialized" model_id=parakeet
|
||||
Downloading canary-1b-v2.nemo: 100% (5.9 GB / 5.9 GB)
|
||||
scriberr | time=02:55:54 level="INFO " msg="Successfully downloaded Canary model" size=6358958080
|
||||
scriberr | time=02:55:54 level="INFO " msg="Canary environment prepared successfully"
|
||||
scriberr | time=02:55:54 level="INFO " msg="transcription model initialized" model_id=canary
|
||||
scriberr | time=02:55:54 level="INFO " msg="Model initialization completed"
|
||||
scriberr | time=02:55:54 level="INFO " msg="Unified transcription service initialized successfully"
|
||||
scriberr | [+] Initializing quick transcription service
|
||||
scriberr | [+] Starting background processing
|
||||
scriberr | time=02:55:54 level="INFO " msg="Scriberr is ready" url=http://0.0.0.0:8080
|
||||
@@ -50,8 +50,8 @@ func (suite *SecurityTestSuite) SetupSuite() {
|
||||
DatabasePath: "security_test.db",
|
||||
JWTSecret: "test-secret",
|
||||
UploadDir: "security_test_uploads",
|
||||
UVPath: "uv",
|
||||
WhisperXEnv: "test_whisperx_env",
|
||||
|
||||
WhisperXEnv: "test_whisperx_env",
|
||||
}
|
||||
|
||||
// Initialize test database
|
||||
|
||||
@@ -46,8 +46,8 @@ func NewTestHelper(t *testing.T, dbName string) *TestHelper {
|
||||
DatabasePath: dbName,
|
||||
JWTSecret: "test-secret-key-for-unit-tests",
|
||||
UploadDir: "test_uploads_" + dbName,
|
||||
UVPath: "uv",
|
||||
WhisperXEnv: "test_whisperx_env",
|
||||
|
||||
WhisperXEnv: "test_whisperx_env",
|
||||
}
|
||||
|
||||
// Initialize test database
|
||||
|
||||
Reference in New Issue
Block a user