fix: redesign transcription config dialogue

This commit is contained in:
rishikanthc
2025-12-13 18:27:22 -08:00
parent 55bbfcb903
commit ee40ffcd46
6 changed files with 1240 additions and 2030 deletions

View File

@@ -8,6 +8,7 @@
"name": "frontend",
"version": "0.0.0",
"dependencies": {
"@radix-ui/react-accordion": "^1.2.12",
"@radix-ui/react-alert-dialog": "^1.1.15",
"@radix-ui/react-checkbox": "^1.3.3",
"@radix-ui/react-dialog": "^1.1.15",
@@ -2471,6 +2472,37 @@
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
"license": "MIT"
},
"node_modules/@radix-ui/react-accordion": {
"version": "1.2.12",
"resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz",
"integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-collapsible": "1.1.12",
"@radix-ui/react-collection": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-direction": "1.1.1",
"@radix-ui/react-id": "1.1.1",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-controllable-state": "1.2.2"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-alert-dialog": {
"version": "1.1.15",
"resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.15.tgz",
@@ -2552,6 +2584,36 @@
}
}
},
"node_modules/@radix-ui/react-collapsible": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz",
"integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==",
"license": "MIT",
"dependencies": {
"@radix-ui/primitive": "1.1.3",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-id": "1.1.1",
"@radix-ui/react-presence": "1.1.5",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-controllable-state": "1.2.2",
"@radix-ui/react-use-layout-effect": "1.1.1"
},
"peerDependencies": {
"@types/react": "*",
"@types/react-dom": "*",
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
},
"peerDependenciesMeta": {
"@types/react": {
"optional": true
},
"@types/react-dom": {
"optional": true
}
}
},
"node_modules/@radix-ui/react-collection": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",

View File

@@ -10,6 +10,7 @@
"preview": "vite preview"
},
"dependencies": {
"@radix-ui/react-accordion": "^1.2.12",
"@radix-ui/react-alert-dialog": "^1.1.15",
"@radix-ui/react-checkbox": "^1.3.3",
"@radix-ui/react-dialog": "^1.1.15",

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,119 @@
import type { ReactNode } from "react";
import { Label } from "@/components/ui/label";
import { HoverCard, HoverCardContent, HoverCardTrigger } from "@/components/ui/hover-card";
import { Info } from "lucide-react";
interface FormFieldProps {
label: string;
htmlFor?: string;
description?: string;
optional?: boolean;
children: ReactNode;
}
/**
* FormField - A consistent wrapper for form inputs with optional tooltip description
* Follows Scriberr design system
*/
export function FormField({ label, htmlFor, description, optional, children }: FormFieldProps) {
return (
<div className="space-y-2">
<div className="flex items-center gap-2">
<Label
htmlFor={htmlFor}
className="text-sm font-medium text-[var(--text-primary)]"
>
{label}
{optional && (
<span className="ml-1 text-[var(--text-tertiary)] font-normal">(optional)</span>
)}
</Label>
{description && (
<HoverCard>
<HoverCardTrigger asChild>
<Info className="h-4 w-4 text-[var(--text-tertiary)] cursor-help hover:text-[var(--text-secondary)] transition-colors" />
</HoverCardTrigger>
<HoverCardContent
className="w-80 bg-[var(--bg-card)] border border-[var(--border-subtle)] rounded-xl p-4"
style={{ boxShadow: 'var(--shadow-float)' }}
>
<p className="text-sm text-[var(--text-secondary)] leading-relaxed">{description}</p>
</HoverCardContent>
</HoverCard>
)}
</div>
{children}
</div>
);
}
interface SectionProps {
title: string;
description?: string;
children: ReactNode;
className?: string;
}
/**
* Section - A grouped section with title and optional description
*/
export function Section({ title, description, children, className = "" }: SectionProps) {
return (
<div className={`space-y-4 ${className}`}>
<div>
<h3 className="text-base font-semibold text-[var(--text-primary)]">{title}</h3>
{description && (
<p className="text-sm text-[var(--text-secondary)] mt-1">{description}</p>
)}
</div>
{children}
</div>
);
}
interface InfoBannerProps {
variant: 'info' | 'warning' | 'success';
title: string;
children: ReactNode;
}
/**
* InfoBanner - Alert/notice banner with consistent styling
*/
export function InfoBanner({ variant, title, children }: InfoBannerProps) {
const styles = {
info: {
bg: 'bg-[var(--brand-light)]',
border: 'border-[var(--brand-solid)]/20',
icon: 'text-[var(--brand-solid)]',
title: 'text-[var(--text-primary)]',
},
warning: {
bg: 'bg-[var(--warning-translucent)]',
border: 'border-[var(--warning-solid)]/20',
icon: 'text-[var(--warning-solid)]',
title: 'text-[var(--text-primary)]',
},
success: {
bg: 'bg-[var(--success-translucent)]',
border: 'border-[var(--success-solid)]/20',
icon: 'text-[var(--success-solid)]',
title: 'text-[var(--text-primary)]',
},
};
const s = styles[variant];
const icon = variant === 'warning' ? '⚠️' : variant === 'success' ? '✓' : '';
return (
<div className={`p-4 rounded-xl border ${s.bg} ${s.border}`}>
<div className="flex items-start gap-3">
<span className={`mt-0.5 ${s.icon}`}>{icon}</span>
<div>
<h4 className={`text-sm font-medium ${s.title} mb-1`}>{title}</h4>
<div className="text-sm text-[var(--text-secondary)]">{children}</div>
</div>
</div>
</div>
);
}

View File

@@ -0,0 +1,995 @@
import { useState, useEffect, memo } from "react";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";
import { Switch } from "@/components/ui/switch";
import { Slider } from "@/components/ui/slider";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import {
Accordion,
AccordionContent,
AccordionItem,
AccordionTrigger,
} from "@/components/ui/accordion";
import { Loader2, Check, XCircle } from "lucide-react";
import { useAuth } from "@/features/auth/hooks/useAuth";
import { FormField, Section, InfoBanner } from "@/components/transcription/FormHelpers";
// ============================================================================
// Types & Constants
// ============================================================================
export interface WhisperXParams {
model_family: string;
model: string;
model_cache_only: boolean;
model_dir?: string;
device: string;
device_index: number;
batch_size: number;
compute_type: string;
threads: number;
output_format: string;
verbose: boolean;
task: string;
language?: string;
align_model?: string;
interpolate_method: string;
no_align: boolean;
return_char_alignments: boolean;
vad_method: string;
vad_onset: number;
vad_offset: number;
chunk_size: number;
diarize: boolean;
min_speakers?: number;
max_speakers?: number;
diarize_model: string;
speaker_embeddings: boolean;
temperature: number;
best_of: number;
beam_size: number;
patience: number;
length_penalty: number;
suppress_tokens?: string;
suppress_numerals: boolean;
initial_prompt?: string;
condition_on_previous_text: boolean;
fp16: boolean;
temperature_increment_on_fallback: number;
compression_ratio_threshold: number;
logprob_threshold: number;
no_speech_threshold: number;
max_line_width?: number;
max_line_count?: number;
highlight_words: boolean;
segment_resolution: string;
hf_token?: string;
print_progress: boolean;
attention_context_left: number;
attention_context_right: number;
is_multi_track_enabled: boolean;
api_key?: string;
}
interface TranscriptionConfigDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onStartTranscription: (params: WhisperXParams & { profileName?: string; profileDescription?: string }) => void;
loading?: boolean;
isProfileMode?: boolean;
initialParams?: WhisperXParams;
initialName?: string;
initialDescription?: string;
isMultiTrack?: boolean;
title?: string;
}
const DEFAULT_PARAMS: WhisperXParams = {
model_family: "whisper",
model: "small",
model_cache_only: false,
device: "cpu",
device_index: 0,
batch_size: 8,
compute_type: "float32",
threads: 0,
output_format: "all",
verbose: true,
task: "transcribe",
interpolate_method: "nearest",
no_align: false,
return_char_alignments: false,
vad_method: "pyannote",
vad_onset: 0.5,
vad_offset: 0.363,
chunk_size: 30,
diarize: false,
diarize_model: "pyannote",
speaker_embeddings: false,
temperature: 0,
best_of: 5,
beam_size: 5,
patience: 1.0,
length_penalty: 1.0,
suppress_numerals: false,
condition_on_previous_text: false,
fp16: true,
temperature_increment_on_fallback: 0.2,
compression_ratio_threshold: 2.4,
logprob_threshold: -1.0,
no_speech_threshold: 0.6,
highlight_words: false,
segment_resolution: "sentence",
print_progress: false,
attention_context_left: 256,
attention_context_right: 256,
is_multi_track_enabled: false,
api_key: "",
};
const WHISPER_MODELS = [
"tiny", "tiny.en", "base", "base.en", "small", "small.en",
"medium", "medium.en", "large", "large-v1", "large-v2", "large-v3"
];
const LANGUAGES = [
{ value: "auto", label: "Auto-detect" },
{ value: "en", label: "English" },
{ value: "zh", label: "Chinese" },
{ value: "de", label: "German" },
{ value: "es", label: "Spanish" },
{ value: "ru", label: "Russian" },
{ value: "ko", label: "Korean" },
{ value: "fr", label: "French" },
{ value: "ja", label: "Japanese" },
{ value: "pt", label: "Portuguese" },
{ value: "tr", label: "Turkish" },
{ value: "pl", label: "Polish" },
{ value: "nl", label: "Dutch" },
{ value: "ar", label: "Arabic" },
{ value: "sv", label: "Swedish" },
{ value: "it", label: "Italian" },
{ value: "id", label: "Indonesian" },
{ value: "hi", label: "Hindi" },
{ value: "fi", label: "Finnish" },
{ value: "vi", label: "Vietnamese" },
{ value: "he", label: "Hebrew" },
{ value: "uk", label: "Ukrainian" },
{ value: "el", label: "Greek" },
];
const CANARY_LANGUAGES = [
{ value: "en", label: "English" },
{ value: "de", label: "German" },
{ value: "es", label: "Spanish" },
{ value: "fr", label: "French" },
];
const PARAM_DESCRIPTIONS = {
model: "Size of the Whisper model. Larger = more accurate but slower.",
language: "Source language. Auto-detect works for most cases.",
task: "Transcribe in original language or translate to English.",
device: "CPU (universal), GPU (faster, CUDA required), or AUTO.",
compute_type: "Float16 (faster), Float32 (accurate), Int8 (fastest).",
batch_size: "Segments processed at once. Higher = faster but more memory.",
diarize: "Identify and separate different speakers.",
diarize_model: "Pyannote (accurate, needs HF token) or NVIDIA Sortformer (up to 4 speakers).",
temperature: "0 = deterministic, higher = more creative.",
beam_size: "Search beams. Higher = better quality but slower.",
vad_method: "Voice detection: Pyannote (accurate) or Silero (fast).",
initial_prompt: "Context text to guide transcription style.",
hf_token: "Required for Pyannote diarization models.",
};
// ============================================================================
// Styled Input/Select Components
// ============================================================================
const inputClassName = `
h-11 bg-[var(--bg-main)] border border-[var(--border-subtle)] rounded-xl
text-[var(--text-primary)] placeholder:text-[var(--text-tertiary)]
focus:border-[var(--brand-solid)] focus:ring-2 focus:ring-[var(--brand-solid)]/20
transition-all duration-200
[color-scheme:light] dark:[color-scheme:dark]
`;
const selectTriggerClassName = `
h-11 bg-[var(--bg-main)] border border-[var(--border-subtle)] rounded-xl
text-[var(--text-primary)] shadow-none
focus:border-[var(--brand-solid)] focus:ring-2 focus:ring-[var(--brand-solid)]/20
`;
const selectContentClassName = `
bg-[var(--bg-card)] border border-[var(--border-subtle)] rounded-xl
`;
const selectItemClassName = `
text-[var(--text-primary)] rounded-lg mx-1 cursor-pointer
focus:bg-[var(--brand-light)] focus:text-[var(--brand-solid)]
`;
// ============================================================================
// Main Component
// ============================================================================
export const TranscriptionConfigDialog = memo(function TranscriptionConfigDialog({
open,
onOpenChange,
onStartTranscription,
loading = false,
isProfileMode = false,
initialParams,
initialName = "",
initialDescription = "",
isMultiTrack = false,
title,
}: TranscriptionConfigDialogProps) {
const [params, setParams] = useState<WhisperXParams>(DEFAULT_PARAMS);
const [profileName, setProfileName] = useState("");
const [profileDescription, setProfileDescription] = useState("");
// OpenAI validation state
const [isValidating, setIsValidating] = useState(false);
const [validationStatus, setValidationStatus] = useState<'idle' | 'valid' | 'invalid'>('idle');
const [validationMessage, setValidationMessage] = useState("");
const { getAuthHeaders } = useAuth();
const [availableModels, setAvailableModels] = useState<string[]>(["whisper-1"]);
// Reset when dialog opens
useEffect(() => {
if (open) {
const baseParams = initialParams || DEFAULT_PARAMS;
setParams({
...baseParams,
is_multi_track_enabled: isMultiTrack,
diarize: isMultiTrack ? false : baseParams.diarize
});
setProfileName(initialName);
setProfileDescription(initialDescription);
}
}, [open, initialParams, initialName, initialDescription, isMultiTrack]);
const updateParam = <K extends keyof WhisperXParams>(key: K, value: WhisperXParams[K]) => {
setParams(prev => {
const newParams = { ...prev, [key]: value };
if (key === 'model_family' && value === 'whisper') {
newParams.diarize_model = 'pyannote';
}
return newParams;
});
};
const validateAPIKey = async () => {
setIsValidating(true);
setValidationStatus('idle');
try {
const response = await fetch('/api/v1/config/openai/validate', {
method: 'POST',
headers: { 'Content-Type': 'application/json', ...getAuthHeaders() },
body: JSON.stringify({ api_key: params.api_key }),
});
const data = await response.json();
if (response.ok && data.valid) {
setValidationStatus('valid');
setAvailableModels(data.models || ["whisper-1"]);
setValidationMessage("API key validated");
} else {
setValidationStatus('invalid');
setValidationMessage(data.error || "Invalid API key");
}
} catch {
setValidationStatus('invalid');
setValidationMessage("Validation failed");
} finally {
setIsValidating(false);
}
};
const handleSubmit = () => {
if (isProfileMode) {
onStartTranscription({ ...params, profileName, profileDescription });
} else {
onStartTranscription(params);
}
};
const dialogTitle = title || (isProfileMode
? (initialName ? `Edit "${initialName}"` : "New Transcription Profile")
: "Transcription Settings"
);
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent
className="max-w-full sm:max-w-2xl w-[calc(100vw-1rem)] max-h-[90vh] overflow-hidden flex flex-col p-0 gap-0 bg-[var(--bg-card)] border border-[var(--border-subtle)] rounded-2xl"
style={{ boxShadow: 'var(--shadow-float)' }}
>
{/* Header */}
<DialogHeader className="px-6 pt-6 pb-4 border-b border-[var(--border-subtle)]">
<DialogTitle className="text-xl font-semibold text-[var(--text-primary)]">
{dialogTitle}
</DialogTitle>
<DialogDescription className="text-[var(--text-secondary)] text-sm mt-1">
{isProfileMode
? "Configure and save your transcription settings."
: "Choose a model and configure transcription parameters."
}
</DialogDescription>
</DialogHeader>
{/* Scrollable Content */}
<div className="flex-1 overflow-y-auto px-6 py-6 space-y-6">
{/* Profile Name/Description (if profile mode) */}
{isProfileMode && (
<div className="p-4 bg-[var(--bg-main)] rounded-xl border border-[var(--border-subtle)] space-y-4">
<FormField label="Profile Name" htmlFor="profileName">
<Input
id="profileName"
value={profileName}
onChange={(e) => setProfileName(e.target.value)}
placeholder="My transcription profile"
className={inputClassName}
required
/>
</FormField>
<FormField label="Description" htmlFor="profileDesc" optional>
<Textarea
id="profileDesc"
value={profileDescription}
onChange={(e) => setProfileDescription(e.target.value)}
placeholder="Describe this profile..."
className={`${inputClassName} resize-none min-h-[80px]`}
rows={2}
/>
</FormField>
</div>
)}
{/* Model Family Selection */}
<FormField
label="Model Family"
description="Choose the AI model for transcription. Each has different capabilities and requirements."
>
<Select
value={params.model_family}
onValueChange={(v) => updateParam('model_family', v)}
>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
<SelectItem value="whisper" className={selectItemClassName}>
Whisper
</SelectItem>
<SelectItem value="nvidia_parakeet" className={selectItemClassName}>
NVIDIA Parakeet
</SelectItem>
<SelectItem value="nvidia_canary" className={selectItemClassName}>
NVIDIA Canary
</SelectItem>
<SelectItem value="openai" className={selectItemClassName}>
OpenAI
</SelectItem>
</SelectContent>
</Select>
</FormField>
{/* Multi-track notice */}
{isMultiTrack && (
<InfoBanner variant="info" title="Multi-track Audio Detected">
Each audio track will be transcribed separately. Speaker diarization is disabled.
</InfoBanner>
)}
{/* Model-Specific Configuration */}
{params.model_family === "whisper" && (
<WhisperConfig
params={params}
updateParam={updateParam}
isMultiTrack={isMultiTrack}
/>
)}
{params.model_family === "nvidia_parakeet" && (
<ParakeetConfig
params={params}
updateParam={updateParam}
isMultiTrack={isMultiTrack}
/>
)}
{params.model_family === "nvidia_canary" && (
<CanaryConfig
params={params}
updateParam={updateParam}
isMultiTrack={isMultiTrack}
/>
)}
{params.model_family === "openai" && (
<OpenAIConfig
params={params}
updateParam={updateParam}
isValidating={isValidating}
validationStatus={validationStatus}
validationMessage={validationMessage}
availableModels={availableModels}
onValidate={validateAPIKey}
/>
)}
</div>
{/* Footer */}
<DialogFooter className="px-6 py-4 border-t border-[var(--border-subtle)] gap-3 sm:gap-2">
<Button
variant="ghost"
onClick={() => onOpenChange(false)}
className="rounded-xl text-[var(--text-secondary)] hover:bg-[var(--bg-main)] cursor-pointer"
>
Cancel
</Button>
<Button
onClick={handleSubmit}
disabled={loading || (isProfileMode && !profileName.trim())}
className="rounded-xl text-white cursor-pointer bg-gradient-to-r from-[#FFAB40] to-[#FF3D00] hover:opacity-90 active:scale-[0.98] transition-all shadow-lg shadow-orange-500/20"
>
{loading ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Starting...
</>
) : (
isProfileMode ? "Save Profile" : "Start Transcription"
)}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
});
// ============================================================================
// Model-Specific Configuration Components
// ============================================================================
interface ConfigProps {
params: WhisperXParams;
updateParam: <K extends keyof WhisperXParams>(key: K, value: WhisperXParams[K]) => void;
isMultiTrack?: boolean;
}
function WhisperConfig({ params, updateParam, isMultiTrack }: ConfigProps) {
return (
<div className="space-y-6">
{/* Essential Settings */}
<Section title="Model Settings">
<div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
<FormField label="Model Size" description={PARAM_DESCRIPTIONS.model}>
<Select value={params.model} onValueChange={(v) => updateParam('model', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
{WHISPER_MODELS.map((m) => (
<SelectItem key={m} value={m} className={selectItemClassName}>{m}</SelectItem>
))}
</SelectContent>
</Select>
</FormField>
<FormField label="Language" description={PARAM_DESCRIPTIONS.language}>
<Select value={params.language || "auto"} onValueChange={(v) => updateParam('language', v === "auto" ? undefined : v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
{LANGUAGES.map((l) => (
<SelectItem key={l.value} value={l.value} className={selectItemClassName}>{l.label}</SelectItem>
))}
</SelectContent>
</Select>
</FormField>
<FormField label="Task" description={PARAM_DESCRIPTIONS.task}>
<Select value={params.task} onValueChange={(v) => updateParam('task', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
<SelectItem value="transcribe" className={selectItemClassName}>Transcribe</SelectItem>
<SelectItem value="translate" className={selectItemClassName}>Translate to English</SelectItem>
</SelectContent>
</Select>
</FormField>
<FormField label="Device" description={PARAM_DESCRIPTIONS.device}>
<Select value={params.device} onValueChange={(v) => updateParam('device', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
<SelectItem value="cpu" className={selectItemClassName}>CPU</SelectItem>
<SelectItem value="cuda" className={selectItemClassName}>GPU (CUDA)</SelectItem>
</SelectContent>
</Select>
</FormField>
</div>
</Section>
{/* Speaker Diarization */}
{!isMultiTrack && (
<Section title="Speaker Diarization" description="Identify and separate different speakers in the audio">
<div className="space-y-4">
<div className="flex items-center gap-3">
<Switch
id="diarize"
checked={params.diarize}
onCheckedChange={(v) => updateParam('diarize', v)}
/>
<label htmlFor="diarize" className="text-sm text-[var(--text-primary)] cursor-pointer">
Enable speaker identification
</label>
</div>
{params.diarize && (
<div className="p-4 bg-[var(--bg-main)] rounded-xl border border-[var(--border-subtle)] space-y-4">
<div className="grid grid-cols-2 gap-4">
<FormField label="Min Speakers" optional>
<Input
type="number"
min={1}
max={20}
placeholder="Auto"
value={params.min_speakers || ""}
onChange={(e) => updateParam('min_speakers', e.target.value ? parseInt(e.target.value) : undefined)}
className={inputClassName}
/>
</FormField>
<FormField label="Max Speakers" optional>
<Input
type="number"
min={1}
max={20}
placeholder="Auto"
value={params.max_speakers || ""}
onChange={(e) => updateParam('max_speakers', e.target.value ? parseInt(e.target.value) : undefined)}
className={inputClassName}
/>
</FormField>
</div>
<FormField label="Hugging Face Token" description={PARAM_DESCRIPTIONS.hf_token}>
<Input
type="password"
placeholder="hf_..."
value={params.hf_token || ""}
onChange={(e) => updateParam('hf_token', e.target.value || undefined)}
className={inputClassName}
/>
</FormField>
</div>
)}
</div>
</Section>
)}
{/* Advanced Settings (Accordion) */}
<Accordion type="single" collapsible className="w-full">
<AccordionItem value="advanced" className="border border-[var(--border-subtle)] rounded-xl px-4">
<AccordionTrigger className="text-sm font-medium text-[var(--text-primary)] hover:no-underline py-4">
Advanced Settings
</AccordionTrigger>
<AccordionContent className="pb-4 space-y-4">
<div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
<FormField label="Compute Type" description={PARAM_DESCRIPTIONS.compute_type}>
<Select value={params.compute_type} onValueChange={(v) => updateParam('compute_type', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
<SelectItem value="float32" className={selectItemClassName}>Float32 (Accurate)</SelectItem>
<SelectItem value="float16" className={selectItemClassName}>Float16 (Fast)</SelectItem>
<SelectItem value="int8" className={selectItemClassName}>Int8 (Fastest)</SelectItem>
</SelectContent>
</Select>
</FormField>
<FormField label="Batch Size" description={PARAM_DESCRIPTIONS.batch_size}>
<Input
type="number"
min={1}
max={64}
value={params.batch_size}
onChange={(e) => updateParam('batch_size', parseInt(e.target.value) || 8)}
className={inputClassName}
/>
</FormField>
<FormField label="Beam Size" description={PARAM_DESCRIPTIONS.beam_size}>
<Input
type="number"
min={1}
max={10}
value={params.beam_size}
onChange={(e) => updateParam('beam_size', parseInt(e.target.value) || 5)}
className={inputClassName}
/>
</FormField>
<FormField label="Temperature" description={PARAM_DESCRIPTIONS.temperature}>
<Input
type="number"
min={0}
max={1}
step={0.1}
value={params.temperature}
onChange={(e) => updateParam('temperature', parseFloat(e.target.value) || 0)}
className={inputClassName}
/>
</FormField>
</div>
<FormField label="Initial Prompt" description={PARAM_DESCRIPTIONS.initial_prompt} optional>
<Textarea
placeholder="Optional context to guide transcription..."
value={params.initial_prompt || ""}
onChange={(e) => updateParam('initial_prompt', e.target.value || undefined)}
className={`${inputClassName} resize-none min-h-[80px]`}
rows={2}
/>
</FormField>
<div className="flex items-center gap-3">
<Switch
id="suppress_numerals"
checked={params.suppress_numerals}
onCheckedChange={(v) => updateParam('suppress_numerals', v)}
/>
<label htmlFor="suppress_numerals" className="text-sm text-[var(--text-primary)] cursor-pointer">
Suppress numerals (write numbers as words)
</label>
</div>
{/* Alignment Settings */}
<div className="pt-2 border-t border-[var(--border-subtle)] space-y-4">
<div className="flex items-center gap-3">
<Switch
id="no_align"
checked={params.no_align}
onCheckedChange={(v) => updateParam('no_align', v)}
/>
<label htmlFor="no_align" className="text-sm text-[var(--text-primary)] cursor-pointer">
Skip word alignment (faster, less precise timestamps)
</label>
</div>
{!params.no_align && (
<FormField label="Custom Alignment Model" description="WhisperX-compatible alignment model (e.g., KBLab/wav2vec2-large-voxrex-swedish). Leave empty for default." optional>
<Input
placeholder="model/path or HuggingFace ID"
value={params.align_model || ""}
onChange={(e) => updateParam('align_model', e.target.value || undefined)}
className={inputClassName}
/>
</FormField>
)}
</div>
</AccordionContent>
</AccordionItem>
</Accordion>
</div>
);
}
function ParakeetConfig({ params, updateParam, isMultiTrack }: ConfigProps) {
return (
<div className="space-y-6">
{/* Long-form Audio Settings */}
<Section title="Audio Context" description="Configure how much context the model uses for long audio files">
<div className="grid grid-cols-1 sm:grid-cols-2 gap-6">
<div className="space-y-3">
<FormField label="Left Context">
<Slider
value={[params.attention_context_left]}
onValueChange={(v) => updateParam('attention_context_left', v[0])}
max={512}
min={64}
step={64}
className="w-full"
/>
<div className="flex justify-between text-xs text-[var(--text-tertiary)]">
<span>64</span>
<span className="font-medium text-[var(--text-primary)]">{params.attention_context_left}</span>
<span>512</span>
</div>
</FormField>
</div>
<div className="space-y-3">
<FormField label="Right Context">
<Slider
value={[params.attention_context_right]}
onValueChange={(v) => updateParam('attention_context_right', v[0])}
max={512}
min={64}
step={64}
className="w-full"
/>
<div className="flex justify-between text-xs text-[var(--text-tertiary)]">
<span>64</span>
<span className="font-medium text-[var(--text-primary)]">{params.attention_context_right}</span>
<span>512</span>
</div>
</FormField>
</div>
</div>
</Section>
{/* Diarization for Parakeet */}
{!isMultiTrack && (
<Section title="Speaker Diarization">
<div className="space-y-4">
<div className="flex items-center gap-3">
<Switch
id="parakeet_diarize"
checked={params.diarize}
onCheckedChange={(v) => updateParam('diarize', v)}
/>
<label htmlFor="parakeet_diarize" className="text-sm text-[var(--text-primary)] cursor-pointer">
Enable speaker identification
</label>
</div>
{params.diarize && (
<div className="p-4 bg-[var(--bg-main)] rounded-xl border border-[var(--border-subtle)] space-y-4">
<FormField label="Diarization Model">
<Select value={params.diarize_model} onValueChange={(v) => updateParam('diarize_model', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
<SelectItem value="pyannote" className={selectItemClassName}>Pyannote</SelectItem>
<SelectItem value="nvidia_sortformer" className={selectItemClassName}>NVIDIA Sortformer</SelectItem>
</SelectContent>
</Select>
</FormField>
<div className="grid grid-cols-2 gap-4">
<FormField label="Min Speakers" optional>
<Input
type="number"
min={1}
max={20}
placeholder="Auto"
value={params.min_speakers || ""}
onChange={(e) => updateParam('min_speakers', e.target.value ? parseInt(e.target.value) : undefined)}
className={inputClassName}
/>
</FormField>
<FormField label="Max Speakers" optional>
<Input
type="number"
min={1}
max={20}
placeholder="Auto"
value={params.max_speakers || ""}
onChange={(e) => updateParam('max_speakers', e.target.value ? parseInt(e.target.value) : undefined)}
className={inputClassName}
/>
</FormField>
</div>
{params.diarize_model === "pyannote" && (
<FormField label="Hugging Face Token">
<Input
type="password"
placeholder="hf_..."
value={params.hf_token || ""}
onChange={(e) => updateParam('hf_token', e.target.value || undefined)}
className={inputClassName}
/>
</FormField>
)}
</div>
)}
</div>
</Section>
)}
</div>
);
}
function CanaryConfig({ params, updateParam, isMultiTrack }: ConfigProps) {
return (
<div className="space-y-6">
<Section title="Language Settings">
<FormField label="Source Language">
<Select value={params.language || "en"} onValueChange={(v) => updateParam('language', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
{CANARY_LANGUAGES.map((l) => (
<SelectItem key={l.value} value={l.value} className={selectItemClassName}>{l.label}</SelectItem>
))}
</SelectContent>
</Select>
</FormField>
</Section>
{/* Diarization for Canary */}
{!isMultiTrack && (
<Section title="Speaker Diarization">
<div className="space-y-4">
<div className="flex items-center gap-3">
<Switch
id="canary_diarize"
checked={params.diarize}
onCheckedChange={(v) => updateParam('diarize', v)}
/>
<label htmlFor="canary_diarize" className="text-sm text-[var(--text-primary)] cursor-pointer">
Enable speaker identification
</label>
</div>
{params.diarize && (
<div className="p-4 bg-[var(--bg-main)] rounded-xl border border-[var(--border-subtle)] space-y-4">
<FormField label="Diarization Model">
<Select value={params.diarize_model} onValueChange={(v) => updateParam('diarize_model', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
<SelectItem value="pyannote" className={selectItemClassName}>Pyannote</SelectItem>
<SelectItem value="nvidia_sortformer" className={selectItemClassName}>NVIDIA Sortformer</SelectItem>
</SelectContent>
</Select>
</FormField>
<div className="grid grid-cols-2 gap-4">
<FormField label="Min Speakers" optional>
<Input
type="number"
min={1}
max={20}
placeholder="Auto"
value={params.min_speakers || ""}
onChange={(e) => updateParam('min_speakers', e.target.value ? parseInt(e.target.value) : undefined)}
className={inputClassName}
/>
</FormField>
<FormField label="Max Speakers" optional>
<Input
type="number"
min={1}
max={20}
placeholder="Auto"
value={params.max_speakers || ""}
onChange={(e) => updateParam('max_speakers', e.target.value ? parseInt(e.target.value) : undefined)}
className={inputClassName}
/>
</FormField>
</div>
{params.diarize_model === "pyannote" && (
<FormField label="Hugging Face Token">
<Input
type="password"
placeholder="hf_..."
value={params.hf_token || ""}
onChange={(e) => updateParam('hf_token', e.target.value || undefined)}
className={inputClassName}
/>
</FormField>
)}
</div>
)}
</div>
</Section>
)}
</div>
);
}
interface OpenAIConfigProps extends ConfigProps {
isValidating: boolean;
validationStatus: 'idle' | 'valid' | 'invalid';
validationMessage: string;
availableModels: string[];
onValidate: () => void;
}
function OpenAIConfig({
params,
updateParam,
isValidating,
validationStatus,
validationMessage,
availableModels,
onValidate
}: OpenAIConfigProps) {
return (
<div className="space-y-6">
<Section title="API Configuration">
<div className="space-y-4">
<FormField label="OpenAI API Key" description="Your API key. Leave empty to use server default if configured.">
<div className="flex gap-2">
<Input
type="password"
placeholder="sk-..."
value={params.api_key || ""}
onChange={(e) => {
updateParam('api_key', e.target.value);
}}
className={`${inputClassName} flex-1`}
/>
<Button
variant="outline"
onClick={onValidate}
disabled={isValidating}
className="shrink-0 rounded-xl border-[var(--border-subtle)] cursor-pointer"
>
{isValidating ? <Loader2 className="h-4 w-4 animate-spin" /> : "Validate"}
</Button>
</div>
{validationStatus !== 'idle' && (
<div className={`flex items-center gap-2 text-sm mt-2 ${validationStatus === 'valid' ? 'text-[var(--success-solid)]' : 'text-[var(--error)]'
}`}>
{validationStatus === 'valid' ? <Check className="h-4 w-4" /> : <XCircle className="h-4 w-4" />}
<span>{validationMessage}</span>
</div>
)}
</FormField>
<FormField label="Model">
<Select value={params.model || "whisper-1"} onValueChange={(v) => updateParam('model', v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
{availableModels.map((m) => (
<SelectItem key={m} value={m} className={selectItemClassName}>{m}</SelectItem>
))}
</SelectContent>
</Select>
</FormField>
<FormField label="Language">
<Select value={params.language || "auto"} onValueChange={(v) => updateParam('language', v === "auto" ? undefined : v)}>
<SelectTrigger className={selectTriggerClassName}>
<SelectValue />
</SelectTrigger>
<SelectContent className={selectContentClassName}>
{LANGUAGES.map((l) => (
<SelectItem key={l.value} value={l.value} className={selectItemClassName}>{l.label}</SelectItem>
))}
</SelectContent>
</Select>
</FormField>
</div>
</Section>
{params.model && params.model !== "whisper-1" && (
<InfoBanner variant="warning" title="Limited Features">
Word-level timestamps are only supported by whisper-1. Synchronized playback won't be available.
</InfoBanner>
)}
</div>
);
}

View File

@@ -0,0 +1,58 @@
"use client"
import * as React from "react"
import * as AccordionPrimitive from "@radix-ui/react-accordion"
import { ChevronDown } from "lucide-react"
import { cn } from "@/lib/utils"
const Accordion = AccordionPrimitive.Root
const AccordionItem = React.forwardRef<
React.ElementRef<typeof AccordionPrimitive.Item>,
React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Item>
>(({ className, ...props }, ref) => (
<AccordionPrimitive.Item
ref={ref}
className={cn("border-b", className)}
{...props}
/>
))
AccordionItem.displayName = "AccordionItem"
const AccordionTrigger = React.forwardRef<
React.ElementRef<typeof AccordionPrimitive.Trigger>,
React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Trigger>
>(({ className, children, ...props }, ref) => (
<AccordionPrimitive.Header className="flex">
<AccordionPrimitive.Trigger
ref={ref}
className={cn(
"flex flex-1 items-center justify-between py-4 font-medium transition-all hover:underline [&[data-state=open]>svg]:rotate-180 cursor-pointer",
className
)}
{...props}
>
{children}
<ChevronDown className="h-4 w-4 shrink-0 transition-transform duration-200" />
</AccordionPrimitive.Trigger>
</AccordionPrimitive.Header>
))
AccordionTrigger.displayName = AccordionPrimitive.Trigger.displayName
const AccordionContent = React.forwardRef<
React.ElementRef<typeof AccordionPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Content>
>(({ className, children, ...props }, ref) => (
<AccordionPrimitive.Content
ref={ref}
className="overflow-hidden text-sm transition-all data-[state=closed]:animate-accordion-up data-[state=open]:animate-accordion-down"
{...props}
>
<div className={cn("pb-4 pt-0", className)}>{children}</div>
</AccordionPrimitive.Content>
))
AccordionContent.displayName = AccordionPrimitive.Content.displayName
export { Accordion, AccordionItem, AccordionTrigger, AccordionContent }