From 4e13bb9d151a3e8a8f39a0307b6452ea2e601abe Mon Sep 17 00:00:00 2001 From: rishikanthc Date: Tue, 28 Apr 2026 09:53:25 -0700 Subject: [PATCH] Add transcript annotation schema --- devnotes/v2.0.0/README.md | 1 + .../highlights-notes-backend-sprint-plan.md | 238 ++++++++++++++++++ internal/database/database_test.go | 125 +++++++-- internal/database/legacy.go | 48 ---- internal/database/migrate.go | 2 - internal/database/schema.go | 124 ++++----- internal/database/steps.go | 28 --- internal/models/annotation.go | 80 ++++++ internal/models/note.go | 77 ------ internal/repository/implementations.go | 30 --- 10 files changed, 487 insertions(+), 266 deletions(-) create mode 100644 devnotes/v2.0.0/sprint-plans/highlights-notes-backend-sprint-plan.md create mode 100644 internal/models/annotation.go delete mode 100644 internal/models/note.go diff --git a/devnotes/v2.0.0/README.md b/devnotes/v2.0.0/README.md index 0ecc7787..3903863e 100644 --- a/devnotes/v2.0.0/README.md +++ b/devnotes/v2.0.0/README.md @@ -25,6 +25,7 @@ This folder contains the planning notes, architecture rules, sprint trackers, an | `devnotes/automatic-summarization-sprint.md` | `devnotes/v2.0.0/sprint-plans/automatic-summarization-sprint.md` | | `devnotes/summary-widgets-sprint.md` | `devnotes/v2.0.0/sprint-plans/summary-widgets-sprint.md` | | `devnotes/llm-provider-settings-sprint.md` | `devnotes/v2.0.0/sprint-plans/llm-provider-settings-sprint.md` | +| highlights and notes backend sprint plan | `devnotes/v2.0.0/sprint-plans/highlights-notes-backend-sprint-plan.md` | | `devnotes/sprint-tracker.md` | `devnotes/v2.0.0/sprint-trackers/api-revamp-sprint-tracker.md` | | `devnotes/engine-worker-sprint-tracker.md` | `devnotes/v2.0.0/sprint-trackers/engine-worker-sprint-tracker.md` | | `devnotes/api-sprint-0-inventory.md` | `devnotes/v2.0.0/status-updates/api-sprint-00-inventory-and-removal-plan.md` | diff --git a/devnotes/v2.0.0/sprint-plans/highlights-notes-backend-sprint-plan.md b/devnotes/v2.0.0/sprint-plans/highlights-notes-backend-sprint-plan.md new file mode 100644 index 00000000..ee721777 --- /dev/null +++ b/devnotes/v2.0.0/sprint-plans/highlights-notes-backend-sprint-plan.md @@ -0,0 +1,238 @@ +# Highlights and Notes Backend Sprint Plan + +## Current Assessment + +The backend does not currently have a clean notes/highlights implementation ready for use. + +What exists now: + +- Notes and highlights are listed as deferred modules in `devnotes/v2.0.0/specs/api-v1-master-spec.md`. +- The canonical route family is reserved under `/api/v1/transcriptions/{id}/notes`. +- The current transcript API returns transcript text, segments, and words from `GET /api/v1/transcriptions/{id}/transcript`. +- Transcription records already carry `user_id`, so future ownership checks have a clear parent boundary. + +What was removed as legacy backend code: + +- The old `models.Note` persistence record. +- The generic `NoteRepository`. +- Old note table schema registration and indexes. +- Legacy note migration/backfill code. +- Database tests that asserted the old note shape. + +Why the old note schema was not a good foundation: + +- It only modeled notes, not highlights as a first-class resource. +- It mixed compatibility fields into the persistence model. +- It had weak anchoring for transcript text changes and retranscription. +- It used internal transcription IDs directly without a clean public response contract. +- It did not give enough room for future multi-user behavior such as ownership, sharing, or audit-safe mutation. + +## Target Backend Model + +Use a new annotation-oriented schema rather than resurrecting the old notes table. + +Recommended table: `transcript_annotations` + +Core columns: + +```txt +id string primary key +user_id uint not null indexed +transcription_id string not null indexed +kind string not null enum-like: highlight | note +content text null +color string null +quote text not null +anchor_start_ms integer not null +anchor_end_ms integer not null +anchor_start_word integer null +anchor_end_word integer null +anchor_start_char integer null +anchor_end_char integer null +anchor_text_hash string null +status string not null default active +metadata_json json not null default {} +created_at timestamp +updated_at timestamp +deleted_at soft delete +``` + +Indexes and constraints: + +- `idx_transcript_annotations_user_transcription_created_at` on `(user_id, transcription_id, created_at DESC)`. +- `idx_transcript_annotations_user_kind_updated_at` on `(user_id, kind, updated_at DESC)`. +- `idx_transcript_annotations_transcription_time` on `(transcription_id, anchor_start_ms, anchor_end_ms)`. +- Foreign key from `transcription_id` to `transcriptions(id)` with cascade delete. +- Check constraint or service validation for `kind IN ('highlight', 'note')`. +- Service validation that `anchor_end_ms >= anchor_start_ms`. + +Multi-user readiness: + +- Every query must include `user_id`; never fetch by annotation ID alone. +- Authorization should be inherited from transcription ownership for now. +- Keep `user_id` on the annotation even though the transcription also has it. This supports future shared transcripts where annotation ownership may differ from transcript ownership. +- Public IDs should be opaque and prefixed, for example `ann_...`. +- API responses should never expose raw database IDs. + +## Target REST API + +Use thin handlers and route through a notes/highlights service. + +Recommended canonical routes: + +```http +GET /api/v1/transcriptions/{id}/annotations +POST /api/v1/transcriptions/{id}/annotations +GET /api/v1/transcriptions/{id}/annotations/{annotation_id} +PATCH /api/v1/transcriptions/{id}/annotations/{annotation_id} +DELETE /api/v1/transcriptions/{id}/annotations/{annotation_id} +``` + +Supported filters: + +```txt +kind=highlight|note +updated_after=RFC3339 timestamp +cursor=... +limit=... +``` + +Convenience aliases may be added only if the frontend needs resource-specific URLs: + +```http +GET /api/v1/transcriptions/{id}/notes +GET /api/v1/transcriptions/{id}/highlights +``` + +Those aliases should call the same annotation service with a fixed `kind` filter. Do not create separate persistence paths. + +Example create request: + +```json +{ + "kind": "note", + "content": "Follow up on this decision", + "color": "yellow", + "quote": "We should ship the smaller model first", + "anchor": { + "start_ms": 12400, + "end_ms": 18900, + "start_word": 42, + "end_word": 51, + "start_char": 280, + "end_char": 336, + "text_hash": "sha256:..." + } +} +``` + +Collection response: + +```json +{ + "items": [], + "next_cursor": null +} +``` + +## Sprint 1: Schema and Migration + +Goal: add a clean annotations persistence model. + +Tasks: + +- Add `models.TranscriptAnnotation` with persistence fields only. +- Add schema migration for `transcript_annotations`. +- Add indexes for per-transcription list, per-kind list, and time-range lookup. +- Add database tests for fresh schema, foreign key behavior, soft delete, and user-scoped indexes. +- Keep legacy `notes` migration removed; old note data is intentionally not imported into the new model. + +Acceptance criteria: + +- Fresh databases create `transcript_annotations`. +- Deleting a transcription deletes its annotations. +- Invalid annotation kinds and invalid time ranges are rejected by service/model validation. + +## Sprint 2: Repository and Service + +Goal: keep HTTP thin and put annotation decisions behind a domain service. + +Tasks: + +- Add an annotation repository with domain methods: + - `CreateAnnotation` + - `FindAnnotationForUser` + - `ListAnnotationsForTranscription` + - `UpdateAnnotation` + - `SoftDeleteAnnotation` +- Add an annotation service that: + - verifies transcription ownership, + - parses public IDs, + - validates anchors, + - normalizes content for highlights vs notes, + - emits small annotation events. +- Add service tests for ownership, invalid IDs, kind validation, time-range validation, and soft-delete behavior. + +Acceptance criteria: + +- No new annotation handler reads `database.DB` directly. +- Annotation ID lookup is always scoped by `user_id` and `transcription_id`. +- Service methods return explicit not-found, validation, and conflict errors. + +## Sprint 3: REST API + +Goal: expose annotations through canonical v1 REST endpoints. + +Tasks: + +- Add request/response types separate from GORM models. +- Register routes under `/api/v1/transcriptions/{id}/annotations`. +- Implement list/create/get/update/delete handlers. +- Add pagination using the existing list-query helpers where appropriate. +- Add route contract tests and error-envelope tests. +- Add OpenAPI documentation for the new routes. + +Acceptance criteria: + +- Authenticated users can create, list, update, and delete their own transcript annotations. +- Other users cannot access annotations even if they know the public annotation ID. +- Responses use public IDs: `tr_...` and `ann_...`. +- API responses do not expose local paths or internal database-only fields. + +## Sprint 4: Events and Transcript Integrity + +Goal: make annotations live-update friendly and robust against transcript changes. + +Tasks: + +- Publish `annotation.created`, `annotation.updated`, and `annotation.deleted` SSE events. +- Include only public IDs and small cache-invalidation payloads. +- Add optional anchor verification against current transcript words/segments. +- Decide behavior on retranscription: + - keep annotations but mark anchors as stale when text hash no longer matches, or + - soft-delete annotations on destructive transcript replacement. +- Add tests for event payload shape and stale-anchor behavior. + +Acceptance criteria: + +- Annotation events are usable for cache invalidation. +- Full page refresh can reconstruct the correct state from persisted annotations. +- Retranscription behavior is explicit, tested, and documented. + +## Sprint 5: Future Multi-User Hardening + +Goal: avoid another schema rewrite when multi-user support arrives. + +Tasks: + +- Add tests that simulate two users with different transcriptions and annotations. +- Add service seams for future shared transcript permissions. +- Review whether annotation ownership should remain independent from transcription ownership. +- Add audit-friendly fields only if product requirements need them, such as `created_by_user_id` and `updated_by_user_id`. + +Acceptance criteria: + +- All annotation reads and writes are user-scoped. +- No repository method can accidentally return annotations across users. +- Future shared-transcript access can be added by changing authorization policy without changing the core annotation table. + diff --git a/internal/database/database_test.go b/internal/database/database_test.go index 66402b82..63b1676d 100644 --- a/internal/database/database_test.go +++ b/internal/database/database_test.go @@ -53,10 +53,6 @@ type legacySummaryTable legacySummary func (legacySummaryTable) TableName() string { return "summaries" } -type legacyNoteTable legacyNote - -func (legacyNoteTable) TableName() string { return "notes" } - type legacyLLMConfigTable legacyLLMConfig func (legacyLLMConfigTable) TableName() string { return "llm_configs" } @@ -83,7 +79,7 @@ func TestFreshSchemaInitialization(t *testing.T) { "speaker_mappings", "summary_templates", "summaries", - "notes", + "transcript_annotations", "chat_sessions", "chat_messages", "llm_profiles", @@ -99,17 +95,14 @@ func TestFreshSchemaInitialization(t *testing.T) { assert.True(t, hasIndex(t, db, "transcription_profiles", "idx_transcription_profiles_user_default_unique")) assert.True(t, hasIndex(t, db, "summary_templates", "idx_summary_templates_user_default_unique")) assert.True(t, hasIndex(t, db, "llm_profiles", "idx_llm_profiles_user_default_unique")) + assert.True(t, hasIndex(t, db, "transcript_annotations", "idx_transcript_annotations_user_transcription_created_at")) + assert.True(t, hasIndex(t, db, "transcript_annotations", "idx_transcript_annotations_user_kind_updated_at")) + assert.True(t, hasIndex(t, db, "transcript_annotations", "idx_transcript_annotations_transcription_time")) title := "Fresh transcription" job := models.TranscriptionJob{UserID: 1, Title: &title, Status: models.StatusUploaded, AudioPath: "/tmp/audio.wav"} require.NoError(t, db.Create(&job).Error) - note := models.Note{ID: "note-1", UserID: job.UserID, TranscriptionID: job.ID, Content: "hello", StartTime: 1.2, EndTime: 2.4} - require.NoError(t, db.Create(¬e).Error) - - invalidNote := models.Note{ID: "note-invalid", UserID: job.UserID, TranscriptionID: "missing", Content: "bad"} - require.Error(t, db.Create(&invalidNote).Error) - mapping1 := models.SpeakerMapping{UserID: job.UserID, TranscriptionJobID: job.ID, OriginalSpeaker: "SPEAKER_00", CustomName: "Alice"} require.NoError(t, db.Create(&mapping1).Error) mapping2 := models.SpeakerMapping{UserID: job.UserID, TranscriptionJobID: job.ID, OriginalSpeaker: "SPEAKER_00", CustomName: "Bob"} @@ -117,6 +110,105 @@ func TestFreshSchemaInitialization(t *testing.T) { } +func TestTranscriptAnnotationSchemaValidationAndSoftDelete(t *testing.T) { + db := openMigratedTestDB(t, "transcript-annotations.db") + + user := models.User{Username: "annotation-user", Password: "pw"} + require.NoError(t, db.Create(&user).Error) + + title := "Annotated transcript" + job := models.TranscriptionJob{UserID: user.ID, Title: &title, Status: models.StatusCompleted, AudioPath: "/tmp/audio.wav"} + require.NoError(t, db.Create(&job).Error) + + content := "Follow up" + color := "yellow" + startWord := 2 + endWord := 5 + annotation := models.TranscriptAnnotation{ + UserID: user.ID, + TranscriptionID: job.ID, + Kind: models.AnnotationKindNote, + Content: &content, + Color: &color, + Quote: "important quote", + AnchorStartMS: 1200, + AnchorEndMS: 3400, + AnchorStartWord: &startWord, + AnchorEndWord: &endWord, + } + require.NoError(t, db.Create(&annotation).Error) + assert.NotEmpty(t, annotation.ID) + assert.Equal(t, models.AnnotationStatusActive, annotation.Status) + assert.Equal(t, "{}", annotation.MetadataJSON) + + invalidKind := models.TranscriptAnnotation{ + UserID: user.ID, + TranscriptionID: job.ID, + Kind: models.AnnotationKind("bookmark"), + Quote: "bad kind", + AnchorStartMS: 100, + AnchorEndMS: 200, + } + require.Error(t, db.Create(&invalidKind).Error) + + invalidRange := models.TranscriptAnnotation{ + UserID: user.ID, + TranscriptionID: job.ID, + Kind: models.AnnotationKindHighlight, + Quote: "bad range", + AnchorStartMS: 500, + AnchorEndMS: 100, + } + require.Error(t, db.Create(&invalidRange).Error) + + missingTranscription := models.TranscriptAnnotation{ + UserID: user.ID, + TranscriptionID: "missing", + Kind: models.AnnotationKindHighlight, + Quote: "missing parent", + AnchorStartMS: 100, + AnchorEndMS: 200, + } + require.Error(t, db.Create(&missingTranscription).Error) + + require.NoError(t, db.Delete(&annotation).Error) + + var visibleCount int64 + require.NoError(t, db.Model(&models.TranscriptAnnotation{}).Where("id = ?", annotation.ID).Count(&visibleCount).Error) + assert.Zero(t, visibleCount) + + var storedCount int64 + require.NoError(t, db.Unscoped().Model(&models.TranscriptAnnotation{}).Where("id = ?", annotation.ID).Count(&storedCount).Error) + assert.Equal(t, int64(1), storedCount) +} + +func TestTranscriptAnnotationHardDeleteCascadesWithTranscription(t *testing.T) { + db := openMigratedTestDB(t, "transcript-annotation-cascade.db") + + user := models.User{Username: "cascade-annotation-user", Password: "pw"} + require.NoError(t, db.Create(&user).Error) + + title := "Cascade transcript" + job := models.TranscriptionJob{UserID: user.ID, Title: &title, Status: models.StatusCompleted, AudioPath: "/tmp/audio.wav"} + require.NoError(t, db.Create(&job).Error) + + annotation := models.TranscriptAnnotation{ + UserID: user.ID, + TranscriptionID: job.ID, + Kind: models.AnnotationKindHighlight, + Quote: "highlighted quote", + AnchorStartMS: 1000, + AnchorEndMS: 2000, + } + require.NoError(t, db.Create(&annotation).Error) + + require.NoError(t, db.Unscoped().Delete(&job).Error) + + var count int64 + require.NoError(t, db.Unscoped().Model(&models.TranscriptAnnotation{}).Where("id = ?", annotation.ID).Count(&count).Error) + assert.Zero(t, count) +} + func TestCreateExecutionAssignsSequentialNumbers(t *testing.T) { db := openMigratedTestDB(t, "execution-sequence.db") @@ -452,13 +544,6 @@ func TestLegacyMigrationPreservesData(t *testing.T) { assert.Equal(t, "gpt-4o", summary.Model) assert.Equal(t, "completed", summary.Status) - var note models.Note - require.NoError(t, db.First(¬e, "id = ?", "note-1").Error) - assert.Equal(t, int64(1250), note.StartMS) - assert.Equal(t, int64(3250), note.EndMS) - assert.Equal(t, "quoted text", note.Quote) - assert.Equal(t, 2, note.StartWordIndex) - var chatSession models.ChatSession require.NoError(t, db.First(&chatSession, "id = ?", "chat-1").Error) assert.Equal(t, "job-1", chatSession.TranscriptionID) @@ -581,7 +666,6 @@ func createLegacyDatabase(t *testing.T, dbPath string, withData bool) { &legacySummaryTemplateTable{}, &legacySummarySettingTable{}, &legacySummaryTable{}, - &legacyNoteTable{}, &legacyLLMConfigTable{}, &legacyChatSessionTable{}, &legacyChatMessageTable{}, @@ -626,9 +710,6 @@ func createLegacyDatabase(t *testing.T, dbPath string, withData bool) { require.NoError(t, db.Table("summary_settings").Create(&legacySummarySetting{ID: 1, DefaultModel: "gpt-4o-mini", UpdatedAt: now}).Error) require.NoError(t, db.Table("summaries").Create(&legacySummary{ID: "summary-1", TranscriptionID: "job-1", TemplateID: &summaryTemplate.ID, Model: "gpt-4o", Content: "summary body", CreatedAt: completedAt, UpdatedAt: completedAt}).Error) - note := legacyNote{ID: "note-1", TranscriptionID: "job-1", StartWordIndex: 2, EndWordIndex: 6, StartTime: 1.25, EndTime: 3.25, Quote: "quoted text", Content: "note content", CreatedAt: now, UpdatedAt: now} - require.NoError(t, db.Table("notes").Create(¬e).Error) - llmConfig := legacyLLMConfig{ID: 3, Provider: "openai", OpenAIBaseURL: &openAIBaseURL, APIKey: &openAIKey, IsActive: true, CreatedAt: now, UpdatedAt: now} require.NoError(t, db.Table("llm_configs").Create(&llmConfig).Error) diff --git a/internal/database/legacy.go b/internal/database/legacy.go index 4c477ac6..a0704a8f 100644 --- a/internal/database/legacy.go +++ b/internal/database/legacy.go @@ -120,19 +120,6 @@ type legacySummary struct { UpdatedAt time.Time } -type legacyNote struct { - ID string - TranscriptionID string - StartWordIndex int - EndWordIndex int - StartTime float64 - EndTime float64 - Quote string - Content string - CreatedAt time.Time - UpdatedAt time.Time -} - type legacyLLMConfig struct { ID uint Provider string @@ -213,9 +200,6 @@ func migrateLegacy(db *gorm.DB) error { if err := migrateSummaries(tx, userID); err != nil { return err } - if err := migrateNotes(tx, userID); err != nil { - return err - } if err := migrateLLMProfiles(tx, userID); err != nil { return err } @@ -253,7 +237,6 @@ func archiveConflictingLegacyTables(tx *gorm.DB) error { "speaker_mappings", "summary_templates", "summaries", - "notes", "chat_sessions", "chat_messages", } @@ -308,7 +291,6 @@ func legacyHasOwnedData(tx *gorm.DB) (bool, error) { "transcription_jobs", legacyPrefix + "transcription_profiles", legacyPrefix + "summary_templates", - legacyPrefix + "notes", legacyPrefix + "chat_sessions", "llm_configs", legacyPrefix + "api_keys", @@ -536,36 +518,6 @@ func migrateSummaries(tx *gorm.DB, userID uint) error { return nil } -func migrateNotes(tx *gorm.DB, userID uint) error { - source := legacyPrefix + "notes" - if !tx.Migrator().HasTable(source) { - return nil - } - var notes []legacyNote - if err := tx.Table(source).Order("created_at ASC").Find(¬es).Error; err != nil { - return fmt.Errorf("load legacy notes: %w", err) - } - for _, legacyNote := range notes { - note := models.Note{ - ID: legacyNote.ID, - UserID: userID, - TranscriptionID: legacyNote.TranscriptionID, - Content: legacyNote.Content, - StartTime: legacyNote.StartTime, - EndTime: legacyNote.EndTime, - StartWordIndex: legacyNote.StartWordIndex, - EndWordIndex: legacyNote.EndWordIndex, - Quote: legacyNote.Quote, - CreatedAt: legacyNote.CreatedAt, - UpdatedAt: legacyNote.UpdatedAt, - } - if err := tx.Create(¬e).Error; err != nil { - return fmt.Errorf("create migrated note %s: %w", legacyNote.ID, err) - } - } - return nil -} - func migrateLLMProfiles(tx *gorm.DB, userID uint) error { if !tx.Migrator().HasTable("llm_configs") { return nil diff --git a/internal/database/migrate.go b/internal/database/migrate.go index c3d306b1..7d31ea44 100644 --- a/internal/database/migrate.go +++ b/internal/database/migrate.go @@ -77,7 +77,6 @@ func detectLegacySchema(db *gorm.DB) (bool, error) { "speaker_mappings", "summary_templates", "summaries", - "notes", "chat_sessions", "chat_messages", } @@ -115,7 +114,6 @@ func isLegacySameNameTable(db *gorm.DB, table string) (bool, error) { "speaker_mappings": {"display_name", "user_id", "transcription_id"}, "summary_templates": {"config_json", "user_id"}, "summaries": {"model_name", "user_id"}, - "notes": {"start_ms", "end_ms", "metadata_json", "user_id"}, "chat_sessions": {"system_prompt", "user_id"}, "chat_messages": {"chat_session_id", "user_id"}, } diff --git a/internal/database/schema.go b/internal/database/schema.go index f1ddc9b2..e5921ead 100644 --- a/internal/database/schema.go +++ b/internal/database/schema.go @@ -25,7 +25,7 @@ var schemaModels = []any{ &models.Summary{}, &models.SummaryWidget{}, &models.SummaryWidgetRun{}, - &models.Note{}, + &models.TranscriptAnnotation{}, &models.ChatSession{}, &models.ChatMessage{}, &models.LLMConfig{}, @@ -71,7 +71,9 @@ func createTargetSchema(tx *gorm.DB) error { `CREATE INDEX IF NOT EXISTS idx_summary_widgets_user_enabled ON summary_widgets(user_id, enabled)`, `CREATE INDEX IF NOT EXISTS idx_summary_widget_runs_status_created_at ON summary_widget_runs(status, created_at ASC)`, `CREATE INDEX IF NOT EXISTS idx_summary_widget_runs_summary_created_at ON summary_widget_runs(summary_id, created_at ASC)`, - `CREATE INDEX IF NOT EXISTS idx_notes_transcription_created_at ON notes(transcription_id, created_at DESC)`, + `CREATE INDEX IF NOT EXISTS idx_transcript_annotations_user_transcription_created_at ON transcript_annotations(user_id, transcription_id, created_at DESC)`, + `CREATE INDEX IF NOT EXISTS idx_transcript_annotations_user_kind_updated_at ON transcript_annotations(user_id, kind, updated_at DESC)`, + `CREATE INDEX IF NOT EXISTS idx_transcript_annotations_transcription_time ON transcript_annotations(transcription_id, anchor_start_ms, anchor_end_ms)`, } for _, stmt := range statements { if err := tx.Exec(stmt).Error; err != nil { @@ -114,63 +116,67 @@ type expectedSQLiteIndex struct { } var expectedSQLiteIndexes = map[string]expectedSQLiteIndex{ - "idx_users_deleted_at": {Table: "users", Columns: []string{"deleted_at"}, Unique: false}, - "idx_users_username": {Table: "users", Columns: []string{"username"}, Unique: true}, - "idx_users_email": {Table: "users", Columns: []string{"email"}, Unique: true}, - "idx_refresh_tokens_user_id": {Table: "refresh_tokens", Columns: []string{"user_id"}, Unique: false}, - "idx_refresh_tokens_token_hash": {Table: "refresh_tokens", Columns: []string{"token_hash"}, Unique: true}, - "idx_refresh_tokens_expires_at": {Table: "refresh_tokens", Columns: []string{"expires_at"}, Unique: false}, - "idx_refresh_tokens_revoked_at": {Table: "refresh_tokens", Columns: []string{"revoked_at"}, Unique: false}, - "idx_api_keys_user_id": {Table: "api_keys", Columns: []string{"user_id"}, Unique: false}, - "idx_api_keys_key_prefix": {Table: "api_keys", Columns: []string{"key_prefix"}, Unique: false}, - "idx_api_keys_key_hash": {Table: "api_keys", Columns: []string{"key_hash"}, Unique: true}, - "idx_api_keys_expires_at": {Table: "api_keys", Columns: []string{"expires_at"}, Unique: false}, - "idx_api_keys_revoked_at": {Table: "api_keys", Columns: []string{"revoked_at"}, Unique: false}, - "idx_transcription_profiles_user_id": {Table: "transcription_profiles", Columns: []string{"user_id"}, Unique: false}, - "idx_transcription_profiles_is_default": {Table: "transcription_profiles", Columns: []string{"is_default"}, Unique: false}, - "idx_transcription_profiles_user_default_unique": {Table: "transcription_profiles", Columns: []string{"user_id"}, Unique: true, Partial: true, WherePredicate: "is_default=1"}, - "idx_transcriptions_user_id": {Table: "transcriptions", Columns: []string{"user_id"}, Unique: false}, - "idx_transcriptions_status": {Table: "transcriptions", Columns: []string{"status"}, Unique: false}, - "idx_transcriptions_source_file_hash": {Table: "transcriptions", Columns: []string{"source_file_hash"}, Unique: false}, - "idx_transcriptions_latest_execution_id": {Table: "transcriptions", Columns: []string{"latest_execution_id"}, Unique: false}, - "idx_transcriptions_deleted_at": {Table: "transcriptions", Columns: []string{"deleted_at"}, Unique: false}, - "idx_transcriptions_queue_claim": {Table: "transcriptions", Columns: []string{"status", "queued_at"}, Unique: false}, - "idx_transcriptions_claim_expires_at": {Table: "transcriptions", Columns: []string{"claim_expires_at"}, Unique: false}, - "idx_transcription_executions_transcription_job_id": {Table: "transcription_executions", Columns: []string{"transcription_id"}, Unique: false}, - "idx_transcription_executions_user_id": {Table: "transcription_executions", Columns: []string{"user_id"}, Unique: false}, - "idx_transcription_executions_status": {Table: "transcription_executions", Columns: []string{"status"}, Unique: false}, - "idx_transcription_executions_profile_id": {Table: "transcription_executions", Columns: []string{"profile_id"}, Unique: false}, - "idx_speaker_mappings_user_id": {Table: "speaker_mappings", Columns: []string{"user_id"}, Unique: false}, - "idx_speaker_mappings_transcription_job_id": {Table: "speaker_mappings", Columns: []string{"transcription_id"}, Unique: false}, - "idx_summary_templates_user_id": {Table: "summary_templates", Columns: []string{"user_id"}, Unique: false}, - "idx_summary_templates_is_default": {Table: "summary_templates", Columns: []string{"is_default"}, Unique: false}, - "idx_summary_templates_deleted_at": {Table: "summary_templates", Columns: []string{"deleted_at"}, Unique: false}, - "idx_summary_templates_user_default_unique": {Table: "summary_templates", Columns: []string{"user_id"}, Unique: true, Partial: true, WherePredicate: "is_default=1"}, - "idx_summaries_transcription_id": {Table: "summaries", Columns: []string{"transcription_id"}, Unique: false}, - "idx_summaries_user_id": {Table: "summaries", Columns: []string{"user_id"}, Unique: false}, - "idx_summaries_template_id": {Table: "summaries", Columns: []string{"template_id"}, Unique: false}, - "idx_summaries_status_created_at": {Table: "summaries", Columns: []string{"status", "created_at"}, Unique: false}, - "idx_summary_widgets_user_id": {Table: "summary_widgets", Columns: []string{"user_id"}, Unique: false}, - "idx_summary_widgets_enabled": {Table: "summary_widgets", Columns: []string{"enabled"}, Unique: false}, - "idx_summary_widgets_deleted_at": {Table: "summary_widgets", Columns: []string{"deleted_at"}, Unique: false}, - "idx_summary_widgets_user_name_active_unique": {Table: "summary_widgets", Columns: []string{"user_id", "name"}, Unique: true, Partial: true, WherePredicate: "deleted_at IS NULL"}, - "idx_summary_widgets_user_enabled": {Table: "summary_widgets", Columns: []string{"user_id", "enabled"}, Unique: false}, - "idx_summary_widget_runs_summary_id": {Table: "summary_widget_runs", Columns: []string{"summary_id"}, Unique: false}, - "idx_summary_widget_runs_transcription_id": {Table: "summary_widget_runs", Columns: []string{"transcription_id"}, Unique: false}, - "idx_summary_widget_runs_widget_id": {Table: "summary_widget_runs", Columns: []string{"widget_id"}, Unique: false}, - "idx_summary_widget_runs_user_id": {Table: "summary_widget_runs", Columns: []string{"user_id"}, Unique: false}, - "idx_summary_widget_runs_status_created_at": {Table: "summary_widget_runs", Columns: []string{"status", "created_at"}, Unique: false}, - "idx_summary_widget_runs_summary_created_at": {Table: "summary_widget_runs", Columns: []string{"summary_id", "created_at"}, Unique: false}, - "idx_notes_user_id": {Table: "notes", Columns: []string{"user_id"}, Unique: false}, - "idx_notes_transcription_id": {Table: "notes", Columns: []string{"transcription_id"}, Unique: false}, - "idx_notes_deleted_at": {Table: "notes", Columns: []string{"deleted_at"}, Unique: false}, - "idx_chat_sessions_user_id": {Table: "chat_sessions", Columns: []string{"user_id"}, Unique: false}, - "idx_chat_sessions_transcription_id": {Table: "chat_sessions", Columns: []string{"transcription_id"}, Unique: false}, - "idx_chat_messages_user_id": {Table: "chat_messages", Columns: []string{"user_id"}, Unique: false}, - "idx_chat_messages_chat_session_id": {Table: "chat_messages", Columns: []string{"chat_session_id"}, Unique: false}, - "idx_llm_profiles_user_id": {Table: "llm_profiles", Columns: []string{"user_id"}, Unique: false}, - "idx_llm_profiles_is_default": {Table: "llm_profiles", Columns: []string{"is_default"}, Unique: false}, - "idx_llm_profiles_user_default_unique": {Table: "llm_profiles", Columns: []string{"user_id"}, Unique: true, Partial: true, WherePredicate: "is_default=1"}, + "idx_users_deleted_at": {Table: "users", Columns: []string{"deleted_at"}, Unique: false}, + "idx_users_username": {Table: "users", Columns: []string{"username"}, Unique: true}, + "idx_users_email": {Table: "users", Columns: []string{"email"}, Unique: true}, + "idx_refresh_tokens_user_id": {Table: "refresh_tokens", Columns: []string{"user_id"}, Unique: false}, + "idx_refresh_tokens_token_hash": {Table: "refresh_tokens", Columns: []string{"token_hash"}, Unique: true}, + "idx_refresh_tokens_expires_at": {Table: "refresh_tokens", Columns: []string{"expires_at"}, Unique: false}, + "idx_refresh_tokens_revoked_at": {Table: "refresh_tokens", Columns: []string{"revoked_at"}, Unique: false}, + "idx_api_keys_user_id": {Table: "api_keys", Columns: []string{"user_id"}, Unique: false}, + "idx_api_keys_key_prefix": {Table: "api_keys", Columns: []string{"key_prefix"}, Unique: false}, + "idx_api_keys_key_hash": {Table: "api_keys", Columns: []string{"key_hash"}, Unique: true}, + "idx_api_keys_expires_at": {Table: "api_keys", Columns: []string{"expires_at"}, Unique: false}, + "idx_api_keys_revoked_at": {Table: "api_keys", Columns: []string{"revoked_at"}, Unique: false}, + "idx_transcription_profiles_user_id": {Table: "transcription_profiles", Columns: []string{"user_id"}, Unique: false}, + "idx_transcription_profiles_is_default": {Table: "transcription_profiles", Columns: []string{"is_default"}, Unique: false}, + "idx_transcription_profiles_user_default_unique": {Table: "transcription_profiles", Columns: []string{"user_id"}, Unique: true, Partial: true, WherePredicate: "is_default=1"}, + "idx_transcriptions_user_id": {Table: "transcriptions", Columns: []string{"user_id"}, Unique: false}, + "idx_transcriptions_status": {Table: "transcriptions", Columns: []string{"status"}, Unique: false}, + "idx_transcriptions_source_file_hash": {Table: "transcriptions", Columns: []string{"source_file_hash"}, Unique: false}, + "idx_transcriptions_latest_execution_id": {Table: "transcriptions", Columns: []string{"latest_execution_id"}, Unique: false}, + "idx_transcriptions_deleted_at": {Table: "transcriptions", Columns: []string{"deleted_at"}, Unique: false}, + "idx_transcriptions_queue_claim": {Table: "transcriptions", Columns: []string{"status", "queued_at"}, Unique: false}, + "idx_transcriptions_claim_expires_at": {Table: "transcriptions", Columns: []string{"claim_expires_at"}, Unique: false}, + "idx_transcription_executions_transcription_job_id": {Table: "transcription_executions", Columns: []string{"transcription_id"}, Unique: false}, + "idx_transcription_executions_user_id": {Table: "transcription_executions", Columns: []string{"user_id"}, Unique: false}, + "idx_transcription_executions_status": {Table: "transcription_executions", Columns: []string{"status"}, Unique: false}, + "idx_transcription_executions_profile_id": {Table: "transcription_executions", Columns: []string{"profile_id"}, Unique: false}, + "idx_speaker_mappings_user_id": {Table: "speaker_mappings", Columns: []string{"user_id"}, Unique: false}, + "idx_speaker_mappings_transcription_job_id": {Table: "speaker_mappings", Columns: []string{"transcription_id"}, Unique: false}, + "idx_summary_templates_user_id": {Table: "summary_templates", Columns: []string{"user_id"}, Unique: false}, + "idx_summary_templates_is_default": {Table: "summary_templates", Columns: []string{"is_default"}, Unique: false}, + "idx_summary_templates_deleted_at": {Table: "summary_templates", Columns: []string{"deleted_at"}, Unique: false}, + "idx_summary_templates_user_default_unique": {Table: "summary_templates", Columns: []string{"user_id"}, Unique: true, Partial: true, WherePredicate: "is_default=1"}, + "idx_summaries_transcription_id": {Table: "summaries", Columns: []string{"transcription_id"}, Unique: false}, + "idx_summaries_user_id": {Table: "summaries", Columns: []string{"user_id"}, Unique: false}, + "idx_summaries_template_id": {Table: "summaries", Columns: []string{"template_id"}, Unique: false}, + "idx_summaries_status_created_at": {Table: "summaries", Columns: []string{"status", "created_at"}, Unique: false}, + "idx_summary_widgets_user_id": {Table: "summary_widgets", Columns: []string{"user_id"}, Unique: false}, + "idx_summary_widgets_enabled": {Table: "summary_widgets", Columns: []string{"enabled"}, Unique: false}, + "idx_summary_widgets_deleted_at": {Table: "summary_widgets", Columns: []string{"deleted_at"}, Unique: false}, + "idx_summary_widgets_user_name_active_unique": {Table: "summary_widgets", Columns: []string{"user_id", "name"}, Unique: true, Partial: true, WherePredicate: "deleted_at IS NULL"}, + "idx_summary_widgets_user_enabled": {Table: "summary_widgets", Columns: []string{"user_id", "enabled"}, Unique: false}, + "idx_summary_widget_runs_summary_id": {Table: "summary_widget_runs", Columns: []string{"summary_id"}, Unique: false}, + "idx_summary_widget_runs_transcription_id": {Table: "summary_widget_runs", Columns: []string{"transcription_id"}, Unique: false}, + "idx_summary_widget_runs_widget_id": {Table: "summary_widget_runs", Columns: []string{"widget_id"}, Unique: false}, + "idx_summary_widget_runs_user_id": {Table: "summary_widget_runs", Columns: []string{"user_id"}, Unique: false}, + "idx_summary_widget_runs_status_created_at": {Table: "summary_widget_runs", Columns: []string{"status", "created_at"}, Unique: false}, + "idx_summary_widget_runs_summary_created_at": {Table: "summary_widget_runs", Columns: []string{"summary_id", "created_at"}, Unique: false}, + "idx_transcript_annotations_user_id": {Table: "transcript_annotations", Columns: []string{"user_id"}, Unique: false}, + "idx_transcript_annotations_transcription_id": {Table: "transcript_annotations", Columns: []string{"transcription_id"}, Unique: false}, + "idx_transcript_annotations_kind": {Table: "transcript_annotations", Columns: []string{"kind"}, Unique: false}, + "idx_transcript_annotations_deleted_at": {Table: "transcript_annotations", Columns: []string{"deleted_at"}, Unique: false}, + "idx_transcript_annotations_user_transcription_created_at": {Table: "transcript_annotations", Columns: []string{"user_id", "transcription_id", "created_at"}, Unique: false}, + "idx_transcript_annotations_user_kind_updated_at": {Table: "transcript_annotations", Columns: []string{"user_id", "kind", "updated_at"}, Unique: false}, + "idx_transcript_annotations_transcription_time": {Table: "transcript_annotations", Columns: []string{"transcription_id", "anchor_start_ms", "anchor_end_ms"}, Unique: false}, + "idx_chat_sessions_user_id": {Table: "chat_sessions", Columns: []string{"user_id"}, Unique: false}, + "idx_chat_sessions_transcription_id": {Table: "chat_sessions", Columns: []string{"transcription_id"}, Unique: false}, + "idx_chat_messages_user_id": {Table: "chat_messages", Columns: []string{"user_id"}, Unique: false}, + "idx_chat_messages_chat_session_id": {Table: "chat_messages", Columns: []string{"chat_session_id"}, Unique: false}, + "idx_llm_profiles_user_id": {Table: "llm_profiles", Columns: []string{"user_id"}, Unique: false}, + "idx_llm_profiles_is_default": {Table: "llm_profiles", Columns: []string{"is_default"}, Unique: false}, + "idx_llm_profiles_user_default_unique": {Table: "llm_profiles", Columns: []string{"user_id"}, Unique: true, Partial: true, WherePredicate: "is_default=1"}, } func duplicateIndexName(errMsg string) string { diff --git a/internal/database/steps.go b/internal/database/steps.go index a7f2689b..e967de1d 100644 --- a/internal/database/steps.go +++ b/internal/database/steps.go @@ -59,9 +59,6 @@ func backfillCompatibilityColumns(tx *gorm.DB) error { if err := backfillSummaryTemplates(tx); err != nil { return err } - if err := backfillNotes(tx); err != nil { - return err - } if err := backfillLLMConfigs(tx); err != nil { return err } @@ -235,31 +232,6 @@ func backfillSummaryTemplates(tx *gorm.DB) error { return nil } -func backfillNotes(tx *gorm.DB) error { - var rows []models.Note - if err := tx.Find(&rows).Error; err != nil { - return err - } - for _, row := range rows { - if err := row.BeforeSave(tx); err != nil { - return err - } - updates := map[string]any{ - "user_id": row.UserID, - "transcription_id": row.TranscriptionID, - "content": row.Content, - "start_ms": row.StartMS, - "end_ms": row.EndMS, - "metadata_json": row.MetadataJSON, - } - if err := withPreservedUpdatedAt(tx.Model(&models.Note{}).Where("id = ?", row.ID), updates, row.UpdatedAt). - Updates(updates).Error; err != nil { - return err - } - } - return nil -} - func backfillLLMConfigs(tx *gorm.DB) error { var rows []models.LLMConfig if err := tx.Find(&rows).Error; err != nil { diff --git a/internal/models/annotation.go b/internal/models/annotation.go new file mode 100644 index 00000000..24c249c5 --- /dev/null +++ b/internal/models/annotation.go @@ -0,0 +1,80 @@ +package models + +import ( + "fmt" + "time" + + "github.com/google/uuid" + "gorm.io/gorm" +) + +type AnnotationKind string + +const ( + AnnotationKindHighlight AnnotationKind = "highlight" + AnnotationKindNote AnnotationKind = "note" +) + +const AnnotationStatusActive = "active" + +// TranscriptAnnotation stores a user-owned highlight or note anchored to a transcript range. +type TranscriptAnnotation struct { + ID string `json:"id" gorm:"primaryKey;type:varchar(36)"` + UserID uint `json:"user_id" gorm:"not null;index;default:1"` + TranscriptionID string `json:"transcription_id" gorm:"type:varchar(36);not null;index"` + Kind AnnotationKind `json:"kind" gorm:"type:varchar(20);not null;index"` + Content *string `json:"content,omitempty" gorm:"type:text"` + Color *string `json:"color,omitempty" gorm:"type:varchar(32)"` + Quote string `json:"quote" gorm:"type:text;not null"` + AnchorStartMS int64 `json:"anchor_start_ms" gorm:"type:integer;not null"` + AnchorEndMS int64 `json:"anchor_end_ms" gorm:"type:integer;not null"` + AnchorStartWord *int `json:"anchor_start_word,omitempty" gorm:"type:integer"` + AnchorEndWord *int `json:"anchor_end_word,omitempty" gorm:"type:integer"` + AnchorStartChar *int `json:"anchor_start_char,omitempty" gorm:"type:integer"` + AnchorEndChar *int `json:"anchor_end_char,omitempty" gorm:"type:integer"` + AnchorTextHash *string `json:"anchor_text_hash,omitempty" gorm:"type:varchar(128)"` + Status string `json:"status" gorm:"type:varchar(20);not null;default:'active'"` + MetadataJSON string `json:"-" gorm:"column:metadata_json;type:json;not null;default:'{}'"` + CreatedAt time.Time `json:"created_at" gorm:"autoCreateTime"` + UpdatedAt time.Time `json:"updated_at" gorm:"autoUpdateTime"` + DeletedAt gorm.DeletedAt `json:"deleted_at,omitempty" gorm:"index" swaggertype:"string"` + + Transcription TranscriptionJob `json:"transcription,omitempty" gorm:"foreignKey:TranscriptionID;references:ID;constraint:OnDelete:CASCADE"` +} + +func (TranscriptAnnotation) TableName() string { return "transcript_annotations" } + +func (a *TranscriptAnnotation) BeforeCreate(tx *gorm.DB) error { + if a.ID == "" { + a.ID = uuid.New().String() + } + return a.BeforeSave(tx) +} + +func (a *TranscriptAnnotation) BeforeSave(tx *gorm.DB) error { + if a.UserID == 0 { + a.UserID = primaryUserID + } + if a.Status == "" { + a.Status = AnnotationStatusActive + } + if a.MetadataJSON == "" { + a.MetadataJSON = "{}" + } + if !validAnnotationKind(a.Kind) { + return fmt.Errorf("transcript annotation kind is invalid") + } + if a.AnchorEndMS < a.AnchorStartMS { + return fmt.Errorf("transcript annotation anchor end must be greater than or equal to start") + } + return nil +} + +func validAnnotationKind(kind AnnotationKind) bool { + switch kind { + case AnnotationKindHighlight, AnnotationKindNote: + return true + default: + return false + } +} diff --git a/internal/models/note.go b/internal/models/note.go deleted file mode 100644 index e4fb09a0..00000000 --- a/internal/models/note.go +++ /dev/null @@ -1,77 +0,0 @@ -package models - -import ( - "time" - - "gorm.io/gorm" -) - -// Note represents an annotation attached to a transcription. -type Note struct { - ID string `json:"id" gorm:"primaryKey;type:varchar(36)"` - UserID uint `json:"user_id" gorm:"not null;index;default:1"` - TranscriptionID string `json:"transcription_id" gorm:"type:varchar(36);not null;index"` - Content string `json:"content" gorm:"type:text;not null"` - StartMS int64 `json:"start_ms" gorm:"column:start_ms;type:integer;not null;default:0"` - EndMS int64 `json:"end_ms" gorm:"column:end_ms;type:integer;not null;default:0"` - MetadataJSON string `json:"-" gorm:"column:metadata_json;type:json"` - CreatedAt time.Time `json:"created_at" gorm:"autoCreateTime"` - UpdatedAt time.Time `json:"updated_at" gorm:"autoUpdateTime"` - DeletedAt gorm.DeletedAt `json:"deleted_at,omitempty" gorm:"index" swaggertype:"string"` - - StartWordIndex int `json:"start_word_index" gorm:"-"` - EndWordIndex int `json:"end_word_index" gorm:"-"` - StartTime float64 `json:"start_time" gorm:"-"` - EndTime float64 `json:"end_time" gorm:"-"` - Quote string `json:"quote" gorm:"-"` - - Transcription TranscriptionJob `json:"transcription,omitempty" gorm:"foreignKey:TranscriptionID;references:ID;constraint:OnDelete:CASCADE"` -} - -func (Note) TableName() string { return "notes" } - -func (n *Note) BeforeCreate(tx *gorm.DB) error { - if n.UserID == 0 { - n.UserID = primaryUserID - } - return n.syncColumnsFromCompat() -} - -func (n *Note) BeforeSave(tx *gorm.DB) error { - return n.syncColumnsFromCompat() -} - -func (n *Note) AfterFind(tx *gorm.DB) error { - n.StartTime = float64(n.StartMS) / 1000 - n.EndTime = float64(n.EndMS) / 1000 - if n.MetadataJSON == "" { - return nil - } - var metadata struct { - StartWordIndex int `json:"start_word_index,omitempty"` - EndWordIndex int `json:"end_word_index,omitempty"` - Quote string `json:"quote,omitempty"` - } - if err := unmarshalJSONColumn("notes.metadata_json", n.MetadataJSON, &metadata); err != nil { - return err - } - n.StartWordIndex = metadata.StartWordIndex - n.EndWordIndex = metadata.EndWordIndex - n.Quote = metadata.Quote - return nil -} - -func (n *Note) syncColumnsFromCompat() error { - n.StartMS = int64(n.StartTime * 1000) - n.EndMS = int64(n.EndTime * 1000) - metadataJSON, err := marshalJSONColumn("notes.metadata_json", map[string]any{ - "start_word_index": n.StartWordIndex, - "end_word_index": n.EndWordIndex, - "quote": n.Quote, - }) - if err != nil { - return err - } - n.MetadataJSON = metadataJSON - return nil -} diff --git a/internal/repository/implementations.go b/internal/repository/implementations.go index 2caf5548..735fff9e 100644 --- a/internal/repository/implementations.go +++ b/internal/repository/implementations.go @@ -1209,36 +1209,6 @@ func (r *chatRepository) GetLastMessagesBySessionIDs(ctx context.Context, sessio return result, nil } -// NoteRepository handles notes -type NoteRepository interface { - Repository[models.Note] - ListByJob(ctx context.Context, jobID string) ([]models.Note, error) - DeleteByTranscriptionID(ctx context.Context, transcriptionID string) error -} - -type noteRepository struct { - *BaseRepository[models.Note] -} - -func NewNoteRepository(db *gorm.DB) NoteRepository { - return ¬eRepository{ - BaseRepository: NewBaseRepository[models.Note](db), - } -} - -func (r *noteRepository) ListByJob(ctx context.Context, jobID string) ([]models.Note, error) { - var notes []models.Note - err := r.db.WithContext(ctx).Where("transcription_id = ?", jobID).Order("created_at DESC").Find(¬es).Error - if err != nil { - return nil, err - } - return notes, nil -} - -func (r *noteRepository) DeleteByTranscriptionID(ctx context.Context, transcriptionID string) error { - return r.db.WithContext(ctx).Where("transcription_id = ?", transcriptionID).Delete(&models.Note{}).Error -} - // SpeakerMappingRepository handles speaker mappings type SpeakerMappingRepository interface { Repository[models.SpeakerMapping]