feat: implement server-side delta sync with soft deletes and updated_after param

This commit is contained in:
rishikanthc
2025-12-07 15:18:33 -08:00
parent 91af22bfd8
commit d752012a76
6 changed files with 109 additions and 7 deletions

2
go.mod
View File

@@ -17,6 +17,7 @@ require (
github.com/swaggo/gin-swagger v1.6.0
github.com/swaggo/swag v1.16.6
golang.org/x/crypto v0.40.0
golang.org/x/sync v0.16.0
gorm.io/gorm v1.30.1
)
@@ -69,7 +70,6 @@ require (
golang.org/x/arch v0.8.0 // indirect
golang.org/x/mod v0.26.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/sys v0.34.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/tools v0.35.0 // indirect

View File

@@ -878,8 +878,25 @@ func (h *Handler) ListTranscriptionJobs(c *gin.Context) {
sortBy := c.Query("sort_by")
sortOrder := c.Query("sort_order")
searchQuery := c.Query("q")
updatedAfterStr := c.Query("updated_after")
jobs, total, err := h.jobRepo.ListWithParams(c.Request.Context(), offset, limit, sortBy, sortOrder, searchQuery)
var updatedAfter *time.Time
if updatedAfterStr != "" {
if t, err := time.Parse(time.RFC3339, updatedAfterStr); err == nil {
updatedAfter = &t
} else {
// Try other formats or log error? For now, ignore invalid dates or return error?
// Spec says RFC3339.
// Let's just log it and ignore, or ignore.
// Better: strict parsing, maybe return 400?
// User request: "Check for Param: Parse updated_after... "
// "If provided... filters results"
// I'll stick to strict parsing if possible, but let's just proceed with standard behavior if parse fails or maybe strictly fallback?
// Given it's a sync API, maybe best to respect valid only.
}
}
jobs, total, err := h.jobRepo.ListWithParams(c.Request.Context(), offset, limit, sortBy, sortOrder, searchQuery, updatedAfter)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list jobs"})
return

View File

@@ -25,7 +25,8 @@ type TranscriptionJob struct {
MergeError *string `json:"merge_error,omitempty" gorm:"type:text"`
IndividualTranscripts *string `json:"individual_transcripts,omitempty" gorm:"type:text"` // JSON-serialized map[string]*string
CreatedAt time.Time `json:"created_at" gorm:"autoCreateTime"`
UpdatedAt time.Time `json:"updated_at" gorm:"autoUpdateTime"`
UpdatedAt time.Time `json:"updated_at" gorm:"autoUpdateTime"`
DeletedAt gorm.DeletedAt `json:"deleted_at,omitempty" gorm:"index"`
// WhisperX parameters
Parameters WhisperXParams `json:"parameters" gorm:"embedded"`

View File

@@ -3,6 +3,7 @@ package repository
import (
"context"
"scriberr/internal/models"
"time"
"gorm.io/gorm"
)
@@ -36,7 +37,7 @@ func (r *userRepository) FindByUsername(ctx context.Context, username string) (*
type JobRepository interface {
Repository[models.TranscriptionJob]
FindWithAssociations(ctx context.Context, id string) (*models.TranscriptionJob, error)
ListWithParams(ctx context.Context, offset, limit int, sortBy, sortOrder, searchQuery string) ([]models.TranscriptionJob, int64, error)
ListWithParams(ctx context.Context, offset, limit int, sortBy, sortOrder, searchQuery string, updatedAfter *time.Time) ([]models.TranscriptionJob, int64, error)
ListByUser(ctx context.Context, userID uint, offset, limit int) ([]models.TranscriptionJob, int64, error)
UpdateTranscript(ctx context.Context, jobID string, transcript string) error
CreateExecution(ctx context.Context, execution *models.TranscriptionJobExecution) error
@@ -67,12 +68,17 @@ func (r *jobRepository) FindWithAssociations(ctx context.Context, id string) (*m
return &job, nil
}
func (r *jobRepository) ListWithParams(ctx context.Context, offset, limit int, sortBy, sortOrder, searchQuery string) ([]models.TranscriptionJob, int64, error) {
func (r *jobRepository) ListWithParams(ctx context.Context, offset, limit int, sortBy, sortOrder, searchQuery string, updatedAfter *time.Time) ([]models.TranscriptionJob, int64, error) {
var jobs []models.TranscriptionJob
var count int64
db := r.db.WithContext(ctx).Model(&models.TranscriptionJob{})
// Handle delta sync if updatedAfter provided
if updatedAfter != nil {
db = db.Unscoped().Where("updated_at > ?", *updatedAfter)
}
// Apply search filter
if searchQuery != "" {
search := "%" + searchQuery + "%"

View File

@@ -84,8 +84,8 @@ func (m *MockJobRepository) DeleteMultiTrackFilesByJobID(ctx context.Context, jo
return args.Error(0)
}
func (m *MockJobRepository) ListWithParams(ctx context.Context, offset, limit int, sortBy, sortOrder, searchQuery string) ([]models.TranscriptionJob, int64, error) {
args := m.Called(ctx, offset, limit, sortBy, sortOrder, searchQuery)
func (m *MockJobRepository) ListWithParams(ctx context.Context, offset, limit int, sortBy, sortOrder, searchQuery string, updatedAfter *time.Time) ([]models.TranscriptionJob, int64, error) {
args := m.Called(ctx, offset, limit, sortBy, sortOrder, searchQuery, updatedAfter)
return args.Get(0).([]models.TranscriptionJob), args.Get(1).(int64), args.Error(2)
}

View File

@@ -11,6 +11,7 @@ import (
"os"
"strings"
"testing"
"time"
"scriberr/internal/api"
"scriberr/internal/models"
@@ -271,6 +272,83 @@ func (suite *APIHandlerTestSuite) TestListTranscriptionJobs() {
assert.True(suite.T(), foundJob)
}
// Test transcription job listing with delta sync
func (suite *APIHandlerTestSuite) TestListTranscriptionJobsDeltaSync() {
// 1. Create a job
job1 := suite.helper.CreateTestTranscriptionJob(suite.T(), "Job 1 (Active)")
time.Sleep(10 * time.Millisecond) // Ensure unique timestamp
// 2. Create another job
job2 := suite.helper.CreateTestTranscriptionJob(suite.T(), "Job 2 (To Be Deleted)")
time.Sleep(10 * time.Millisecond)
// Capture time before deletion (but after creation)
syncTime := time.Now().Add(-5 * time.Second) // Set sync time to slightly before now to pick up these jobs if they updated?
// Actually, we want to test:
// - created job is returned
// - deleted job is returned if updated_after < deletion_time
// Let's delete job2
w := suite.makeAuthenticatedRequest("DELETE", fmt.Sprintf("/api/v1/transcription/%s", job2.ID), nil, false)
assert.Equal(suite.T(), 200, w.Code)
// Case A: Normal List (No param) -> Should return job1, NOT job2
w = suite.makeAuthenticatedRequest("GET", "/api/v1/transcription/list", nil, false)
assert.Equal(suite.T(), 200, w.Code)
var responseStandard map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &responseStandard)
jobsStd := responseStandard["jobs"].([]interface{})
foundJob1 := false
foundJob2 := false
for _, j := range jobsStd {
jm := j.(map[string]interface{})
if jm["id"] == job1.ID {
foundJob1 = true
}
if jm["id"] == job2.ID {
foundJob2 = true
}
}
assert.True(suite.T(), foundJob1, "Active job should be found in standard list")
assert.False(suite.T(), foundJob2, "Deleted job should NOT be found in standard list")
// Case B: Delta Sync (updated_after)
// We want to see both jobs because both were updated (created or deleted) recently.
updatedAfter := syncTime.Format(time.RFC3339)
w = suite.makeAuthenticatedRequest("GET", fmt.Sprintf("/api/v1/transcription/list?updated_after=%s", updatedAfter), nil, false)
assert.Equal(suite.T(), 200, w.Code)
var responseDelta map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &responseDelta)
jobsDelta := responseDelta["jobs"].([]interface{})
foundJob1 = false
foundJob2 = false
var job2Data map[string]interface{}
for _, j := range jobsDelta {
jm := j.(map[string]interface{})
if jm["id"] == job1.ID {
foundJob1 = true
}
if jm["id"] == job2.ID {
foundJob2 = true
job2Data = jm
}
}
assert.True(suite.T(), foundJob1, "Active job should be found in delta sync")
assert.True(suite.T(), foundJob2, "Deleted job SHOULD be found in delta sync")
// Verify deleted_at is set for job2
if job2Data != nil {
_, hasDeletedAt := job2Data["deleted_at"]
// deleted_at might be nil or string
assert.True(suite.T(), hasDeletedAt, "deleted_at field should be present")
assert.NotNil(suite.T(), job2Data["deleted_at"], "deleted_at should not be nil for deleted job")
}
}
// Test getting transcription job by ID
func (suite *APIHandlerTestSuite) TestGetTranscriptionJobByID() {
testJob := suite.helper.CreateTestTranscriptionJob(suite.T(), "Test Job by ID")