mirror of
https://github.com/cloudreve/cloudreve.git
synced 2026-03-03 00:47:02 +00:00
85 lines
2.3 KiB
Go
85 lines
2.3 KiB
Go
package indexer
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
func TestChunkText_Empty(t *testing.T) {
|
|
assert.Nil(t, ChunkText("", 500))
|
|
assert.Nil(t, ChunkText(" ", 500))
|
|
}
|
|
|
|
func TestChunkText_SingleSmallParagraph(t *testing.T) {
|
|
chunks := ChunkText("Hello world", 500)
|
|
assert.Equal(t, []string{"Hello world"}, chunks)
|
|
}
|
|
|
|
func TestChunkText_MultipleParagraphsCombined(t *testing.T) {
|
|
text := "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
|
|
chunks := ChunkText(text, 500)
|
|
assert.Len(t, chunks, 1)
|
|
assert.Contains(t, chunks[0], "First paragraph.")
|
|
assert.Contains(t, chunks[0], "Third paragraph.")
|
|
}
|
|
|
|
func TestChunkText_SplitOnParagraphBoundary(t *testing.T) {
|
|
// Create two paragraphs each ~300 bytes
|
|
para := strings.Repeat("abcde ", 50) // 300 bytes
|
|
para = strings.TrimSpace(para)
|
|
text := para + "\n\n" + para
|
|
|
|
chunks := ChunkText(text, 500)
|
|
assert.Len(t, chunks, 2)
|
|
}
|
|
|
|
func TestChunkText_LargeParagraphSplit(t *testing.T) {
|
|
// Create a single paragraph of 1200 bytes (240 words * 5 bytes each)
|
|
words := make([]string, 240)
|
|
for i := range words {
|
|
words[i] = "word" // 4 bytes + 1 space = 5 per word
|
|
}
|
|
text := strings.Join(words, " ") // 240*4 + 239 = 1199 bytes
|
|
|
|
chunks := ChunkText(text, 500)
|
|
assert.Len(t, chunks, 3)
|
|
assert.LessOrEqual(t, len(chunks[0]), 500)
|
|
assert.LessOrEqual(t, len(chunks[1]), 500)
|
|
assert.Greater(t, len(chunks[2]), 0)
|
|
}
|
|
|
|
func TestChunkText_DefaultMaxBytes(t *testing.T) {
|
|
chunks := ChunkText("hello", 0)
|
|
assert.Equal(t, []string{"hello"}, chunks)
|
|
}
|
|
|
|
func TestChunkText_EmptyParagraphsIgnored(t *testing.T) {
|
|
text := "First.\n\n\n\n\n\nSecond."
|
|
chunks := ChunkText(text, 500)
|
|
assert.Len(t, chunks, 1)
|
|
assert.Equal(t, "First.\n\nSecond.", chunks[0])
|
|
}
|
|
|
|
func TestChunkText_ParagraphKeptWhole(t *testing.T) {
|
|
// A paragraph under the limit should not be split
|
|
para := strings.Repeat("x", 400)
|
|
chunks := ChunkText(para, 500)
|
|
assert.Len(t, chunks, 1)
|
|
assert.Equal(t, para, chunks[0])
|
|
}
|
|
|
|
func TestChunkText_JoinerAccountedInLimit(t *testing.T) {
|
|
// Two paragraphs that fit individually but exceed the limit when joined with "\n\n"
|
|
p1 := strings.Repeat("a", 250)
|
|
p2 := strings.Repeat("b", 250)
|
|
text := p1 + "\n\n" + p2
|
|
|
|
chunks := ChunkText(text, 500)
|
|
// 250 + 2 + 250 = 502 > 500, so they should be split
|
|
assert.Len(t, chunks, 2)
|
|
assert.Equal(t, p1, chunks[0])
|
|
assert.Equal(t, p2, chunks[1])
|
|
}
|