diff --git a/apps/server/evals/ai-chat-basic.eval.ts b/apps/server/evals/ai-chat-basic.eval.ts index 3f9f17f36..140cfa926 100644 --- a/apps/server/evals/ai-chat-basic.eval.ts +++ b/apps/server/evals/ai-chat-basic.eval.ts @@ -44,137 +44,6 @@ const safeStreamText = async (config: Parameters[0]) => { type TestCase = { input: string; expected: string }; -// Comprehensive static test cases for reliable, consistent testing -const STATIC_TEST_CASES: TestCase[] = [ - // Basic functionality tests - { - input: "Hello, what can you help me with?", - expected: "greeting" - }, - { - input: "Show me my unread emails", - expected: "getThread" - }, - { - input: "Find emails from john@example.com", - expected: "from:" - }, - { - input: "Create a label called 'Important'", - expected: "createLabel" - }, - { - input: "Archive all emails older than 30 days", - expected: "bulkArchive" - }, - { - input: "Write a thank you email to sarah@company.com", - expected: "composeEmail" - }, - { - input: "What's the weather like today?", - expected: "webSearch" - }, - { - input: "Summarize my inbox", - expected: "inboxRag" - }, - { - input: "Mark all emails from newsletters as read", - expected: "markThreadsRead" - }, - { - input: "Delete all spam emails", - expected: "bulkDelete" - }, - { - input: "Find emails with attachments from last week", - expected: "has:attachment" - }, - { - input: "Organize my emails by priority", - expected: "modifyLabels" - }, - { - input: "What emails do I have scheduled for tomorrow?", - expected: "getThread" - }, - { - input: "Send a follow-up email to the meeting request", - expected: "composeEmail" - }, - { - input: "Find all receipts from Amazon", - expected: "from:amazon" - }, - // Additional comprehensive test cases - { - input: "Show me emails with large attachments (>5MB)", - expected: "larger:5M" - }, - { - input: "Find emails sent between Monday and Friday last week", - expected: "after:2025/08/18" - }, - { - input: "Create a nested label structure: Work > Projects > Beta", - expected: "createLabel" - }, - { - input: "Rename the 'Old' label to 'Archived'", - expected: "modifyLabels" - }, - { - input: "Apply 'Urgent' label to all emails from the CEO", - expected: "modifyLabels" - }, - { - input: "Forward all emails from 'Support' to my manager", - expected: "composeEmail" - }, - { - input: "Set up automatic archiving for emails older than 90 days", - expected: "bulkArchive" - }, - { - input: "Find emails that are both important and starred", - expected: "is:important" - }, - { - input: "Create email templates for common responses", - expected: "composeEmail" - }, - { - input: "Analyze my email patterns and suggest improvements", - expected: "inboxRag" - }, - { - input: "Set up email encryption for sensitive communications", - expected: "composeEmail" - }, - { - input: "Create a backup of all my emails", - expected: "bulkArchive" - }, - { - input: "Find emails with multiple recipients (more than 10 people)", - expected: "to:" - }, - { - input: "Set up email forwarding rules for specific senders", - expected: "modifyLabels" - }, - { - input: "Create a knowledge base from FAQ emails", - expected: "inboxRag" - } -]; - -// Helper function to convert static test cases to the format expected by evalite -const makeStaticTestCaseProvider = (testCases: TestCase[]) => { - return async () => testCases; -}; - const makeAiChatTestCaseBuilder = (topic: string): (() => Promise) => { return async () => { const { object } = await generateObject({ diff --git a/apps/server/evals/ai-tool-usage.eval.ts b/apps/server/evals/ai-tool-usage.eval.ts index dfcd67146..e69719910 100644 --- a/apps/server/evals/ai-tool-usage.eval.ts +++ b/apps/server/evals/ai-tool-usage.eval.ts @@ -2,10 +2,8 @@ import { evalite } from "evalite"; import { openai } from "@ai-sdk/openai"; import { streamText } from "ai"; import { traceAISDKModel } from "evalite/ai-sdk"; -import { Factuality, EmbeddingSimilarity, ExactMatch } from "autoevals"; +import { Factuality, EmbeddingSimilarity } from "autoevals"; import { AiChatPrompt, GmailSearchAssistantSystemPrompt, StyledEmailAssistantSystemPrompt } from "../src/lib/prompts"; -import { generateObject } from "ai"; -import { z } from "zod"; // base model (untraced) for internal helpers to avoid trace errors const baseModel = openai("gpt-4o-mini");