import { Agent, openai } from "@radaros/core";
import {
EvalSuite, contains, regexMatch, llmJudge, custom,
semanticSimilarity, ConsoleReporter, JsonReporter,
} from "@radaros/eval";
const agent = new Agent({
name: "support-bot",
model: openai("gpt-4o-mini"),
instructions: "You are a helpful customer support agent for an e-commerce platform.",
});
const suite = new EvalSuite({
name: "Support Agent Quality",
agent,
cases: [
{
name: "Return policy",
input: "What is your return policy?",
expected: "30-day return policy",
},
{
name: "Order tracking",
input: "Where is my order #12345?",
expected: "tracking information",
},
{
name: "Refund timeline",
input: "How long do refunds take?",
expected: "5-10 business days",
},
{
name: "Greeting",
input: "Hi there",
expected: "polite greeting",
},
{
name: "Out of scope",
input: "What's the weather like?",
expected: "redirect to relevant topic",
},
],
scorers: [
contains(),
llmJudge({
model: openai("gpt-4o"),
criteria: ["relevance", "helpfulness", "professionalism"],
}),
custom("not-too-long", async (input, output) => {
const words = output.text.split(/\s+/).length;
const pass = words <= 150;
return { score: pass ? 1 : 0, pass, reason: `${words} words (max 150)` };
}),
],
threshold: 0.7,
concurrency: 3,
});
const result = await suite.run([
new ConsoleReporter(),
new JsonReporter({ outputPath: "./eval-results.json" }),
]);
console.log(`\nResults: ${result.passed}/${result.total} passed (${(result.passRate * 100).toFixed(0)}%)`);
if (!result.allPassed) {
console.log("Failed cases:", result.failures.map(f => f.name).join(", "));
process.exit(1);
}