diff --git a/src/brief/approval.ts b/src/brief/approval.ts new file mode 100644 index 0000000..2447d22 --- /dev/null +++ b/src/brief/approval.ts @@ -0,0 +1,262 @@ +import { createInterface } from "readline"; +import type { Brief, EvaluatorVerdict, BriefIssue } from "./types.js"; +import type { StalenessReport } from "./stale.js"; +import { colorCategory } from "./colors.js"; + +// ANSI helpers +const isTTY = Boolean(process.stdout.isTTY); +const dim = (s: string) => isTTY ? `\x1b[2m${s}\x1b[0m` : s; +const bold = (s: string) => isTTY ? `\x1b[1m${s}\x1b[0m` : s; +const yellow = (s: string) => isTTY ? `\x1b[33m${s}\x1b[0m` : s; +const cyan = (s: string) => isTTY ? `\x1b[36m${s}\x1b[0m` : s; +const green = (s: string) => isTTY ? `\x1b[32m${s}\x1b[0m` : s; +const red = (s: string) => isTTY ? `\x1b[31m${s}\x1b[0m` : s; + +export type ApprovalDecision = "approve" | "edit" | "regenerate" | "skip"; +export type StaleDecision = "use" | "regenerate" | "skip"; + +export function displayBrief(brief: Brief, verdict: EvaluatorVerdict, bestEffort: boolean): void { + const draft = brief.draft; + const warnings = verdict.issues.filter(i => i.level === "warning"); + const criticals = verdict.issues.filter(i => i.level === "critical"); + + console.log(""); + console.log(bold(`◆ Brief ready for: "${brief.task}"`)); + console.log(dim(` Agent: ${brief.agent} | Assertions: ${draft.assertions.length} | Complexity: ${draft.estimated_complexity}`)); + console.log(dim(` Negotiation: ${brief.negotiation_iterations} iteration${brief.negotiation_iterations !== 1 ? "s" : ""} | Evaluator score: ${verdict.score}/100`)); + + if (bestEffort) { + console.log(yellow("\n ⚠ Brief could not be fully validated after 3 iterations. Review warnings before approving.")); + } + + // Success criteria table + const maxAssertion = draft.assertions.reduce((m, a) => Math.max(m, a.assertion.length), 0); + const colWidth = Math.min(Math.max(maxAssertion, 30), 70); + + console.log(""); + console.log(dim(" ┌─ Success Criteria " + "─".repeat(Math.max(0, colWidth - 18)) + "┐")); + for (const a of draft.assertions) { + const cat = colorCategory(a.category); + const assertion = a.assertion.length > colWidth ? a.assertion.slice(0, colWidth - 1) + "…" : a.assertion.padEnd(colWidth); + console.log(` │ ${a.id.toString().padStart(2)}. [${cat.padEnd(9 + cat.length - a.category.length)}] ${assertion} │`); + } + console.log(dim(" └" + "─".repeat(colWidth + 22) + "┘")); + + // Quality rubric + console.log(""); + console.log(dim(" Quality Rubric:")); + console.log(dim(` Craft: ${draft.rubric.craft.slice(0, 80)}${draft.rubric.craft.length > 80 ? "…" : ""}`)); + console.log(dim(` Originality: ${draft.rubric.originality.slice(0, 80)}${draft.rubric.originality.length > 80 ? "…" : ""}`)); + console.log(dim(` Tone: ${draft.rubric.tone.slice(0, 80)}${draft.rubric.tone.length > 80 ? "…" : ""}`)); + console.log(dim(` Completeness: ${draft.rubric.completeness.slice(0, 80)}${draft.rubric.completeness.length > 80 ? "…" : ""}`)); + + // Ambiguities + if (draft.ambiguities.length > 0) { + console.log(""); + console.log(yellow(" ⚠ Ambiguities (auto-resolved):")); + for (const a of draft.ambiguities) { + console.log(yellow(` → ${a}`)); + } + } + + // Evaluator warnings + if (warnings.length > 0) { + console.log(""); + console.log(yellow(" ⚠ Evaluator warnings (non-blocking):")); + for (const w of warnings) { + console.log(yellow(` → ${w.issue}`)); + } + } + + // Critical issues (only shown in best-effort mode since they shouldn't reach here otherwise) + if (criticals.length > 0) { + console.log(""); + console.log(red(" ✗ Critical issues remaining:")); + for (const c of criticals) { + console.log(red(` → ${c.issue}`)); + } + } + + console.log(""); +} + +function isInteractive(): boolean { + return process.stdin.isTTY && process.stdout.isTTY; +} + +export async function promptApproval(brief: Brief): Promise { + if (!isInteractive()) { + // Non-interactive: auto-approve with log + console.log(dim(`[brief] Non-interactive mode — auto-approving brief for: "${brief.task}"`)); + return "approve"; + } + + const rl = createInterface({ + input: process.stdin, + output: process.stdout, + }); + + return new Promise((resolve) => { + function ask() { + console.log( + " " + bold("[A]") + "pprove " + + bold("[E]") + "dit " + + bold("[R]") + "egenerate from scratch " + + bold("[N]") + "ot now", + ); + rl.question(" → ", (answer) => { + const choice = answer.trim().toLowerCase(); + if (choice === "a" || choice === "approve") { + rl.close(); + console.log(green(" ✓ Brief approved.")); + resolve("approve"); + } else if (choice === "e" || choice === "edit") { + rl.close(); + console.log(dim(" Opening brief in editor...")); + resolve("edit"); + } else if (choice === "r" || choice === "regenerate") { + rl.close(); + console.log(dim(" Regenerating brief from scratch...")); + resolve("regenerate"); + } else if (choice === "n") { + rl.close(); + console.log(dim(" Brief not applied. You can run `gitagent brief` again at any time.")); + resolve("skip"); + } else { + console.log(yellow(" Please enter A, E, R, or N.")); + ask(); + } + }); + } + ask(); + }); +} + +export function displayBriefList(briefs: Brief[]): void { + if (briefs.length === 0) { + console.log(dim("No briefs found. Run `gitagent brief \"task\"` to create one.")); + return; + } + + console.log(bold(`\n${briefs.length} brief${briefs.length !== 1 ? "s" : ""} found:\n`)); + for (const b of briefs) { + const statusColor = b.status === "approved" ? green : b.status === "draft" ? yellow : dim; + console.log(` ${statusColor(`[${b.status}]`)} ${cyan(b.id)} — ${b.task.slice(0, 60)}${b.task.length > 60 ? "…" : ""}`); + console.log(dim(` ${b.draft.assertions.length} assertions | v${b.version} | ${b.created_at.slice(0, 10)}`)); + } + console.log(""); +} + +export function displayBriefDetail(brief: Brief): void { + const draft = brief.draft; + console.log(bold(`\nBrief: ${brief.id}`)); + console.log(dim(`Task: ${brief.task}`)); + console.log(dim(`Agent: ${brief.agent}`)); + console.log(dim(`Status: ${brief.status}`)); + console.log(dim(`Created: ${brief.created_at}`)); + if (brief.approved_at) console.log(dim(`Approved: ${brief.approved_at}`)); + console.log(""); + + console.log(bold("Success Criteria:")); + for (const a of draft.assertions) { + console.log(` ${a.id}. [${colorCategory(a.category)}] ${a.assertion}`); + console.log(dim(` → Verify: ${a.test}`)); + } + + console.log(""); + console.log(bold("Rubric:")); + console.log(` Craft: ${draft.rubric.craft}`); + console.log(` Originality: ${draft.rubric.originality}`); + console.log(` Tone: ${draft.rubric.tone}`); + console.log(` Completeness: ${draft.rubric.completeness}`); + + if (draft.constraints_applied.length > 0) { + console.log(""); + console.log(bold("Constraints:")); + for (const c of draft.constraints_applied) { + console.log(` - ${c}`); + } + } + + if (draft.ambiguities.length > 0) { + console.log(""); + console.log(yellow("Ambiguities:")); + for (const a of draft.ambiguities) { + console.log(yellow(` ⚠ ${a}`)); + } + } + console.log(""); +} + +export function displayStalenessReport(report: StalenessReport): void { + if (!report.stale) return; + + const changed: string[] = []; + if (report.soulChanged) changed.push("SOUL.md"); + if (report.rulesChanged) changed.push("RULES.md"); + + console.log(""); + console.log(yellow(`⚠ Brief is stale — ${changed.join(" and ")} changed.`)); + + if (report.affectedAssertions.length === 0) { + console.log(dim(` ${report.summary}`)); + } else { + console.log(yellow(" Affected assertions:\n")); + for (const a of report.affectedAssertions) { + const marker = a.level === "critical" ? red(" ✗ [critical]") : yellow(" ⚠ [warning] "); + const assertionPreview = a.assertion_text.length > 60 + ? a.assertion_text.slice(0, 60) + "…" + : a.assertion_text; + if (a.assertion_id != null) { + console.log(`${marker} Assertion ${a.assertion_id} [${a.category}]: "${assertionPreview}"`); + } else { + console.log(`${marker} ${a.issue}`); + } + console.log(dim(` Issue: ${a.issue}`)); + console.log(dim(` Fix: ${a.fix}`)); + console.log(""); + } + } +} + +export async function promptStaleDecision(): Promise { + if (!isInteractive()) { + console.log(dim("[brief] Non-interactive mode — using existing brief despite staleness.")); + return "use"; + } + + const rl = createInterface({ + input: process.stdin, + output: process.stdout, + }); + + return new Promise((resolve) => { + function ask() { + console.log( + " " + bold("[U]") + "se anyway " + + bold("[R]") + "egenerate brief " + + bold("[N]") + "ot now", + ); + rl.question(" → ", (answer) => { + const choice = answer.trim().toLowerCase(); + if (choice === "u" || choice === "use") { + rl.close(); + console.log(dim(" Using existing brief.")); + resolve("use"); + } else if (choice === "r" || choice === "regenerate") { + rl.close(); + console.log(dim(" Regenerating brief from scratch...")); + resolve("regenerate"); + } else if (choice === "n") { + rl.close(); + console.log(dim(" Skipping. Brief not applied.")); + resolve("skip"); + } else { + console.log(yellow(" Please enter U, R, or N.")); + ask(); + } + }); + } + ask(); + }); +} diff --git a/src/brief/colors.ts b/src/brief/colors.ts new file mode 100644 index 0000000..54b8c3f --- /dev/null +++ b/src/brief/colors.ts @@ -0,0 +1,22 @@ +const isTTY = Boolean(process.stdout.isTTY); + +const ANSI: Record = { + format: "34", + content: "32", + quality: "35", + constraint: "31", + behavior: "36", + tone: "33", +}; + +export const CATEGORY_COLORS: Record string> = Object.fromEntries( + Object.entries(ANSI).map(([category, code]) => [ + category, + (s: string) => isTTY ? `\x1b[${code}m${s}\x1b[0m` : s, + ]), +); + +export function colorCategory(cat: string): string { + const fn = CATEGORY_COLORS[cat] ?? ((s: string) => s); + return fn(cat); +} diff --git a/src/brief/editor.ts b/src/brief/editor.ts new file mode 100644 index 0000000..46073a4 --- /dev/null +++ b/src/brief/editor.ts @@ -0,0 +1,201 @@ +import { writeFile, readFile, unlink } from "fs/promises"; +import { join } from "path"; +import { tmpdir } from "os"; +import { spawnSync } from "child_process"; +import { parse as yamlParse, stringify as yamlStringify } from "yaml"; +import type { BriefDraft, AssertionCategory, ComplexityLevel } from "./types.js"; + +const VALID_CATEGORIES: AssertionCategory[] = ["format", "content", "quality", "constraint", "behavior", "tone"]; +const VALID_COMPLEXITIES: ComplexityLevel[] = ["low", "medium", "high"]; + +const EDIT_FILE_HEADER = `# Agent Brief — Edit Mode +# ───────────────────────────────────────────────────────────────────── +# Edit assertions, rubric, or ambiguities below. Save and close to apply. +# +# Rules for assertions: +# • Each assertion must be binary — either passes or fails, no partial credit +# • No vague words: good, appropriate, clear, sufficient, correct +# • category must be one of: format, content, quality, constraint, behavior, tone +# • test field must describe exactly HOW to verify (not just "read the output") +# +# At least 1 assertion each of: format, content, constraint +# ───────────────────────────────────────────────────────────────────── + +`; + +function serializeToEditYaml(draft: BriefDraft): string { + const editData = { + task_summary: draft.task_summary, + estimated_complexity: draft.estimated_complexity, + recommended_max_turns: draft.recommended_max_turns, + assertions: draft.assertions.map(a => ({ + id: a.id, + category: a.category, + assertion: a.assertion, + why: a.why, + test: a.test, + })), + rubric: { + craft: draft.rubric.craft, + originality: draft.rubric.originality, + tone: draft.rubric.tone, + completeness: draft.rubric.completeness, + }, + constraints_applied: draft.constraints_applied, + ambiguities: draft.ambiguities, + }; + + return EDIT_FILE_HEADER + yamlStringify(editData, { lineWidth: 100, defaultStringType: "PLAIN" }); +} + +function parseEditYaml(content: string): BriefDraft { + // Strip comment lines before parsing (yaml library handles # comments but be safe) + const parsed = yamlParse(content); + + if (!parsed || typeof parsed !== "object") { + throw new Error("YAML parse returned empty or non-object result."); + } + + return { + task_summary: String(parsed.task_summary ?? ""), + estimated_complexity: parsed.estimated_complexity as ComplexityLevel ?? "medium", + recommended_max_turns: Number(parsed.recommended_max_turns ?? 10), + assertions: Array.isArray(parsed.assertions) + ? parsed.assertions.map((a: any, idx: number) => ({ + id: Number(a.id ?? idx + 1), + category: String(a.category ?? "content") as AssertionCategory, + assertion: String(a.assertion ?? ""), + why: String(a.why ?? ""), + test: String(a.test ?? ""), + })) + : [], + rubric: { + craft: String(parsed.rubric?.craft ?? ""), + originality: String(parsed.rubric?.originality ?? ""), + tone: String(parsed.rubric?.tone ?? ""), + completeness: String(parsed.rubric?.completeness ?? ""), + }, + constraints_applied: Array.isArray(parsed.constraints_applied) + ? parsed.constraints_applied.map(String) + : [], + ambiguities: Array.isArray(parsed.ambiguities) + ? parsed.ambiguities.map(String) + : [], + }; +} + +export interface ValidationResult { + valid: boolean; + errors: string[]; + warnings: string[]; +} + +export function validateEditedDraft(draft: BriefDraft): ValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + + if (!draft.assertions || draft.assertions.length === 0) { + errors.push("At least 1 assertion is required."); + } + + if (!draft.task_summary || draft.task_summary.trim() === "") { + errors.push("task_summary cannot be empty."); + } + + if (!VALID_COMPLEXITIES.includes(draft.estimated_complexity)) { + errors.push(`estimated_complexity must be one of: ${VALID_COMPLEXITIES.join(", ")}`); + } + + for (const a of draft.assertions) { + if (!a.assertion || a.assertion.trim() === "") { + errors.push(`Assertion ${a.id}: assertion text cannot be empty.`); + } + if (!a.test || a.test.trim() === "") { + errors.push(`Assertion ${a.id}: test field cannot be empty.`); + } + if (!VALID_CATEGORIES.includes(a.category)) { + errors.push(`Assertion ${a.id}: category "${a.category}" is invalid. Must be one of: ${VALID_CATEGORIES.join(", ")}`); + } + } + + // Warnings (non-blocking) + const categories = new Set(draft.assertions.map(a => a.category)); + if (!categories.has("format")) warnings.push("No format assertion present. Consider adding one (structure, length, shape of output)."); + if (!categories.has("content")) warnings.push("No content assertion present. Consider adding one (what must be included)."); + if (!categories.has("constraint")) warnings.push("No constraint assertion present. Consider adding one from RULES.md."); + + const rubricFields = ["craft", "originality", "tone", "completeness"] as const; + for (const field of rubricFields) { + if (!draft.rubric[field] || draft.rubric[field].trim() === "") { + warnings.push(`Rubric field "${field}" is empty.`); + } + } + + return { valid: errors.length === 0, errors, warnings }; +} + +export async function openInEditor(draft: BriefDraft): Promise { + const tmpFile = join(tmpdir(), `gitagent-brief-edit-${Date.now()}.yaml`); + + const isTTY = Boolean(process.stdout.isTTY); + const red = (s: string) => isTTY ? `\x1b[31m${s}\x1b[0m` : s; + const yellow = (s: string) => isTTY ? `\x1b[33m${s}\x1b[0m` : s; + + try { + await writeFile(tmpFile, serializeToEditYaml(draft), "utf-8"); + } catch (err: any) { + console.error(red(`[brief] Failed to write temp file: ${err.message}`)); + return null; + } + + const editor = process.env.VISUAL || process.env.EDITOR || "vi"; + + const spawnResult = spawnSync(editor, [tmpFile], { stdio: "inherit" }); + + if (spawnResult.error) { + console.error(red(`[brief] Failed to launch editor "${editor}" (${spawnResult.error.message}). Set $EDITOR or $VISUAL to a valid editor.`)); + await unlink(tmpFile).catch(() => {}); + return null; + } + if (spawnResult.status !== 0 && spawnResult.status !== null) { + console.error(yellow(`[brief] Editor exited with status ${spawnResult.status} — if you didn't save, your changes were not applied.`)); + // Still try to read the file below — the user may have saved before a non-zero exit. + } + + let content: string; + try { + content = await readFile(tmpFile, "utf-8"); + } catch { + console.error(red("[brief] Could not read edited file.")); + await unlink(tmpFile).catch(() => {}); + return null; + } + + await unlink(tmpFile).catch(() => {}); + + let parsed: BriefDraft; + try { + parsed = parseEditYaml(content); + } catch (err: any) { + console.error(red(`[brief] YAML parse error: ${err.message}`)); + return null; + } + + const validation = validateEditedDraft(parsed); + + // Show warnings even if valid + if (validation.warnings.length > 0) { + for (const w of validation.warnings) { + console.log(yellow(` ⚠ ${w}`)); + } + } + + if (!validation.valid) { + for (const e of validation.errors) { + console.error(red(` ✗ ${e}`)); + } + return null; + } + + return parsed; +} diff --git a/src/brief/evaluator.ts b/src/brief/evaluator.ts new file mode 100644 index 0000000..7a6fb5a --- /dev/null +++ b/src/brief/evaluator.ts @@ -0,0 +1,254 @@ +import { query } from "../sdk.js"; +import type { BriefDraft, EvaluatorVerdict } from "./types.js"; +import { BriefGenerationError } from "./types.js"; +import type { GCAssistantMessage } from "../sdk-types.js"; +import { CostTracker } from "../cost-tracker.js"; +import type { SessionCosts } from "../cost-tracker.js"; + +const EVALUATOR_SYSTEM_PROMPT = `You are a Brief Quality Reviewer. You review briefs written by a Requirements Analyst +before they are shown to a user. Your job is adversarial — assume the brief is flawed +until proven otherwise. Find every issue before the user sees it. + +A brief is a set of assertions defining what "done" looks like for an AI agent's task. +Your job is NOT to redo the brief. Your job is to find every problem with the existing brief. + +You will receive: +- BRIEF DRAFT: the JSON brief produced by the Planner +- TASK: the original user request +- SOUL: the agent's identity and voice +- RULES: the agent's hard constraints +- DUTIES: the agent's responsibilities +- ITERATION: which review round this is (1, 2, or 3) + +Your output MUST be a single valid JSON object. No markdown. No code blocks. No explanation. + +Required JSON schema: +{ + "approved": boolean, + "score": number, + "issues": [ + { + "level": "critical"|"warning"|"suggestion", + "assertion_id": number|null, + "field": "assertions"|"rubric"|"ambiguities"|"overall", + "issue": string, + "fix": string + } + ], + "summary": string +} + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +APPROVAL CRITERIA +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +approved: true ONLY if ALL of these are satisfied: +1. Zero critical issues exist +2. Assertion count meets minimum for stated complexity + (low: ≥5, medium: ≥8, high: ≥12) +3. At least one assertion in each required category: format, content, constraint +4. No assertion uses vague language (good, appropriate, clear, sufficient, etc.) +5. No assertion contradicts any rule in RULES.md +6. All rubric fields are task-specific (not generic boilerplate) +7. Every constraint in RULES.md that applies to this task has a corresponding assertion + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +WHAT TO CHECK — ASSERTIONS +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +For EACH assertion, verify: + +□ TESTABILITY: Can this be evaluated as binary pass/fail? + If not → CRITICAL: "Assertion {id} is not testable. '{assertion}' cannot be evaluated as pass/fail." + Fix: rewrite with a specific, measurable condition. + +□ VAGUE LANGUAGE: Does it contain: good, appropriate, relevant, clear, sufficient, proper, + adequate, reasonable, suitable, correct, well-written, engaging, compelling? + If yes → CRITICAL: "Assertion {id} uses vague language: '{word}'. Assertions must be binary." + +□ RULES CONFLICT: Does this assertion require something RULES.md forbids? + If yes → CRITICAL: "Assertion {id} conflicts with RULES: '{rule}'. Remove or rewrite." + +□ SCOPE CREEP: Does this assertion require something the task did NOT ask for? + If yes → WARNING: "Assertion {id} is out of scope. Task did not request '{requirement}'." + +□ DUPLICATE: Does this assertion overlap significantly with another assertion? + If yes → WARNING: "Assertions {id1} and {id2} test the same thing. Merge or differentiate." + +□ TEST FIELD: Is the test description specific and actionable? + "Read the output" → WARNING: too vague. + "Evaluate quality" → CRITICAL: not a test. + +□ TONE ASSERTIONS: If category is "tone", does the assertion reference SOUL.md? + If not → WARNING: "Tone assertion {id} does not reference SOUL.md. It may not match the agent's actual voice." + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +WHAT TO CHECK — COVERAGE +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +□ MISSING CATEGORIES: Are there assertions for format, content, AND constraint? + If any missing → CRITICAL: "No {category} assertion present. Required." + +□ MISSING RULES COVERAGE: Read every constraint in RULES.md. + For each applicable constraint: is there a corresponding assertion? + If not → CRITICAL: "RULES constraint '{rule}' has no corresponding assertion." + +□ COUNT: Does assertion count match stated complexity? + If too few → CRITICAL: "Only {n} assertions for {complexity} complexity task. Minimum is {min}." + +□ AMBIGUITIES: Are there unspecified requirements that would change the assertions? + If yes and not flagged → WARNING: "Ambiguity not flagged: '{requirement}' is unspecified." + +□ BLOCKING AMBIGUITY: Is there a required input the agent cannot produce without? + If yes → CRITICAL: "Blocking ambiguity: '{resource}' is required but not provided. Agent cannot complete task." + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +WHAT TO CHECK — RUBRIC +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +□ GENERIC RUBRIC: Is any rubric field a generic statement that could apply to any task? + Examples of generic (bad): "Every paragraph earns its place", "Writing is clear and engaging" + If generic → WARNING: "Rubric field '{field}' is generic. Must be specific to this task." + +□ TONE RUBRIC: Does it quote or reference SOUL.md? + If not → WARNING: "Rubric 'tone' does not reference SOUL.md definition." + +□ COMPLETENESS RUBRIC: Does it list the required structural parts? + If it doesn't enumerate them → WARNING: "Rubric 'completeness' must list all required sections." + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +ISSUE LEVELS +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +critical: Blocks approval. The brief cannot be shown to a user in this state. + Examples: vague assertion, rules conflict, missing required category, + assertion count too low, blocking ambiguity. + +warning: Should be fixed but doesn't block approval. Brief is usable but suboptimal. + Examples: generic rubric, missing tone reference, weak test description. + +suggestion: Optional improvement. Does not affect approval or score. + Examples: could add an assertion about X, rubric could be more specific about Y. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +SCORING +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Start at 100. Deduct: +- 15 points per critical issue +- 5 points per warning +- 2 points per suggestion +Minimum score: 0. Score is informational — only issues determine approval. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +ITERATION CONTEXT +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +If iteration = 3 (final attempt): +- If only warnings remain (no criticals), approve with warnings noted. +- The user will see the warnings alongside the brief. +- Do not block on warnings in the final iteration.`; + +export function buildEvaluatorInput( + draft: BriefDraft, + task: string, + soul: string, + rules: string, + duties: string, + iteration: number, +): string { + return [ + `BRIEF DRAFT:\n${JSON.stringify(draft, null, 2)}`, + `TASK:\n${task}`, + soul ? `SOUL:\n${soul}` : "SOUL: (not defined)", + rules ? `RULES:\n${rules}` : "RULES: (not defined)", + duties ? `DUTIES:\n${duties}` : "DUTIES: (not defined)", + `ITERATION: ${iteration} of 3`, + ].join("\n\n---\n\n"); +} + +async function collectAssistantText(gen: AsyncIterable): Promise { + let text = ""; + for await (const msg of gen) { + if (msg.type === "assistant") { + const am = msg as GCAssistantMessage; + text = am.content; + } + } + return text; +} + +function extractJson(raw: string): string { + const fenced = raw.match(/```(?:json)?\s*([\s\S]*?)```/); + if (fenced) return fenced[1].trim(); + const start = raw.indexOf("{"); + const end = raw.lastIndexOf("}"); + if (start !== -1 && end !== -1 && end > start) return raw.slice(start, end + 1); + return raw.trim(); +} + +export async function runBriefEvaluator(opts: { + input: string; + model?: string; + agentDir: string; +}): Promise<{ verdict: EvaluatorVerdict; costs: SessionCosts }> { + const { input, model, agentDir } = opts; + const costTracker = new CostTracker(); + + for (let attempt = 1; attempt <= 2; attempt++) { + const prompt = attempt === 1 + ? input + : `${input}\n\n---\n\nIMPORTANT: Your previous response was not valid JSON. Output ONLY a raw JSON object matching the schema. No markdown, no code fences, no explanation.`; + + const gen = query({ + prompt, + dir: agentDir, + model, + systemPrompt: EVALUATOR_SYSTEM_PROMPT, + replaceBuiltinTools: true, + maxTurns: 1, + }); + + try { + const raw = await collectAssistantText(gen); + const json = extractJson(raw); + const parsed = JSON.parse(json) as EvaluatorVerdict; + if (typeof parsed.approved !== "boolean") { + throw new Error("Missing approved field"); + } + if (!Array.isArray(parsed.issues)) { + throw new Error("Missing issues array"); + } + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + return { verdict: parsed, costs: costTracker.get() }; + } catch (err: any) { + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + if (attempt === 2) { + return { + verdict: { + approved: false, + score: 0, + issues: [{ + level: "critical", + assertion_id: null, + field: "overall", + issue: `Evaluator produced invalid JSON: ${err.message}`, + fix: "Brief Evaluator response could not be parsed. Using draft as-is with warning.", + }], + summary: "Evaluator response invalid — using draft as best-effort.", + }, + costs: costTracker.get(), + }; + } + } + } + + return { + verdict: { approved: false, score: 0, issues: [], summary: "Evaluator failed" }, + costs: costTracker.get(), + }; +} diff --git a/src/brief/injector.ts b/src/brief/injector.ts new file mode 100644 index 0000000..6f7ab2e --- /dev/null +++ b/src/brief/injector.ts @@ -0,0 +1,41 @@ +import type { Brief } from "./types.js"; + +export function buildBriefSuffix(brief: Brief): string { + const draft = brief.draft; + + const assertionLines = draft.assertions.map(a => + `${a.id}. [${a.category}] ${a.assertion}\n → Verify: ${a.test}`, + ).join("\n\n"); + + const constraintsSection = draft.constraints_applied.length > 0 + ? `\n**Active constraints from your rules:**\n${draft.constraints_applied.map(c => `- ${c}`).join("\n")}` + : ""; + + const ambiguitiesSection = draft.ambiguities.length > 0 + ? `\n**Resolved ambiguities:**\n${draft.ambiguities.map(a => `- ${a}`).join("\n")}` + : ""; + + return `--- +## Active Brief + +You are executing against an approved brief. Your output MUST satisfy every assertion below. +Self-evaluate against each criterion before considering your response complete. + +**Task:** ${brief.task} + +**Success Criteria — all must pass:** + +${assertionLines} + +**Quality Standard:** +- Craft: ${draft.rubric.craft} +- Originality: ${draft.rubric.originality} +- Tone: ${draft.rubric.tone} +- Completeness: ${draft.rubric.completeness} +${constraintsSection} +${ambiguitiesSection} + +Before finishing, verify each numbered assertion above against your output. +If any assertion fails, revise before responding. +---`.trim(); +} diff --git a/src/brief/negotiator.ts b/src/brief/negotiator.ts new file mode 100644 index 0000000..e7f9e31 --- /dev/null +++ b/src/brief/negotiator.ts @@ -0,0 +1,82 @@ +import { runPlanner, buildPlannerInput } from "./planner.js"; +import { runBriefEvaluator, buildEvaluatorInput } from "./evaluator.js"; +import type { BriefDraft, EvaluatorVerdict, NegotiatorOptions, NegotiationResult } from "./types.js"; +import { CostTracker } from "../cost-tracker.js"; + +const isTTY = Boolean(process.stdout.isTTY); +const dim = (s: string) => isTTY ? `\x1b[2m${s}\x1b[0m` : s; +const green = (s: string) => isTTY ? `\x1b[32m${s}\x1b[0m` : s; +const yellow = (s: string) => isTTY ? `\x1b[33m${s}\x1b[0m` : s; + +const MAX_ITERATIONS = 3; + +export async function negotiateBrief(opts: NegotiatorOptions): Promise { + const { task, soul, rules, duties, model, agentDir } = opts; + + let currentDraft: BriefDraft | null = null; + let bestDraft: BriefDraft | null = null; + let bestVerdict: EvaluatorVerdict | null = null; + let bestScore = -1; + let lastVerdict: EvaluatorVerdict | null = null; + let iterations = 0; + const costTracker = new CostTracker(); + + while (iterations < MAX_ITERATIONS) { + iterations++; + + const iterLabel = ` Iteration ${iterations}/${MAX_ITERATIONS}`; + const isRevision = iterations > 1; + + process.stdout.write(dim(`${iterLabel} — Planner ${isRevision ? "revising" : "generating"} assertions...`)); + const plannerResult = await runPlanner({ + input: buildPlannerInput(task, soul, rules, duties, lastVerdict ?? undefined, currentDraft ?? undefined), + model, + agentDir, + }); + currentDraft = plannerResult.draft; + for (const [m, u] of Object.entries(plannerResult.costs.modelUsage)) costTracker.add(m, u); + process.stdout.write("\r\x1b[K"); + + process.stdout.write(dim(`${iterLabel} — Evaluator reviewing...`)); + const evaluatorResult = await runBriefEvaluator({ + input: buildEvaluatorInput(currentDraft, task, soul, rules, duties, iterations), + model, + agentDir, + }); + lastVerdict = evaluatorResult.verdict; + for (const [m, u] of Object.entries(evaluatorResult.costs.modelUsage)) costTracker.add(m, u); + process.stdout.write("\r\x1b[K"); + + const criticals = lastVerdict.issues.filter(i => i.level === "critical").length; + const warnings = lastVerdict.issues.filter(i => i.level === "warning").length; + + if (lastVerdict.approved) { + console.log(dim(`${iterLabel} — score: ${lastVerdict.score}/100 `) + green("✓ approved")); + } else { + const issuesSummary = [ + criticals > 0 ? `${criticals} critical` : "", + warnings > 0 ? `${warnings} warning${warnings !== 1 ? "s" : ""}` : "", + ].filter(Boolean).join(", "); + console.log(dim(`${iterLabel} — score: ${lastVerdict.score}/100`) + yellow(` (${issuesSummary})`)); + } + + if (lastVerdict.score > bestScore) { + bestScore = lastVerdict.score; + bestDraft = currentDraft; + bestVerdict = lastVerdict; + } + + if (lastVerdict.approved) break; + } + + const finalDraft = lastVerdict!.approved ? currentDraft! : (bestDraft ?? currentDraft!); + const finalVerdict = lastVerdict!.approved ? lastVerdict! : (bestVerdict ?? lastVerdict!); + + return { + draft: finalDraft, + verdict: finalVerdict, + iterations, + bestEffort: !lastVerdict!.approved, + costs: costTracker.get(), + }; +} diff --git a/src/brief/orchestrator.ts b/src/brief/orchestrator.ts new file mode 100644 index 0000000..1a3435e --- /dev/null +++ b/src/brief/orchestrator.ts @@ -0,0 +1,342 @@ +import { readFile } from "fs/promises"; +import { join, resolve } from "path"; +import type { Brief, BriefOptions } from "./types.js"; +import { BriefError, BriefGenerationError } from "./types.js"; +import { CostTracker } from "../cost-tracker.js"; +import type { SessionCosts } from "../cost-tracker.js"; +import { negotiateBrief } from "./negotiator.js"; +import { + briefId, + hashContent, + saveBrief, + findBrief, + loadBriefFromFile, + resolveBriefPath, + archiveBrief, + nextVersion, + assertBriefApproved, +} from "./storage.js"; +import { + displayBrief, + displayBriefList, + displayBriefDetail, + displayStalenessReport, + promptApproval, + promptStaleDecision, + type ApprovalDecision, +} from "./approval.js"; +import { buildBriefSuffix } from "./injector.js"; +import { analyzeStaleAssertions } from "./stale.js"; +import { openInEditor } from "./editor.js"; + +// ANSI helpers +const isTTY = Boolean(process.stdout.isTTY); +const dim = (s: string) => isTTY ? `\x1b[2m${s}\x1b[0m` : s; +const bold = (s: string) => isTTY ? `\x1b[1m${s}\x1b[0m` : s; +const yellow = (s: string) => isTTY ? `\x1b[33m${s}\x1b[0m` : s; + +async function readOrEmpty(agentDir: string, filename: string): Promise { + try { + return await readFile(join(agentDir, filename), "utf-8"); + } catch { + return ""; + } +} + +async function loadParentRules(agentDir: string, extendsPath: string | undefined): Promise { + if (!extendsPath) return ""; + + // Resolve: could be a relative path or a remote URL that was cloned into .gitagent/deps/ + let parentDir: string; + + if (extendsPath.startsWith("http") || extendsPath.startsWith("git@")) { + // Remote URL — look in the cloned deps dir + const parentName = extendsPath.split("/").pop()?.replace(/\.git$/, "") || "parent"; + parentDir = join(agentDir, ".gitagent", "deps", parentName); + } else { + // Local relative path + parentDir = resolve(agentDir, extendsPath); + } + + const parentRules = await readOrEmpty(parentDir, "RULES.md"); + return parentRules + ? `\n\n--- Inherited Rules (from ${extendsPath}) ---\n\n${parentRules}` + : ""; +} + +export interface BriefOrchestrationOptions { + task: string; + agentDir: string; + agentName?: string; + agentExtends?: string; + model?: string; + options?: BriefOptions; +} + +export interface BriefOrchestrationResult { + brief: Brief; + systemPromptSuffix: string; + skipped: boolean; + costs: SessionCosts; +} + +export interface GenerateBriefResult { + filePath: string; + costs: SessionCosts; +} + +export async function runBriefOrchestration(opts: BriefOrchestrationOptions): Promise { + const { task, agentDir, agentName = "agent", agentExtends, model, options = {} } = opts; + + if (!task || task.trim() === "") { + throw new BriefError("Task cannot be empty."); + } + + const effectiveTask = task.length > 2000 + ? task.slice(0, 2000) + : task; + + if (task.length > 2000) { + console.log(yellow("[brief] Task truncated to 2000 chars for brief generation. Full task passed to main execution.")); + } + + const zeroCosts = () => new CostTracker().get(); + + // If a specific brief path is given, load and use it directly + if (options.briefPath) { + const brief = await loadBriefFromFile(resolveBriefPath(agentDir, options.briefPath)); + assertBriefApproved(brief); + const staleReport = await analyzeStaleAssertions(agentDir, brief); + if (staleReport.stale) { + displayStalenessReport(staleReport); + } + return { + brief, + systemPromptSuffix: buildBriefSuffix(brief), + skipped: false, + costs: zeroCosts(), + }; + } + + // Check for existing approved brief unless regenerate is requested + if (!options.regenerate) { + const existing = await findBrief(agentDir, effectiveTask); + if (existing) { + const staleReport = await analyzeStaleAssertions(agentDir, existing); + if (staleReport.stale) { + displayStalenessReport(staleReport); + const staleDecision = await promptStaleDecision(); + + if (staleDecision === "skip") { + return { brief: existing, systemPromptSuffix: "", skipped: true, costs: zeroCosts() }; + } + + if (staleDecision === "regenerate") { + // Fall through to negotiation below (skip the early return) + } else { + // "use" — proceed with existing brief despite staleness + console.log(dim(`[brief] Using existing brief: ${existing.id} (v${existing.version})`)); + return { + brief: existing, + systemPromptSuffix: buildBriefSuffix(existing), + skipped: false, + costs: zeroCosts(), + }; + } + } else { + console.log(dim(`[brief] Using existing approved brief: ${existing.id} (v${existing.version})`)); + return { + brief: existing, + systemPromptSuffix: buildBriefSuffix(existing), + skipped: false, + costs: zeroCosts(), + }; + } + } + } + + // Load agent identity files for Planner/Evaluator context + const soul = await readOrEmpty(agentDir, "SOUL.md"); + const rules = await readOrEmpty(agentDir, "RULES.md"); + const duties = await readOrEmpty(agentDir, "DUTIES.md"); + + // Feature 3: Load parent RULES.md if agent inherits from a parent + const parentRulesSuffix = await loadParentRules(agentDir, agentExtends); + const effectiveRules = rules + parentRulesSuffix; + + if (!soul) console.log(dim("[brief] No SOUL.md found — Planner will generate generic assertions.")); + if (!rules) console.log(dim("[brief] No RULES.md found — no constraint assertions will be generated.")); + if (parentRulesSuffix) console.log(dim(`[brief] Parent rules loaded from: ${agentExtends}`)); + + console.log(bold(`\n[brief] Negotiating brief for: "${effectiveTask.slice(0, 60)}${effectiveTask.length > 60 ? "…" : ""}"`)); + console.log(dim("[brief] Planner and Evaluator are negotiating internally...\n")); + + // Run the Planner↔Evaluator negotiation loop + const negotiation = await negotiateBrief({ + task: effectiveTask, + soul, + rules: effectiveRules, + duties, + model, + agentDir, + }); + + // Compute hashes for stale detection (only child RULES hash; parent can change independently) + const soulHash = hashContent(soul); + const rulesHash = hashContent(rules); + + const id = briefId(effectiveTask); + + // Archive old version if regenerating + if (options.regenerate) { + await archiveBrief(agentDir, id); + } + + const version = await nextVersion(agentDir, id); + const now = new Date().toISOString(); + + let finalBrief: Brief = { + id, + task: effectiveTask, + agent: agentName, + created_at: now, + status: "draft", + version, + planner_model: model ?? "default", + evaluator_model: model ?? "default", + negotiation_iterations: negotiation.iterations, + soul_hash: soulHash, + rules_hash: rulesHash, + draft: negotiation.draft, + file_path: "", + }; + + // Auto-approve path (programmatic use or no TTY) + if (options.skipApproval || !process.stdin.isTTY) { + if (negotiation.bestEffort && !options.allowBestEffort) { + const criticals = negotiation.verdict.issues.filter(i => i.level === "critical"); + throw new BriefGenerationError( + `Brief negotiation did not reach approval after ${negotiation.iterations} iteration(s) ` + + `(score ${negotiation.verdict.score}/100). ${criticals.length} critical issue(s) remain:\n` + + criticals.map(c => ` - ${c.issue}`).join("\n") + + `\nPass { allowBestEffort: true } to accept this brief anyway, or fix the issues above.`, + ); + } + finalBrief.status = "approved"; + finalBrief.approved_at = now; + if (negotiation.bestEffort) { + console.log(yellow(`[brief] ⚠ Auto-approved a best-effort brief (score ${negotiation.verdict.score}/100, never reached full approval).`)); + } + const filePath = await saveBrief(agentDir, finalBrief); + console.log(dim(`[brief] Brief auto-approved and saved: ${filePath}`)); + return { + brief: finalBrief, + systemPromptSuffix: buildBriefSuffix(finalBrief), + skipped: false, + costs: negotiation.costs, + }; + } + + // Interactive approval loop + displayBrief(finalBrief, negotiation.verdict, negotiation.bestEffort); + + let currentNegotiation = negotiation; + let attempts = 0; + + while (attempts < 3) { + const decision: ApprovalDecision = await promptApproval(finalBrief); + + if (decision === "approve") { + finalBrief.status = "approved"; + finalBrief.approved_at = new Date().toISOString(); + const filePath = await saveBrief(agentDir, finalBrief); + console.log(dim(`[brief] Brief saved: ${filePath}`)); + return { + brief: finalBrief, + systemPromptSuffix: buildBriefSuffix(finalBrief), + skipped: false, + costs: currentNegotiation.costs, + }; + } + + if (decision === "skip") { + await saveBrief(agentDir, finalBrief); + return { + brief: finalBrief, + systemPromptSuffix: "", + skipped: true, + costs: currentNegotiation.costs, + }; + } + + if (decision === "edit") { + // Feature 2: open brief in $EDITOR, re-validate, re-display + const edited = await openInEditor(finalBrief.draft); + if (edited) { + finalBrief = { ...finalBrief, draft: edited }; + console.log(dim("[brief] Brief updated from editor.")); + } else { + console.log(dim("[brief] Editor changes discarded or invalid. Showing original brief.")); + } + displayBrief(finalBrief, currentNegotiation.verdict, currentNegotiation.bestEffort); + continue; // re-prompt without incrementing attempts + } + + // Regenerate + attempts++; + console.log(dim(`[brief] Regenerating (attempt ${attempts + 1})...\n`)); + const renegotiation = await negotiateBrief({ + task: effectiveTask, + soul, + rules: effectiveRules, + duties, + model, + agentDir, + }); + + currentNegotiation = renegotiation; + finalBrief = { + ...finalBrief, + version: await nextVersion(agentDir, id), + draft: renegotiation.draft, + negotiation_iterations: renegotiation.iterations, + created_at: new Date().toISOString(), + }; + displayBrief(finalBrief, renegotiation.verdict, renegotiation.bestEffort); + } + + // Save as draft after exhausting regenerate attempts + await saveBrief(agentDir, finalBrief); + return { + brief: finalBrief, + systemPromptSuffix: "", + skipped: true, + costs: currentNegotiation.costs, + }; +} + +export async function generateBrief(opts: { + task: string; + dir?: string; + model?: string; + skipApproval?: boolean; +}): Promise { + const agentDir = opts.dir ?? process.cwd(); + const result = await runBriefOrchestration({ + task: opts.task, + agentDir, + model: opts.model, + options: { skipApproval: opts.skipApproval }, + }); + if (result.skipped) return null; + return { + filePath: result.brief.file_path, + costs: result.costs, + }; +} + +// ── List/view helpers re-exported for CLI ───────────────────────────────── + +export { listBriefs, loadBriefFromFile } from "./storage.js"; +export { displayBriefList, displayBriefDetail } from "./approval.js"; +export { buildBriefSuffix } from "./injector.js"; diff --git a/src/brief/output-evaluator.ts b/src/brief/output-evaluator.ts new file mode 100644 index 0000000..9530189 --- /dev/null +++ b/src/brief/output-evaluator.ts @@ -0,0 +1,244 @@ +import { query } from "../sdk.js"; +import type { Brief, OutputVerdict } from "./types.js"; +import type { GCAssistantMessage } from "../sdk-types.js"; +import { CostTracker } from "../cost-tracker.js"; +import type { SessionCosts } from "../cost-tracker.js"; + +const OUTPUT_EVALUATOR_SYSTEM_PROMPT = `You are an Output Quality Reviewer. You evaluate whether an AI agent's response +satisfied a set of pre-approved success criteria (assertions). + +You will receive: +- OUTPUT: the agent's full conversation output, split into labeled messages ([Message 1], [Message 2], etc.) +- ASSERTIONS: a list of success criteria, each with an id, category, assertion text, + and test instruction describing exactly how to verify it + +IMPORTANT — IDENTIFY THE DELIVERABLE FIRST: +The agent may have sent multiple messages including thinking steps, verification checks, +word count notes, grep results, self-evaluation tables, and meta-commentary. +Your first task is to identify which message(s) contain the actual deliverable +(the final content the agent was asked to produce). Ignore all messages that are +verification steps, meta-commentary, or process notes. Evaluate ONLY the deliverable content. + +If multiple messages contain versions of the deliverable, use the LAST complete version. + +Your job is to evaluate EACH assertion independently against the deliverable. +Be objective and evidence-based. Quote directly from the deliverable when citing evidence. + +Your output MUST be a single valid JSON object. No markdown. No code blocks. No explanation. +Raw JSON only. + +Required JSON schema: +{ + "all_passed": boolean, + "passed_count": number, + "failed_count": number, + "results": [ + { + "assertion_id": number, + "category": string, + "assertion": string, + "passed": boolean, + "evidence": string, + "notes": string + } + ], + "summary": string +} + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +EVALUATION RULES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +1. FOLLOW THE TEST INSTRUCTION. Each assertion has a "test" field describing exactly + how to verify it. Follow that instruction precisely. + If test says "count words — must be 800-1000": count the words in the output. + If test says "grep for competitor names — must return 0 matches": scan for those names. + +2. BINARY ONLY. Each assertion is either passed or failed. No partial credit. + If word count is 1050 and limit is 1000: FAIL. + If word count is 999: PASS. + +3. EVIDENCE IS REQUIRED. The evidence field must quote or cite something specific + from the deliverable — not just restate the assertion. + BAD: "The output does not have 3 arguments" + GOOD: "Found only 2 arguments: [X] and [Y]. No third." + Keep evidence under 100 characters. Be precise, not verbose. + +4. DO NOT INFER INTENT. If an assertion says "ends with an imperative verb" and + the deliverable ends with a declarative sentence: FAIL. Do not give credit for close. + +5. FORMAT ASSERTIONS: For assertions involving word counts, structure, or length: + measure precisely on the deliverable only. Do not count verification tables or meta-commentary. + +6. CONSTRAINT ASSERTIONS: For "no X mentioned" — scan the deliverable only. If found: FAIL. + Quote the exact line where the violation occurs in evidence. + +7. TONE ASSERTIONS: These are harder to evaluate. Use the test field strictly. + If the test says "grep for hedging phrases" — look for those exact phrases in the deliverable. + If the test says "all sentences use active voice" — check each sentence in the deliverable. + +8. BE STRICT. The point of the brief is to catch problems the agent missed. + When in doubt: FAIL. A false positive (incorrect FAIL) is less harmful than + a false negative (incorrect PASS) — the agent will simply revise.`; + +export function buildOutputEvaluatorInput(outputText: string, brief: Brief): string { + const assertionList = brief.draft.assertions.map(a => + `Assertion ${a.id} [${a.category}]:\n Text: "${a.assertion}"\n How to verify: ${a.test}`, + ).join("\n\n"); + + return [ + `OUTPUT (evaluate this):\n${outputText || "(empty output)"}`, + `ASSERTIONS (evaluate each one):\n${assertionList}`, + ].join("\n\n---\n\n"); +} + +async function collectAssistantText(gen: AsyncIterable): Promise<{ text: string; truncated: boolean; error?: string }> { + let text = ""; + let truncated = false; + let error: string | undefined; + for await (const msg of gen) { + if (msg.type === "assistant") { + const am = msg as GCAssistantMessage; + text = am.content; + truncated = am.stopReason === "length"; + } else if (msg.type === "system" && msg.subtype === "error") { + error = msg.content; + } + } + return { text, truncated, error }; +} + +function extractJson(raw: string): string { + const fenced = raw.match(/```(?:json)?\s*([\s\S]*?)```/); + if (fenced) return fenced[1].trim(); + const start = raw.indexOf("{"); + const end = raw.lastIndexOf("}"); + if (start !== -1 && end !== -1 && end > start) return raw.slice(start, end + 1); + return raw.trim(); +} + +const isTTY = Boolean(process.stdout.isTTY); +const yellow = (s: string) => isTTY ? `\x1b[33m${s}\x1b[0m` : s; + +function fallbackVerdict(brief: Brief, warning: string, costs: SessionCosts): { verdict: OutputVerdict; costs: SessionCosts } { + console.warn(yellow(`[brief] Output Evaluator warning: ${warning} — evaluation skipped, marking as failed.`)); + return { + verdict: { + all_passed: false, + passed_count: 0, + failed_count: brief.draft.assertions.length, + results: brief.draft.assertions.map(a => ({ + assertion_id: a.id, + category: a.category, + assertion: a.assertion, + passed: false, + evidence: "(evaluation unavailable — could not parse evaluator response)", + })), + summary: `Output evaluation failed: ${warning}`, + }, + costs, + }; +} + +export async function runOutputEvaluator(opts: { + outputText: string; + brief: Brief; + dir: string; + model?: string; +}): Promise<{ verdict: OutputVerdict; costs: SessionCosts }> { + const { outputText, brief, dir, model } = opts; + const costTracker = new CostTracker(); + + if (brief.draft.assertions.length === 0) { + return { + verdict: { + all_passed: true, + passed_count: 0, + failed_count: 0, + results: [], + summary: "No assertions to evaluate.", + }, + costs: costTracker.get(), + }; + } + + const input = buildOutputEvaluatorInput(outputText, brief); + + for (let attempt = 1; attempt <= 2; attempt++) { + const prompt = attempt === 1 + ? input + : `${input}\n\n---\n\nIMPORTANT: Your previous response was not valid JSON. Output ONLY a raw JSON object matching the schema. No markdown, no code fences, no explanation.`; + + const gen = query({ + prompt, + dir, + model, + systemPrompt: OUTPUT_EVALUATOR_SYSTEM_PROMPT, + replaceBuiltinTools: true, + maxTurns: 1, + constraints: { temperature: 0, maxTokens: 8000 }, + }); + + try { + const { text: raw, truncated, error } = await collectAssistantText(gen); + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + if (error) { + throw new Error(`evaluator LLM call failed: ${error}`); + } + if (truncated) { + throw new Error("response was truncated (hit max_tokens before completing JSON)"); + } + if (!raw.trim()) { + throw new Error("evaluator returned an empty response"); + } + const json = extractJson(raw); + const parsed = JSON.parse(json) as OutputVerdict; + + if (!Array.isArray(parsed.results)) { + throw new Error("Missing results array"); + } + + // Guard against the evaluator silently skipping assertions — treat any + // assertion missing from the response as failed rather than ignoring it. + const seenIds = new Set(parsed.results.map(r => r.assertion_id)); + const missing = brief.draft.assertions.filter(a => !seenIds.has(a.id)); + const results = [ + ...parsed.results, + ...missing.map(a => ({ + assertion_id: a.id, + category: a.category, + assertion: a.assertion, + passed: false, + evidence: "(not evaluated — missing from evaluator response)", + })), + ]; + + // Recompute counts from actual results to guard against model arithmetic errors + const passedCount = results.filter(r => r.passed).length; + const failedCount = results.filter(r => !r.passed).length; + return { + verdict: { + all_passed: failedCount === 0, + passed_count: passedCount, + failed_count: failedCount, + results, + summary: missing.length > 0 + ? `${parsed.summary ?? ""} (${missing.length} assertion(s) were not evaluated by the model and were marked failed.)`.trim() + : (parsed.summary ?? ""), + }, + costs: costTracker.get(), + }; + } catch (err: any) { + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + if (attempt === 2) { + return fallbackVerdict(brief, `failed to parse evaluator response after 2 attempts: ${err.message}`, costTracker.get()); + } + } + } + + return fallbackVerdict(brief, "unexpected evaluator exit", costTracker.get()); +} diff --git a/src/brief/planner.ts b/src/brief/planner.ts new file mode 100644 index 0000000..4d6adea --- /dev/null +++ b/src/brief/planner.ts @@ -0,0 +1,254 @@ +import { query } from "../sdk.js"; +import type { BriefDraft, EvaluatorVerdict } from "./types.js"; +import { BriefGenerationError } from "./types.js"; +import type { GCAssistantMessage } from "../sdk-types.js"; +import { CostTracker } from "../cost-tracker.js"; +import type { SessionCosts } from "../cost-tracker.js"; + +const PLANNER_SYSTEM_PROMPT = `You are a Requirements Analyst. Your only job is to define exactly what "done" looks like +for a given task — before any agent begins working. + +You will receive: +- TASK: what the agent has been asked to produce +- SOUL: the agent's identity, personality, and communication style +- RULES: hard constraints the agent must never violate +- DUTIES: the agent's defined responsibilities and scope +- REVISION FEEDBACK (optional): issues found by the Brief Evaluator in your previous draft + +Your output MUST be a single valid JSON object. No markdown. No code blocks. No explanation. +Raw JSON only. + +Required JSON schema: +{ + "task_summary": string, + "ambiguities": string[], + "assertions": [ + { + "id": number, + "category": "format"|"content"|"quality"|"constraint"|"behavior"|"tone", + "assertion": string, + "why": string, + "test": string + } + ], + "rubric": { + "craft": string, + "originality": string, + "tone": string, + "completeness": string + }, + "constraints_applied": string[], + "estimated_complexity": "low"|"medium"|"high", + "recommended_max_turns": number +} + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +ASSERTION RULES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +1. BINARY ONLY. Every assertion must evaluate to pass or fail — never "partially passes". + BAD: "The post is well-written and engaging" + BAD: "The tone is appropriate for the audience" + GOOD: "Uses active voice in at least 90% of sentences" + GOOD: "Word count is between 800 and 1000" + +2. NO VAGUE WORDS. Never use: good, appropriate, relevant, clear, sufficient, proper, adequate, + reasonable, suitable, correct. If you catch yourself using these, rewrite the assertion + with a concrete, measurable standard. + +3. NEVER contradict RULES. If RULES says "never mention competitor X", do not write an + assertion requiring comparisons to X. Read RULES before writing each assertion. + +4. MINIMUM COVERAGE. Every brief must have at least one assertion in each of: + - "format" (structure, length, shape of the output) + - "content" (what must be included or covered) + - "constraint" (a rule from RULES.md that directly applies to this task) + If RULES.md is empty, generate a constraint assertion based on SOUL.md limitations. + +5. TONE ASSERTIONS must quote SOUL.md exactly. Do not invent a tone standard. + BAD: "Tone is professional and friendly" + GOOD: "Tone matches SOUL.md definition: direct and slightly contrarian, no hedging language" + +6. COUNT: minimum 5 assertions (low complexity), 8-11 (medium), 12-15 (high). + Scale to task complexity. Never exceed 15. + +7. CONSTRAINT ASSERTIONS: Every applicable RULES.md constraint must appear as its own + assertion — not just in constraints_applied. If RULES has 3 relevant constraints, + you need 3 constraint assertions. + +8. SCOPE: Only assert things the task explicitly asked for. + Do not add assertions for things the user didn't request. + +9. TEST FIELD: Must describe exactly how to check the assertion — what to read, count, + measure, or search for. "Read the output" is not acceptable. Be specific. + GOOD: "Count words using word count tool — must be 800-1000" + GOOD: "Grep output for competitor names from RULES.md list — must return 0 matches" + GOOD: "Read final paragraph — must contain a verb in imperative form (do X, try Y, start Z)" + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +RUBRIC RULES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Each rubric field must be specific to THIS task. No generic boilerplate. + +craft: Describe structural/mechanical quality for this output type. + "Every paragraph earns its place" is too vague. + GOOD: "No paragraph repeats a point already made. Each paragraph adds + one new piece of evidence or one new angle." + +originality: Describe what non-obvious looks like for this specific topic. + List 2-3 common takes to avoid by name. + GOOD: "Avoids the three most common takes: isolation, time zone friction, + and too many meetings. Introduces a framing the reader hasn't seen." + +tone: Quote the SOUL.md voice definition. Be concrete about what violates it. + GOOD: "Direct, slightly contrarian per SOUL.md. Violation examples: + 'it could be argued', 'some might say', passive voice constructions." + +completeness: List all required structural parts as a checklist. + GOOD: "Contains all of: (1) non-generic hook, (2) 3 arguments each with data, + (3) one counterargument paragraph, (4) actionable ending paragraph." + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +AMBIGUITY RULES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +1. If the task leaves a requirement unspecified that would affect the assertions, flag it. + Example: "write a post" — length is unspecified → flag it, state your default. + +2. Never silently fill gaps. State every assumption you make. + Format: "X is not specified — using default: Y" + +3. If a required resource is missing (e.g., "update the landing page" — no page provided), + flag it as a blocking ambiguity. + +4. Empty ambiguities array means the task is fully specified. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +REVISION MODE +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +If REVISION FEEDBACK is present, you are fixing a rejected draft. +- Address every CRITICAL issue — brief cannot be approved without fixing these. +- Address every WARNING unless you have a strong reason not to (explain in rubric why). +- Do not regress: do not remove assertions that were not flagged as issues. +- Do not add assertions outside the original task scope to compensate for removed ones. +- Keep assertion IDs stable for assertions you keep unchanged.`; + +export function buildPlannerInput( + task: string, + soul: string, + rules: string, + duties: string, + revisionFeedback?: EvaluatorVerdict, + previousDraft?: BriefDraft, +): string { + const sections: string[] = [ + `TASK:\n${task}`, + soul ? `SOUL (agent identity and voice):\n${soul}` : "SOUL: (not defined)", + rules ? `RULES (hard constraints):\n${rules}` : "RULES: (not defined)", + duties ? `DUTIES (agent responsibilities):\n${duties}` : "DUTIES: (not defined)", + ]; + + if (previousDraft) { + sections.push(`PREVIOUS DRAFT (revise this — keep every assertion not flagged below unchanged, with the same id):\n${JSON.stringify(previousDraft)}`); + } + + if (revisionFeedback) { + const criticals = revisionFeedback.issues.filter(i => i.level === "critical"); + const warnings = revisionFeedback.issues.filter(i => i.level === "warning"); + const feedbackParts: string[] = [ + `REVISION FEEDBACK (you must address all CRITICAL issues):`, + `Summary: ${revisionFeedback.summary}`, + ]; + if (criticals.length > 0) { + feedbackParts.push( + `\nCRITICAL (must fix):\n${criticals.map(i => `- [Assertion ${i.assertion_id ?? "overall"}] ${i.issue}\n Fix: ${i.fix}`).join("\n")}`, + ); + } + if (warnings.length > 0) { + feedbackParts.push( + `\nWARNINGS (should fix):\n${warnings.map(i => `- ${i.issue}`).join("\n")}`, + ); + } + sections.push(feedbackParts.join("\n")); + } + + return sections.join("\n\n---\n\n"); +} + +async function collectAssistantText(gen: AsyncIterable): Promise { + let text = ""; + for await (const msg of gen) { + if (msg.type === "assistant") { + const am = msg as GCAssistantMessage; + text = am.content; + } + } + return text; +} + +function extractJson(raw: string): string { + // Strip markdown code blocks if the model wraps output despite instructions + const fenced = raw.match(/```(?:json)?\s*([\s\S]*?)```/); + if (fenced) return fenced[1].trim(); + // Fallback: find first { ... } block + const start = raw.indexOf("{"); + const end = raw.lastIndexOf("}"); + if (start !== -1 && end !== -1 && end > start) return raw.slice(start, end + 1); + return raw.trim(); +} + +export async function runPlanner(opts: { + input: string; + model?: string; + agentDir: string; +}): Promise<{ draft: BriefDraft; costs: SessionCosts }> { + const { input, model, agentDir } = opts; + const costTracker = new CostTracker(); + + for (let attempt = 1; attempt <= 2; attempt++) { + const prompt = attempt === 1 + ? input + : `${input}\n\n---\n\nIMPORTANT: Your previous response was not valid JSON. Output ONLY a raw JSON object matching the schema. No markdown, no code fences, no explanation.`; + + const gen = query({ + prompt, + dir: agentDir, + model, + systemPrompt: PLANNER_SYSTEM_PROMPT, + replaceBuiltinTools: true, + maxTurns: 1, + }); + + try { + const raw = await collectAssistantText(gen); + const json = extractJson(raw); + const parsed = JSON.parse(json) as BriefDraft; + if (!parsed.assertions || !Array.isArray(parsed.assertions)) { + throw new Error("Missing assertions array"); + } + if (!parsed.rubric || typeof parsed.rubric !== "object") { + throw new Error("Missing rubric object"); + } + const MAX_ASSERTIONS = 15; + if (parsed.assertions.length > MAX_ASSERTIONS) { + console.warn(`[brief] Planner returned ${parsed.assertions.length} assertions — truncating to ${MAX_ASSERTIONS}`); + parsed.assertions = parsed.assertions.slice(0, MAX_ASSERTIONS); + } + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + return { draft: parsed, costs: costTracker.get() }; + } catch (err: any) { + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + if (attempt === 2) { + throw new BriefGenerationError(`Planner failed to produce valid JSON after 2 attempts: ${err.message}`); + } + } + } + + throw new BriefGenerationError("Planner failed"); +} diff --git a/src/brief/report.ts b/src/brief/report.ts new file mode 100644 index 0000000..e2755b2 --- /dev/null +++ b/src/brief/report.ts @@ -0,0 +1,39 @@ +import type { OutputVerdict } from "./types.js"; +import { colorCategory } from "./colors.js"; + +const isTTY = Boolean(process.stdout.isTTY); +const dim = (s: string) => isTTY ? `\x1b[2m${s}\x1b[0m` : s; +const bold = (s: string) => isTTY ? `\x1b[1m${s}\x1b[0m` : s; +const green = (s: string) => isTTY ? `\x1b[32m${s}\x1b[0m` : s; +const red = (s: string) => isTTY ? `\x1b[31m${s}\x1b[0m` : s; + +export function displayOutputReport(verdict: OutputVerdict, attempts: number): void { + const line = "─".repeat(50); + console.log(""); + console.log(bold(` Brief Evaluation — ${attempts} attempt${attempts !== 1 ? "s" : ""}`)); + console.log(dim(` ${line}`)); + + for (const r of verdict.results) { + const icon = r.passed ? green("✓") : red("✗"); + const cat = colorCategory(r.category); + const assertion = r.assertion.length > 55 ? r.assertion.slice(0, 54) + "…" : r.assertion; + console.log(` ${icon} ${r.assertion_id.toString().padStart(2)}. [${cat}] ${assertion}`); + if (!r.passed && r.evidence) { + console.log(dim(` Evidence: ${r.evidence}`)); + } + } + + console.log(dim(` ${line}`)); + + const total = verdict.results.length; + const score = total > 0 ? Math.round((verdict.passed_count / total) * 100) : 100; + const passFail = verdict.all_passed + ? green(` ${verdict.passed_count}/${total} passed`) + : red(` ${verdict.passed_count}/${total} passed`) + dim(` · `) + red(`${verdict.failed_count} failed`); + + console.log(`${passFail} · Score: ${score}/100`); + if (verdict.summary) { + console.log(dim(` ${verdict.summary}`)); + } + console.log(""); +} diff --git a/src/brief/runner.ts b/src/brief/runner.ts new file mode 100644 index 0000000..4788fef --- /dev/null +++ b/src/brief/runner.ts @@ -0,0 +1,182 @@ +import { query } from "../sdk.js"; +import type { GCMessage } from "../sdk-types.js"; +import type { Brief, AssertionResult, OutputVerdict, BriefOptions, RunWithBriefOptions } from "./types.js"; +import { BriefError } from "./types.js"; +import { loadBriefFromFile, resolveBriefPath, assertBriefApproved } from "./storage.js"; +import { buildBriefSuffix } from "./injector.js"; +import { runBriefOrchestration } from "./orchestrator.js"; +import { runOutputEvaluator } from "./output-evaluator.js"; +import { displayOutputReport } from "./report.js"; +import { CostTracker } from "../cost-tracker.js"; +import type { SessionCosts } from "../cost-tracker.js"; +import { loadAgent } from "../loader.js"; + +const isTTY = Boolean(process.stdout.isTTY); +const dim = (s: string) => isTTY ? `\x1b[2m${s}\x1b[0m` : s; + +async function resolveBrief(agentDir: string, briefOpt: BriefOptions, task: string, model?: string): Promise { + if (briefOpt.briefPath) { + const brief = await loadBriefFromFile(resolveBriefPath(agentDir, briefOpt.briefPath)); + assertBriefApproved(brief); + return brief; + } + if (briefOpt.autoBrief !== false) { + const result = await runBriefOrchestration({ task, agentDir, model, options: briefOpt }); + if (result.skipped) { + throw new BriefError("Brief was not approved — cannot run with an unapproved brief."); + } + return result.brief; + } + throw new BriefError( + "runWithBrief() requires brief.briefPath or brief.autoBrief. " + + "Create a brief first with runBriefOrchestration(), or pass autoBrief: true.", + ); +} + +function buildRetryPrompt(originalPrompt: string, failures: AssertionResult[], previousOutput: string): string { + const truncatedPrompt = originalPrompt.length > 1000 + ? originalPrompt.slice(0, 1000) + "\n[...prompt truncated for retry...]" + : originalPrompt; + + const failureList = failures.map(f => + `- [${f.category}] Assertion ${f.assertion_id}: "${f.assertion}"\n Evidence of failure: ${f.evidence}`, + ).join("\n\n"); + + return `${truncatedPrompt} + +--- +PREVIOUS RESPONSE (revise this — keep everything that already passed unchanged): +${previousOutput} + +--- +Your previous response did not satisfy these success criteria. Revise the response above +to address each failure below. Do not change parts that already passed. + +Failed assertions: +${failureList} + +Produce a complete revised response that satisfies all criteria.`; +} + +export async function* runWithBrief(opts: RunWithBriefOptions): AsyncGenerator { + const { + prompt, + dir, + model, + briefModel, + brief: briefOpt, + maxRetries = 2, + autoRetry = true, + showReport = true, + env, + hooks, + abortController, + sessionId, + } = opts; + + const agentDir = dir ?? process.cwd(); + + // Resolve brief model: explicit opt > manifest brief.model > agent model + let resolvedBriefModel = briefModel; + if (!resolvedBriefModel) { + try { + const loaded = await loadAgent(agentDir); + resolvedBriefModel = loaded.manifest.brief?.model ?? model; + } catch { + resolvedBriefModel = model; + } + } + + const brief = await resolveBrief(agentDir, briefOpt, prompt, resolvedBriefModel); + + if (brief.draft.assertions.length === 0) { + console.log(dim("[brief] No assertions in brief — skipping output evaluation.")); + yield* query({ prompt, dir, model, env, hooks, abortController, sessionId }); + return; + } + + const briefSuffix = buildBriefSuffix(brief); + const costTracker = new CostTracker(); + + let lastOutputText = ""; + let lastVerdict: OutputVerdict | null = null; + let attempt = 0; + let currentPrompt = prompt; + + while (attempt <= maxRetries) { + attempt++; + const outputMessages: string[] = []; + + const gen = query({ + prompt: currentPrompt, + dir, + model, + env, + systemPromptSuffix: briefSuffix, + hooks, + abortController, + sessionId, + }); + + for await (const msg of gen) { + yield msg; + if (msg.type === "assistant") { + outputMessages.push(msg.content); + } + } + + // Cap to last 5 messages and 12000 chars to keep evaluator input manageable. + // The evaluator needs token budget to produce its JSON verdict. + const cappedMessages = outputMessages.slice(-5); + const labeled = cappedMessages + .map((m, i) => `[Message ${outputMessages.length - cappedMessages.length + i + 1}]:\n${m}`) + .join("\n\n---\n\n"); + lastOutputText = labeled.length > 12000 + ? labeled.slice(-12000) + : labeled; + + for (const [m, u] of Object.entries(gen.costs().modelUsage)) { + costTracker.add(m, u); + } + + const evalResult = await runOutputEvaluator({ + outputText: lastOutputText, + brief, + dir: agentDir, + model: resolvedBriefModel, + }); + lastVerdict = evalResult.verdict; + for (const [m, u] of Object.entries(evalResult.costs.modelUsage)) { + costTracker.add(m, u); + } + + if (lastVerdict.all_passed || !autoRetry || attempt > maxRetries) break; + + const failures = lastVerdict.results.filter(r => !r.passed); + currentPrompt = buildRetryPrompt(prompt, failures, lastOutputText); + + const failureDetail = failures + .map(f => ` ✗ [${f.category}] Assertion ${f.assertion_id}: ${f.assertion}\n Evidence: ${f.evidence}`) + .join("\n"); + + yield { + type: "system", + subtype: "session_start", + content: `[brief] Retry ${attempt}/${maxRetries} — ${failures.length} assertion${failures.length !== 1 ? "s" : ""} failed\n${failureDetail}`, + metadata: { briefRetry: true, attempt, failedCount: failures.length, failures }, + } satisfies GCMessage; + } + + if (lastVerdict) { + if (showReport) { + displayOutputReport(lastVerdict, attempt); + } + + yield { + type: "system", + subtype: "session_end", + content: JSON.stringify(lastVerdict), + metadata: { briefReport: true, attempts: attempt, costs: costTracker.get() }, + } satisfies GCMessage; + } +} diff --git a/src/brief/stale.ts b/src/brief/stale.ts new file mode 100644 index 0000000..7da2c9f --- /dev/null +++ b/src/brief/stale.ts @@ -0,0 +1,105 @@ +import { readFile } from "fs/promises"; +import { join } from "path"; +import type { Brief, BriefIssue, IssueLevel } from "./types.js"; +import { isBriefStale } from "./storage.js"; +import { runBriefEvaluator, buildEvaluatorInput } from "./evaluator.js"; + +export interface AffectedAssertion { + assertion_id: number | null; + category: string; + assertion_text: string; + level: IssueLevel; + issue: string; + fix: string; +} + +export interface StalenessReport { + stale: boolean; + soulChanged: boolean; + rulesChanged: boolean; + affectedAssertions: AffectedAssertion[]; + summary: string; +} + +async function readOrEmpty(agentDir: string, filename: string): Promise { + try { + return await readFile(join(agentDir, filename), "utf-8"); + } catch { + return ""; + } +} + +export async function analyzeStaleAssertions( + agentDir: string, + brief: Brief, +): Promise { + const basicStale = await isBriefStale(agentDir, brief); + + if (!basicStale.stale) { + return { + stale: false, + soulChanged: false, + rulesChanged: false, + affectedAssertions: [], + summary: "", + }; + } + + const soulChanged = basicStale.reason?.includes("SOUL.md") ?? false; + const rulesChanged = basicStale.reason?.includes("RULES.md") ?? false; + + // Read current files to pass to evaluator + const soul = await readOrEmpty(agentDir, "SOUL.md"); + const rules = await readOrEmpty(agentDir, "RULES.md"); + const duties = await readOrEmpty(agentDir, "DUTIES.md"); + + // Run the Evaluator on the existing draft with the NEW identity files. + // It will naturally flag assertions that now conflict with or are missing from SOUL/RULES. + let affected: AffectedAssertion[] = []; + let summary = basicStale.reason ?? "Agent identity has changed."; + + try { + const { verdict } = await runBriefEvaluator({ + input: buildEvaluatorInput(brief.draft, brief.task, soul, rules, duties, 1), + agentDir, + }); + + // Only surface critical and warning issues — suggestions are noise during stale review + const relevantIssues = verdict.issues.filter( + (i: BriefIssue) => i.level === "critical" || i.level === "warning", + ); + + if (relevantIssues.length === 0) { + summary = "Brief may be outdated but no specific assertions were flagged. You can use it as-is or regenerate."; + } else { + summary = verdict.summary; + } + + // Map issues to AffectedAssertion display objects + affected = relevantIssues.map((issue: BriefIssue) => { + const matchedAssertion = issue.assertion_id != null + ? brief.draft.assertions.find(a => a.id === issue.assertion_id) + : null; + + return { + assertion_id: issue.assertion_id ?? null, + category: matchedAssertion?.category ?? "overall", + assertion_text: matchedAssertion?.assertion ?? "(overall brief structure)", + level: issue.level, + issue: issue.issue, + fix: issue.fix, + }; + }); + } catch { + // Evaluator failure during stale analysis is non-fatal — fall back to vague warning + summary = basicStale.reason ?? "Agent identity has changed. Consider regenerating the brief."; + } + + return { + stale: true, + soulChanged, + rulesChanged, + affectedAssertions: affected, + summary, + }; +} diff --git a/src/brief/storage.ts b/src/brief/storage.ts new file mode 100644 index 0000000..5aa3925 --- /dev/null +++ b/src/brief/storage.ts @@ -0,0 +1,274 @@ +import { readFile, writeFile, mkdir, readdir, access } from "fs/promises"; +import { join } from "path"; +import { createHash } from "crypto"; +import type { Brief, BriefDraft, BriefStatus } from "./types.js"; +import { BriefError } from "./types.js"; + +const BRIEFS_DIR = ".gitagent/briefs"; + +export function assertBriefApproved(brief: Brief): void { + if (brief.status !== "approved") { + throw new BriefError( + `Brief "${brief.id}" (v${brief.version}) is not approved (status: "${brief.status}"). ` + + "Refusing to run against an unvalidated or superseded brief.", + ); + } +} + +// Deterministic kebab-case slug from task string, max 60 chars +export function briefId(task: string): string { + const slug = task + .toLowerCase() + .replace(/[^a-z0-9\s-]/g, "") + .trim() + .replace(/\s+/g, "-") + .replace(/-+/g, "-") + .slice(0, 60) + .replace(/-$/, ""); + return slug || "brief"; +} + +export function hashContent(content: string): string { + return createHash("sha256").update(content).digest("hex").slice(0, 8); +} + +function briefsDir(agentDir: string): string { + return join(agentDir, BRIEFS_DIR); +} + +function briefFilePath(agentDir: string, id: string, version: number): string { + const suffix = version > 1 ? `-v${version}` : ""; + return join(briefsDir(agentDir), `${id}${suffix}.md`); +} + +function serializeBrief(brief: Brief): string { + const fm = [ + "---", + `id: ${brief.id}`, + `task: ${JSON.stringify(brief.task)}`, + `agent: ${brief.agent}`, + `created_at: ${brief.created_at}`, + brief.approved_at ? `approved_at: ${brief.approved_at}` : null, + `status: ${brief.status}`, + `version: ${brief.version}`, + `planner_model: ${brief.planner_model}`, + `evaluator_model: ${brief.evaluator_model}`, + `negotiation_iterations: ${brief.negotiation_iterations}`, + `soul_hash: ${brief.soul_hash}`, + `rules_hash: ${brief.rules_hash}`, + `assertion_count: ${brief.draft.assertions.length}`, + "---", + ].filter(Boolean).join("\n"); + + const draft = brief.draft; + const escapeCell = (s: string) => s.replace(/\|/g, "\\|"); + const assertionTable = [ + "| # | Category | Assertion | How to Verify |", + "|---|---|---|---|", + ...draft.assertions.map(a => + `| ${a.id} | ${a.category} | ${escapeCell(a.assertion)} | ${escapeCell(a.test)} |`, + ), + ].join("\n"); + + const ambigSection = draft.ambiguities.length > 0 + ? `## Ambiguities Flagged\n\n${draft.ambiguities.map(a => `> ⚠ "${a}"`).join("\n")}\n\n` + : ""; + + const constraintsSection = draft.constraints_applied.length > 0 + ? `## Agent Constraints Applied\n\n${draft.constraints_applied.map(c => `- ${c}`).join("\n")}\n` + : ""; + + const body = [ + `# Brief: ${draft.task_summary}`, + "", + "## Task", + "", + draft.task_summary, + "", + ambigSection, + "## Success Criteria", + "", + assertionTable, + "", + "## Quality Rubric", + "", + `- **Craft:** ${draft.rubric.craft}`, + `- **Originality:** ${draft.rubric.originality}`, + `- **Tone:** ${draft.rubric.tone}`, + `- **Completeness:** ${draft.rubric.completeness}`, + "", + constraintsSection, + ``, + ].join("\n"); + + return `${fm}\n\n${body}`; +} + +function parseBrief(content: string, filePath: string): Brief { + // Extract YAML frontmatter + const fmMatch = content.match(/^---\n([\s\S]*?)\n---/); + if (!fmMatch) throw new Error(`Invalid brief file: missing frontmatter in ${filePath}`); + + const fm = fmMatch[1]; + const get = (key: string): string => { + const m = fm.match(new RegExp(`^${key}:\\s*(.+)$`, "m")); + return m ? m[1].trim() : ""; + }; + + // Extract draft JSON from HTML comment + const draftMatch = content.match(//); + if (!draftMatch) throw new Error(`Invalid brief file: missing draft_json in ${filePath}`); + const draft = JSON.parse(draftMatch[1]) as BriefDraft; + + const taskRaw = get("task"); + const task = taskRaw.startsWith('"') ? JSON.parse(taskRaw) : taskRaw; + const approvedAt = get("approved_at"); + + return { + id: get("id"), + task, + agent: get("agent"), + created_at: get("created_at"), + approved_at: approvedAt || undefined, + status: get("status") as BriefStatus, + version: parseInt(get("version"), 10) || 1, + planner_model: get("planner_model"), + evaluator_model: get("evaluator_model"), + negotiation_iterations: parseInt(get("negotiation_iterations"), 10) || 1, + soul_hash: get("soul_hash"), + rules_hash: get("rules_hash"), + draft, + file_path: filePath, + }; +} + +export async function saveBrief(agentDir: string, brief: Brief): Promise { + await mkdir(briefsDir(agentDir), { recursive: true }); + const filePath = briefFilePath(agentDir, brief.id, brief.version); + brief.file_path = filePath; + await writeFile(filePath, serializeBrief(brief), "utf-8"); + return filePath; +} + +export async function loadBriefFromFile(filePath: string): Promise { + const content = await readFile(filePath, "utf-8"); + return parseBrief(content, filePath); +} + +// Accepts a full path OR just a brief name/id. +// "write-a-500-word-blog-post" → "/.gitagent/briefs/write-a-500-word-blog-post.md" +export function resolveBriefPath(agentDir: string, nameOrPath: string): string { + if (nameOrPath.includes("/") || nameOrPath.includes("\\") || nameOrPath.endsWith(".md")) { + return nameOrPath; // already a path + } + return join(briefsDir(agentDir), `${nameOrPath}.md`); +} + +export async function findBrief(agentDir: string, task: string): Promise { + const id = briefId(task); + const dir = briefsDir(agentDir); + try { + await access(dir); + } catch { + return null; + } + + const files = await readdir(dir); + const versionOf = (filename: string): number => { + if (filename === `${id}.md`) return 1; + const m = filename.match(/-v(\d+)\.md$/); + return m ? parseInt(m[1], 10) : 1; + }; + // Find all files matching the id, sorted by version number descending (numeric, not lexicographic) + const matching = files + .filter(f => f === `${id}.md` || f.match(new RegExp(`^${id}-v\\d+\\.md$`))) + .sort((a, b) => versionOf(b) - versionOf(a)); + + for (const filename of matching) { + try { + const brief = await loadBriefFromFile(join(dir, filename)); + if (brief.status === "approved") return brief; + } catch { + // skip malformed files + } + } + return null; +} + +export async function listBriefs(agentDir: string): Promise { + const dir = briefsDir(agentDir); + try { + await access(dir); + } catch { + return []; + } + + const files = await readdir(dir); + const briefs: Brief[] = []; + for (const filename of files.filter(f => f.endsWith(".md"))) { + try { + briefs.push(await loadBriefFromFile(join(dir, filename))); + } catch { + // skip malformed + } + } + return briefs.sort((a, b) => b.created_at.localeCompare(a.created_at)); +} + +export async function isBriefStale( + agentDir: string, + brief: Brief, +): Promise<{ stale: boolean; reason?: string }> { + const reasons: string[] = []; + + const readOrEmpty = async (p: string) => { + try { return await readFile(join(agentDir, p), "utf-8"); } catch { return ""; } + }; + + const soul = await readOrEmpty("SOUL.md"); + const rules = await readOrEmpty("RULES.md"); + + const currentSoulHash = hashContent(soul); + const currentRulesHash = hashContent(rules); + + if (brief.soul_hash && currentSoulHash !== brief.soul_hash) { + reasons.push("Agent identity (SOUL.md) has changed since this brief was created. Tone assertions may be outdated."); + } + if (brief.rules_hash && currentRulesHash !== brief.rules_hash) { + reasons.push("Agent rules (RULES.md) have changed since this brief was created. Constraint assertions may be outdated."); + } + + return reasons.length > 0 ? { stale: true, reason: reasons.join(" ") } : { stale: false }; +} + +export async function archiveBrief(agentDir: string, briefId: string): Promise { + const dir = briefsDir(agentDir); + const files = await readdir(dir).catch(() => [] as string[]); + const matching = files.filter(f => f === `${briefId}.md` || f.match(new RegExp(`^${briefId}-v\\d+\\.md$`))); + + for (const filename of matching) { + const filePath = join(dir, filename); + try { + const brief = await loadBriefFromFile(filePath); + if (brief.status !== "archived") { + brief.status = "archived"; + await writeFile(filePath, serializeBrief(brief), "utf-8"); + } + } catch { + // skip malformed + } + } +} + +export async function nextVersion(agentDir: string, id: string): Promise { + const dir = briefsDir(agentDir); + const files = await readdir(dir).catch(() => [] as string[]); + const versions = files + .map(f => { + if (f === `${id}.md`) return 1; + const m = f.match(new RegExp(`^${id}-v(\\d+)\\.md$`)); + return m ? parseInt(m[1], 10) : 0; + }) + .filter(v => v > 0); + return versions.length > 0 ? Math.max(...versions) + 1 : 1; +} diff --git a/src/brief/types.ts b/src/brief/types.ts new file mode 100644 index 0000000..994f9ba --- /dev/null +++ b/src/brief/types.ts @@ -0,0 +1,134 @@ +export type AssertionCategory = "format" | "content" | "quality" | "constraint" | "behavior" | "tone"; +export type BriefStatus = "draft" | "approved" | "archived"; +export type ComplexityLevel = "low" | "medium" | "high"; +export type IssueLevel = "critical" | "warning" | "suggestion"; + +export interface BriefAssertion { + id: number; + category: AssertionCategory; + assertion: string; + why: string; + test: string; +} + +export interface BriefRubric { + craft: string; + originality: string; + tone: string; + completeness: string; +} + +export interface BriefDraft { + task_summary: string; + ambiguities: string[]; + assertions: BriefAssertion[]; + rubric: BriefRubric; + constraints_applied: string[]; + estimated_complexity: ComplexityLevel; + recommended_max_turns: number; +} + +export interface BriefIssue { + level: IssueLevel; + assertion_id?: number | null; + field?: "assertions" | "rubric" | "ambiguities" | "overall"; + issue: string; + fix: string; +} + +export interface EvaluatorVerdict { + approved: boolean; + score: number; + issues: BriefIssue[]; + summary: string; +} + +export interface Brief { + id: string; + task: string; + agent: string; + created_at: string; + approved_at?: string; + status: BriefStatus; + version: number; + planner_model: string; + evaluator_model: string; + negotiation_iterations: number; + soul_hash: string; + rules_hash: string; + draft: BriefDraft; + file_path: string; +} + +export interface BriefOptions { + briefPath?: string; + autoBrief?: boolean; + skipApproval?: boolean; + regenerate?: boolean; + plannerModel?: string; + evaluatorModel?: string; + allowBestEffort?: boolean; +} + +export interface NegotiatorOptions { + task: string; + soul: string; + rules: string; + duties: string; + model?: string; + agentDir: string; +} + +export interface NegotiationResult { + draft: BriefDraft; + verdict: EvaluatorVerdict; + iterations: number; + bestEffort: boolean; + costs: import("../cost-tracker.js").SessionCosts; +} + +export interface AssertionResult { + assertion_id: number; + category: AssertionCategory; + assertion: string; + passed: boolean; + evidence: string; + notes?: string; +} + +export interface OutputVerdict { + all_passed: boolean; + passed_count: number; + failed_count: number; + results: AssertionResult[]; + summary: string; +} + +export interface RunWithBriefOptions { + prompt: string; + dir?: string; + model?: string; + briefModel?: string; // model used for Output Evaluator; falls back to model if not set + brief: BriefOptions; + maxRetries?: number; + autoRetry?: boolean; + showReport?: boolean; + env?: string; + hooks?: any; + abortController?: AbortController; + sessionId?: string; +} + +export class BriefError extends Error { + constructor(message: string) { + super(message); + this.name = "BriefError"; + } +} + +export class BriefGenerationError extends BriefError { + constructor(message: string) { + super(message); + this.name = "BriefGenerationError"; + } +} diff --git a/src/exports.ts b/src/exports.ts index 787d60f..c9243ca 100644 --- a/src/exports.ts +++ b/src/exports.ts @@ -1,6 +1,28 @@ // SDK core export { query, tool } from "./sdk.js"; +// Agent Brief — pre-execution contract negotiation +export { runBriefOrchestration, generateBrief } from "./brief/orchestrator.js"; +export type { GenerateBriefResult } from "./brief/orchestrator.js"; +export { loadBriefFromFile, listBriefs, findBrief, resolveBriefPath } from "./brief/storage.js"; +export { buildBriefSuffix } from "./brief/injector.js"; +export { runWithBrief } from "./brief/runner.js"; +export type { + Brief, + BriefDraft, + BriefAssertion, + BriefRubric, + BriefIssue, + BriefOptions, + BriefStatus, + EvaluatorVerdict, + AssertionCategory, + NegotiationResult, + AssertionResult, + OutputVerdict, + RunWithBriefOptions, +} from "./brief/types.js"; + // SDK types export type { Query, diff --git a/src/index.ts b/src/index.ts index 9deefca..92e93d5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -25,6 +25,14 @@ import type { LocalSession } from "./session.js"; // Imported dynamically below so the slim core has no static dependency on it — // users without voice get a clean install + a clear error if they try --voice. import { handlePluginCommand } from "./plugin-cli.js"; +import { + runBriefOrchestration, + listBriefs, + loadBriefFromFile, + displayBriefList, + displayBriefDetail, +} from "./brief/orchestrator.js"; +import { runWithBrief } from "./brief/runner.js"; import { context as otelContext } from "@opentelemetry/api"; import { initTelemetry, @@ -320,6 +328,148 @@ async function main(): Promise { return; } + // Handle brief subcommand: gitagent brief [--list|--view |--regenerate |--only] ["task"] + if (process.argv[2] === "brief") { + const briefArgs = process.argv.slice(3); + let agentDir = process.cwd(); + let briefModel: string | undefined; + let briefTask: string | undefined; + let briefCommand: "negotiate" | "list" | "view" | "regenerate" = "negotiate"; + let briefTarget: string | undefined; + let onlyMode = false; + let skipApproval = false; + let briefPath: string | undefined; + + for (let i = 0; i < briefArgs.length; i++) { + const arg = briefArgs[i]; + if ((arg === "--dir" || arg === "-d") && briefArgs[i + 1]) { + agentDir = briefArgs[++i]; + } else if ((arg === "--model" || arg === "-m") && briefArgs[i + 1]) { + briefModel = briefArgs[++i]; + } else if (arg === "--list") { + briefCommand = "list"; + } else if (arg === "--view" && briefArgs[i + 1]) { + briefCommand = "view"; + briefTarget = briefArgs[++i]; + } else if (arg === "--regenerate") { + briefCommand = "regenerate"; + if (briefArgs[i + 1] && !briefArgs[i + 1].startsWith("-")) { + briefTarget = briefArgs[++i]; + } + } else if ((arg === "--brief-path" || arg === "--briefPath") && briefArgs[i + 1]) { + briefPath = briefArgs[++i]; + } else if (arg === "--only") { + onlyMode = true; + } else if (arg === "--yes" || arg === "-y") { + skipApproval = true; + } else if (!arg.startsWith("-")) { + briefTask = arg; + } + } + + const resolvedDir = resolve(agentDir); + + if (briefCommand === "list") { + const briefs = await listBriefs(resolvedDir); + displayBriefList(briefs); + return; + } + + if (briefCommand === "view" && briefTarget) { + // Try as file path first, then as id + try { + let brief; + if (briefTarget.endsWith(".md")) { + brief = await loadBriefFromFile(resolve(briefTarget)); + } else { + const all = await listBriefs(resolvedDir); + brief = all.find(b => b.id === briefTarget); + } + if (!brief) { + console.error(red(`No brief found with id: ${briefTarget}`)); + process.exit(1); + } + displayBriefDetail(brief); + } catch (err: any) { + console.error(red(`Error loading brief: ${err.message}`)); + process.exit(1); + } + return; + } + + if (!briefTask) { + console.error(red('Usage: gitagent brief "task description" [--only] [--dir ]')); + console.error(dim(' gitagent brief "task" --brief-path .gitagent/briefs/task.md')); + console.error(dim(" gitagent brief --list [--dir ]")); + console.error(dim(" gitagent brief --view [--dir ]")); + console.error(dim(" gitagent brief --regenerate [--dir ]")); + process.exit(1); + } + + // Load agent manifest for name, model, and extends (parent agent) + let agentName = "agent"; + let resolvedBriefModel: string | undefined = briefModel; + let resolvedAgentModel: string | undefined; + let agentExtends: string | undefined; + try { + const loaded = await loadAgent(resolvedDir, briefModel); + agentName = loaded.manifest.name || "agent"; + resolvedAgentModel = loaded.manifest.model?.preferred; + // Brief model priority: CLI --model flag > manifest brief.model > agent model + resolvedBriefModel = briefModel ?? loaded.manifest.brief?.model ?? resolvedAgentModel; + agentExtends = loaded.manifest.extends; + } catch { + // ok if agent not loaded yet + } + + try { + const result = await runBriefOrchestration({ + task: briefTask, + agentDir: resolvedDir, + agentName, + agentExtends, + model: resolvedBriefModel, + options: { + briefPath: briefPath ? resolve(briefPath) : undefined, + regenerate: briefCommand === "regenerate", + skipApproval, + }, + }); + + if (result.skipped) { + console.log(dim("[brief] Brief not applied. Run without --only to execute, or approve the saved brief.")); + return; + } + + if (onlyMode) { + console.log(dim("[brief] Brief created and approved. Use it with: gitagent -p \"" + briefTask + "\" --dir " + resolvedDir)); + return; + } + + // Execute the task with the brief injected + output evaluator + console.log(bold(`\nRunning task with brief...\n`)); + for await (const msg of runWithBrief({ + prompt: briefTask, + dir: resolvedDir, + model: resolvedAgentModel, + briefModel: resolvedBriefModel, + brief: { briefPath: result.brief.file_path }, + maxRetries: 2, + })) { + if (msg.type === "assistant") { + process.stdout.write(msg.content + "\n"); + } + if (msg.type === "system" && (msg as any).metadata?.briefRetry) { + console.log(bold(`\n${msg.content}\n`)); + } + } + } catch (err: any) { + console.error(red(`[brief] Error: ${err.message}`)); + process.exit(1); + } + return; + } + const { model, dir: rawDir, prompt, env, sandbox: useSandbox, sandboxRepo, sandboxToken, repo, pat, session: sessionBranch, voice } = parseArgs(process.argv); // If --repo is given, derive a default dir from the repo URL (skip interactive prompt) diff --git a/src/loader.ts b/src/loader.ts index b8d193e..5f1bac5 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -55,6 +55,9 @@ export interface AgentManifest { delegation?: { mode: "auto" | "explicit" | "router"; router?: string }; compliance?: Record; plugins?: Record; + brief?: { + model?: string; + }; } async function readFileOr(path: string, fallback: string): Promise { diff --git a/src/schedule-runner.ts b/src/schedule-runner.ts index 9e677ce..e817da7 100644 --- a/src/schedule-runner.ts +++ b/src/schedule-runner.ts @@ -3,6 +3,8 @@ import { discoverSchedules, updateScheduleMeta, type ScheduleDefinition } from " import { mkdirSync, appendFileSync } from "fs"; import { join } from "path"; import type { ServerMessage } from "./adapter.js"; +import { findBrief, isBriefStale } from "./brief/storage.js"; +import { buildBriefSuffix } from "./brief/injector.js"; const dim = (s: string) => `\x1b[2m${s}\x1b[0m`; @@ -102,8 +104,24 @@ export async function executeScheduledJob(schedule: ScheduleDefinition, opts: Sc let result = ""; let success = true; + // Look for an approved brief for this scheduled task and fold it into the prompt + let promptToRun = schedule.prompt; try { - result = await opts.runPrompt(schedule.prompt); + const existing = await findBrief(opts.agentDir, schedule.prompt); + if (existing) { + const staleness = await isBriefStale(opts.agentDir, existing); + if (staleness.stale) { + console.log(dim(`[scheduler] ⚠ Brief for "${schedule.id}" may be stale: ${staleness.reason}`)); + } + promptToRun = `${schedule.prompt}\n\n${buildBriefSuffix(existing)}`; + console.log(dim(`[scheduler] Injecting brief "${existing.id}" into "${schedule.id}"`)); + } + } catch { + // Brief lookup failure is non-fatal + } + + try { + result = await opts.runPrompt(promptToRun); } catch (err: any) { result = err.message || "Unknown error"; success = false;