Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
590 changes: 590 additions & 0 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@
"@opentelemetry/semantic-conventions": "^1.40.0",
"@sinclair/typebox": "^0.34.41",
"js-yaml": "^4.1.0",
"jszip": "^3.10.1",
"mammoth": "^1.12.0",
"node-cron": "^3.0.3",
"pdf-parse": "^2.4.5",
"xlsx": "^0.18.5",
"yaml": "^2.8.2"
},
"peerDependencies": {
Expand All @@ -76,8 +80,10 @@
},
"devDependencies": {
"@types/js-yaml": "^4.0.9",
"@types/jszip": "^3.4.0",
"@types/node": "^22.0.0",
"@types/node-cron": "^3.0.11",
"@types/pdf-parse": "^1.1.5",
"typescript": "^5.7.0"
}
}
87 changes: 87 additions & 0 deletions src/compression/cache-aligner.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/**
* CacheAligner — stabilizes the system prompt prefix to maximize KV cache hits.
*
* Anthropic and OpenAI both cache the prefix of the system prompt. If the prefix
* is identical across requests, the provider reuses the cached KV state and you
* pay zero input tokens for it. If anything changes at the top — even a timestamp
* or a dynamic memory line — the cache misses and you pay full price.
*
* The fix: put all STATIC content first (SOUL.md, RULES.md, knowledge, skills),
* and all DYNAMIC content last (memory, current task, recent conversation).
*
* This module assembles a system prompt in that order and reports the stable
* prefix length so callers can log or track cache effectiveness.
*/

export interface SystemPromptParts {
/** Static — never changes between sessions (SOUL.md, RULES.md) */
identity: string;
/** Static — knowledge files marked always_load */
knowledge: string;
/** Static — skill definitions loaded for this session */
skills: string;
/** Dynamic — changes every session (memory, conversation summary) */
memory: string;
/** Dynamic — changes every turn */
task: string;
}

export interface AlignedPrompt {
/** The full assembled system prompt */
prompt: string;
/** Character index where the static prefix ends and dynamic content begins */
staticPrefixEnd: number;
/** Estimated tokens in the static prefix (eligible for KV cache) */
staticTokens: number;
/** Estimated tokens in the dynamic suffix (never cached) */
dynamicTokens: number;
}

function estimateTokens(s: string): number {
return Math.ceil(s.length / 4);
}

function section(header: string, content: string): string {
if (!content.trim()) return "";
return `${header}\n\n${content.trim()}`;
}

/**
* Assembles a system prompt with static parts first, dynamic parts last.
* Static parts are eligible for provider-side KV cache reuse.
*/
export function alignSystemPrompt(parts: SystemPromptParts): AlignedPrompt {
const staticSections = [
section("# Identity", parts.identity),
section("# Knowledge", parts.knowledge),
section("# Skills", parts.skills),
].filter(Boolean);

const dynamicSections = [
section("# Memory", parts.memory),
section("# Current Task", parts.task),
].filter(Boolean);

const staticPrefix = staticSections.join("\n\n");
const dynamicSuffix = dynamicSections.join("\n\n");

const prompt = dynamicSuffix
? `${staticPrefix}\n\n${dynamicSuffix}`
: staticPrefix;

const staticPrefixEnd = staticPrefix.length;
const staticTokens = estimateTokens(staticPrefix);
const dynamicTokens = estimateTokens(dynamicSuffix);

return { prompt, staticPrefixEnd, staticTokens, dynamicTokens };
}

/**
* Returns the cache efficiency ratio: what fraction of the prompt is static.
* 1.0 = fully cacheable, 0.0 = nothing cacheable.
*/
export function cacheEfficiency(aligned: AlignedPrompt): number {
const total = aligned.staticTokens + aligned.dynamicTokens;
if (total === 0) return 0;
return aligned.staticTokens / total;
}
166 changes: 166 additions & 0 deletions src/compression/code-compressor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* CodeCompressor — strips noise from source code before the LLM sees it.
*
* When an agent reads code files, most of the token cost is comments,
* docstrings, blank lines, and decorative whitespace. The LLM needs the
* structure and logic — not the annotations.
*
* Techniques applied:
* 1. Strip single-line comments (// and #)
* 2. Strip block comments (/* ... *\/ and """ ... """)
* 3. Collapse multiple blank lines into one
* 4. Trim trailing whitespace per line
*
* Does NOT strip:
* - String literals (could contain // or # that look like comments)
* - Type annotations (useful signal for the LLM)
* - Import statements
*/

export type Language = "ts" | "js" | "py" | "go" | "rust" | "java" | "cpp" | "unknown";

const EXTENSION_MAP: Record<string, Language> = {
".ts": "ts",
".tsx": "ts",
".js": "js",
".jsx": "js",
".py": "py",
".go": "go",
".rs": "rust",
".java": "java",
".cpp": "cpp",
".cc": "cpp",
".c": "cpp",
".h": "cpp",
};

export function detectLanguage(filename: string): Language {
const ext = filename.slice(filename.lastIndexOf(".")).toLowerCase();
return EXTENSION_MAP[ext] ?? "unknown";
}

function stripSlashComments(code: string): string {
// Remove // comments but preserve URLs (https://) and protocol strings
// Strategy: only strip if // is preceded by whitespace or start-of-line
return code
.split("\n")
.map((line) => {
// Find // that isn't inside a string
let inString: string | null = null;
for (let i = 0; i < line.length - 1; i++) {
const ch = line[i];
if (inString) {
if (ch === inString && line[i - 1] !== "\\") inString = null;
} else {
if (ch === '"' || ch === "'" || ch === "`") { inString = ch; continue; }
if (ch === "/" && line[i + 1] === "/") {
return line.slice(0, i).trimEnd();
}
}
}
return line;
})
.join("\n");
}

function stripHashComments(code: string): string {
return code
.split("\n")
.map((line) => {
let inString: string | null = null;
for (let i = 0; i < line.length; i++) {
const ch = line[i];
if (inString) {
if (ch === inString && line[i - 1] !== "\\") inString = null;
} else {
if (ch === '"' || ch === "'") { inString = ch; continue; }
if (ch === "#") return line.slice(0, i).trimEnd();
}
}
return line;
})
.join("\n");
}

function stripBlockComments(code: string): string {
// Remove /* ... */ block comments
return code.replace(/\/\*[\s\S]*?\*\//g, "");
}

function stripPythonDocstrings(code: string): string {
// Remove triple-quoted strings that appear as standalone statements (docstrings)
// Matches """ or ''' docstrings at the start of a block
return code
.replace(/^(\s*)"""[\s\S]*?"""/gm, "")
.replace(/^(\s*)'''[\s\S]*?'''/gm, "");
}

function collapseBlankLines(code: string): string {
// Replace 3+ consecutive blank lines with a single blank line
return code.replace(/\n{3,}/g, "\n\n");
}

function trimTrailingWhitespace(code: string): string {
return code
.split("\n")
.map((l) => l.trimEnd())
.join("\n");
}

function estimateTokens(s: string): number {
return Math.ceil(s.length / 4);
}

export interface CompressionResult {
compressed: string;
originalTokens: number;
compressedTokens: number;
reductionPct: number;
language: Language;
}

/**
* Compress source code by removing comments and noise.
* Returns the original if the file language is unknown or compression doesn't help.
*/
export function compressCode(text: string, filename: string): CompressionResult {
const language = detectLanguage(filename);
const originalTokens = estimateTokens(text);

if (language === "unknown") {
return { compressed: text, originalTokens, compressedTokens: originalTokens, reductionPct: 0, language };
}

let result = text;

if (language === "py") {
result = stripPythonDocstrings(result);
result = stripHashComments(result);
} else if (language === "ts" || language === "js") {
result = stripBlockComments(result);
result = stripSlashComments(result);
} else if (language === "go" || language === "rust" || language === "java" || language === "cpp") {
result = stripBlockComments(result);
result = stripSlashComments(result);
}

result = collapseBlankLines(result);
result = trimTrailingWhitespace(result);
result = result.trim();

const compressedTokens = estimateTokens(result);

if (compressedTokens >= originalTokens) {
return { compressed: text, originalTokens, compressedTokens: originalTokens, reductionPct: 0, language };
}

const reductionPct = Math.round(((originalTokens - compressedTokens) / originalTokens) * 100);
return { compressed: result, originalTokens, compressedTokens, reductionPct, language };
}

/**
* Returns true for file extensions the compressor handles.
*/
export function isSourceFile(filename: string): boolean {
return detectLanguage(filename) !== "unknown";
}
8 changes: 8 additions & 0 deletions src/compression/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export { crushJson, isJson } from "./smart-crusher.js";
export type { CompressionResult as JsonCompressionResult } from "./smart-crusher.js";

export { compressCode, isSourceFile, detectLanguage } from "./code-compressor.js";
export type { CompressionResult as CodeCompressionResult, Language } from "./code-compressor.js";

export { alignSystemPrompt, cacheEfficiency } from "./cache-aligner.js";
export type { SystemPromptParts, AlignedPrompt } from "./cache-aligner.js";
Loading