Add ez-assistant and kerberos service folders

2026-02-11 14:56:03 -05:00
parent e4e8ae1b87
commit 9ccfb36923
4471 changed files with 746463 additions and 0 deletions
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/README.md
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/README.md
@@ -0,0 +1,105 @@
+# OpenProse Standard Library
+
+Core programs that ship with OpenProse. Production-quality, well-tested programs for common tasks.
+
+## Programs
+
+### Evaluation & Improvement
+
+| Program | Description |
+|---------|-------------|
+| `inspector.prose` | Post-run analysis for runtime fidelity and task effectiveness |
+| `vm-improver.prose` | Analyzes inspections and proposes PRs to improve the VM |
+| `program-improver.prose` | Analyzes inspections and proposes PRs to improve .prose source |
+| `cost-analyzer.prose` | Token usage and cost pattern analysis |
+| `calibrator.prose` | Validates light evaluations against deep evaluations |
+| `error-forensics.prose` | Root cause analysis for failed runs |
+
+### Memory
+
+| Program | Description |
+|---------|-------------|
+| `user-memory.prose` | Cross-project persistent personal memory |
+| `project-memory.prose` | Project-scoped institutional memory |
+
+## The Improvement Loop
+
+The evaluation programs form a recursive improvement cycle:
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                                                             │
+│   Run Program  ──►  Inspector  ──►  VM Improver ──► PR     │
+│        ▲                │                                   │
+│        │                ▼                                   │
+│        │         Program Improver ──► PR                    │
+│        │                │                                   │
+│        └────────────────┘                                   │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+
+Supporting analysis:
+- **cost-analyzer** — Where does the money go? Optimization opportunities.
+- **calibrator** — Are cheap evaluations reliable proxies for expensive ones?
+- **error-forensics** — Why did a run fail? Root cause analysis.
+
+## Usage
+
+```bash
+# Inspect a completed run
+prose run lib/inspector.prose
+# Inputs: run_path, depth (light|deep), target (vm|task|all)
+
+# Propose VM improvements
+prose run lib/vm-improver.prose
+# Inputs: inspection_path, prose_repo
+
+# Propose program improvements
+prose run lib/program-improver.prose
+# Inputs: inspection_path, run_path
+
+# Analyze costs
+prose run lib/cost-analyzer.prose
+# Inputs: run_path, scope (single|compare|trend)
+
+# Validate light vs deep evaluation
+prose run lib/calibrator.prose
+# Inputs: run_paths, sample_size
+
+# Investigate failures
+prose run lib/error-forensics.prose
+# Inputs: run_path, focus (vm|program|context|external)
+
+# Memory programs (recommend sqlite+ backend)
+prose run lib/user-memory.prose --backend sqlite+
+# Inputs: mode (teach|query|reflect), content
+
+prose run lib/project-memory.prose --backend sqlite+
+# Inputs: mode (ingest|query|update|summarize), content
+```
+
+## Memory Programs
+
+The memory programs use persistent agents to accumulate knowledge:
+
+**user-memory** (`persist: user`)
+- Learns your preferences, decisions, patterns across all projects
+- Remembers mistakes and lessons learned
+- Answers questions from accumulated knowledge
+
+**project-memory** (`persist: project`)
+- Understands this project's architecture and decisions
+- Tracks why things are the way they are
+- Answers questions with project-specific context
+
+Both recommend `--backend sqlite+` for durable persistence.
+
+## Design Principles
+
+1. **Production-ready** — Tested, documented, handles edge cases
+2. **Composable** — Can be imported via `use` in other programs
+3. **User-scoped state** — Cross-project utilities use `persist: user`
+4. **Minimal dependencies** — No external services required
+5. **Clear contracts** — Well-defined inputs and outputs
+6. **Incremental value** — Useful in simple mode, more powerful with depth
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/calibrator.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/calibrator.prose
@@ -0,0 +1,215 @@
+# Calibrator
+# Validates that lightweight evaluations are reliable proxies for deep evaluations
+#
+# Usage:
+#   prose run @openprose/lib/calibrator
+#
+# Purpose:
+#   Run both light and deep inspections on the same runs, compare results,
+#   and build confidence (or identify gaps) in light evaluations.
+#
+# Inputs:
+#   run_paths: Paths to runs to calibrate on (comma-separated or glob)
+#   sample_size: How many runs to sample (if more available)
+#
+# Outputs:
+#   - Agreement rate between light and deep
+#   - Cases where they disagree
+#   - Recommendations for improving light evaluation
+
+input run_paths: "Paths to runs (comma-separated, or 'recent' for latest)"
+input sample_size: "Max runs to analyze (default: 10)"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent sampler:
+  model: sonnet
+  prompt: """
+    You select runs for calibration analysis.
+    Prefer diverse runs: different programs, outcomes, sizes.
+  """
+
+agent comparator:
+  model: opus
+  prompt: """
+    You compare light vs deep evaluation results with nuance.
+    Identify agreement, disagreement, and edge cases.
+  """
+
+agent statistician:
+  model: sonnet
+  prompt: """
+    You compute statistics and confidence intervals.
+  """
+
+agent advisor:
+  model: opus
+  prompt: """
+    You recommend improvements to evaluation criteria.
+  """
+
+# ============================================================
+# Phase 1: Select Runs
+# ============================================================
+
+let selected_runs = session: sampler
+  prompt: """
+    Select runs for calibration.
+
+    Input: {run_paths}
+    Sample size: {sample_size}
+
+    If run_paths is "recent", find recent runs in .prose/runs/
+    If specific paths, use those.
+
+    Select a diverse sample:
+    - Different programs if possible
+    - Mix of successful and partial/failed if available
+    - Different sizes (small vs large runs)
+
+    Return list of run paths.
+  """
+
+# ============================================================
+# Phase 2: Run Both Inspection Depths
+# ============================================================
+
+let calibration_data = selected_runs | map:
+  # Run light and deep sequentially on each (can't parallel same run)
+  let light = session "Light inspection"
+    prompt: """
+      Run a LIGHT inspection on: {item}
+
+      Evaluate quickly:
+      - completion: did it finish cleanly?
+      - binding_integrity: do expected outputs exist?
+      - output_substance: do outputs have real content?
+      - goal_alignment: does output match program purpose?
+
+      Score each 1-10, give verdicts (pass/partial/fail).
+      Return JSON.
+    """
+
+  let deep = session "Deep inspection"
+    prompt: """
+      Run a DEEP inspection on: {item}
+
+      Evaluate thoroughly:
+      - Read the full program source
+      - Trace execution step by step
+      - Check each binding's content
+      - Evaluate output quality in detail
+      - Assess fidelity (did VM follow program correctly?)
+      - Assess efficiency (reasonable steps for the job?)
+
+      Score each dimension 1-10, give verdicts.
+      Return JSON.
+    """
+    context: light  # Deep can see light's assessment
+
+  session "Package results"
+    prompt: """
+      Package the light and deep inspection results.
+
+      Run: {item}
+      Light: {light}
+      Deep: {deep}
+
+      Return:
+      {
+        "run_path": "...",
+        "light": { verdicts, scores },
+        "deep": { verdicts, scores },
+        "agreement": {
+          "vm_verdict": true/false,
+          "task_verdict": true/false,
+          "score_delta": { ... }
+        }
+      }
+    """
+    context: { light, deep }
+
+# ============================================================
+# Phase 3: Statistical Analysis
+# ============================================================
+
+let statistics = session: statistician
+  prompt: """
+    Compute calibration statistics.
+
+    Data: {calibration_data}
+
+    Calculate:
+    - Overall agreement rate (how often do light and deep agree?)
+    - Agreement by verdict type (vm vs task)
+    - Score correlation (do light scores predict deep scores?)
+    - Disagreement patterns (when do they diverge?)
+
+    Return:
+    {
+      "sample_size": N,
+      "agreement_rate": { overall, vm, task },
+      "score_correlation": { ... },
+      "disagreements": [ { run, light_said, deep_said, reason } ],
+      "confidence": "high" | "medium" | "low"
+    }
+  """
+  context: calibration_data
+
+# ============================================================
+# Phase 4: Recommendations
+# ============================================================
+
+let recommendations = session: advisor
+  prompt: """
+    Based on calibration results, recommend improvements.
+
+    Statistics: {statistics}
+    Raw data: {calibration_data}
+
+    If agreement is high (>90%):
+    - Light evaluation is reliable
+    - Note any edge cases to watch
+
+    If agreement is medium (70-90%):
+    - Identify patterns in disagreements
+    - Suggest criteria adjustments
+
+    If agreement is low (<70%):
+    - Light evaluation needs work
+    - Specific recommendations for improvement
+
+    Return:
+    {
+      "reliability_verdict": "reliable" | "mostly_reliable" | "needs_work",
+      "key_findings": [...],
+      "recommendations": [
+        { "priority": 1, "action": "...", "rationale": "..." }
+      ]
+    }
+  """
+  context: { statistics, calibration_data }
+
+# ============================================================
+# Output
+# ============================================================
+
+output report = session "Format report"
+  prompt: """
+    Format calibration results as a report.
+
+    Statistics: {statistics}
+    Recommendations: {recommendations}
+
+    Include:
+    1. Summary: Is light evaluation reliable?
+    2. Agreement rates (table)
+    3. Disagreement cases (if any)
+    4. Recommendations
+    5. Confidence level in these results
+
+    Format as markdown.
+  """
+  context: { statistics, recommendations, calibration_data }
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/cost-analyzer.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/cost-analyzer.prose
@@ -0,0 +1,174 @@
+# Cost Analyzer
+# Analyzes runs for token usage and cost patterns
+#
+# Usage:
+#   prose run @openprose/lib/cost-analyzer
+#
+# Inputs:
+#   run_path: Path to run to analyze, or "recent" for latest runs
+#   scope: single | compare | trend
+#
+# Outputs:
+#   - Token usage breakdown by agent/phase
+#   - Model tier efficiency analysis
+#   - Cost hotspots
+#   - Optimization recommendations
+
+input run_path: "Path to run, or 'recent' for latest runs in .prose/runs/"
+input scope: "Scope: single (one run) | compare (multiple runs) | trend (over time)"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent collector:
+  model: sonnet
+  prompt: """
+    You collect and structure cost/token data from .prose runs.
+
+    Extract from run artifacts:
+    - Model used per session (haiku/sonnet/opus)
+    - Approximate token counts (estimate from content length)
+    - Session count per agent
+    - Parallel vs sequential execution
+  """
+
+agent analyzer:
+  model: opus
+  prompt: """
+    You analyze cost patterns and identify optimization opportunities.
+
+    Consider:
+    - Model tier appropriateness (is opus needed, or would sonnet suffice?)
+    - Token efficiency (are contexts bloated?)
+    - Parallelization (could sequential steps run in parallel?)
+    - Caching opportunities (repeated computations?)
+  """
+
+agent tracker:
+  model: haiku
+  persist: user
+  prompt: """
+    You track cost metrics across runs for trend analysis.
+    Store compactly: run_id, program, total_cost_estimate, breakdown.
+  """
+
+# ============================================================
+# Phase 1: Collect Run Data
+# ============================================================
+
+let runs_to_analyze = session: collector
+  prompt: """
+    Find runs to analyze.
+
+    Input: {run_path}
+    Scope: {scope}
+
+    If run_path is a specific path, use that run.
+    If run_path is "recent", find the latest 5-10 runs in .prose/runs/
+
+    For scope=compare, find runs of the same program.
+    For scope=trend, find runs over time.
+
+    Return: list of run paths to analyze
+  """
+
+let run_data = runs_to_analyze | pmap:
+  session: collector
+    prompt: """
+      Extract cost data from run: {item}
+
+      Read state.md and bindings to determine:
+      1. Program name
+      2. Each session spawned:
+         - Agent name (or "anonymous")
+         - Model tier
+         - Estimated input tokens (context size)
+         - Estimated output tokens (binding size)
+      3. Parallel blocks (how many concurrent sessions)
+      4. Total session count
+
+      Estimate costs using rough rates:
+      - haiku: $0.25 / 1M input, $1.25 / 1M output
+      - sonnet: $3 / 1M input, $15 / 1M output
+      - opus: $15 / 1M input, $75 / 1M output
+
+      Return structured JSON.
+    """
+    context: item
+
+# ============================================================
+# Phase 2: Analyze
+# ============================================================
+
+let analysis = session: analyzer
+  prompt: """
+    Analyze cost patterns across these runs.
+
+    Data: {run_data}
+    Scope: {scope}
+
+    For single run:
+    - Break down cost by agent and phase
+    - Identify the most expensive operations
+    - Flag potential inefficiencies
+
+    For compare:
+    - Show cost differences between runs
+    - Identify which changes affected cost
+    - Note if cost increased/decreased
+
+    For trend:
+    - Show cost over time
+    - Identify if costs are stable, growing, or improving
+    - Flag anomalies
+
+    Always include:
+    - Model tier efficiency (are expensive models used appropriately?)
+    - Context efficiency (are contexts lean or bloated?)
+    - Specific optimization recommendations
+
+    Return structured JSON with:
+    {
+      "summary": { total_cost, session_count, by_model: {...} },
+      "hotspots": [ { agent, cost, percent, issue } ],
+      "recommendations": [ { priority, description, estimated_savings } ],
+      "details": { ... }
+    }
+  """
+  context: run_data
+
+# ============================================================
+# Phase 3: Track for Trends
+# ============================================================
+
+resume: tracker
+  prompt: """
+    Record this cost analysis for future trend tracking.
+
+    {analysis.summary}
+
+    Add to your historical record.
+  """
+  context: analysis
+
+# ============================================================
+# Output
+# ============================================================
+
+output report = session "Format report"
+  prompt: """
+    Format the cost analysis as a readable report.
+
+    Analysis: {analysis}
+
+    Include:
+    1. Executive summary (total cost, key finding)
+    2. Cost breakdown table
+    3. Hotspots (where money goes)
+    4. Recommendations (prioritized)
+    5. If scope=trend, include trend chart (ascii or description)
+
+    Format as markdown.
+  """
+  context: analysis
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/error-forensics.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/error-forensics.prose
@@ -0,0 +1,250 @@
+# Error Forensics
+# Deep investigation of failed or problematic runs
+#
+# Usage:
+#   prose run @openprose/lib/error-forensics
+#
+# Inputs:
+#   run_path: Path to the failed/problematic run
+#   focus: Optional focus area (vm | program | context | external)
+#
+# Outputs:
+#   - Root cause analysis
+#   - Error classification
+#   - Fix recommendations
+#   - Prevention suggestions
+
+input run_path: "Path to the run to investigate"
+input focus: "Optional focus: vm | program | context | external (default: auto-detect)"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent investigator:
+  model: opus
+  prompt: """
+    You are a forensic investigator for failed .prose runs.
+
+    You methodically trace execution to find root causes:
+    - Read state.md for execution trace
+    - Check each binding for errors or unexpected content
+    - Look for patterns: where did things go wrong?
+    - Distinguish symptoms from causes
+  """
+
+agent classifier:
+  model: sonnet
+  prompt: """
+    You classify errors into actionable categories:
+
+    VM errors: The OpenProse VM itself misbehaved
+    - State management bugs
+    - Incorrect control flow
+    - Context passing failures
+
+    Program errors: The .prose program has issues
+    - Logic errors
+    - Missing error handling
+    - Bad agent prompts
+
+    Context errors: Context degradation or bloat
+    - Information lost between agents
+    - Context too large
+    - Wrong context passed
+
+    External errors: Outside factors
+    - Tool failures
+    - Network issues
+    - Resource limits
+  """
+
+agent fixer:
+  model: opus
+  prompt: """
+    You propose specific fixes for identified issues.
+    Be concrete: show the change, not just describe it.
+  """
+
+# ============================================================
+# Phase 1: Gather Evidence
+# ============================================================
+
+let evidence = session: investigator
+  prompt: """
+    Gather evidence from the failed run.
+
+    Run: {run_path}
+
+    Read and analyze:
+    1. state.md - What was the execution trace? Where did it stop?
+    2. bindings/ - Which bindings exist? Any with errors or empty?
+    3. program.prose - What was the program trying to do?
+    4. agents/ - Any agent memory files with clues?
+
+    Document:
+    - Last successful step
+    - First sign of trouble
+    - Error messages (if any)
+    - Unexpected states
+
+    Return structured evidence.
+  """
+
+# ============================================================
+# Phase 2: Trace Execution
+# ============================================================
+
+let trace = session: investigator
+  prompt: """
+    Trace execution step by step to find the failure point.
+
+    Evidence: {evidence}
+
+    Walk through the execution:
+    1. What was the program supposed to do at each step?
+    2. What actually happened (according to state.md)?
+    3. Where do expected and actual diverge?
+
+    For the divergence point:
+    - What was the input to that step?
+    - What was the output (or lack thereof)?
+    - What should have happened?
+
+    Return:
+    {
+      "failure_point": { step, statement, expected, actual },
+      "chain_of_events": [...],
+      "contributing_factors": [...]
+    }
+  """
+  context: evidence
+
+# ============================================================
+# Phase 3: Classify Error
+# ============================================================
+
+let classification = session: classifier
+  prompt: """
+    Classify this error.
+
+    Trace: {trace}
+    Evidence: {evidence}
+    Focus hint: {focus}
+
+    Determine:
+    - Primary category (vm | program | context | external)
+    - Subcategory (specific type within category)
+    - Severity (critical | major | minor)
+    - Reproducibility (always | sometimes | rare)
+
+    Return:
+    {
+      "category": "...",
+      "subcategory": "...",
+      "severity": "...",
+      "reproducibility": "...",
+      "confidence": "high" | "medium" | "low",
+      "reasoning": "..."
+    }
+  """
+  context: { trace, evidence }
+
+# ============================================================
+# Phase 4: Root Cause Analysis
+# ============================================================
+
+let root_cause = session: investigator
+  prompt: """
+    Determine the root cause (not just symptoms).
+
+    Trace: {trace}
+    Classification: {classification}
+
+    Ask "why" repeatedly until you reach the root:
+    - Why did this step fail?
+    - Why was that input malformed?
+    - Why did that agent produce that output?
+    - ...
+
+    The root cause is the earliest point where an intervention
+    would have prevented the failure.
+
+    Return:
+    {
+      "root_cause": "...",
+      "causal_chain": ["step 1", "led to step 2", "which caused failure"],
+      "root_cause_category": "vm" | "program" | "context" | "external"
+    }
+  """
+  context: { trace, classification }
+
+# ============================================================
+# Phase 5: Fix Recommendations
+# ============================================================
+
+let fixes = session: fixer
+  prompt: """
+    Propose fixes for this failure.
+
+    Root cause: {root_cause}
+    Classification: {classification}
+    Evidence: {evidence}
+
+    Provide:
+    1. Immediate fix (how to make this specific run work)
+    2. Permanent fix (how to prevent this class of error)
+    3. Detection (how to catch this earlier next time)
+
+    Be specific. If it's a code change, show the diff.
+    If it's a process change, describe the new process.
+
+    Return:
+    {
+      "immediate": { action, details },
+      "permanent": { action, details, files_to_change },
+      "detection": { action, details },
+      "prevention": "how to avoid this in future programs"
+    }
+  """
+  context: { root_cause, classification, evidence }
+
+# ============================================================
+# Output
+# ============================================================
+
+output report = session "Format report"
+  prompt: """
+    Format the forensic analysis as a report.
+
+    Evidence: {evidence}
+    Trace: {trace}
+    Classification: {classification}
+    Root cause: {root_cause}
+    Fixes: {fixes}
+
+    Structure:
+    1. Executive Summary
+       - What failed
+       - Why it failed (root cause)
+       - How to fix it
+
+    2. Timeline
+       - Execution trace with failure point highlighted
+
+    3. Root Cause Analysis
+       - Causal chain
+       - Classification
+
+    4. Recommendations
+       - Immediate fix
+       - Permanent fix
+       - Prevention
+
+    5. Technical Details
+       - Evidence gathered
+       - Files examined
+
+    Format as markdown.
+  """
+  context: { evidence, trace, classification, root_cause, fixes }
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/inspector.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/inspector.prose
@@ -0,0 +1,196 @@
+# Post-Run Inspector
+# Analyzes completed .prose runs for runtime fidelity and task effectiveness
+#
+# Usage:
+#   prose run @openprose/lib/inspector
+#
+# Inputs:
+#   run_path: Path to the run to inspect (e.g., .prose/runs/20260119-100000-abc123)
+#   depth: light | deep
+#   target: vm | task | all
+#
+# Compounding: Each inspection builds on prior inspections via persistent index agent.
+# The index agent uses `persist: user` so inspection history spans all projects.
+
+input run_path: "Path to the run to inspect (e.g., .prose/runs/20260119-100000-abc123)"
+input depth: "Inspection depth: light or deep"
+input target: "Evaluation target: vm, task, or all"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent index:
+  model: haiku
+  persist: user
+  prompt: """
+    You maintain the inspection registry across all projects.
+    Track: target_run_id, depth, target, timestamp, verdict.
+    Return JSON when queried. Store compactly.
+  """
+
+agent extractor:
+  model: sonnet
+  prompt: """
+    You extract structured data from .prose run artifacts.
+    Read state.md, bindings/, and logs carefully.
+    Return clean JSON.
+  """
+
+agent evaluator:
+  model: opus
+  prompt: """
+    You evaluate .prose runs with intelligent judgment.
+    Rate 1-10 with specific rationale. Be concrete.
+  """
+
+agent synthesizer:
+  model: sonnet
+  prompt: """
+    You produce clear reports in requested formats.
+  """
+
+# ============================================================
+# Phase 0: Check Prior Work
+# ============================================================
+
+let prior = resume: index
+  prompt: """
+    Any prior inspections for: {run_path}?
+    Return JSON: { "inspections": [...], "has_light": bool, "has_deep": bool }
+  """
+
+# ============================================================
+# Phase 1: Extraction
+# ============================================================
+
+let extraction = session: extractor
+  prompt: """
+    Extract from run at: {run_path}
+    Depth: {depth}
+    Prior work: {prior}
+
+    ALWAYS get:
+    - run_id (from path)
+    - completed (did state.md show completion?)
+    - error_count (failures in state.md)
+    - binding_names (list all bindings/)
+    - output_names (bindings with kind: output)
+
+    IF depth=deep AND no prior deep inspection:
+    - program_source (contents of program.prose)
+    - execution_summary (key statements from state.md)
+    - binding_previews (first 300 chars of each binding)
+
+    IF prior deep exists, skip deep extraction and note "using cached".
+
+    Return JSON.
+  """
+  context: prior
+
+# ============================================================
+# Phase 2: Evaluation
+# ============================================================
+
+let evaluation = session: evaluator
+  prompt: """
+    Evaluate this run.
+
+    Target: {target}
+    Depth: {depth}
+    Data: {extraction}
+    Prior findings: {prior}
+
+    FOR vm (if target=vm or all):
+    - completion (1-10): Clean finish?
+    - binding_integrity (1-10): Expected outputs exist with content?
+    - vm_verdict: pass/partial/fail
+    - vm_notes: 1-2 sentences
+
+    FOR task (if target=task or all):
+    - output_substance (1-10): Outputs look real, not empty/error?
+    - goal_alignment (1-10): Based on program name, does output fit?
+    - task_verdict: pass/partial/fail
+    - task_notes: 1-2 sentences
+
+    IF depth=deep, add:
+    - fidelity (1-10): Execution trace matches program structure?
+    - efficiency (1-10): Reasonable number of steps for the job?
+
+    Return JSON with all applicable fields.
+  """
+  context: extraction
+
+# ============================================================
+# Phase 3: Synthesis
+# ============================================================
+
+parallel:
+  verdict = session: synthesizer
+    prompt: """
+      Machine-readable verdict as JSON:
+      {
+        "run_id": "...",
+        "depth": "{depth}",
+        "target": "{target}",
+        "vm": { "verdict": "...", "scores": {...} },
+        "task": { "verdict": "...", "scores": {...} },
+        "flags": []
+      }
+
+      Data: {evaluation}
+    """
+    context: evaluation
+
+  diagram = session: synthesizer
+    prompt: """
+      Simple mermaid flowchart of the run.
+      Show: inputs -> key steps -> outputs.
+      Use execution_summary if available, else infer from bindings.
+      Output only the mermaid code.
+
+      Data: {extraction}
+    """
+    context: extraction
+
+  report = session: synthesizer
+    prompt: """
+      2-paragraph markdown summary:
+      1. What was inspected, key metrics
+      2. Findings and any recommendations
+
+      Data: {extraction}, {evaluation}
+    """
+    context: { extraction, evaluation }
+
+# ============================================================
+# Phase 4: Register
+# ============================================================
+
+resume: index
+  prompt: """
+    Register this inspection:
+    run_path: {run_path}
+    depth: {depth}
+    target: {target}
+    verdict: {verdict}
+
+    Update your memory with this entry.
+  """
+  context: verdict
+
+# ============================================================
+# Output
+# ============================================================
+
+output inspection = session: synthesizer
+  prompt: """
+    Combine into final output structure:
+
+    verdict_json: {verdict}
+    mermaid: {diagram}
+    summary: {report}
+
+    Return as JSON with these three fields.
+  """
+  context: { verdict, diagram, report }
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/profiler.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/profiler.prose
@@ -0,0 +1,460 @@
+# Profiler
+# Analyzes OpenProse runs for cost, tokens, and time using actual API data
+#
+# Usage:
+#   prose run @openprose/lib/profiler
+#
+# Inputs:
+#   run_path: Path to run to analyze, or "recent" for latest runs
+#   scope: single | compare | trend
+#
+# Outputs:
+#   - Cost breakdown (VM vs subagents, by agent, by model)
+#   - Time breakdown (wall-clock, per-session, parallelism effectiveness)
+#   - Token usage patterns
+#   - Efficiency metrics ($/second, tokens/second)
+#   - Bottleneck identification
+#   - Optimization recommendations
+#
+# Data Sources:
+#   Primary: Claude Code's jsonl files in ~/.claude/projects/{project}/{session}/
+#   - Main session: {session}.jsonl (VM orchestration)
+#   - Subagents: subagents/agent-*.jsonl (OpenProse sessions)
+#
+#   From each assistant message:
+#   - Tokens: input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens
+#   - Model: message.model
+#   - Timestamps: for duration calculations
+#
+#   Pricing: Fetched live from Anthropic's pricing page
+#
+# Supported Tools:
+#   - Claude Code (~/.claude) - full support
+#   - OpenCode, Amp, Codex - may have different structures, will warn
+
+input run_path: "Path to run, or 'recent' for latest runs in .prose/runs/"
+input scope: "Scope: single (one run) | compare (multiple runs) | trend (over time)"
+
+const PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing#model-pricing"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent detector:
+  model: haiku
+  prompt: """
+    You detect which AI coding tool was used and find its data files.
+
+    Check for:
+    1. ~/.claude/projects/ - Claude Code (full support)
+    2. ~/.opencode/ - OpenCode (may differ)
+    3. ~/.amp/ - Amp (may differ)
+    4. ~/.codex/ - Codex (may differ)
+
+    If not Claude Code, warn the user that analysis may be incomplete.
+  """
+
+agent collector:
+  model: sonnet
+  prompt: """
+    You locate and inventory AI coding tool session files.
+
+    For Claude Code (~/.claude/projects/{project}/{session}/):
+    1. Main session file: {session}.jsonl - VM orchestration
+    2. Subagent files: subagents/agent-*.jsonl - OpenProse sessions
+
+    Your job is to FIND the files, not process them.
+    Return file paths for the calculator agent to process.
+  """
+
+agent calculator:
+  model: sonnet
+  prompt: """
+    You calculate metrics by writing and executing inline Python scripts.
+
+    CRITICAL RULES:
+    1. NEVER do math in your head - always use Python
+    2. NEVER create standalone .py files - use inline scripts only
+    3. Run scripts with heredoc style: python3 << 'EOF' ... EOF
+    4. MUST process ALL files: main_jsonl AND EVERY file in subagent_jsonls[]
+
+    BEFORE CALCULATING:
+    Fetch current pricing from the pricing URL provided in your prompt.
+    Extract per-million-token rates for each Claude model.
+
+    YOUR PYTHON SCRIPT MUST:
+    1. Process the main_jsonl file (VM orchestration data)
+    2. Process EVERY file in subagent_jsonls[] (subagent session data)
+       - This is critical! There may be 10-20+ subagent files
+       - Each contains token usage that MUST be counted
+    3. For each file, read line by line and extract from type="assistant":
+       - usage.input_tokens, usage.output_tokens
+       - usage.cache_creation_input_tokens, usage.cache_read_input_tokens
+       - message.model (for pricing tier)
+       - timestamp (for duration calculation)
+    4. From Task prompts in subagent files, extract:
+       - Agent name: regex `You are the "([^"]+)" agent`
+       - Binding name: regex `/bindings/([^.]+)\\.md`
+    5. Calculate costs using the pricing you fetched
+    6. Calculate durations from first to last timestamp per file
+    7. Output structured JSON with VM and subagent data SEPARATELY
+
+    VALIDATION: If subagents.total.cost is 0 but subagent_jsonls has files,
+    your script has a bug - fix it before outputting.
+  """
+  permissions:
+    network: [PRICING_URL]
+
+agent analyzer:
+  model: opus
+  prompt: """
+    You analyze profiling data and identify optimization opportunities.
+
+    You receive pre-calculated data (computed by Python, not estimated).
+    Your job is interpretation and recommendations, not calculation.
+
+    COST ANALYSIS:
+    - VM overhead vs subagent costs (percentage split)
+    - Per-agent costs (which agents are most expensive?)
+    - Per-binding costs (which outputs cost the most?)
+    - Model tier usage (is opus used where sonnet would suffice?)
+    - Cache efficiency (cache_read vs cache_write ratio)
+
+    TIME ANALYSIS:
+    - Wall-clock duration vs sum of session durations
+    - Parallelism effectiveness (ratio shows how much parallelization helped)
+    - Per-agent time (which agents are slowest?)
+    - Bottlenecks (sequential operations that blocked progress)
+
+    EFFICIENCY ANALYSIS:
+    - Cost per second ($/s)
+    - Tokens per second (throughput)
+    - Cost vs time correlation (expensive but fast? cheap but slow?)
+
+    RECOMMENDATIONS:
+    - Model tier downgrades where appropriate
+    - Parallelization opportunities (sequential ops that could be parallel)
+    - Batching opportunities (many small sessions that could consolidate)
+    - Context trimming if input tokens seem excessive
+  """
+
+agent tracker:
+  model: haiku
+  persist: user
+  prompt: """
+    You track profiling metrics across runs for trend analysis.
+    Store: run_id, program, timestamp, total_cost, total_time, vm_cost, subagent_cost, by_model.
+    Compare against historical data when available.
+  """
+
+# ============================================================
+# Phase 1: Detect Tool and Find Data
+# ============================================================
+
+let tool_detection = session: detector
+  prompt: """
+    Detect which AI coding tool was used for this OpenProse run.
+
+    Run path: {run_path}
+
+    1. If run_path is in .prose/runs/, extract the run timestamp
+    2. Look for corresponding session in:
+       - ~/.claude/projects/ (Claude Code) - check subfolders for sessions
+       - Other tool directories as fallback
+
+    3. If found in ~/.claude:
+       - Return the full session path
+       - List the main jsonl file and subagent files
+       - This is the primary data source
+
+    4. If NOT found in ~/.claude:
+       - Check for opencode/amp/codex directories
+       - WARN: "Non-Claude Code tool detected. Token data structure may differ."
+
+    5. If no tool data found:
+       - Return tool="not-found" with clear error
+       - Do NOT attempt estimation
+
+    Return JSON:
+    {
+      "tool": "claude-code" | "opencode" | "amp" | "codex" | "not-found",
+      "session_path": "/path/to/session/" | null,
+      "main_jsonl": "/path/to/session.jsonl" | null,
+      "subagent_jsonls": [...] | [],
+      "error": null | "Error message",
+      "warnings": []
+    }
+  """
+
+# ============================================================
+# Guard: Exit if no data available
+# ============================================================
+
+assert tool_detection.tool != "not-found":
+  """
+  ERROR: Profiling requires actual data from AI tool session files.
+
+  Could not find session data for this run. This can happen if:
+  1. The run was not executed with Claude Code (or supported tool)
+  2. The Claude Code session has been deleted or moved
+  3. The run path does not correspond to an existing session
+
+  Supported tools: Claude Code (~/.claude)
+  Partial support: OpenCode, Amp, Codex (structure may differ)
+  """
+
+# ============================================================
+# Phase 2: Locate Session Files
+# ============================================================
+
+let runs_to_analyze = session: collector
+  prompt: """
+    Find runs to analyze and locate their session files.
+
+    Input: {run_path}
+    Scope: {scope}
+    Tool detection: {tool_detection}
+
+    If run_path is a specific path, use that run.
+    If run_path is "recent", find the latest 5-10 runs in .prose/runs/
+
+    For each run, locate:
+    1. The .prose/runs/{run_id}/ directory
+    2. The corresponding Claude Code session
+    3. List all jsonl files (main session + subagents/)
+
+    Return JSON array:
+    [
+      {
+        "run_id": "...",
+        "prose_run_path": "/path/to/.prose/runs/xxx/",
+        "session_path": "/path/to/claude/session/",
+        "main_jsonl": "/path/to/session.jsonl",
+        "subagent_jsonls": [...]
+      }
+    ]
+  """
+  context: tool_detection
+
+# ============================================================
+# Phase 3: Calculate Metrics (single Python pass per run)
+# ============================================================
+
+let metrics = runs_to_analyze | pmap:
+  session: calculator
+    prompt: """
+      Calculate all metrics for: {item}
+
+      STEP 1: Fetch current pricing from {PRICING_URL}
+      Note the per-million-token rates for each model (input, output, cache).
+
+      STEP 2: Write and execute an inline Python script that processes:
+      - Main jsonl: {item.main_jsonl}
+      - Subagent jsonls: {item.subagent_jsonls}
+
+      EXTRACT FROM EACH ASSISTANT MESSAGE:
+      - usage.input_tokens, usage.output_tokens
+      - usage.cache_creation_input_tokens, usage.cache_read_input_tokens
+      - model (for pricing tier)
+      - timestamp (for duration calculation)
+
+      EXTRACT FROM TASK PROMPTS (user messages in subagent files):
+      - Agent name: regex `You are the "([^"]+)" agent`
+      - Binding name: regex `/bindings/([^.]+)\.md`
+
+      CALCULATE:
+      - Cost: tokens * pricing rates you fetched
+      - Duration: time between first and last message per session
+      - Wall-clock: total run duration
+
+      OUTPUT JSON:
+      {
+        "run_id": "...",
+        "program": "...",
+        "wall_clock_seconds": N,
+        "vm_orchestration": {
+          "tokens": { "input": N, "output": N, "cache_write": N, "cache_read": N },
+          "cost": 0.00,
+          "duration_seconds": N,
+          "model": "...",
+          "message_count": N
+        },
+        "subagents": {
+          "total": { "tokens": {...}, "cost": 0.00, "duration_seconds": N },
+          "by_agent": {
+            "agent_name": {
+              "tokens": {...},
+              "cost": 0.00,
+              "duration_seconds": N,
+              "sessions": N,
+              "model": "..."
+            }
+          },
+          "by_binding": {
+            "binding_name": { "tokens": {...}, "cost": 0.00, "duration_seconds": N, "agent": "..." }
+          }
+        },
+        "by_model": {
+          "opus": { "tokens": {...}, "cost": 0.00 },
+          "sonnet": { "tokens": {...}, "cost": 0.00 },
+          "haiku": { "tokens": {...}, "cost": 0.00 }
+        },
+        "total": {
+          "tokens": { "input": N, "output": N, "cache_write": N, "cache_read": N, "total": N },
+          "cost": 0.00,
+          "duration_seconds": N
+        },
+        "efficiency": {
+          "cost_per_second": 0.00,
+          "tokens_per_second": N,
+          "parallelism_factor": N  // sum(session_durations) / wall_clock
+        }
+      }
+    """
+    context: item
+
+# ============================================================
+# Phase 4: Analyze
+# ============================================================
+
+let analysis = session: analyzer
+  prompt: """
+    Analyze the profiling data.
+
+    Pre-calculated metrics: {metrics}
+    Scope: {scope}
+
+    All numbers were calculated by Python. Trust them - focus on insights.
+
+    FOR SINGLE RUN:
+
+    1. COST ATTRIBUTION
+       - VM overhead vs subagent costs (percentage)
+       - Rank agents by cost
+       - Flag expensive models on simple tasks
+
+    2. TIME ATTRIBUTION
+       - Wall-clock vs sum of session durations
+       - Parallelism factor interpretation:
+         - Factor near 1.0 = fully sequential
+         - Factor > 2.0 = good parallelization
+         - Factor > 5.0 = excellent parallelization
+       - Identify slowest agents/bindings
+
+    3. EFFICIENCY
+       - Cost per second (is expensive time well-spent?)
+       - Tokens per second (throughput)
+       - Correlation: expensive-and-fast vs cheap-and-slow
+
+    4. CACHE EFFICIENCY
+       - Read/write ratio
+       - Assessment: good (>5:1), fair (2-5:1), poor (<2:1)
+
+    5. HOTSPOTS
+       - Top 5 by cost
+       - Top 5 by time
+       - Note any that appear in both lists
+
+    6. RECOMMENDATIONS
+       - Model downgrades (specific: "agent X could use sonnet")
+       - Parallelization opportunities (specific sequential ops)
+       - Batching opportunities (many small similar sessions)
+       - Context trimming if input >> output
+
+    FOR COMPARE (multiple runs):
+    - Show cost and time differences
+    - Identify what changed between runs
+    - Note improvements or regressions
+
+    FOR TREND (over time):
+    - Show cost and time progression
+    - Identify trend direction
+    - Flag anomalies
+
+    Return structured JSON with all analysis sections.
+  """
+  context: metrics
+
+# ============================================================
+# Phase 5: Track for Trends
+# ============================================================
+
+resume: tracker
+  prompt: """
+    Record this profiling data for trend tracking.
+
+    Run: {metrics[0].run_id}
+    Program: {metrics[0].program}
+    Total cost: {analysis.summary.total_cost}
+    Total time: {analysis.summary.total_time}
+    Efficiency: {analysis.summary.efficiency}
+
+    Add to your historical record with timestamp.
+    If you have previous runs of the same program, note the trend.
+  """
+  context: analysis
+
+# ============================================================
+# Output
+# ============================================================
+
+output report = session "Format profiler report"
+  prompt: """
+    Format the profiling analysis as a professional report.
+
+    Analysis: {analysis}
+    Tool: {tool_detection.tool}
+
+    ## Report Structure:
+
+    ### 1. Executive Summary
+    - Total cost and wall-clock time
+    - Key finding (most significant insight)
+    - Tool used
+
+    ### 2. Cost Attribution
+    | Category | Cost | % of Total |
+    |----------|------|------------|
+    | VM Orchestration | $X.XX | XX% |
+    | Subagent Execution | $X.XX | XX% |
+    | **Total** | $X.XX | 100% |
+
+    ### 3. Time Attribution
+    | Category | Time | % of Wall-Clock |
+    |----------|------|-----------------|
+    | VM Orchestration | Xs | XX% |
+    | Subagent Execution | Xs | XX% |
+    | **Wall-Clock** | Xs | - |
+    | **Sum of Sessions** | Xs | - |
+    | **Parallelism Factor** | X.Xx | - |
+
+    ### 4. By Agent
+    | Agent | Model | Sessions | Cost | Time | $/s |
+    |-------|-------|----------|------|------|-----|
+
+    ### 5. By Model Tier
+    | Model | Cost | % | Tokens | % |
+    |-------|------|---|--------|---|
+
+    ### 6. Cache Efficiency
+    - Read/write ratio and assessment
+
+    ### 7. Hotspots
+    **By Cost:**
+    1. ...
+
+    **By Time:**
+    1. ...
+
+    ### 8. Efficiency Analysis
+    - Cost per second
+    - Tokens per second
+    - Parallelism effectiveness
+
+    ### 9. Recommendations
+    Prioritized list with estimated impact
+
+    Format as clean markdown with tables.
+  """
+  context: analysis
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/program-improver.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/program-improver.prose
@@ -0,0 +1,275 @@
+# Program Improver
+# Analyzes inspection reports and proposes improvements to .prose source code
+#
+# Usage:
+#   prose run @openprose/lib/program-improver
+#
+# Inputs:
+#   inspection_path: Path to inspection binding
+#   run_path: Path to the inspected run (to find program.prose)
+#
+# Output: PR to source repo if accessible, otherwise proposal file
+
+input inspection_path: "Path to inspection output (bindings/inspection.md)"
+input run_path: "Path to the inspected run directory"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent locator:
+  model: sonnet
+  prompt: """
+    You find the source location of .prose programs.
+
+    Check:
+    - Registry reference in program header (e.g., @handle/slug)
+    - Local file paths
+    - Whether source repo is accessible for PRs
+  """
+
+agent analyst:
+  model: opus
+  prompt: """
+    You analyze OpenProse inspection reports for program improvement opportunities.
+
+    Look for:
+    - Wrong model tier (using opus where sonnet suffices, or vice versa)
+    - Missing error handling (no try/catch around risky operations)
+    - Suboptimal control flow (sequential where parallel would work)
+    - Context passing issues (passing too much, or missing context)
+    - Unnecessary complexity (over-engineered for the task)
+    - Missing parallelization (independent operations run sequentially)
+    - Agent prompt issues (vague, missing constraints, wrong role)
+
+    Be specific. Quote evidence from inspection.
+  """
+
+agent implementer:
+  model: opus
+  prompt: """
+    You improve .prose programs while preserving their intent.
+
+    Rules:
+    - Keep the same overall structure
+    - Make minimal, targeted changes
+    - Follow OpenProse idioms
+    - Preserve comments and documentation
+    - One logical improvement per change
+  """
+
+agent pr_author:
+  model: sonnet
+  prompt: """
+    You create branches and pull requests or write proposal files.
+  """
+
+# ============================================================
+# Phase 1: Locate Program Source
+# ============================================================
+
+let source_info = session: locator
+  prompt: """
+    Find the source of the inspected program.
+
+    Run path: {run_path}
+
+    Steps:
+    1. Read {run_path}/program.prose
+    2. Check header for registry reference (e.g., # from: @handle/slug)
+    3. Check if it's a lib/ program (part of OpenProse)
+    4. Determine if we can create a PR
+
+    Return JSON:
+    {
+      "program_name": "name from header or filename",
+      "registry_ref": "@handle/slug or null",
+      "source_type": "lib" | "local" | "registry" | "unknown",
+      "source_path": "path to original source or null",
+      "source_repo": "git repo URL or null",
+      "can_pr": true/false,
+      "program_content": "full program source"
+    }
+  """
+  context: run_path
+
+# ============================================================
+# Phase 2: Analyze for Improvements
+# ============================================================
+
+let analysis = session: analyst
+  prompt: """
+    Analyze this program and its inspection for improvement opportunities.
+
+    Program source:
+    {source_info.program_content}
+
+    Inspection report: {inspection_path}
+
+    For each opportunity:
+    - category: model-tier | error-handling | flow | context | complexity | parallel | prompts
+    - description: what could be better
+    - severity: low | medium | high
+    - location: which part of program (agent name, phase, line range)
+    - evidence: what in the inspection suggests this
+    - proposed_fix: brief description of the change
+
+    Return JSON:
+    {
+      "program_name": "{source_info.program_name}",
+      "opportunities": [...],
+      "priority_order": [indices by impact]
+    }
+  """
+  context: { source_info, inspection_path }
+
+if **no actionable opportunities found**:
+  output result = {
+    status: "no-improvements-needed",
+    source_info: source_info,
+    analysis: analysis,
+    message: "Program executed well, no obvious improvements"
+  }
+
+# ============================================================
+# Phase 3: User Selection
+# ============================================================
+
+input selection: """
+  ## Program Improvement Opportunities
+
+  Program: {source_info.program_name}
+  Source: {source_info.source_type} ({source_info.source_path})
+  Can PR: {source_info.can_pr}
+
+  ### Opportunities Found:
+  {analysis.opportunities}
+
+  ---
+
+  Which improvements should I implement?
+  - List by number
+  - Or "all" for everything
+  - Or "none" to skip
+"""
+
+if **user selected none or wants to skip**:
+  output result = {
+    status: "skipped",
+    source_info: source_info,
+    analysis: analysis
+  }
+
+let selected = session "Parse selection"
+  prompt: "Extract selected opportunity indices"
+  context: { selection, analysis }
+
+# ============================================================
+# Phase 4: Implement Changes
+# ============================================================
+
+let implementation = session: implementer
+  prompt: """
+    Implement the selected improvements to this program.
+
+    Original program:
+    {source_info.program_content}
+
+    Selected opportunities: {selected}
+    Full analysis: {analysis}
+
+    Write the improved program. Make all selected changes.
+
+    Return JSON:
+    {
+      "improved_program": "full .prose source with improvements",
+      "changes_made": [
+        {
+          "opportunity_index": N,
+          "description": "what was changed",
+          "lines_affected": "before/after summary"
+        }
+      ],
+      "branch_name": "program/{program_name}-improvements"
+    }
+  """
+  context: { source_info, selected, analysis }
+
+# ============================================================
+# Phase 5: Create PR or Proposal
+# ============================================================
+
+if **source_info.can_pr is true**:
+  let pr = session: pr_author
+    prompt: """
+      Create a PR for this program improvement.
+
+      Source path: {source_info.source_path}
+      Source repo: {source_info.source_repo}
+      Branch: {implementation.branch_name}
+      Changes: {implementation.changes_made}
+      Improved program: {implementation.improved_program}
+
+      Steps:
+      1. cd to repo containing source
+      2. Create branch
+      3. Write improved program to source path
+      4. Commit with clear message
+      5. Push and create PR
+
+      PR body should explain each improvement.
+
+      Return: { pr_url, branch, title }
+    """
+    context: { source_info, implementation }
+    permissions:
+      bash: allow
+      write: ["**/*.prose"]
+
+  output result = {
+    status: "pr-created",
+    source_info: source_info,
+    analysis: analysis,
+    implementation: implementation,
+    pr: pr
+  }
+
+else:
+  # Write proposal file since we can't PR
+  let proposal_path = session: pr_author
+    prompt: """
+      Write a proposal file for this improvement.
+
+      Since we can't create a PR directly, write a proposal to:
+      .prose/proposals/{source_info.program_name}-improvements.md
+
+      Include:
+      # Improvement Proposal: {source_info.program_name}
+
+      ## Original Source
+      {source_info.source_path or source_info.registry_ref}
+
+      ## Changes Proposed
+      {implementation.changes_made}
+
+      ## Improved Program
+      ```prose
+      {implementation.improved_program}
+      ```
+
+      ## How to Apply
+      Instructions for manually applying or submitting upstream.
+
+      Return: { proposal_path }
+    """
+    context: { source_info, implementation }
+    permissions:
+      write: [".prose/proposals/*.md"]
+
+  output result = {
+    status: "proposal-written",
+    source_info: source_info,
+    analysis: analysis,
+    implementation: implementation,
+    proposal: proposal_path
+  }
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/project-memory.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/project-memory.prose
@@ -0,0 +1,118 @@
+# Project Memory
+# A persistent agent that understands this specific project
+#
+# Usage:
+#   prose run @openprose/lib/project-memory --backend sqlite+
+#
+# Recommended backend: sqlite+ (for durable project-scoped persistence)
+#
+# Modes:
+#   ingest: Read and understand content (code, docs, history)
+#   query: Ask questions about the project
+#   update: Record decisions, changes, or learnings
+#   summarize: Get an overview of the project
+#
+# The memory agent builds understanding over time. Ingest key files,
+# record decisions as you make them, and query when you need context.
+
+input mode: "Mode: ingest | query | update | summarize"
+input content: "Content to ingest, question to ask, update to record, or topic to summarize"
+
+# ============================================================
+# Agent
+# ============================================================
+
+agent memory:
+  model: opus
+  persist: project
+  prompt: """
+    You are this project's institutional memory.
+
+    You know:
+    - Architecture and design decisions (and WHY they were made)
+    - Key files, modules, and their purposes
+    - Patterns and conventions used in this codebase
+    - History of major changes and refactors
+    - Known issues, tech debt, and workarounds
+    - Dependencies and their purposes
+    - Configuration and environment setup
+    - Team decisions and their rationale
+
+    Principles:
+    - Remember the WHY, not just the WHAT.
+    - Track evolution—how things changed over time.
+    - Note uncertainty and gaps in your knowledge.
+    - Reference specific files, commits, or discussions when possible.
+    - Keep knowledge structured and retrievable.
+  """
+
+# ============================================================
+# Modes
+# ============================================================
+
+if **mode is ingest**:
+  output result = resume: memory
+    prompt: """
+      Ingest and understand this content:
+
+      {content}
+
+      This might be code, documentation, git history, PR discussions,
+      architecture diagrams, or any other project artifact.
+
+      Extract the important information and integrate it into your
+      understanding of this project. Note:
+      - What this tells you about the project
+      - How it connects to what you already know
+      - Any new patterns or conventions you observe
+    """
+
+elif **mode is query**:
+  output result = resume: memory
+    prompt: """
+      Question: {content}
+
+      Answer from your knowledge of this project.
+
+      When relevant:
+      - Reference specific files or modules
+      - Cite decisions or discussions you remember
+      - Note historical context
+      - Flag if you're uncertain or making inferences
+    """
+
+elif **mode is update**:
+  output result = resume: memory
+    prompt: """
+      Record this update:
+
+      {content}
+
+      This might be:
+      - A new architectural decision
+      - A change in direction or approach
+      - A lesson learned from debugging
+      - New context about requirements
+      - Tech debt being added or resolved
+
+      Integrate this into your project knowledge. Note the date
+      and how this relates to previous understanding.
+    """
+
+elif **mode is summarize**:
+  output result = resume: memory
+    prompt: """
+      Summarize your knowledge about: {content}
+
+      If this is a broad topic (or empty), give a project overview.
+      If specific, focus on that area.
+
+      Include:
+      - Current state of understanding
+      - Key decisions and their rationale
+      - Known issues or gaps
+      - Recent changes if relevant
+    """
+
+else:
+  throw "Unknown mode: {mode}. Use: ingest, query, update, or summarize"
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/user-memory.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/user-memory.prose
@@ -0,0 +1,93 @@
+# User Memory
+# A persistent agent that learns and remembers across all your projects
+#
+# Usage:
+#   prose run @openprose/lib/user-memory --backend sqlite+
+#
+# Recommended backend: sqlite+ (for durable cross-project persistence)
+#
+# Modes:
+#   teach: Add new knowledge
+#   query: Ask questions
+#   reflect: Summarize what you know about a topic
+#
+# The memory agent accumulates knowledge over time. Each interaction
+# builds on previous ones. Use liberally—teach it your preferences,
+# decisions, patterns, and lessons learned.
+
+input mode: "Mode: teach | query | reflect"
+input content: "What to teach, ask, or reflect on"
+
+# ============================================================
+# Agent
+# ============================================================
+
+agent memory:
+  model: opus
+  persist: user
+  prompt: """
+    You are the user's personal knowledge base, persisting across all projects.
+
+    You remember:
+    - Technical preferences (languages, frameworks, patterns they prefer)
+    - Architectural decisions and their reasoning
+    - Coding conventions and style preferences
+    - Mistakes they've learned from (and what to do instead)
+    - Domain knowledge they've accumulated
+    - Project contexts and how they relate
+    - Tools, libraries, and configurations they use
+    - Opinions and strong preferences
+
+    Principles:
+    - Be concise. Store knowledge efficiently.
+    - Prioritize actionable knowledge over trivia.
+    - Note confidence levels when uncertain.
+    - Update previous knowledge when new info contradicts it.
+    - Connect related pieces of knowledge.
+  """
+
+# ============================================================
+# Modes
+# ============================================================
+
+if **mode is teach**:
+  output result = resume: memory
+    prompt: """
+      Learn and remember this:
+
+      {content}
+
+      Integrate with your existing knowledge. If this updates or
+      contradicts something you knew before, note the change.
+
+      Respond with a brief confirmation of what you learned.
+    """
+
+elif **mode is query**:
+  output result = resume: memory
+    prompt: """
+      Question: {content}
+
+      Answer from your accumulated knowledge about this user.
+
+      If you know relevant context, share it.
+      If you're uncertain, say so.
+      If you don't know, say that clearly.
+    """
+
+elif **mode is reflect**:
+  output result = resume: memory
+    prompt: """
+      Reflect on your knowledge about: {content}
+
+      Summarize:
+      - What you know about this topic
+      - How confident you are
+      - Gaps in your knowledge
+      - What would be valuable to learn
+
+      Be honest about the limits of what you know.
+    """
+
+else:
+  throw "Unknown mode: {mode}. Use: teach, query, or reflect"
--- a/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/vm-improver.prose
+++ b/docker-compose/ez-assistant/extensions/open-prose/skills/prose/lib/vm-improver.prose
@@ -0,0 +1,243 @@
+# VM Improver
+# Analyzes inspection reports and proposes improvements to the OpenProse VM
+#
+# Usage:
+#   prose run @openprose/lib/vm-improver
+#
+# Inputs:
+#   inspection_path: Path to inspection binding (e.g., .prose/runs/.../bindings/inspection.md)
+#   prose_repo: Path to prose submodule (default: current project's prose/)
+#
+# Output: One or more PRs to the prose repo, or proposals if no git access
+
+input inspection_path: "Path to inspection output (bindings/inspection.md)"
+input prose_repo: "Path to prose skill directory (e.g., prose/skills/open-prose)"
+
+# ============================================================
+# Agents
+# ============================================================
+
+agent analyst:
+  model: opus
+  prompt: """
+    You analyze OpenProse inspection reports for VM improvement opportunities.
+
+    Look for evidence of:
+    - Execution inefficiencies (too many steps, redundant spawns)
+    - Context bloat (VM passing full values instead of references)
+    - State management issues (missing bindings, path errors)
+    - Error handling gaps (uncaught failures, unclear errors)
+    - Missing features that would help this class of program
+    - Spec ambiguities that led to incorrect execution
+
+    Be concrete. Reference specific inspection findings.
+  """
+
+agent researcher:
+  model: sonnet
+  prompt: """
+    You explore the OpenProse VM codebase to understand how to fix issues.
+    Read files, understand structure, find the right places to change.
+  """
+
+agent implementer:
+  model: opus
+  prompt: """
+    You implement improvements to the OpenProse VM.
+
+    Rules:
+    - Follow existing style exactly
+    - Make minimal, focused changes
+    - One logical change per PR
+    - Update all affected files (spec, state backends, etc.)
+  """
+
+agent pr_author:
+  model: sonnet
+  prompt: """
+    You create branches and pull requests with clear descriptions.
+    Explain the problem, the solution, and how to test it.
+  """
+
+# ============================================================
+# Phase 1: Analyze Inspection for VM Issues
+# ============================================================
+
+let analysis = session: analyst
+  prompt: """
+    Read the inspection report and identify VM improvement opportunities.
+
+    Inspection: {inspection_path}
+
+    For each opportunity, specify:
+    - category: efficiency | context | state | error | feature | spec
+    - description: what's wrong
+    - severity: low | medium | high
+    - evidence: quote from inspection that shows this
+    - hypothesis: what VM behavior likely caused this
+
+    Return JSON:
+    {
+      "target_run": "run ID that was inspected",
+      "opportunities": [...],
+      "priority_order": [indices sorted by impact]
+    }
+
+    If the inspection shows clean execution with no issues, return empty opportunities.
+  """
+  context: inspection_path
+
+if **no actionable opportunities found**:
+  output result = {
+    status: "no-improvements-needed",
+    analysis: analysis,
+    message: "Inspection showed clean VM execution"
+  }
+
+# ============================================================
+# Phase 2: Research VM Codebase
+# ============================================================
+
+let research = session: researcher
+  prompt: """
+    For each opportunity, find the relevant VM code.
+
+    Prose repo: {prose_repo}
+    Opportunities: {analysis.opportunities}
+
+    Key files to check:
+    - prose.md (main VM spec)
+    - state/filesystem.md, state/sqlite.md, state/postgres.md
+    - primitives/session.md
+    - compiler.md
+    - SKILL.md
+
+    Return JSON:
+    {
+      "findings": [
+        {
+          "opportunity_index": N,
+          "relevant_files": ["path/to/file.md"],
+          "current_behavior": "how it works now",
+          "change_location": "specific section or line range"
+        }
+      ]
+    }
+  """
+  context: { analysis, prose_repo }
+
+# ============================================================
+# Phase 3: User Selection
+# ============================================================
+
+input selection: """
+  ## VM Improvement Opportunities
+
+  Based on inspection of: {analysis.target_run}
+
+  ### Opportunities Found:
+  {analysis.opportunities}
+
+  ### Research:
+  {research.findings}
+
+  ---
+
+  Which improvements should I implement as PRs?
+  - List by number (e.g., "1, 3")
+  - Or "all" for everything
+  - Or "none" to skip
+"""
+
+if **user selected none or wants to skip**:
+  output result = {
+    status: "skipped",
+    analysis: analysis,
+    research: research
+  }
+
+let selected = session "Parse selection"
+  prompt: "Extract selected opportunity indices from user input"
+  context: { selection, analysis }
+
+# ============================================================
+# Phase 4: Implement Changes
+# ============================================================
+
+let implementations = selected | map:
+  session: implementer
+    prompt: """
+      Implement this VM improvement.
+
+      Opportunity: {analysis.opportunities[item]}
+      Research: {research.findings[item]}
+      Prose repo: {prose_repo}
+
+      1. Read the current file content
+      2. Design the minimal change
+      3. Write the improved content
+
+      Return JSON:
+      {
+        "opportunity_index": N,
+        "branch_name": "vm/short-description",
+        "title": "PR title",
+        "files": [
+          {
+            "path": "relative/path.md",
+            "action": "modify",
+            "description": "what changed"
+          }
+        ],
+        "summary": "2-3 sentence explanation"
+      }
+
+      Actually write the changes to the files.
+    """
+    context: item
+    permissions:
+      read: ["{prose_repo}/**"]
+      write: ["{prose_repo}/**"]
+
+# ============================================================
+# Phase 5: Create PRs
+# ============================================================
+
+let prs = implementations | map:
+  session: pr_author
+    prompt: """
+      Create a PR for this VM improvement.
+
+      Implementation: {item}
+      Prose repo: {prose_repo}
+
+      Steps:
+      1. cd to prose repo
+      2. Create branch: {item.branch_name}
+      3. Stage changed files
+      4. Commit with clear message
+      5. Push branch
+      6. Create PR via gh cli
+
+      PR body should include:
+      - Problem: what inspection revealed
+      - Solution: what this changes
+      - Testing: how to verify
+
+      Return: { pr_url, branch, title }
+    """
+    context: item
+    permissions:
+      bash: allow
+
+# ============================================================
+# Output
+# ============================================================
+
+output result = {
+  status: "complete",
+  target_run: analysis.target_run,
+  opportunities_found: analysis.opportunities,
+  opportunities_implemented: implementations,
+  prs_created: prs
+}