test: replay sanitized protocol fixtures

ebursztein · ebursztein · commit 4f9125a91460 · 2026-06-12T00:59:19.000-04:00
diff --git a/scripts/protocol_fixture_recorder.py b/scripts/protocol_fixture_recorder.py
@@ -51,6 +51,17 @@ class ProtocolFixture(BaseModel):
     substitutions: dict[str, str] = Field(default_factory=dict)
 
 
+class ReplayResult(BaseModel):
+    name: str
+    protocol_family: ProtocolFamily
+    status_matches: bool
+    visible_bytes_match: bool
+    expected_status_code: int
+    actual_status_code: int
+    expected_visible_bytes: int
+    actual_visible_bytes: int
+
+
 def _substitution_for(secret: str) -> str:
     digest = blake3.blake3(secret.encode("utf-8")).hexdigest()
     return f"credential:blake3:{digest}"
@@ -312,10 +323,41 @@ def record_debug_upstream(
     return written
 
 
+def replay_fixtures(base_url: str, fixture_paths: list[str | Path]) -> list[ReplayResult]:
+    results: list[ReplayResult] = []
+    for path in fixture_paths:
+        fixture = ProtocolFixture.model_validate_json(Path(path).read_text())
+        exchange, visible_bytes, _substitutions = _http_exchange(
+            base_url,
+            fixture.exchange.method,
+            fixture.exchange.path,
+            headers=dict(fixture.exchange.request_headers),
+            body=fixture.exchange.request_body,
+        )
+        results.append(
+            ReplayResult(
+                name=fixture.name,
+                protocol_family=fixture.protocol_family,
+                status_matches=exchange.status_code == fixture.exchange.status_code,
+                visible_bytes_match=visible_bytes == fixture.expected_visible_bytes,
+                expected_status_code=fixture.exchange.status_code,
+                actual_status_code=exchange.status_code,
+                expected_visible_bytes=fixture.expected_visible_bytes,
+                actual_visible_bytes=visible_bytes,
+            )
+        )
+    return results
+
+
 def main() -> int:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("--base-url", required=True, help="capsem-debug-upstream base URL")
     parser.add_argument("--out-dir", required=True, type=Path, help="fixture output directory")
+    parser.add_argument(
+        "--replay",
+        action="store_true",
+        help="replay written fixtures after recording and include replay results",
+    )
     parser.add_argument(
         "--scenario",
         action="append",
@@ -328,7 +370,12 @@ def main() -> int:
         args.out_dir,
         scenarios=set(args.scenarios) if args.scenarios else None,
     )
-    print(json.dumps({"written": [str(path) for path in written]}, indent=2))
+    output: dict[str, Any] = {"written": [str(path) for path in written]}
+    if args.replay:
+        output["replay"] = [
+            result.model_dump() for result in replay_fixtures(args.base_url, written)
+        ]
+    print(json.dumps(output, indent=2))
     return 0
 
 
diff --git a/sprints/1.3-release-correction/tracker.md b/sprints/1.3-release-correction/tracker.md
@@ -240,8 +240,17 @@ next one, and stage only the files for that slice.
   - Proof: `uv run python -m pytest tests/test_protocol_fixture_recorder.py
     -q` (`1 passed in 1.81s`); `uv run ruff check
     scripts/protocol_fixture_recorder.py tests/test_protocol_fixture_recorder.py`.
-- [ ] RED/GREEN: replay covers Claude/Anthropic, OpenAI/Codex-compatible,
+- [x] RED/GREEN: replay covers Claude/Anthropic, OpenAI/Codex-compatible,
   Gemini/AGY-compatible, Ollama/OpenAI-compatible, MCP, and credential flows.
+  - 2026-06-12 progress: the recorder now exposes `replay_fixtures()`, which
+    reissues recorded fixtures against the local lab and validates response
+    status plus stable visible-byte counts. The test records and replays
+    Claude/Anthropic-shaped, Codex/OpenAI-compatible, AGY/Gemini-shaped,
+    Ollama/OpenAI-compatible, OAuth, MCP tools/list, MCP tools/call, and
+    credential-capture fixtures without public network.
+  - Proof: `uv run python -m pytest tests/test_protocol_fixture_recorder.py
+    -q` (`2 passed in 0.92s`); `uv run ruff check
+    scripts/protocol_fixture_recorder.py tests/test_protocol_fixture_recorder.py`.
 - [ ] RED/GREEN: live-local Ollama probe uses host `gemma4:latest` through the
   Capsem-routed path and records/replays the resulting native Ollama and
   OpenAI-compatible traffic without installing Ollama in the guest.
diff --git a/tests/test_protocol_fixture_recorder.py b/tests/test_protocol_fixture_recorder.py
@@ -65,3 +65,26 @@ def test_protocol_fixture_recorder_uses_debug_upstream_and_sanitizes(tmp_path):
         assert fixture.auth_mode in {"none", "bearer", "api_key", "oauth_code"}
         assert fixture.expected_ledger_rows
         assert fixture.expected_visible_bytes >= 0
+
+
+def test_protocol_fixture_replay_covers_recorded_flows(tmp_path):
+    recorder = _load_recorder()
+    subprocess.run(
+        ["cargo", "build", "-p", "capsem-debug-upstream"],
+        cwd=PROJECT_ROOT,
+        check=True,
+    )
+    proc = None
+    try:
+        proc, ready = start_debug_upstream()
+        written = recorder.record_debug_upstream(ready["base_url"], tmp_path)
+        results = recorder.replay_fixtures(ready["base_url"], written)
+    finally:
+        stop_process(proc)
+
+    assert {result.name for result in results} == {path.stem for path in written}
+    assert all(result.status_matches for result in results)
+    assert all(result.visible_bytes_match for result in results)
+    assert {
+        result.protocol_family for result in results
+    } == {"model", "oauth", "mcp", "credential"}