mirror of
https://github.com/xr843/Master-skill.git
synced 2026-05-10 05:16:25 +00:00
feat: add CI validation pipeline and boundary test support
- Update test-fidelity.py to support must_not_contain and must_not_contain_first_turn fields for boundary/pressure tests - Add validate-fidelity.py for structural validation of all fidelity.jsonl files (no API needed) - Add GitHub Actions workflow: runs validate + dry-run on every push/PR touching prebuilt/scripts/prompts/tools Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Validate fidelity.jsonl structure for all masters.
|
||||
|
||||
Checks that every test case has required fields and valid structure.
|
||||
No API calls needed — pure structural validation.
|
||||
|
||||
Usage:
|
||||
python scripts/validate-fidelity.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
PREBUILT_DIR = Path(__file__).resolve().parent.parent / "prebuilt"
|
||||
|
||||
VALID_TEST_TYPES = {"fidelity", "boundary", "pressure"}
|
||||
VALID_BOUNDARIES = {
|
||||
"sectarian_judgment",
|
||||
"no_prophecy",
|
||||
"neutral_first_turn",
|
||||
}
|
||||
VALID_PRESSURES = {
|
||||
"citation_bypass",
|
||||
"informality_bypass",
|
||||
"meta_challenge",
|
||||
"hostile_challenge",
|
||||
"simplicity_bypass",
|
||||
"terminology_bypass",
|
||||
"relevance_challenge",
|
||||
"misunderstanding_challenge",
|
||||
}
|
||||
|
||||
|
||||
def validate_master(master_dir: Path) -> list[str]:
|
||||
"""Validate fidelity.jsonl for a single master. Returns list of errors."""
|
||||
fidelity_path = master_dir / "tests" / "fidelity.jsonl"
|
||||
if not fidelity_path.exists():
|
||||
return [f"{master_dir.name}: no fidelity.jsonl found"]
|
||||
|
||||
errors = []
|
||||
lines = fidelity_path.read_text(encoding="utf-8").strip().splitlines()
|
||||
|
||||
if len(lines) < 5:
|
||||
errors.append(f"{master_dir.name}: fewer than 5 test cases ({len(lines)})")
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
test = json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
errors.append(f"{master_dir.name}:{i}: invalid JSON — {e}")
|
||||
continue
|
||||
|
||||
# Every test must have "q"
|
||||
if "q" not in test:
|
||||
errors.append(f"{master_dir.name}:{i}: missing 'q' field")
|
||||
|
||||
# Must have at least one assertion
|
||||
has_assertion = any(
|
||||
k in test
|
||||
for k in [
|
||||
"must_cite",
|
||||
"must_mention",
|
||||
"must_not_contain",
|
||||
"must_not_contain_first_turn",
|
||||
]
|
||||
)
|
||||
if not has_assertion:
|
||||
errors.append(f"{master_dir.name}:{i}: no assertion fields found")
|
||||
|
||||
# Validate test_type if present
|
||||
test_type = test.get("test_type")
|
||||
if test_type and test_type not in VALID_TEST_TYPES:
|
||||
errors.append(
|
||||
f"{master_dir.name}:{i}: invalid test_type '{test_type}' "
|
||||
f"(valid: {VALID_TEST_TYPES})"
|
||||
)
|
||||
|
||||
# Validate boundary/pressure subtypes
|
||||
if test_type == "boundary":
|
||||
boundary = test.get("boundary")
|
||||
if not boundary:
|
||||
errors.append(f"{master_dir.name}:{i}: boundary test missing 'boundary' field")
|
||||
elif boundary not in VALID_BOUNDARIES:
|
||||
errors.append(
|
||||
f"{master_dir.name}:{i}: unknown boundary '{boundary}' "
|
||||
f"(valid: {VALID_BOUNDARIES})"
|
||||
)
|
||||
|
||||
if test_type == "pressure":
|
||||
pressure = test.get("pressure")
|
||||
if not pressure:
|
||||
errors.append(f"{master_dir.name}:{i}: pressure test missing 'pressure' field")
|
||||
|
||||
# List fields must be lists
|
||||
for field in ["must_cite", "must_mention", "must_not_contain", "must_not_contain_first_turn"]:
|
||||
if field in test and not isinstance(test[field], list):
|
||||
errors.append(f"{master_dir.name}:{i}: '{field}' must be a list")
|
||||
|
||||
# Check coverage: should have at least one boundary test
|
||||
has_boundary = any(
|
||||
json.loads(l).get("test_type") == "boundary"
|
||||
for l in lines
|
||||
if l.strip()
|
||||
)
|
||||
if not has_boundary:
|
||||
errors.append(f"{master_dir.name}: no boundary tests found (need at least one)")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def main():
|
||||
all_errors = []
|
||||
masters = sorted(
|
||||
d for d in PREBUILT_DIR.iterdir()
|
||||
if d.is_dir() and d.name != "compare"
|
||||
)
|
||||
|
||||
for master_dir in masters:
|
||||
errors = validate_master(master_dir)
|
||||
all_errors.extend(errors)
|
||||
if not errors:
|
||||
fidelity_path = master_dir / "tests" / "fidelity.jsonl"
|
||||
count = len(fidelity_path.read_text().strip().splitlines()) if fidelity_path.exists() else 0
|
||||
print(f" {master_dir.name}: {count} tests OK")
|
||||
|
||||
if all_errors:
|
||||
print(f"\n{len(all_errors)} error(s) found:")
|
||||
for err in all_errors:
|
||||
print(f" ERROR: {err}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"\nAll {len(masters)} masters validated successfully.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user