From bd04ede81757e288cf5bb34cbcff80ea43b73d81 Mon Sep 17 00:00:00 2001 From: xianren Date: Thu, 16 Apr 2026 13:45:56 +0800 Subject: [PATCH] chore(governance): add ETHICS.md, CHANGELOG.md, and PR fidelity smoke MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three governance-tier additions to convert Master-skill from a demo into a defensible long-lived project: **ETHICS.md** — mandatory governance document: - AI transparency: outputs are AI-synthesized, not masters' own words - Copyright tiers A (public domain, current 8), B (in-copyright, needs license), C (never admit: living masters, Buddhas/bodhisattvas, apocryphal figures), D (case-by-case) - Religious boundary: AI must refuse precept transmission, awakening certification, karmic diagnosis, spirit-medium framing, etc. - Dual-track content license: code MIT, master content CC BY-NC-SA 4.0, prompts CC BY 4.0 - Takedown + appeal channel with 48h / 7d SLAs **CHANGELOG.md** — Keep a Changelog format: - [Unreleased] captures the current governance + community + npm work - [0.3.0] retroactively documents the architectural rebuild (provenance, fidelity, NPX, multi-platform, HARD-GATE, two-stage review) - [0.2.0], [0.1.0] historical sections **CI fidelity smoke** — make HARD-GATE a real gate, not just documentation: - New `fidelity-smoke` job runs one basic-difficulty fixture against one master per PR; picks the master touched by the diff, else rotates by day-of-year for uniform coverage - Cost cap ≈ $0.05/PR (~10k-token system prompt × 1 request × Sonnet 4.6 pricing). Forks without ANTHROPIC_API_KEY get an advisory pass so external PRs can still land - `scripts/test-fidelity.py` gains `--max-tests N` flag; when capping, it sorts by difficulty (basic → intermediate → advanced) so smoke runs hit the reliable floor, not stress cases - Old `fidelity` job renamed to `fidelity-full` (still workflow_dispatch) README 声明 section now links to ETHICS.md so every reader sees the AI disclosure and boundary rules before copying master content. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/validate-and-test.yml | 73 +++++++++- CHANGELOG.md | 98 ++++++++++++++ ETHICS.md | 171 ++++++++++++++++++++++++ README.md | 2 + scripts/test-fidelity.py | 27 +++- 5 files changed, 367 insertions(+), 4 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 ETHICS.md diff --git a/.github/workflows/validate-and-test.yml b/.github/workflows/validate-and-test.yml index d7ae908..6e6a073 100644 --- a/.github/workflows/validate-and-test.yml +++ b/.github/workflows/validate-and-test.yml @@ -38,8 +38,77 @@ jobs: - name: Dry-run fidelity tests run: python scripts/test-fidelity.py --all --dry-run - fidelity: - name: Fidelity tests (API) + fidelity-smoke: + name: Fidelity smoke (1 master × 1 fixture) + runs-on: ubuntu-latest + needs: validate + # Cost cap: one basic-difficulty question per PR, Sonnet 4.6 pricing + # with ~10k-token system prompt → under $0.05/PR. Forks have no secret + # access — treat missing secret as advisory pass so external PRs can land. + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: pip install anthropic requests pypinyin + + - name: Pick smoke target + id: pick + run: | + # If the PR touches a specific prebuilt master, smoke-test that one. + # Otherwise rotate by day-of-year so coverage stays uniform across 8. + BASE="${{ github.base_ref || 'main' }}" + CHANGED=$(git diff --name-only "origin/${BASE}...HEAD" 2>/dev/null \ + | grep -oP 'prebuilt/\K[^/]+' | grep -v '^compare$' | head -n1 || true) + if [ -z "$CHANGED" ]; then + MASTERS=(xuanzang kumarajiva huineng zhiyi fazang yinguang ouyi xuyun) + IDX=$(( $(date +%j) % 8 )) + CHANGED=${MASTERS[$IDX]} + fi + echo "master=$CHANGED" >> "$GITHUB_OUTPUT" + echo "Smoke target: $CHANGED" + + - name: Run fidelity smoke + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + if [ -z "${ANTHROPIC_API_KEY:-}" ]; then + echo "::warning::ANTHROPIC_API_KEY not configured — smoke is advisory on this run (expected on forks)." + echo '{"skipped": true, "reason": "no_api_key"}' > fidelity-smoke.json + exit 0 + fi + python scripts/test-fidelity.py \ + --master "${{ steps.pick.outputs.master }}" \ + --max-tests 1 \ + --json > fidelity-smoke.json + python - <<'PY' + import json, sys + with open("fidelity-smoke.json") as f: + data = json.load(f) + # --master returns a single-element list + r = data[0] if isinstance(data, list) else data + failed = r.get("failed", 0) + total = r.get("total", 0) + print(f"Fidelity smoke: {total - failed}/{total} passed for {r.get('master', '?')}") + sys.exit(1 if failed else 0) + PY + + - name: Upload smoke results + if: always() + uses: actions/upload-artifact@v4 + with: + name: fidelity-smoke-${{ github.run_id }} + path: fidelity-smoke.json + if-no-files-found: ignore + + fidelity-full: + name: Fidelity tests — full suite (manual only) runs-on: ubuntu-latest if: github.event_name == 'workflow_dispatch' needs: validate diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..94f7125 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,98 @@ +# Changelog + +All notable changes to Master-skill are documented in this file. + +The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +Sections marked **Ethics** track changes to `ETHICS.md`, content licensing, or boundary rules — these are governance-level changes and require the public-review process documented in `ETHICS.md §7`. + +--- + +## [Unreleased] + +### Added +- `ETHICS.md` — AI transparency, copyright tier (A/B/C/D), religious boundary, dual-track content license, takedown channel. +- `CONTRIBUTING.md`, `CODE_OF_CONDUCT.md`, `SECURITY.md` — community infrastructure. +- `.github/ISSUE_TEMPLATE/` — bug report, feature request, new-master proposal, boundary-violation. +- `.github/PULL_REQUEST_TEMPLATE.md`. +- `.github/workflows/npm-publish.yml` — tag-triggered npm release. +- CI `fidelity-smoke` job — runs a single master × single fixture on every PR with a hard $0.05 cost cap, enforces HARD-GATE beyond dry-run. +- `package.json`: `engines.node`, `scripts.test`, `scripts.validate`, `publishConfig`. + +### Ethics +- Establish copyright tiers A–D; current 8 prebuilt masters confirmed Tier A (Public Domain in CN/TW as of 2026). +- Declare dual-track content licensing: code MIT, master content CC BY-NC-SA 4.0, prompts CC BY 4.0. + +--- + +## [0.3.0] — 2026-04-10 + +**Architectural rebuild around provenance, fidelity, and multi-platform delivery.** + +### Added +- **Provenance frontmatter** — every `prebuilt//SKILL.md` now carries `sources:` (CBETA ID + FoJin text ID), `citation_format:`, `verified_by:`, `verified_at:`. +- **Offline sutra excerpts** — `prebuilt//sources/` ships canonical passages so masters remain useful when FoJin is unreachable. +- **Progressive disclosure architecture** — SKILL.md is decision-tree + quick-ref; heavyweight `references/teaching.md`, `references/voice.md`, and `sources/` load on demand. +- **Fidelity tests** — `prebuilt//tests/fidelity.jsonl`, 5 Q&A per master, verifying citations (`must_cite`), terminology (`must_mention`), and boundary rules (`must_not_contain_first_turn`). +- **NPX installer** — `npx master-skill install ` / `list` / `uninstall` / `--all`; `bin/cli.mjs`. +- **Multi-platform plugin support** — unified `prebuilt/` reused by Claude Code, Cursor, Codex CLI, OpenCode, Gemini CLI; per-platform hooks in `hooks/`, `.claude-plugin/`, `.cursor-plugin/`, `.codex/`, `.opencode/`, `gemini-extension.json`. +- **Session-start hook** — auto-injects the list of installed masters so the user does not re-issue `/list` each session. +- **HARD-GATE enforcement** — no CBETA citation → no dogmatic assertion; fabricated CBETA IDs rejected by `scripts/validate.py`; no persona for fictional / unattested figures. +- **Two-stage independent review** — `/create-master` pipeline runs doctrine-accuracy pass followed by voice-consistency pass, auto-fix up to 2 rounds. +- **Offline tooling** — `scripts/cite.py` (CBETA citation lookup), `scripts/query.py` (offline semantic search), `scripts/validate.py` (SKILL.md frontmatter linter), `scripts/validate-fidelity.py`, `scripts/test-fidelity.py`. +- **CI pipeline** (`.github/workflows/validate-and-test.yml`) — lint, fidelity structure validation, dry-run fidelity on every push/PR; full API-backed fidelity on `workflow_dispatch`. +- **Weekly link verification** (`.github/workflows/verify-links.yml`) — cron'd `tools/verify_sources.py` opens an issue when FoJin URLs or CBETA IDs drift. +- **`/compare-masters` meta-skill** — multi-master side-by-side answering with smart master selection, divergence radar, labeled differences, classic debate templates. +- **Cross-reference tool** (`tools/cross_reference.py`) for inter-master dialogue. +- **Browser-first onboarding** — README now directs non-CLI users to `fojin.app/chat` 法师模式 first; per-master `starter_questions` added. +- **Prebuilt masters** (8): 玄奘 (Xuanzang), 鸠摩罗什 (Kumārajīva), 慧能 (Huineng), 智顗 (Zhiyi), 法藏 (Fazang), 印光 (Yinguang), 蕅益 (Ouyi), 虚云 (Xuyun). + +### Changed +- Project renamed `buddha-skill` → `Buddha-skill` → **`Master-skill`** to match AgentSkills naming conventions and emphasize teaching-persona framing. +- Focus narrowed to **汉传 (Chinese Mahāyāna)** — 南传 / 藏传 sections removed from PRD, prompts, and prebuilt set. Cross-tradition `compare` still possible via `/create-master` but not shipped. +- Per-master RAG queries in `/compare-masters` now enforce tradition-specific terminology to prevent cross-tradition drift. +- Smart master selection: keywords expanded 6 → 24 per master; first-turn identity-neutral (masters no longer assume user identity on first message). +- FoJin URL format corrected for juan paths; 186 FoJin URLs verified and updated from CBETA IDs to real internal `text_id`s. + +### Fixed +- `fix(ci)`: `verify-links.yml` uses `context.repo.repo` instead of non-existent `context.repo.name`. +- `fix(lint)`: meta-skills (`compare-masters`) exempted from `lineage` / `sources` frontmatter checks. +- `fix`: `slugify` lowercases English names and handles spaces. +- `fix`: robust tool path resolution + precise selection feedback in `/compare-masters`. +- `fix`: escape `text_id` placeholder in `SKILL_MD_TEMPLATE` to survive Python `.format()`. + +### Removed +- Early prebuilt masters **宗喀巴 (Tsongkhapa, Gelug)** and **Ajahn Chah (Thai Forest)** — retracted when scope refocused to 汉传 on 2026-04-04. Will return only via a future `Master-skill-beyond-chinese` branch with native-speaker reviewers. + +### Documentation +- README: hero section with Diamond Sutra epigraph, badges, navigation; EN README synced to v0.3 parity. +- PRD (`docs/PRD.md`) refocused on 汉传. +- Plugin metadata synced across Cursor / Codex / OpenCode / Gemini extensions. + +--- + +## [0.2.0] — 2026-04-05 (historical, no release tag) + +Iteration layer between initial skeleton and full v0.3 rebuild. Highlights: +- `/compare-masters` skill first draft (P1). +- Graceful degradation when FoJin API is unavailable. +- Complete FoJin API reference for ad-hoc LLM queries. +- First-turn identity-neutral rule. +- Expanded flow control and error handling in SKILL.md. +- Community section added to README (linux.do link). + +--- + +## [0.1.0] — 2026-04-04 (initial skeleton) + +- Project skeleton, directory layout, prompt templates. +- FoJin data bridge (`tools/fojin_bridge.py`) with full API coverage. +- Version manager, skill writer, sutra collector, master builder orchestrator. +- Initial prebuilt masters (later expanded): 印光, Ajahn Chah, 宗喀巴, 玄奘, 鸠摩罗什, 慧能, 智顗, 法藏, 虚云, 蕅益. +- Source verification tool. +- Chinese + English README, PRD v1.0.0. + +--- + +[Unreleased]: https://github.com/xr843/Master-skill/compare/v0.3.0...HEAD +[0.3.0]: https://github.com/xr843/Master-skill/releases/tag/v0.3.0 diff --git a/ETHICS.md b/ETHICS.md new file mode 100644 index 0000000..f62c152 --- /dev/null +++ b/ETHICS.md @@ -0,0 +1,171 @@ +# Master-skill 伦理与版权声明 (Ethics & Copyright) + +> **本声明是 Master-skill 项目的强制性约束。** 任何使用、派生、贡献行为,均需遵守本文档所载的 AI 透明度、版权分级、教界使用边界、内容授权条款。与代码仓库中其它文档冲突时,本文档优先。 + +--- + +## 1. AI 透明度声明 (AI Disclosure) + +**所有通过 Master-skill 生成的对话、文本、回答,均为 AI 合成内容,不是真实祖师的著作、开示或教言。** + +- 每位预置法师的回答均由大型语言模型基于 CBETA 经典文献 + `teaching.md` / `voice.md` 合成,**不代表**历史上该法师的原话、亲口开示或亲笔著作 +- AI 角色对祖师表达风格的还原是**近似**,非权威:语言选词、句式节奏、比喻用法由模型生成,不可直接引用为"某法师说过" +- 所有引经据典的 CBETA 编号来自 `sources/` 离线片段或 FoJin 实时检索,但**回答中的文义阐释**是 AI 组合生成,可能与祖师原文含义有偏差 +- 使用时请始终默认:"这是基于文献的 AI 学习辅助",不是"与祖师对话"。前者是工具,后者是误解 + +如你在任何公开场合引用、转发、发表由本项目生成的文本,**必须明确标注 AI 生成属性与原始出处**(CBETA 经号 / FoJin 链接)。将 AI 生成内容作为祖师原话传播,既违反本协议,也违背佛教"不妄语"的基本戒律。 + +--- + +## 2. 版权分级 (Copyright Tiers) + +不同法师的教法与著作处于不同的版权状态。本项目**仅收录版权状态明确允许的法师**。 + +### Tier A — 公有领域 (Public Domain, 可直接收录) + +适用于圆寂已超过各主要司法辖区著作权保护期的历代祖师。本项目当前 8 位预置法师均属此类: + +| 法师 | 生卒 | 圆寂距今 | 状态 | +|------|------|---------|------| +| 鸠摩罗什 | 344–413 | > 1600 年 | 公有领域 | +| 智顗 | 538–597 | > 1400 年 | 公有领域 | +| 玄奘 | 602–664 | > 1360 年 | 公有领域 | +| 慧能 | 638–713 | > 1310 年 | 公有领域 | +| 法藏 | 643–712 | > 1310 年 | 公有领域 | +| 蕅益 | 1599–1655 | > 370 年 | 公有领域 | +| 印光 | 1861–1940 | > 85 年 | 中国 / 台湾著作权已过期(死后 50 年);美国部分早期文集已过期 | +| 虚云 | 1840–1959 | > 65 年 | 中国 / 台湾著作权已过期(死后 50 年)| + +**Tier A 收录要求:** +- `teaching.md` / `voice.md` 基于 CBETA(公有领域学术版)或同等公开学术版本 +- `sources/` 引用须附 CBETA 经号,URL 指向 FoJin 或 CBETA 官方 +- 不得直接大段复制在世出版商的白话译注、现代校注者独立创作的学术评论 + +### Tier B — 版权期内 (In-Copyright, 需权利人授权) + +圆寂距今不满 50 年(中国大陆 / 台湾 / 香港)或 70 年(日本 / 韩国 / 欧盟 / 多数英语国家)的法师,其著作仍受著作权保护。 + +**典型例子(非穷举):** + +| 法师 | 生卒 | 版权到期时间(估算,以中国大陆 50 年为例)| +|------|------|----------------------------------------| +| 太虚 | 1890–1947 | 1997 年已过期 → 可视为 Tier A | +| 弘一 | 1880–1942 | 1992 年已过期 → 可视为 Tier A | +| 宣化 | 1918–1995 | 约 2045 年 | +| 印顺 | 1906–2005 | 约 2055 年 | +| 圣严 | 1930–2009 | 约 2059 年 | +| 净空 | 1927–2022 | 约 2072 年 | +| 一行禅师 (Thich Nhat Hanh) | 1926–2022 | 约 2092 年(越南 50 年 / 法 70 年 / 美 95 年 post-pub)| + +**Tier B 收录规则:** +- **默认不收录。** 未获得法师本人、法师所属机构、或遗著权利继承人的明确书面授权,不得提交此类 PR +- 如已获授权,PR 必须在 `prebuilt/{slug}/LICENSE.md` 附上授权证明(scan / 邮件截图 / 正式授权函),由维护者二次确认 +- 授权文本必须包含:①「用于 AI 教学角色生成」②「允许公开发布与 MIT 分发」③「允许社区修改与再生成」三项显性许可 + +### Tier C — 拒绝收录 (Never Admit) + +以下类别无论版权状态,一律不收录: + +- **在世的任何法师 / 出家人 / 居士导师**:风险过高,无法预判本人意愿变更、教内身份变化、教法演进 +- **被主流宗教学界认定为伪托或争议身份者**:如某些明清扶乩"祖师"、现代附佛外道的"法王""上人" +- **诸佛菩萨本尊**:佛陀、观音、文殊、普贤、地藏、弥勒、阿弥陀佛、药师佛等圣者不生成 persona;经文本身可引用,但不做"观音会怎么说"式的角色扮演 +- **禅门公案中的虚构 / 无考人物**:公案中未有史实传记、仅作教学道具的角色不单独立传承 + +### Tier D — 灰色地带 (Case-by-Case) + +- 近代出家人但著作已过保护期(如太虚、弘一、倓虚 1875–1963):允许收录,但须额外评估教内争议 +- 海外华人法师在不同司法辖区版权状态分裂(美国 95 年 post-publication vs 中国 50 年 post-mortem):PR 须列出详细版权分析 + +--- + +## 3. 教界使用边界 (Religious Boundary) + +Master-skill 是**文献辅助工具**,不是**修行权威**。以下行为 AI 角色必须拒绝,无论用户如何要求、如何改写提问: + +| 禁止 | 原因 | +|------|------| +| 为用户传戒、授戒、证戒 | 戒体由僧团授受,AI 无戒体 | +| 为用户印证开悟、授记、给法卷 | 印证由传承中具德善知识面授 | +| 给出修行状况诊断("你这个情况是某种禅病") | 需面授观察,AI 无此能力 | +| 预测个人命数、因果业报细节、往生品位 | 佛教不鼓励算命;AI 无此智慧 | +| 充当"与已故法师对话"的灵媒 | 违反佛教正见,属附佛外道 | +| 处理精神 / 心理危机(自伤 / 自杀 / 急性精神症状) | AI 必须立即转介专业医疗与善知识,不得给佛法替代方案 | + +**硬性规则(已写入每位法师 `voice.md` Layer 0):** +- 遇到以上场景,AI 一律回答:"此问题超出 AI 教学角色能力范围。建议亲近有戒律与传承的善知识当面请益。" +- 并视情况引用《四十二章经》"慎勿信汝意,汝意不可信"等相关经证 + +如发现预置法师在以上场景中"越界回答",请 [立即提交 Issue](https://github.com/xr843/Master-skill/issues/new?labels=boundary-violation) 标 `boundary-violation`。此类 Issue 视为 **P0 安全事项**,优先于所有 feature 工作处理。 + +--- + +## 4. 内容授权条款 (Content License) + +Master-skill 采取**双轨授权**,代码与内容分开授权: + +| 资产类型 | 授权 | 允许 | 禁止 | +|---------|------|------|------| +| 源代码(`scripts/`、`tools/`、`bin/`、`hooks/`、`.github/`、workflow、CI) | **MIT** | 任意使用、修改、商用 | 去除版权声明 | +| 预置法师内容(`prebuilt/**/SKILL.md`、`teaching.md`、`voice.md`、`sources/*.md`、`fidelity.jsonl`) | **CC BY-NC-SA 4.0** | 署名 + 非商用 + 相同方式共享下任意使用 | 未署名、纯商业闭源分发 | +| Prompts 模板(`prompts/**`) | **CC BY 4.0** | 署名后任意使用 | 去署名 | +| FoJin 检索返回的原始经文 | **CBETA 知识共用 非商业性 禁止改作 3.0** | 遵循 CBETA 原协议 | 违反 CBETA 协议 | + +**商业化使用(含但不限于):** +- SaaS 付费问答服务嵌入 Master-skill 法师内容 +- 打包法师 persona 作为付费 App 卖点 +- 基于法师回答生成付费订阅课程 + +均需单独联系 xianren843@protonmail.com 获得授权。非商业研究、教学、个人修学自由使用。 + +--- + +## 5. 数据来源透明 (Data Provenance) + +每位法师 `SKILL.md` frontmatter 必须声明: + +```yaml +sources: + - title: {经典名称} + cbeta_id: {CBETA 编号,如 T30n1579} + fojin_text_id: {FoJin 内部 ID} +citation_format: "【《{title}》卷{juan},{cbeta_id}】" +verified_by: {维护者 GitHub 用户名} +verified_at: {YYYY-MM-DD} +``` + +**HARD-GATE 铁律:** +- 无 CBETA 经号的教义断言不得写入 `teaching.md` +- 不得捏造 CBETA 编号(CI `scripts/validate.py` 会对照 FoJin 反查) +- 不得为虚构人物、合成 persona、无史实记载者建立 `prebuilt/` + +违反以上任一,PR 将被自动驳回。 + +--- + +## 6. 举报与申诉 (Report & Appeal) + +如你是: + +- **版权所有人** / 法师所属机构 / 遗著继承人,认为本项目某个法师的内容侵犯你的权益 +- **教内大德** / 僧团负责人,认为某个法师 persona 的回答违背教理或存在越界 +- **学界人士**,认为某处引证 / 断句 / 解读存在学术错误 + +请通过以下任一方式联系: + +1. **GitHub Issue**:标 `ethics` 或 `copyright-concern` 标签 → https://github.com/xr843/Master-skill/issues/new +2. **邮件**:xianren843@protonmail.com(收件后 7 日内回复) +3. **紧急下架请求**:邮件标题注明 `[URGENT TAKEDOWN]`,将在 48 小时内处理 + +维护者承诺:**一切版权 / 教界合规性申诉优先于功能开发**。 + +--- + +## 7. 版本与修订 (Revisions) + +- 本文档自 v0.4.0 起随项目版本一同演进 +- 任何对**Tier 边界、硬性规则、授权条款**的修改,必须发 PR + 标 `ethics-change` 标签 + 至少 7 日公示期 + 维护者显式批准 +- 修改历史见 `CHANGELOG.md` 中 `### Ethics` 小节 + +--- + +*合十。愿此工具如实利益学人,不违三宝本怀。* diff --git a/README.md b/README.md index af42d3b..db5fe5d 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,8 @@ 本项目本着对佛教传统的尊重而建立。所有内容均依据佛教经典文献生成,不做教义评判,不代表任何宗派权威。生成内容仅供学习参考,如需正式修行指导,请亲近善知识。 +> **⚠️ 所有通过 Master-skill 生成的对话均为 AI 合成内容**,不代表历史上祖师的亲口开示、亲笔著作。项目遵守严格的版权分级与教界边界——详见 **[ETHICS.md](ETHICS.md)**(AI 透明度、版权 Tier A–D、禁止行为、内容双轨授权、紧急下架通道)。 + --- ## 特性 diff --git a/scripts/test-fidelity.py b/scripts/test-fidelity.py index a53ce4b..aaf0a1a 100644 --- a/scripts/test-fidelity.py +++ b/scripts/test-fidelity.py @@ -110,7 +110,12 @@ def check_response(response: str, test_case: dict, is_first_turn: bool = True) - } -def run_tests(master_name: str, dry_run: bool = False, model: str = "claude-sonnet-4-6") -> dict: +def run_tests( + master_name: str, + dry_run: bool = False, + model: str = "claude-sonnet-4-6", + max_tests: int | None = None, +) -> dict: """Run fidelity tests for a master. Returns summary.""" master_dir = PREBUILT_DIR / master_name if not master_dir.exists(): @@ -120,6 +125,16 @@ def run_tests(master_name: str, dry_run: bool = False, model: str = "claude-sonn if not tests: return {"error": f"No fidelity.jsonl found for '{master_name}'"} + if max_tests is not None and max_tests > 0: + # Prefer easier/basic tests when capping — smoke suite should hit + # the reliable floor, not the advanced stress cases. + tests = sorted( + tests, + key=lambda t: {"basic": 0, "intermediate": 1, "advanced": 2}.get( + t.get("difficulty", "intermediate"), 1 + ), + )[:max_tests] + results: list[dict] = [] if dry_run: @@ -218,6 +233,12 @@ def main(): parser.add_argument("--dry-run", action="store_true", help="Show test cases without calling API") parser.add_argument("--model", type=str, default="claude-sonnet-4-6", help="Claude model to use") parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument( + "--max-tests", + type=int, + default=None, + help="Cap the number of fixtures per master (smoke runs in CI use 1)", + ) args = parser.parse_args() if not args.master and not args.all: @@ -236,7 +257,9 @@ def main(): print(f"\n{'='*50}") print(f"Testing: {master}") print(f"{'='*50}") - result = run_tests(master, dry_run=args.dry_run, model=args.model) + result = run_tests( + master, dry_run=args.dry_run, model=args.model, max_tests=args.max_tests + ) all_results.append(result) if not args.json and "error" not in result: