feat: 首轮身份中立原则 — masters no longer assume user identity on first turn

- Add Layer 0 hard rule to all 8 masters' voice.md: first turn must use neutral address (您/汝/你/问者), forbidden terms include 居士/行者/学人/ 善男子/善女人/出家人/师父/大众/道友/善信/道友 - From turn 2+: masters adapt to user's self-disclosed or question-inferred identity, restoring each master's historical address style - Layer 2 开场方式/称呼方式 reorganized into 首轮中立 / 身份已知后 tiers - Update voice_builder.md and voice_analyzer.md templates so future /create-master runs inherit this rule - Add tools/sync_skill_from_voice.py to keep SKILL.md PART B in sync - Add 48 regression tests in test_voice_rules.py (all 79 tests pass)
2026-05-10 05:16:25 +00:00 · 2026-04-05 08:44:35 +08:00
parent e0afab5c9f
commit 02df9344b5
20 changed files with 530 additions and 67 deletions
@@ -0,0 +1,129 @@
+"""Tests for voice.md identity-neutral rules (首轮身份中立原则).
+
+Verifies:
+1. Every voice.md contains the 首轮身份中立原则 rule in Layer 0
+2. Every voice.md's 开场方式 and 称呼方式 sections are tiered into 首轮中立 / 身份已知后
+3. The 首轮中立 section does NOT contain identity-assuming address terms
+4. voice.md and SKILL.md PART B stay in sync
+"""
+
+import re
+from pathlib import Path
+import pytest
+
+PREBUILT_DIR = Path(__file__).parent.parent / "prebuilt"
+
+# Identity-assuming terms forbidden in first-turn sections
+IDENTITY_TERMS = [
+    "居士", "善信", "行者", "学人",
+    "善男子", "善女人", "出家人", "师父",
+    "大众", "道友",
+]
+
+# Get all master slugs that have a voice.md
+MASTER_SLUGS = sorted([
+    d.name for d in PREBUILT_DIR.iterdir()
+    if d.is_dir() and (d / "voice.md").exists()
+])
+
+
+@pytest.fixture(params=MASTER_SLUGS)
+def slug(request):
+    return request.param
+
+
+@pytest.fixture
+def voice_content(slug):
+    return (PREBUILT_DIR / slug / "voice.md").read_text(encoding="utf-8")
+
+
+@pytest.fixture
+def skill_content(slug):
+    return (PREBUILT_DIR / slug / "SKILL.md").read_text(encoding="utf-8")
+
+
+def test_layer0_contains_neutrality_rule(slug, voice_content):
+    """Every voice.md Layer 0 must contain 首轮身份中立原则."""
+    assert "首轮身份中立原则" in voice_content, (
+        f"{slug}/voice.md missing 首轮身份中立原则 rule in Layer 0"
+    )
+
+
+def test_opening_section_is_tiered(slug, voice_content):
+    """开场方式 must have both 首轮中立开场 and 后续开场 sub-headers."""
+    assert "首轮中立开场" in voice_content, (
+        f"{slug}/voice.md 开场方式 missing 首轮中立开场 subsection"
+    )
+    assert "后续开场" in voice_content, (
+        f"{slug}/voice.md 开场方式 missing 后续开场 subsection"
+    )
+
+
+def test_address_section_is_tiered(slug, voice_content):
+    """称呼方式 must have both 首轮中立称呼 and 身份已知后 sub-headers."""
+    assert "首轮中立称呼" in voice_content, (
+        f"{slug}/voice.md 称呼方式 missing 首轮中立称呼 subsection"
+    )
+    assert "身份已知后" in voice_content, (
+        f"{slug}/voice.md 称呼方式 missing 身份已知后 subsection"
+    )
+
+
+def _extract_section(content: str, start_marker: str, end_marker: str) -> str:
+    """Extract text between two markers."""
+    start = content.find(start_marker)
+    if start == -1:
+        return ""
+    end = content.find(end_marker, start + len(start_marker))
+    if end == -1:
+        return content[start:]
+    return content[start:end]
+
+
+def test_neutral_opening_has_no_identity_terms(slug, voice_content):
+    """首轮中立开场 section must not contain identity-assuming terms."""
+    section = _extract_section(
+        voice_content,
+        "**首轮中立开场**",
+        "**后续开场**",
+    )
+    assert section, f"{slug}: could not extract 首轮中立开场 section"
+
+    violations = [term for term in IDENTITY_TERMS if term in section]
+    assert not violations, (
+        f"{slug}/voice.md 首轮中立开场 contains forbidden identity terms: {violations}\n"
+        f"Section content:\n{section}"
+    )
+
+
+def test_neutral_address_has_no_identity_terms(slug, voice_content):
+    """首轮中立称呼 section must not contain identity-assuming terms."""
+    section = _extract_section(
+        voice_content,
+        "**首轮中立称呼**",
+        "**身份已知后**",
+    )
+    assert section, f"{slug}: could not extract 首轮中立称呼 section"
+
+    violations = [term for term in IDENTITY_TERMS if term in section]
+    assert not violations, (
+        f"{slug}/voice.md 首轮中立称呼 contains forbidden identity terms: {violations}\n"
+        f"Section content:\n{section}"
+    )
+
+
+def test_skill_md_contains_voice_body(slug, voice_content, skill_content):
+    """SKILL.md PART B must contain voice.md body (excluding title)."""
+    # Strip voice.md's first # Title line
+    voice_lines = voice_content.split("\n")
+    if voice_lines[0].startswith("# "):
+        voice_body = "\n".join(voice_lines[1:]).lstrip("\n")
+    else:
+        voice_body = voice_content
+    voice_body = voice_body.rstrip()
+
+    # Check SKILL.md contains the same body
+    assert voice_body in skill_content, (
+        f"{slug}/SKILL.md PART B is out of sync with voice.md. "
+        f"Run: python3 tools/sync_skill_from_voice.py --slug {slug}"
+    )