feat: 首轮身份中立原则 — masters no longer assume user identity on first turn

- Add Layer 0 hard rule to all 8 masters' voice.md: first turn must use
  neutral address (您/汝/你/问者), forbidden terms include 居士/行者/学人/
  善男子/善女人/出家人/师父/大众/道友/善信/道友
- From turn 2+: masters adapt to user's self-disclosed or question-inferred
  identity, restoring each master's historical address style
- Layer 2 开场方式/称呼方式 reorganized into 首轮中立 / 身份已知后 tiers
- Update voice_builder.md and voice_analyzer.md templates so future
  /create-master runs inherit this rule
- Add tools/sync_skill_from_voice.py to keep SKILL.md PART B in sync
- Add 48 regression tests in test_voice_rules.py (all 79 tests pass)
This commit is contained in:
xianren
2026-04-05 08:44:35 +08:00
parent e0afab5c9f
commit 02df9344b5
20 changed files with 530 additions and 67 deletions
+129
View File
@@ -0,0 +1,129 @@
"""Tests for voice.md identity-neutral rules (首轮身份中立原则).
Verifies:
1. Every voice.md contains the 首轮身份中立原则 rule in Layer 0
2. Every voice.md's 开场方式 and 称呼方式 sections are tiered into 首轮中立 / 身份已知后
3. The 首轮中立 section does NOT contain identity-assuming address terms
4. voice.md and SKILL.md PART B stay in sync
"""
import re
from pathlib import Path
import pytest
PREBUILT_DIR = Path(__file__).parent.parent / "prebuilt"
# Identity-assuming terms forbidden in first-turn sections
IDENTITY_TERMS = [
"居士", "善信", "行者", "学人",
"善男子", "善女人", "出家人", "师父",
"大众", "道友",
]
# Get all master slugs that have a voice.md
MASTER_SLUGS = sorted([
d.name for d in PREBUILT_DIR.iterdir()
if d.is_dir() and (d / "voice.md").exists()
])
@pytest.fixture(params=MASTER_SLUGS)
def slug(request):
return request.param
@pytest.fixture
def voice_content(slug):
return (PREBUILT_DIR / slug / "voice.md").read_text(encoding="utf-8")
@pytest.fixture
def skill_content(slug):
return (PREBUILT_DIR / slug / "SKILL.md").read_text(encoding="utf-8")
def test_layer0_contains_neutrality_rule(slug, voice_content):
"""Every voice.md Layer 0 must contain 首轮身份中立原则."""
assert "首轮身份中立原则" in voice_content, (
f"{slug}/voice.md missing 首轮身份中立原则 rule in Layer 0"
)
def test_opening_section_is_tiered(slug, voice_content):
"""开场方式 must have both 首轮中立开场 and 后续开场 sub-headers."""
assert "首轮中立开场" in voice_content, (
f"{slug}/voice.md 开场方式 missing 首轮中立开场 subsection"
)
assert "后续开场" in voice_content, (
f"{slug}/voice.md 开场方式 missing 后续开场 subsection"
)
def test_address_section_is_tiered(slug, voice_content):
"""称呼方式 must have both 首轮中立称呼 and 身份已知后 sub-headers."""
assert "首轮中立称呼" in voice_content, (
f"{slug}/voice.md 称呼方式 missing 首轮中立称呼 subsection"
)
assert "身份已知后" in voice_content, (
f"{slug}/voice.md 称呼方式 missing 身份已知后 subsection"
)
def _extract_section(content: str, start_marker: str, end_marker: str) -> str:
"""Extract text between two markers."""
start = content.find(start_marker)
if start == -1:
return ""
end = content.find(end_marker, start + len(start_marker))
if end == -1:
return content[start:]
return content[start:end]
def test_neutral_opening_has_no_identity_terms(slug, voice_content):
"""首轮中立开场 section must not contain identity-assuming terms."""
section = _extract_section(
voice_content,
"**首轮中立开场**",
"**后续开场**",
)
assert section, f"{slug}: could not extract 首轮中立开场 section"
violations = [term for term in IDENTITY_TERMS if term in section]
assert not violations, (
f"{slug}/voice.md 首轮中立开场 contains forbidden identity terms: {violations}\n"
f"Section content:\n{section}"
)
def test_neutral_address_has_no_identity_terms(slug, voice_content):
"""首轮中立称呼 section must not contain identity-assuming terms."""
section = _extract_section(
voice_content,
"**首轮中立称呼**",
"**身份已知后**",
)
assert section, f"{slug}: could not extract 首轮中立称呼 section"
violations = [term for term in IDENTITY_TERMS if term in section]
assert not violations, (
f"{slug}/voice.md 首轮中立称呼 contains forbidden identity terms: {violations}\n"
f"Section content:\n{section}"
)
def test_skill_md_contains_voice_body(slug, voice_content, skill_content):
"""SKILL.md PART B must contain voice.md body (excluding title)."""
# Strip voice.md's first # Title line
voice_lines = voice_content.split("\n")
if voice_lines[0].startswith("# "):
voice_body = "\n".join(voice_lines[1:]).lstrip("\n")
else:
voice_body = voice_content
voice_body = voice_body.rstrip()
# Check SKILL.md contains the same body
assert voice_body in skill_content, (
f"{slug}/SKILL.md PART B is out of sync with voice.md. "
f"Run: python3 tools/sync_skill_from_voice.py --slug {slug}"
)