"""Tests for voice.md identity-neutral rules (首轮身份中立原则).

Verifies:
1. Every voice.md contains the 首轮身份中立原则 rule in Layer 0
2. Every voice.md's 开场方式 and 称呼方式 sections are tiered into 首轮中立 / 身份已知后
3. The 首轮中立 section does NOT contain identity-assuming address terms
4. voice.md and SKILL.md PART B stay in sync
"""

import re
from pathlib import Path
import pytest

PREBUILT_DIR = Path(__file__).parent.parent / "prebuilt"

# Identity-assuming terms forbidden in first-turn sections
IDENTITY_TERMS = [
    "居士", "善信", "行者", "学人",
    "善男子", "善女人", "出家人", "师父",
    "大众", "道友",
]

# Get all master slugs that have a voice.md
MASTER_SLUGS = sorted([
    d.name for d in PREBUILT_DIR.iterdir()
    if d.is_dir() and (d / "voice.md").exists()
])


@pytest.fixture(params=MASTER_SLUGS)
def slug(request):
    return request.param


@pytest.fixture
def voice_content(slug):
    return (PREBUILT_DIR / slug / "voice.md").read_text(encoding="utf-8")


@pytest.fixture
def skill_content(slug):
    return (PREBUILT_DIR / slug / "SKILL.md").read_text(encoding="utf-8")


def test_layer0_contains_neutrality_rule(slug, voice_content):
    """Every voice.md Layer 0 must contain 首轮身份中立原则."""
    assert "首轮身份中立原则" in voice_content, (
        f"{slug}/voice.md missing 首轮身份中立原则 rule in Layer 0"
    )


def test_opening_section_is_tiered(slug, voice_content):
    """开场方式 must have both 首轮中立开场 and 后续开场 sub-headers."""
    assert "首轮中立开场" in voice_content, (
        f"{slug}/voice.md 开场方式 missing 首轮中立开场 subsection"
    )
    assert "后续开场" in voice_content, (
        f"{slug}/voice.md 开场方式 missing 后续开场 subsection"
    )


def test_address_section_is_tiered(slug, voice_content):
    """称呼方式 must have both 首轮中立称呼 and 身份已知后 sub-headers."""
    assert "首轮中立称呼" in voice_content, (
        f"{slug}/voice.md 称呼方式 missing 首轮中立称呼 subsection"
    )
    assert "身份已知后" in voice_content, (
        f"{slug}/voice.md 称呼方式 missing 身份已知后 subsection"
    )


def _extract_section(content: str, start_marker: str, end_marker: str) -> str:
    """Extract text between two markers."""
    start = content.find(start_marker)
    if start == -1:
        return ""
    end = content.find(end_marker, start + len(start_marker))
    if end == -1:
        return content[start:]
    return content[start:end]


def test_neutral_opening_has_no_identity_terms(slug, voice_content):
    """首轮中立开场 section must not contain identity-assuming terms."""
    section = _extract_section(
        voice_content,
        "**首轮中立开场**",
        "**后续开场**",
    )
    assert section, f"{slug}: could not extract 首轮中立开场 section"

    violations = [term for term in IDENTITY_TERMS if term in section]
    assert not violations, (
        f"{slug}/voice.md 首轮中立开场 contains forbidden identity terms: {violations}\n"
        f"Section content:\n{section}"
    )


def test_neutral_address_has_no_identity_terms(slug, voice_content):
    """首轮中立称呼 section must not contain identity-assuming terms."""
    section = _extract_section(
        voice_content,
        "**首轮中立称呼**",
        "**身份已知后**",
    )
    assert section, f"{slug}: could not extract 首轮中立称呼 section"

    violations = [term for term in IDENTITY_TERMS if term in section]
    assert not violations, (
        f"{slug}/voice.md 首轮中立称呼 contains forbidden identity terms: {violations}\n"
        f"Section content:\n{section}"
    )


def test_skill_md_contains_voice_body(slug, voice_content, skill_content):
    """SKILL.md PART B must contain voice.md body (excluding title)."""
    # Strip voice.md's first # Title line
    voice_lines = voice_content.split("\n")
    if voice_lines[0].startswith("# "):
        voice_body = "\n".join(voice_lines[1:]).lstrip("\n")
    else:
        voice_body = voice_content
    voice_body = voice_body.rstrip()

    # Check SKILL.md contains the same body
    assert voice_body in skill_content, (
        f"{slug}/SKILL.md PART B is out of sync with voice.md. "
        f"Run: python3 tools/sync_skill_from_voice.py --slug {slug}"
    )