mirror of
https://github.com/xr843/Master-skill.git
synced 2026-05-10 13:26:25 +00:00
126 lines
3.9 KiB
Python
126 lines
3.9 KiB
Python
"""
|
|
Sutra Collector — gathers Buddhist texts and metadata for a specific teacher.
|
|
|
|
Orchestrates FoJin Bridge calls to collect:
|
|
1. Teacher's KG entity and relations (lineage, school, texts)
|
|
2. Core texts associated with the teacher
|
|
3. Text content (selected juans)
|
|
4. Dictionary terms related to the teacher's tradition
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from typing import Optional
|
|
|
|
from fojin_bridge import FojinBridge, create_bridge
|
|
|
|
|
|
def collect_teacher_data(
|
|
teacher_name: str,
|
|
tradition: Optional[str] = None,
|
|
bridge: Optional[FojinBridge] = None,
|
|
) -> dict:
|
|
"""Collect all available data about a teacher from FoJin."""
|
|
if bridge is None:
|
|
bridge = create_bridge()
|
|
|
|
result = {
|
|
"entity": None,
|
|
"lineage": [],
|
|
"texts": [],
|
|
"content_samples": [],
|
|
"terms": [],
|
|
}
|
|
|
|
# Step 1: Find teacher in KG
|
|
kg_results = bridge.search_kg_entities(teacher_name, entity_type="person")
|
|
if kg_results.get("results"):
|
|
entity = kg_results["results"][0]
|
|
result["entity"] = entity
|
|
detail = bridge.get_kg_entity(entity["id"])
|
|
result["entity"] = detail
|
|
for rel in detail.get("relations", []):
|
|
if rel["predicate"] in (
|
|
"teacher_of", "student_of", "lineage_holder",
|
|
"transmitted_to", "received_from",
|
|
):
|
|
result["lineage"].append(rel)
|
|
|
|
# Step 2: Search for associated texts
|
|
text_results = bridge.search_texts(teacher_name, size=50)
|
|
if text_results.get("results"):
|
|
result["texts"] = text_results["results"]
|
|
|
|
# Step 3: Collect content samples from top texts
|
|
for text in result["texts"][:5]:
|
|
text_id = text.get("id")
|
|
if not text_id:
|
|
continue
|
|
try:
|
|
content = bridge.get_text_content(text_id, juan_num=1)
|
|
result["content_samples"].append({
|
|
"text_id": text_id,
|
|
"title": text.get("title_zh", ""),
|
|
"content": content.get("content", "")[:3000],
|
|
})
|
|
except Exception:
|
|
continue
|
|
|
|
# Step 4: Collect tradition-specific terms
|
|
if tradition:
|
|
tradition_terms = {
|
|
"汉传": ["净土", "禅", "般若", "菩提", "念佛"],
|
|
"南传": ["vipassana", "satipatthana", "anicca", "dukkha", "anatta"],
|
|
"藏传": ["菩提道次第", "空性", "菩提心", "止观", "三主要道"],
|
|
}
|
|
for term in tradition_terms.get(tradition, []):
|
|
dict_results = bridge.search_dictionary(term, size=5)
|
|
if dict_results.get("results"):
|
|
result["terms"].extend(dict_results["results"])
|
|
|
|
return result
|
|
|
|
|
|
def collect_specific_texts(
|
|
cbeta_ids: list,
|
|
bridge: Optional[FojinBridge] = None,
|
|
) -> list:
|
|
"""Collect full content for specific texts by CBETA ID."""
|
|
if bridge is None:
|
|
bridge = create_bridge()
|
|
|
|
texts = []
|
|
id_map = bridge.lookup_cbeta_ids(",".join(cbeta_ids))
|
|
|
|
for cbeta_id, text_id in id_map.items():
|
|
if not text_id:
|
|
continue
|
|
text_meta = bridge.get_text(text_id)
|
|
juan_list = bridge.get_text_juans(text_id)
|
|
|
|
text_data = {
|
|
"cbeta_id": cbeta_id,
|
|
"text_id": text_id,
|
|
"title": text_meta.get("title_zh", ""),
|
|
"juans": [],
|
|
}
|
|
|
|
for juan in juan_list.get("juans", [])[:10]:
|
|
content = bridge.get_text_content(text_id, juan["juan_num"])
|
|
text_data["juans"].append({
|
|
"juan_num": juan["juan_num"],
|
|
"content": content.get("content", ""),
|
|
})
|
|
|
|
texts.append(text_data)
|
|
|
|
return texts
|
|
|
|
|
|
def save_collected_data(data: dict, output_path: str) -> str:
|
|
"""Save collected data to JSON file."""
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
return os.path.abspath(output_path)
|