Files
Master-skill/tools/rag_query.py
T
xianren 9873d0514d feat: add --brief flag to rag_query.py to reduce output verbosity
- New --brief flag for semantic/search subcommands outputs one-line-per-result
  (title + link + 80-char snippet) instead of full 500-char excerpts
- Reduces output from ~60 lines to ~7 lines per query (3 results)
- /compare-masters now uses --brief by default (multiple queries no longer
  flood the terminal with long excerpts)
- Full output (default) preserved for single-master /ask and debugging
2026-04-05 09:32:55 +08:00

278 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
RAG Query — runtime FoJin retrieval for teacher skills.
Usage:
python3 tools/rag_query.py search "如何念佛" --sources cbeta --top_k 5
python3 tools/rag_query.py semantic "什么是空性" --top_k 5
python3 tools/rag_query.py dict "般若"
python3 tools/rag_query.py kg "印光" --type person
"""
import argparse
import sys
import os
# Ensure tools/ is on the path so we can import fojin_bridge
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from fojin_bridge import create_bridge, FojinUnavailableError
def format_search_results(data: dict, brief: bool = False) -> str:
"""Format keyword search results for LLM consumption."""
items = data.get("items") or data.get("results") or []
if not items:
return "未找到相关结果。"
if brief:
total = data.get("total", len(items))
lines = [f"关键词搜索 {total} 条,显示 {len(items)}"]
for i, item in enumerate(items, 1):
title = item.get("title", "无标题")
text_id = item.get("text_id", item.get("id", ""))
link = f"https://fojin.app/texts/{text_id}" if text_id else ""
snippet = item.get("highlight", item.get("snippet", item.get("content", "")))
snippet_str = str(snippet).strip().replace("\n", " ")[:80]
lines.append(f"{i}. {title}{link}")
if snippet_str:
lines.append(f" {snippet_str}...")
return "\n".join(lines)
lines = []
for i, item in enumerate(items, 1):
title = item.get("title", "无标题")
source = item.get("source", item.get("collection", ""))
score = item.get("score", item.get("relevance", ""))
snippet = item.get("highlight", item.get("snippet", item.get("content", "")))
text_id = item.get("text_id", item.get("id", ""))
lines.append(f"── 结果 {i} ──")
lines.append(f"标题: {title}")
if source:
lines.append(f"来源: {source}")
if score:
lines.append(f"相关度: {score}")
if text_id:
lines.append(f"FoJin链接: https://fojin.app/texts/{text_id}")
if snippet:
snippet_str = str(snippet)
if len(snippet_str) > 500:
snippet_str = snippet_str[:500] + "..."
lines.append(f"摘要: {snippet_str}")
lines.append("")
total = data.get("total", len(items))
lines.insert(0, f"共找到 {total} 条结果,显示前 {len(items)} 条:\n")
return "\n".join(lines)
def format_semantic_results(data: dict, brief: bool = False) -> str:
"""Format semantic search results for LLM consumption.
Args:
data: API response dict
brief: If True, output one-line-per-result (compact mode for meta-skills
like /compare-masters). If False, output full content excerpts.
"""
items = data.get("items") or data.get("results") or []
if not items:
return "语义检索未找到相关经文。"
if brief:
lines = [f"语义检索 {len(items)} 条:"]
for i, item in enumerate(items, 1):
title = item.get("title", "无标题")
score = item.get("score", item.get("similarity", ""))
content = item.get("content", item.get("snippet", item.get("text", "")))
text_id = item.get("text_id", item.get("id", ""))
juan = item.get("juan_num", item.get("juan", ""))
link = f"https://fojin.app/texts/{text_id}" if text_id else ""
if text_id and juan:
link += f"/juan/{juan}"
score_str = f" (score={score:.2f})" if isinstance(score, (int, float)) else ""
snippet = str(content).strip().replace("\n", " ")[:80]
lines.append(f"{i}. {title}{score_str}{link}")
if snippet:
lines.append(f" {snippet}...")
return "\n".join(lines)
lines = [f"语义检索返回 {len(items)} 条相关经文:\n"]
for i, item in enumerate(items, 1):
title = item.get("title", "无标题")
source = item.get("source", item.get("collection", ""))
score = item.get("score", item.get("similarity", ""))
content = item.get("content", item.get("snippet", item.get("text", "")))
text_id = item.get("text_id", item.get("id", ""))
juan = item.get("juan_num", item.get("juan", ""))
lines.append(f"── 经文 {i} ──")
lines.append(f"标题: {title}")
if source:
lines.append(f"来源: {source}")
if score:
lines.append(f"相似度: {score}")
if text_id:
link = f"https://fojin.app/texts/{text_id}"
if juan:
link += f"/juan/{juan}"
lines.append(f"FoJin链接: {link}")
if content:
content_str = str(content)
if len(content_str) > 500:
content_str = content_str[:500] + "..."
lines.append(f"经文内容: {content_str}")
lines.append("")
return "\n".join(lines)
def format_dict_results(data: dict) -> str:
"""Format dictionary search results for LLM consumption."""
items = data.get("items") or data.get("results") or []
if not items:
return "词典中未找到该术语。"
lines = [f"词典检索返回 {len(items)} 条释义:\n"]
for i, item in enumerate(items, 1):
headword = item.get("headword", item.get("term", item.get("word", "")))
definition = item.get("definition", item.get("content", item.get("meaning", "")))
source_dict = item.get("source", item.get("dictionary", item.get("dict_name", "")))
lines.append(f"── 释义 {i} ──")
if headword:
lines.append(f"词条: {headword}")
if source_dict:
lines.append(f"出处词典: {source_dict}")
if definition:
def_str = str(definition)
if len(def_str) > 800:
def_str = def_str[:800] + "..."
lines.append(f"释义: {def_str}")
lines.append("")
return "\n".join(lines)
def format_kg_results(data: dict) -> str:
"""Format knowledge graph entity results for LLM consumption."""
items = data.get("items") or data.get("results") or data.get("entities") or []
if not items:
return "知识图谱中未找到相关实体。"
lines = [f"知识图谱检索返回 {len(items)} 个实体:\n"]
for i, item in enumerate(items, 1):
name = item.get("name", item.get("label", ""))
etype = item.get("entity_type", item.get("type", ""))
desc = item.get("description", item.get("summary", ""))
relations = item.get("relations", item.get("edges", []))
entity_id = item.get("id", item.get("entity_id", ""))
lines.append(f"── 实体 {i} ──")
if name:
lines.append(f"名称: {name}")
if etype:
lines.append(f"类型: {etype}")
if entity_id:
lines.append(f"FoJin链接: https://fojin.app/kg/entities/{entity_id}")
if desc:
desc_str = str(desc)
if len(desc_str) > 500:
desc_str = desc_str[:500] + "..."
lines.append(f"描述: {desc_str}")
if relations:
lines.append("关系:")
for rel in relations[:10]:
predicate = rel.get("predicate", rel.get("relation", ""))
target = rel.get("target", rel.get("object", rel.get("name", "")))
if predicate and target:
lines.append(f" - {predicate}{target}")
lines.append("")
return "\n".join(lines)
def cmd_search(args):
bridge = create_bridge()
result = bridge.search_texts(args.query, sources=args.sources, size=args.top_k)
print(format_search_results(result, brief=args.brief))
def cmd_semantic(args):
bridge = create_bridge()
result = bridge.semantic_search(args.query, top_k=args.top_k)
print(format_semantic_results(result, brief=args.brief))
def cmd_dict(args):
bridge = create_bridge()
result = bridge.search_dictionary(args.query)
print(format_dict_results(result))
def cmd_kg(args):
bridge = create_bridge()
result = bridge.search_kg_entities(args.query, entity_type=args.type)
print(format_kg_results(result))
def main():
parser = argparse.ArgumentParser(
description="RAG Query — FoJin 佛教文献实时检索工具",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
subparsers = parser.add_subparsers(dest="command", required=True)
# search
p_search = subparsers.add_parser("search", help="关键词搜索经文")
p_search.add_argument("query", help="搜索关键词")
p_search.add_argument("--sources", default=None, help="限定来源,如 cbeta")
p_search.add_argument("--top_k", type=int, default=5, help="返回条数 (默认 5)")
p_search.add_argument("--brief", action="store_true", help="简洁输出(一行一条)")
p_search.set_defaults(func=cmd_search)
# semantic
p_sem = subparsers.add_parser("semantic", help="语义向量检索")
p_sem.add_argument("query", help="语义查询")
p_sem.add_argument("--top_k", type=int, default=5, help="返回条数 (默认 5)")
p_sem.add_argument("--brief", action="store_true", help="简洁输出(一行一条)")
p_sem.set_defaults(func=cmd_semantic)
# dict
p_dict = subparsers.add_parser("dict", help="佛学词典查询")
p_dict.add_argument("query", help="查询术语")
p_dict.set_defaults(func=cmd_dict)
# kg
p_kg = subparsers.add_parser("kg", help="知识图谱实体搜索")
p_kg.add_argument("query", help="实体名称")
p_kg.add_argument("--type", default=None, help="实体类型,如 person, text, school")
p_kg.set_defaults(func=cmd_kg)
args = parser.parse_args()
try:
args.func(args)
except FojinUnavailableError:
print("[FoJin API 当前不可用]")
print("无法检索真实经文。法师将仅基于预置 teaching.md 回答。")
print("建议:")
print("- 稍后重试")
print("- 检查网络连接")
print("- 或在 fojin.app 直接查阅原典")
sys.exit(0)
except ConnectionError as e:
print(f"[错误] 无法连接 FoJin API: {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"[错误] 检索失败: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()