"""
意图分类服务 — 基于 4060 节点 Qwen3.5-4B
将用户输入快速分类为不同的意图类型,用于路由到对应的处理逻辑。
"""
from __future__ import annotations
import json
import re
import httpx
from app.core.config import settings
# 意图 → Dify App 路由映射(可扩展)
INTENT_ROUTES = {
"crm": "dify_agent", # 客户、订单、产品、发货等 CRM 操作
"finance": "dify_agent", # 财务、报销、票据操作
"knowledge": "dify_agent", # 知识库问答(未来可以指向独立的 RAG App)
"general": "dify_agent", # 通用闲聊
"report": "dify_workflow_report", # 周报/月报生成
}
SYSTEM_PROMPT = """你是一个意图分类器。根据用户输入,判断它属于以下哪个意图类别。只返回 JSON 格式 {"intent": "xxx", "confidence": 0.xx}。
意图类别:
- crm: 客户管理(查询/新建客户)、订单管理(查询/下单)、产品/库存、发货物流
- finance: 报销、票据、发票、财务审批
- report: 生成周报、月报、工作汇报
- knowledge: 产品知识、技术问答、公司规章制度
- general: 日常闲聊、问候、与业务无关的问题
只输出 JSON,不要解释。"""
async def classify_intent(message: str) -> dict:
"""
调用 4060 上的 Qwen3.5-4B 做快速意图分类。
返回 {"intent": "crm", "confidence": 0.95, "route": "dify_agent"}
如果分类失败,默认路由到 dify_agent。
"""
fallback = {"intent": "general", "confidence": 0.0, "route": "dify_agent"}
if not settings.OLLAMA_4060_BASE_URL:
return fallback
url = f"{settings.OLLAMA_4060_BASE_URL}/api/chat"
payload = {
"model": settings.OLLAMA_4060_MODEL,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": message},
],
"stream": False,
"options": {
"temperature": 0.1,
"num_predict": 500, # Qwen3.5 的 CoT thinking 会消耗较多 token
},
}
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(url, json=payload)
if resp.status_code != 200:
print(f"[IntentGateway] 4060 返回 {resp.status_code}: {resp.text[:200]}")
return fallback
data = resp.json()
# Qwen3.5 的 CoT 推理放在 message.thinking 字段,最终结果在 message.content
content = data.get("message", {}).get("content", "")
thinking = data.get("message", {}).get("thinking", "")
# 优先从 content 提取 JSON,回退到 thinking
for text_source in [content, thinking]:
if not text_source:
continue
# 去掉 ... 块
cleaned = re.sub(r'.*?', '', text_source, flags=re.DOTALL).strip()
json_match = re.search(r'\{[^}]+\}', cleaned)
if json_match:
try:
result = json.loads(json_match.group())
intent = result.get("intent", "general")
confidence = float(result.get("confidence", 0.0))
route = INTENT_ROUTES.get(intent, "dify_agent")
print(f"[IntentGateway] intent={intent}, confidence={confidence:.2f}, route={route}")
return {"intent": intent, "confidence": confidence, "route": route}
except json.JSONDecodeError:
continue
# 从 thinking 内容中启发式推断意图(当 JSON 未生成完成时)
combined = (thinking + " " + content).lower()
if any(kw in combined for kw in ["crm", "customer", "客户", "订单", "order"]):
print(f"[IntentGateway] 启发式推断: crm")
return {"intent": "crm", "confidence": 0.7, "route": "dify_agent"}
if any(kw in combined for kw in ["finance", "报销", "发票", "票据", "财务"]):
print(f"[IntentGateway] 启发式推断: finance")
return {"intent": "finance", "confidence": 0.7, "route": "dify_agent"}
if any(kw in combined for kw in ["report", "周报", "月报", "汇报"]):
print(f"[IntentGateway] 启发式推断: report")
return {"intent": "report", "confidence": 0.7, "route": "dify_workflow_report"}
print(f"[IntentGateway] JSON 解析失败, 内容长度: content={len(content)}, thinking={len(thinking)}")
return fallback
except httpx.TimeoutException:
print("[IntentGateway] 4060 超时,降级到默认路由")
return fallback
except Exception as e:
print(f"[IntentGateway] 错误: {e}")
return fallback