""" 意图分类服务 — 基于 4060 节点 Qwen3.5-4B 将用户输入快速分类为不同的意图类型,用于路由到对应的处理逻辑。 """ from __future__ import annotations import json import re import httpx from app.core.config import settings # 意图 → Dify App 路由映射(可扩展) INTENT_ROUTES = { "crm": "dify_agent", # 客户、订单、产品、发货等 CRM 操作 "finance": "dify_agent", # 财务、报销、票据操作 "knowledge": "dify_agent", # 知识库问答(未来可以指向独立的 RAG App) "general": "dify_agent", # 通用闲聊 "report": "dify_workflow_report", # 周报/月报生成 } SYSTEM_PROMPT = """你是一个意图分类器。根据用户输入,判断它属于以下哪个意图类别。只返回 JSON 格式 {"intent": "xxx", "confidence": 0.xx}。 意图类别: - crm: 客户管理(查询/新建客户)、订单管理(查询/下单)、产品/库存、发货物流 - finance: 报销、票据、发票、财务审批 - report: 生成周报、月报、工作汇报 - knowledge: 产品知识、技术问答、公司规章制度 - general: 日常闲聊、问候、与业务无关的问题 只输出 JSON,不要解释。""" async def classify_intent(message: str) -> dict: """ 调用 4060 上的 Qwen3.5-4B 做快速意图分类。 返回 {"intent": "crm", "confidence": 0.95, "route": "dify_agent"} 如果分类失败,默认路由到 dify_agent。 """ fallback = {"intent": "general", "confidence": 0.0, "route": "dify_agent"} if not settings.OLLAMA_4060_BASE_URL: return fallback url = f"{settings.OLLAMA_4060_BASE_URL}/api/chat" payload = { "model": settings.OLLAMA_4060_MODEL, "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": message}, ], "stream": False, "options": { "temperature": 0.1, "num_predict": 500, # Qwen3.5 的 CoT thinking 会消耗较多 token }, } try: async with httpx.AsyncClient(timeout=10.0) as client: resp = await client.post(url, json=payload) if resp.status_code != 200: print(f"[IntentGateway] 4060 返回 {resp.status_code}: {resp.text[:200]}") return fallback data = resp.json() # Qwen3.5 的 CoT 推理放在 message.thinking 字段,最终结果在 message.content content = data.get("message", {}).get("content", "") thinking = data.get("message", {}).get("thinking", "") # 优先从 content 提取 JSON,回退到 thinking for text_source in [content, thinking]: if not text_source: continue # 去掉 ... 块 cleaned = re.sub(r'.*?', '', text_source, flags=re.DOTALL).strip() json_match = re.search(r'\{[^}]+\}', cleaned) if json_match: try: result = json.loads(json_match.group()) intent = result.get("intent", "general") confidence = float(result.get("confidence", 0.0)) route = INTENT_ROUTES.get(intent, "dify_agent") print(f"[IntentGateway] intent={intent}, confidence={confidence:.2f}, route={route}") return {"intent": intent, "confidence": confidence, "route": route} except json.JSONDecodeError: continue # 从 thinking 内容中启发式推断意图(当 JSON 未生成完成时) combined = (thinking + " " + content).lower() if any(kw in combined for kw in ["crm", "customer", "客户", "订单", "order"]): print(f"[IntentGateway] 启发式推断: crm") return {"intent": "crm", "confidence": 0.7, "route": "dify_agent"} if any(kw in combined for kw in ["finance", "报销", "发票", "票据", "财务"]): print(f"[IntentGateway] 启发式推断: finance") return {"intent": "finance", "confidence": 0.7, "route": "dify_agent"} if any(kw in combined for kw in ["report", "周报", "月报", "汇报"]): print(f"[IntentGateway] 启发式推断: report") return {"intent": "report", "confidence": 0.7, "route": "dify_workflow_report"} print(f"[IntentGateway] JSON 解析失败, 内容长度: content={len(content)}, thinking={len(thinking)}") return fallback except httpx.TimeoutException: print("[IntentGateway] 4060 超时,降级到默认路由") return fallback except Exception as e: print(f"[IntentGateway] 错误: {e}") return fallback