""" test_export.py 不依赖数据库、不启动 FastAPI,直接测试 export_service 的核心导出功能。 运行方式(在 ax-backend 根目录,已激活 conda 环境): python test_export.py """ import asyncio import os import sys from pathlib import Path # 把项目根目录加入 sys.path,保证 app 包可被导入 sys.path.insert(0, str(Path(__file__).parent)) from app.services.export_service import markdown_to_docx_bytes, export_doc # ------------------------------------------------------------------ # # 测试数据(与文档示例完全一致) # ------------------------------------------------------------------ # TEST_PAYLOAD = { "fileName": "2026年Q2季度报告", "format": "doc", "documentId": "doc-abc123", # 无 DB 时会被忽略 "content": ( "# 2026年Q2季度报告\n\n" "## 一、背景\n\n" "本季度整体营收同比增长15%。\n\n" "## 二、核心数据\n\n" "### 2.1 收入\n\n" "| 月份 | 收入(万元) |\n" "|------|------|\n" "| 4月 | 120 |\n" "| 5月 | 135 |\n\n" "### 2.2 用户增长\n\n" "- 新增用户:12,000\n" "- 月活用户:85,000\n\n" "## 三、下季度计划\n\n" "1. 推进产品 A 上线\n" "2. 扩充销售团队\n" ), } # ------------------------------------------------------------------ # # 测试 1:markdown_to_docx_bytes —— 检验 Markdown 能否转换成字节流 # ------------------------------------------------------------------ # def test_markdown_to_docx_bytes(): print("=" * 50) print("测试 1:markdown_to_docx_bytes()") print("=" * 50) content = TEST_PAYLOAD["content"] doc_bytes = markdown_to_docx_bytes(content) assert isinstance(doc_bytes, bytes), "返回值应为 bytes" assert len(doc_bytes) > 0, "字节流不应为空" # .docx 文件头魔数:PK\x03\x04(ZIP 格式) assert doc_bytes[:2] == b"PK", f"文件头异常,前两字节为: {doc_bytes[:2]!r}" print(f" ✅ 转换成功,文件大小: {len(doc_bytes):,} bytes") # ------------------------------------------------------------------ # # 测试 2:export_doc —— 检验文件写入临时目录和返回结构 # ------------------------------------------------------------------ # async def test_export_doc(): print() print("=" * 50) print("测试 2:export_doc()") print("=" * 50) result = await export_doc( file_name=TEST_PAYLOAD["fileName"], content=TEST_PAYLOAD["content"], ) # 校验返回字段 assert "download_url" in result, "缺少 download_url" assert "file_name" in result, "缺少 file_name" assert "expires_at" in result, "缺少 expires_at" print(f" download_url : {result['download_url']}") print(f" file_name : {result['file_name']}") print(f" expires_at : {result['expires_at']}") # 校验文件真实存在 # download_url 格式:http://localhost:8000/api/v1/files/ filename = result["download_url"].split("/files/")[-1] from app.config import settings file_path = Path(settings.temp_dir) / filename assert file_path.exists(), f"导出文件不存在: {file_path}" file_size = file_path.stat().st_size print(f" 文件路径 : {file_path.resolve()}") print(f" 文件大小 : {file_size:,} bytes") assert file_size > 0, "导出文件为空" print(" ✅ 文件写入成功") return file_path # ------------------------------------------------------------------ # # 测试 3:打开生成的文件,验证内容结构(不依赖 Word,用 python-docx 读) # ------------------------------------------------------------------ # def test_docx_content(file_path: Path): print() print("=" * 50) print("测试 3:验证 .doc 文件内容结构") print("=" * 50) from docx import Document doc = Document(str(file_path)) headings = [] paragraphs = [] tables = [] for para in doc.paragraphs: style_name = para.style.name text = para.text.strip() if not text: continue if "Heading" in style_name or "heading" in style_name: headings.append((style_name, text)) else: paragraphs.append((style_name, text)) for table in doc.tables: rows = [[cell.text.strip() for cell in row.cells] for row in table.rows] tables.append(rows) print(f"\n 标题块(共 {len(headings)} 个):") for style, text in headings: print(f" [{style}] {text}") print(f"\n 正文段落(共 {len(paragraphs)} 个,含列表):") for style, text in paragraphs[:10]: # 最多显示 10 条 print(f" [{style}] {text}") print(f"\n 表格(共 {len(tables)} 个):") for i, table in enumerate(tables): print(f" 表格 {i + 1}:") for row in table: print(f" {row}") # 断言基本结构 heading_texts = [t for _, t in headings] assert "2026年Q2季度报告" in heading_texts, "缺少 H1 标题" assert any("背景" in t for t in heading_texts), "缺少 H2 '背景'" assert any("核心数据" in t for t in heading_texts), "缺少 H2 '核心数据'" assert any("下季度计划" in t for t in heading_texts), "缺少 H2 '下季度计划'" assert len(tables) >= 1, "缺少表格" header_row = tables[0][0] assert "月份" in header_row, f"表头缺少'月份',实际: {header_row}" assert any("收入" in cell for cell in header_row), f"表头缺少'收入',实际: {header_row}" print("\n ✅ 内容结构验证通过") # ------------------------------------------------------------------ # # 主入口 # ------------------------------------------------------------------ # async def main(): print("\n🚀 开始导出功能测试\n") try: test_markdown_to_docx_bytes() file_path = await test_export_doc() test_docx_content(file_path) print("\n🎉 全部测试通过!\n") except AssertionError as e: print(f"\n❌ 断言失败:{e}\n") sys.exit(1) except Exception as e: import traceback print(f"\n❌ 测试异常:{e}\n") traceback.print_exc() sys.exit(1) if __name__ == "__main__": asyncio.run(main())