| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- """
- test_export.py
- 不依赖数据库、不启动 FastAPI,直接测试 export_service 的核心导出功能。
- 运行方式(在 ax-backend 根目录,已激活 conda 环境):
- python test_export.py
- """
- import asyncio
- import os
- import sys
- from pathlib import Path
- # 把项目根目录加入 sys.path,保证 app 包可被导入
- sys.path.insert(0, str(Path(__file__).parent))
- from app.services.export_service import markdown_to_docx_bytes, export_doc
- # ------------------------------------------------------------------ #
- # 测试数据(与文档示例完全一致)
- # ------------------------------------------------------------------ #
- TEST_PAYLOAD = {
- "fileName": "2026年Q2季度报告",
- "format": "doc",
- "documentId": "doc-abc123", # 无 DB 时会被忽略
- "content": (
- "# 2026年Q2季度报告\n\n"
- "## 一、背景\n\n"
- "本季度整体营收同比增长15%。\n\n"
- "## 二、核心数据\n\n"
- "### 2.1 收入\n\n"
- "| 月份 | 收入(万元) |\n"
- "|------|------|\n"
- "| 4月 | 120 |\n"
- "| 5月 | 135 |\n\n"
- "### 2.2 用户增长\n\n"
- "- 新增用户:12,000\n"
- "- 月活用户:85,000\n\n"
- "## 三、下季度计划\n\n"
- "1. 推进产品 A 上线\n"
- "2. 扩充销售团队\n"
- ),
- }
- # ------------------------------------------------------------------ #
- # 测试 1:markdown_to_docx_bytes —— 检验 Markdown 能否转换成字节流
- # ------------------------------------------------------------------ #
- def test_markdown_to_docx_bytes():
- print("=" * 50)
- print("测试 1:markdown_to_docx_bytes()")
- print("=" * 50)
- content = TEST_PAYLOAD["content"]
- doc_bytes = markdown_to_docx_bytes(content)
- assert isinstance(doc_bytes, bytes), "返回值应为 bytes"
- assert len(doc_bytes) > 0, "字节流不应为空"
- # .docx 文件头魔数:PK\x03\x04(ZIP 格式)
- assert doc_bytes[:2] == b"PK", f"文件头异常,前两字节为: {doc_bytes[:2]!r}"
- print(f" ✅ 转换成功,文件大小: {len(doc_bytes):,} bytes")
- # ------------------------------------------------------------------ #
- # 测试 2:export_doc —— 检验文件写入临时目录和返回结构
- # ------------------------------------------------------------------ #
- async def test_export_doc():
- print()
- print("=" * 50)
- print("测试 2:export_doc()")
- print("=" * 50)
- result = await export_doc(
- file_name=TEST_PAYLOAD["fileName"],
- content=TEST_PAYLOAD["content"],
- )
- # 校验返回字段
- assert "download_url" in result, "缺少 download_url"
- assert "file_name" in result, "缺少 file_name"
- assert "expires_at" in result, "缺少 expires_at"
- print(f" download_url : {result['download_url']}")
- print(f" file_name : {result['file_name']}")
- print(f" expires_at : {result['expires_at']}")
- # 校验文件真实存在
- # download_url 格式:http://localhost:8000/api/v1/files/<filename>
- filename = result["download_url"].split("/files/")[-1]
- from app.config import settings
- file_path = Path(settings.temp_dir) / filename
- assert file_path.exists(), f"导出文件不存在: {file_path}"
- file_size = file_path.stat().st_size
- print(f" 文件路径 : {file_path.resolve()}")
- print(f" 文件大小 : {file_size:,} bytes")
- assert file_size > 0, "导出文件为空"
- print(" ✅ 文件写入成功")
- return file_path
- # ------------------------------------------------------------------ #
- # 测试 3:打开生成的文件,验证内容结构(不依赖 Word,用 python-docx 读)
- # ------------------------------------------------------------------ #
- def test_docx_content(file_path: Path):
- print()
- print("=" * 50)
- print("测试 3:验证 .doc 文件内容结构")
- print("=" * 50)
- from docx import Document
- doc = Document(str(file_path))
- headings = []
- paragraphs = []
- tables = []
- for para in doc.paragraphs:
- style_name = para.style.name
- text = para.text.strip()
- if not text:
- continue
- if "Heading" in style_name or "heading" in style_name:
- headings.append((style_name, text))
- else:
- paragraphs.append((style_name, text))
- for table in doc.tables:
- rows = [[cell.text.strip() for cell in row.cells] for row in table.rows]
- tables.append(rows)
- print(f"\n 标题块(共 {len(headings)} 个):")
- for style, text in headings:
- print(f" [{style}] {text}")
- print(f"\n 正文段落(共 {len(paragraphs)} 个,含列表):")
- for style, text in paragraphs[:10]: # 最多显示 10 条
- print(f" [{style}] {text}")
- print(f"\n 表格(共 {len(tables)} 个):")
- for i, table in enumerate(tables):
- print(f" 表格 {i + 1}:")
- for row in table:
- print(f" {row}")
- # 断言基本结构
- heading_texts = [t for _, t in headings]
- assert "2026年Q2季度报告" in heading_texts, "缺少 H1 标题"
- assert any("背景" in t for t in heading_texts), "缺少 H2 '背景'"
- assert any("核心数据" in t for t in heading_texts), "缺少 H2 '核心数据'"
- assert any("下季度计划" in t for t in heading_texts), "缺少 H2 '下季度计划'"
- assert len(tables) >= 1, "缺少表格"
- header_row = tables[0][0]
- assert "月份" in header_row, f"表头缺少'月份',实际: {header_row}"
- assert any("收入" in cell for cell in header_row), f"表头缺少'收入',实际: {header_row}"
- print("\n ✅ 内容结构验证通过")
- # ------------------------------------------------------------------ #
- # 主入口
- # ------------------------------------------------------------------ #
- async def main():
- print("\n🚀 开始导出功能测试\n")
- try:
- test_markdown_to_docx_bytes()
- file_path = await test_export_doc()
- test_docx_content(file_path)
- print("\n🎉 全部测试通过!\n")
- except AssertionError as e:
- print(f"\n❌ 断言失败:{e}\n")
- sys.exit(1)
- except Exception as e:
- import traceback
- print(f"\n❌ 测试异常:{e}\n")
- traceback.print_exc()
- sys.exit(1)
- if __name__ == "__main__":
- asyncio.run(main())
|