test_export.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. """
  2. test_export.py
  3. 不依赖数据库、不启动 FastAPI,直接测试 export_service 的核心导出功能。
  4. 运行方式(在 ax-backend 根目录,已激活 conda 环境):
  5. python test_export.py
  6. """
  7. import asyncio
  8. import os
  9. import sys
  10. from pathlib import Path
  11. # 把项目根目录加入 sys.path,保证 app 包可被导入
  12. sys.path.insert(0, str(Path(__file__).parent))
  13. from app.services.export_service import markdown_to_docx_bytes, export_doc
  14. # ------------------------------------------------------------------ #
  15. # 测试数据(与文档示例完全一致)
  16. # ------------------------------------------------------------------ #
  17. TEST_PAYLOAD = {
  18. "fileName": "2026年Q2季度报告",
  19. "format": "doc",
  20. "documentId": "doc-abc123", # 无 DB 时会被忽略
  21. "content": (
  22. "# 2026年Q2季度报告\n\n"
  23. "## 一、背景\n\n"
  24. "本季度整体营收同比增长15%。\n\n"
  25. "## 二、核心数据\n\n"
  26. "### 2.1 收入\n\n"
  27. "| 月份 | 收入(万元) |\n"
  28. "|------|------|\n"
  29. "| 4月 | 120 |\n"
  30. "| 5月 | 135 |\n\n"
  31. "### 2.2 用户增长\n\n"
  32. "- 新增用户:12,000\n"
  33. "- 月活用户:85,000\n\n"
  34. "## 三、下季度计划\n\n"
  35. "1. 推进产品 A 上线\n"
  36. "2. 扩充销售团队\n"
  37. ),
  38. }
  39. # ------------------------------------------------------------------ #
  40. # 测试 1:markdown_to_docx_bytes —— 检验 Markdown 能否转换成字节流
  41. # ------------------------------------------------------------------ #
  42. def test_markdown_to_docx_bytes():
  43. print("=" * 50)
  44. print("测试 1:markdown_to_docx_bytes()")
  45. print("=" * 50)
  46. content = TEST_PAYLOAD["content"]
  47. doc_bytes = markdown_to_docx_bytes(content)
  48. assert isinstance(doc_bytes, bytes), "返回值应为 bytes"
  49. assert len(doc_bytes) > 0, "字节流不应为空"
  50. # .docx 文件头魔数:PK\x03\x04(ZIP 格式)
  51. assert doc_bytes[:2] == b"PK", f"文件头异常,前两字节为: {doc_bytes[:2]!r}"
  52. print(f" ✅ 转换成功,文件大小: {len(doc_bytes):,} bytes")
  53. # ------------------------------------------------------------------ #
  54. # 测试 2:export_doc —— 检验文件写入临时目录和返回结构
  55. # ------------------------------------------------------------------ #
  56. async def test_export_doc():
  57. print()
  58. print("=" * 50)
  59. print("测试 2:export_doc()")
  60. print("=" * 50)
  61. result = await export_doc(
  62. file_name=TEST_PAYLOAD["fileName"],
  63. content=TEST_PAYLOAD["content"],
  64. )
  65. # 校验返回字段
  66. assert "download_url" in result, "缺少 download_url"
  67. assert "file_name" in result, "缺少 file_name"
  68. assert "expires_at" in result, "缺少 expires_at"
  69. print(f" download_url : {result['download_url']}")
  70. print(f" file_name : {result['file_name']}")
  71. print(f" expires_at : {result['expires_at']}")
  72. # 校验文件真实存在
  73. # download_url 格式:http://localhost:8000/api/v1/files/<filename>
  74. filename = result["download_url"].split("/files/")[-1]
  75. from app.config import settings
  76. file_path = Path(settings.temp_dir) / filename
  77. assert file_path.exists(), f"导出文件不存在: {file_path}"
  78. file_size = file_path.stat().st_size
  79. print(f" 文件路径 : {file_path.resolve()}")
  80. print(f" 文件大小 : {file_size:,} bytes")
  81. assert file_size > 0, "导出文件为空"
  82. print(" ✅ 文件写入成功")
  83. return file_path
  84. # ------------------------------------------------------------------ #
  85. # 测试 3:打开生成的文件,验证内容结构(不依赖 Word,用 python-docx 读)
  86. # ------------------------------------------------------------------ #
  87. def test_docx_content(file_path: Path):
  88. print()
  89. print("=" * 50)
  90. print("测试 3:验证 .doc 文件内容结构")
  91. print("=" * 50)
  92. from docx import Document
  93. doc = Document(str(file_path))
  94. headings = []
  95. paragraphs = []
  96. tables = []
  97. for para in doc.paragraphs:
  98. style_name = para.style.name
  99. text = para.text.strip()
  100. if not text:
  101. continue
  102. if "Heading" in style_name or "heading" in style_name:
  103. headings.append((style_name, text))
  104. else:
  105. paragraphs.append((style_name, text))
  106. for table in doc.tables:
  107. rows = [[cell.text.strip() for cell in row.cells] for row in table.rows]
  108. tables.append(rows)
  109. print(f"\n 标题块(共 {len(headings)} 个):")
  110. for style, text in headings:
  111. print(f" [{style}] {text}")
  112. print(f"\n 正文段落(共 {len(paragraphs)} 个,含列表):")
  113. for style, text in paragraphs[:10]: # 最多显示 10 条
  114. print(f" [{style}] {text}")
  115. print(f"\n 表格(共 {len(tables)} 个):")
  116. for i, table in enumerate(tables):
  117. print(f" 表格 {i + 1}:")
  118. for row in table:
  119. print(f" {row}")
  120. # 断言基本结构
  121. heading_texts = [t for _, t in headings]
  122. assert "2026年Q2季度报告" in heading_texts, "缺少 H1 标题"
  123. assert any("背景" in t for t in heading_texts), "缺少 H2 '背景'"
  124. assert any("核心数据" in t for t in heading_texts), "缺少 H2 '核心数据'"
  125. assert any("下季度计划" in t for t in heading_texts), "缺少 H2 '下季度计划'"
  126. assert len(tables) >= 1, "缺少表格"
  127. header_row = tables[0][0]
  128. assert "月份" in header_row, f"表头缺少'月份',实际: {header_row}"
  129. assert any("收入" in cell for cell in header_row), f"表头缺少'收入',实际: {header_row}"
  130. print("\n ✅ 内容结构验证通过")
  131. # ------------------------------------------------------------------ #
  132. # 主入口
  133. # ------------------------------------------------------------------ #
  134. async def main():
  135. print("\n🚀 开始导出功能测试\n")
  136. try:
  137. test_markdown_to_docx_bytes()
  138. file_path = await test_export_doc()
  139. test_docx_content(file_path)
  140. print("\n🎉 全部测试通过!\n")
  141. except AssertionError as e:
  142. print(f"\n❌ 断言失败:{e}\n")
  143. sys.exit(1)
  144. except Exception as e:
  145. import traceback
  146. print(f"\n❌ 测试异常:{e}\n")
  147. traceback.print_exc()
  148. sys.exit(1)
  149. if __name__ == "__main__":
  150. asyncio.run(main())