Files
akmon/doc_bus/convert_to_pdf.py
2026-01-20 08:04:15 +08:00

179 lines
5.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
融资计划书转换工具
Convert Markdown financing plan to Word/PDF with professional formatting
"""
import subprocess
import os
import sys
import datetime
import platform
from pathlib import Path
def check_installed_modules():
"""查看Python模块安装信息的命令"""
print("=== Python模块检查命令 ===")
print("1. 查看所有已安装模块:")
print(" pip list")
print(" pip freeze")
print()
print("2. 查看特定模块详细信息:")
print(" pip show 模块名")
print(" 例: pip show pandas")
print()
print("3. 检查模块是否可导入:")
print(" python -c \"import 模块名; print('模块已安装')\"")
print(" 例: python -c \"import datetime; print('datetime已安装')\"")
print()
print("4. 查看模块版本:")
print(" python -c \"import 模块名; print(模块名.__version__)\"")
print(" 例: python -c \"import pandas; print(pandas.__version__)\"")
print()
print("5. 搜索包含关键词的模块:")
print(" pip list | grep 关键词")
print(" 例: pip list | grep pandas")
print()
def check_pandoc():
"""检查Pandoc是否已安装"""
try:
subprocess.run(['pandoc', '--version'], check=True, capture_output=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def check_latex_engine():
"""检查LaTeX引擎(xelatex)是否可用"""
try:
subprocess.run(['xelatex', '-version'], check=True, capture_output=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def convert_to_word(input_file, output_file):
"""转换为Word格式"""
cmd = [
'pandoc',
input_file,
'-o', output_file,
'--from', 'markdown',
'--to', 'docx',
'--highlight-style', 'tango',
'--table-of-contents',
'--toc-depth', '3',
'--number-sections',
'--metadata', 'title=骅锋科技股份有限公司融资计划书',
'--metadata', 'author=骅锋科技股份有限公司',
'--metadata', 'date=' + datetime.datetime.now().strftime('%Y年%m月%d')
]
# 如果存在自定义模板,使用模板
template_path = Path('template.docx')
if template_path.exists():
cmd.extend(['--reference-doc', str(template_path)])
return subprocess.run(cmd, check=True)
def convert_to_pdf(input_file, output_file):
"""使用pandoc和LaTeX转换为PDF格式"""
cmd = [
'pandoc',
input_file,
'-o', output_file,
'--from', 'markdown',
'--to', 'pdf',
'--pdf-engine', 'xelatex',
'--highlight-style', 'tango',
'--table-of-contents',
'--toc-depth', '3',
'--number-sections',
'--metadata', 'title=骅锋科技股份有限公司融资计划书',
'--metadata', 'author=骅锋科技股份有限公司',
'--metadata', 'documentclass=report',
'--metadata', 'geometry=a4paper,margin=2cm',
'--metadata', 'fontsize=12pt',
'--metadata', 'mainfont=Microsoft YaHei',
'--metadata', 'CJKmainfont=Microsoft YaHei'
]
return subprocess.run(cmd, check=True, capture_output=True)
def convert_docx_to_pdf_windows(docx_path, pdf_path):
"""[Windows Fallback] 使用PowerShell将DOCX转换为PDF (需要安装MS Word)"""
if platform.system() != "Windows":
return False
print("... 正在尝试使用MS Word将DOCX转换为PDF作为备选方案 ...")
script = f"""
$word = New-Object -ComObject Word.Application
$word.Visible = $false
$doc = $word.Documents.Open('{os.path.abspath(docx_path)}')
$doc.SaveAs('{os.path.abspath(pdf_path)}', 17) # 17 corresponds to wdFormatPDF
$doc.Close()
$word.Quit()
"""
try:
subprocess.run(["powershell", "-Command", script], check=True, capture_output=True)
print(f"✅ PDF文档已通过Word生成: {pdf_path}")
return True
except (subprocess.CalledProcessError, FileNotFoundError) as e:
print("❌ 使用Word转换PDF失败。请确保已安装Microsoft Word。")
print(f" 错误详情: {e}")
return False
def main():
"""主函数"""
# 检查Pandoc
if not check_pandoc():
print("错误未找到Pandoc")
print("请从以下网址安装Pandochttps://pandoc.org/installing.html")
check_installed_modules() # 显示模块检查命令
return 1
input_file = 'rongzi_deepseek.md'
word_output = 'rongzi_deepseek_plan.docx'
# 检查输入文件
if not os.path.exists(input_file):
print(f"错误:找不到输入文件 {input_file}")
return 1
try:
print(f"正在将 {input_file} 转换为Word格式...")
convert_to_word(input_file, word_output)
print(f"✅ Word文档已生成: {word_output}")
except subprocess.CalledProcessError as e:
print(f"转换失败: {e}")
return 1
except Exception as e:
print(f"未知错误: {e}")
return 1
return 0
if __name__ == '__main__':
sys.exit(main())
print("⚠️ 未找到xelatex引擎。PDF将通过备选方案生成。")
print(" 为了获得最佳PDF输出质量建议安装LaTeX发行版 (如 MiKTeX for Windows)。")
if not convert_docx_to_pdf_windows(word_output, pdf_output):
print(" 无法生成PDF文件。")
print("\n转换完成!")
except subprocess.CalledProcessError as e:
print(f"转换失败: {e}")
return 1
except Exception as e:
print(f"未知错误: {e}")
return 1
return 0
if __name__ == '__main__':
sys.exit(main())