38 lines
1.1 KiB
Python
38 lines
1.1 KiB
Python
import PyPDF2
|
|
import os
|
|
import sys
|
|
|
|
pdf_path = r'D:\Code\SecMPS\doc\对接文档\MC4.0对外API.pdf'
|
|
output_path = r'D:\Code\SecMPS\doc\对接文档\MC4.0对外API.md'
|
|
|
|
try:
|
|
print(f'正在读取: {pdf_path}')
|
|
|
|
with open(pdf_path, 'rb') as file:
|
|
pdf_reader = PyPDF2.PdfReader(file)
|
|
num_pages = len(pdf_reader.pages)
|
|
|
|
print(f'PDF总页数: {num_pages}')
|
|
|
|
full_text = []
|
|
full_text.append('# MC4.0对外API文档\n')
|
|
full_text.append(f'\n> 原文档页数: {num_pages} 页\n')
|
|
full_text.append('---\n\n')
|
|
|
|
for page_num in range(num_pages):
|
|
page = pdf_reader.pages[page_num]
|
|
text = page.extract_text()
|
|
if text and text.strip():
|
|
full_text.append(f'## 第 {page_num + 1} 页\n')
|
|
full_text.append(text)
|
|
full_text.append('\n---\n\n')
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as md_file:
|
|
md_file.write('\n'.join(full_text))
|
|
|
|
print(f'转换完成!已保存至: {output_path}')
|
|
|
|
except Exception as e:
|
|
print(f'错误: {e}')
|
|
sys.exit(1)
|