用python批量清理删除doc和docx的页头页脚,不区分内容,只要包含页头页脚直接清理掉。
import os
from docx import Document
from win32com import client as win32
def clear_paragraphs(paragraphs):
for paragraph in paragraphs:
for run in paragraph.runs:
run.clear()
def remove_header_footer_docx(doc):
for section in doc.sections:
clear_paragraphs(section.header.paragraphs)
clear_paragraphs(section.footer.paragraphs)
def remove_header_footer_doc(filename):
word_app = win32.Dispatch('Word.Application')
doc = word_app.Documents.Open(filename)
for section in doc.Sections:
section.Headers(1).Range.Delete()
section.Footers(1).Range.Delete()
doc.Close(True)
def batch_remove_headers_footers(input_folder, output_folder):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for filename in os.listdir(input_folder):
input_path = os.path.join(input_folder, filename)
output_path = os.path.join(output_folder, filename)
if filename.endswith(".docx"):
doc = Document(input_path)
remove_header_footer_docx(doc)
doc.save(output_path)
elif filename.endswith(".doc"):
remove_header_footer_doc(input_path)
os.rename(input_path, output_path)
if __name__ == "__main__":
input_folder = "输入文件夹的路径"
output_folder = "输出文件夹的路径"
batch_remove_headers_footers(input_folder, output_folder)