import itertools from zipfile import ZipFile import shutil
if os.path.exists(f"{doc_path}/{temp_dir}/imgs"): shutil.rmtree(f"{doc_path}/{temp_dir}/imgs") os.makedirs(f"{doc_path}/{temp_dir}/imgs")
i = 1 for filename in itertools.chain(Path(doc_path).glob("*.docx"), (Path(doc_path)/temp_dir).glob("*.docx")): print(filename) with ZipFile(filename) as zip_file: for names in zip_file.namelist(): if names.startswith("word/media/image"): zip_file.extract(names, doc_path) os.rename(f"{doc_path}/{names}", f"{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}") print("\t", names, f"{i}{names[names.find('.'):]}") i += 1 shutil.rmtree(f"{doc_path}/word")
for filename in Path(f"{doc_path}/{temp_dir}/imgs").glob("*"): file = str(filename) with Image.open(file) as im: im.convert('RGB').save( f"{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg", 'jpeg')
import itertools import os import shutil from pathlib import Path from zipfile import ZipFile
from PIL import Image from win32com import client as wc # 导入模块
defword_img_extract(doc_path, temp_dir): if os.path.exists(f"{doc_path}/{temp_dir}"): shutil.rmtree(f"{doc_path}/{temp_dir}") os.mkdir(f"{doc_path}/{temp_dir}")
word = wc.Dispatch("Word.Application") # 打开word应用程序 try: for filename in Path(doc_path).glob("*.doc"): file = str(filename) dest_name = str(filename.parent / f"{temp_dir}" / str(filename.name)) + "x" print(file, dest_name) doc = word.Documents.Open(file) # 打开word文件 doc.SaveAs(dest_name, 12) # 另存为后缀为".docx"的文件,其中参数12指docx文件 finally: word.Quit()
if os.path.exists(f"{doc_path}/{temp_dir}/imgs"): shutil.rmtree(f"{doc_path}/{temp_dir}/imgs") os.makedirs(f"{doc_path}/{temp_dir}/imgs")
i = 1 for filename in itertools.chain(Path(doc_path).glob("*.docx"), (Path(doc_path) / temp_dir).glob("*.docx")): print(filename) with ZipFile(filename) as zip_file: for names in zip_file.namelist(): if names.startswith("word/media/image"): zip_file.extract(names, doc_path) os.rename(f"{doc_path}/{names}", f"{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}") print("\t", names, f"{i}{names[names.find('.'):]}") i += 1 shutil.rmtree(f"{doc_path}/word")
for filename in Path(f"{doc_path}/{temp_dir}/imgs").glob("*"): file = str(filename) with Image.open(file) as im: im.convert('RGB').save( f"{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg", 'jpeg')