python读取word

时间：2020-02-21 20:41:06 阅读：102 评论：0 收藏：0 [点我收藏+]

from docx import Document

# 读取全文本

# document = Document(r‘C:\Users\13375\Desktop\python\长恨歌.docx‘)
# all_paragraphs = document.paragraphs
# for paragraph in all_paragraphs:
#     print(paragraph.text)

# #     读取表格中的文字
# document = Document(r‘C:\Users\13375\Desktop\python\长恨歌2.docx‘)
# all_tables = document.tables
# for table in all_tables:
#     for row in table.rows:
#         for cell in row.cells:
#             print(cell.text)

# 读取word中的表格和文字混排文档   需要zip文件类型(未能成功运行)
import zipfile

word = zipfile.ZipFile(‘C:/Users/13375/Desktop/python/长恨歌3.docx‘)
xml = word.read(‘word/document.xml‘).decode(‘utf-8‘)
print(xml)

xml_list = xml.split(‘<w:t>‘)
print(xml_list)
text_list = []

for i in xml_list:
    if i.find(‘<w:t>‘)+1:
        text_list.append(i[:i.find(‘<w:t>‘)])
    else:
        pass
print(text_list)

text = "".join(text_list)
print(text)

python读取word

原文：https://www.cnblogs.com/tomhu/p/12342930.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)