用python对word文档表格里面的内容进行翻译
import json import re import docx import requests import time def trans(str): # 模拟浏览器发送请求 url = "http://fanyi.baidu.com/basetrans" headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"} data = { "query": str, "from": "zh", "to": "en"} response = requests.post(url, data=data, headers=headers) result = response.content.decode() result = json.loads(result) result_str = "" for i in result["trans"]: # 对两条两条以上的安全措施的翻译结果进行提取 result_str += i["dst"] if result[trans].index(i) + 1 != len(result["trans"]): # 在一个元素的非最后一行加回车 result_str += " " return result_str list = [] doc = docx.Document("222.docx") # 对文档中每个表格的每个对象进行遍历 for table in doc.tables: for row in table.rows: list.clear() # 保证每一行重复读取时,不会重复翻译,同一列可以正常翻译 for cell in row.cells: try: # 判断是否含有中文 zhPattern = re.compile(u[u4e00-u9fa5]+) contents = u{}.format(cell.text) match = zhPattern.search(contents) if match: for list_str in list: # 解决重复翻译问题,对每个元素对象和列表中的元素进行对比 if cell.text.startswith(list_str): break else: list.append(cell.text) result = trans(cell.text) cell.text += " " cell.text += result else: pass except Exception as e: print(e) # time.sleep(0.1) doc.save("test_pudate.docx")
下一篇:
【C++】常见关键字的使用场景及用法