?????? 本文非原创,只是整理了下代码,原代码出自:http://blog.chiefleo.me/archives/429.原文如下:
???????
????? 普通的读取批注信息方法:
?????
public void readWordDocxComments(String fileName) { XWPFDocument document = null; XWPFComment[] comments = null; try { document = new XWPFDocument(POIXMLDocument.openPackage(fileName)); comments = document.getComments(); for (int i = 0; i < comments.length; i++) { System.out.println("Id= " + comments[i].getId()); System.out.println("Text= " + comments[i].getText()); System.out.println("Author= " + comments[i].getAuthor()); } } catch (Exception e) { e.printStackTrace(); } }
??? 不能获取批注对应的正文信息,修改后的代码如下:
???
import java.io.File; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.xwpf.usermodel.XWPFComment; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument.Factory; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class POI_读取批注_S4_Test { private File file; /** Word document */ private XWPFDocument docx; /** 批注内容数组 */ private XWPFComment[] comments;// /** 批注引用正文map,结构-<批注Id,正文text> */ private Map<String, String> commentRefs;// /** 日期格式化类型 */ private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); /** 批注所引用正文装配Map完毕标识 */ private static final String COMMENT_REF_FILLED_OK = "OK"; /** 批注最大下标 */ private String maxCommentIndex; /* * @param filePath Word文件路径 */ public POI_读取批注_S4_Test(String filePath) throws Exception { file = new File(filePath); initAttributes(); } /* * 初始化成员变量 * @throws Exception Word缺陷导入异常 */ private void initAttributes() throws Exception { try { docx = new XWPFDocument(POIXMLDocument.openPackage(file .getCanonicalPath())); comments = docx.getComments(); maxCommentIndex = String.valueOf(comments.length - 1); commentRefs = new HashMap<String, String>(); fillCommentRef(docx.getDocument().getDomNode(), new StringBuilder(), new StringBuilder(), new StringBuilder(), commentRefs); } catch (Exception e) { throw new Exception(new StringBuilder().append("Word文件格式错误") .append("-").append(e.getMessage()).toString(), e); } } /* * 获取批注内容 */ public XWPFComment[] getComments() { return comments; } public Map<String, String> getCommentRefs() { return commentRefs; } /* * 获取日期格式化类型 */ public SimpleDateFormat getSdf() { return sdf; } /* 获取批注日期List */ public List<Date> getSubmitDateList() { Map<String, Date> dateMap = new HashMap<String, Date>(); List<Date> dateList = new ArrayList<Date>(); try { Iterator<POIXMLDocumentPart> iter = docx.getRelations().iterator(); do { if (!iter.hasNext()) break; POIXMLDocumentPart p = (POIXMLDocumentPart) iter.next(); String relation = p.getPackageRelationship() .getRelationshipType(); if (relation.equals(XWPFRelation.COMMENT.getRelation())) { CommentsDocument cmntdoc; cmntdoc = Factory .parse(p.getPackagePart().getInputStream()); List<CTComment> commentList = cmntdoc.getComments() .getCommentList(); int len = commentList.size(); int j = 0; while (j < len) { CTComment ctcomment = commentList.get(j); dateMap.put(ctcomment.getId().toString(), ctcomment .getDate().getTime()); j++; } } } while (true); } catch (Exception e) { } if (dateMap != null) { for (XWPFComment comment : comments) { dateList.add(dateMap.get(comment.getId())); } } return dateList; } /* * 获取批注作者List */ public List<String> getSubmitterList() { List<String> list = new ArrayList<String>(); for (XWPFComment comment : comments) { list.add(comment.getAuthor().trim()); } return list; } /* * 组装批注引用文本Map,Map结构-<commentId,text> * @param node WordProcessingML node * @param id 批注ID * @param value 批注引用正文文本 * @param convertOK 正文组装完毕标识 ,组装完毕 = "OK" * @param map 要填充的目标Map */ private void fillCommentRef(Node node, StringBuilder id, StringBuilder value, StringBuilder convertOK, Map<String, String> map) throws Exception { // fillCommentRef方法要求所有参数不能为null,如果为null,抛出异常 if (!insureNotNull(node, id, value, convertOK, map)) { throw new IllegalArgumentException(new StringBuilder() .append(this.getClass().getName()) .append("fillCommentRef(").append(node).append(",") .append(id).append(",").append(value).append(",") .append(convertOK).append(",").append(map).append(")") .toString()); } /* * docx文件批注所引用的正文保存在document.xml中,可以通过重命名xx.docx为xx.zip来查看 * 其中如果某段正文文本内容有批注,那么会在document.xml这样保存 <w:commentRangeStart w:id="0" /> * <w:t>正文文本</w:t> </w:r> <w:commentRangeEnd w:id="0" /> * 如果被批注的是在图片上加批注,那么会在document * .xml中这样保存(仅限真正docx文件,如果是doc文件另存为docx文件,<wp:docPr节点中是没有属性的) * <w:commentRangeStart w:id="1" /> <wp:docPr id="1" name="xxx" * descr="yyy.png" /> <w:commentRangeEnd w:id="1" /> * * 1)id初始值为空,如果解析到节点w:commentRangeStart,就代表是有批注的部分,需要把参数id设为节点的id属性值 * 2)顺次解析下面节点 * ,如果此时的id不为空,就代表进入批注引用部分,w:t是文本内容,直接append;wp:docPr是图片内容,用"[xxx]" * 来区分是图片,然后append. * 3)如果解析到节点w:commentRangeEnd,就代表一个批注引用完毕,这时需要向Map中put(id,value)值; * 判断当前的批注Id是不是最大 * ,如果为最大批注Id,convertOK置为"OK",用此标识来说明批注引用提取完毕,退出节点for循环?例如一个很大的Word文件 * ,只在第2页做了一个批注,前面的做法会很有用; * 同时还要做好一条批注引用解析完毕的收尾工作:id清空,代表下面节点又是无批注的部分;value清空,待下次新的批注append. */ if ("w:t".equals(node.getNodeName()) && id.length() > 0) { value.append(node.getFirstChild().getNodeValue()); } else if ("wp:docPr".equals(node.getNodeName()) && id.length() > 0) { value.append("[").append(getAttribute(node, "name")).append("]"); } else if ("w:commentRangeStart".equals(node.getNodeName())) { id.setLength(0); id.append(getAttribute(node, "w:id")); value.setLength(0); } else if ("w:commentRangeEnd".equals(node.getNodeName()) && id.length() > 0) { if (id.toString().equals(getAttribute(node, "w:id"))) { map.put(id.toString(), value.toString()); if (id.toString().equals(maxCommentIndex)) { convertOK.setLength(0); convertOK.append(COMMENT_REF_FILLED_OK); id.setLength(0); value.setLength(0); } } } if (node.hasChildNodes()) { NodeList temp = node.getChildNodes(); for (int i = 0; i < temp.getLength(); i++) { if (convertOK.toString().endsWith(COMMENT_REF_FILLED_OK)) { break; } fillCommentRef(temp.item(i), id, value, convertOK, map); } } } /*** * @param node * 当前的Node * @param attName * 要获取的属性名 * @return 属性值,没有该属性时返回null */ private static String getAttribute(Node node, String attName) { return (node.hasAttributes() && node.getAttributes().getNamedItem( attName) != null) ? node.getAttributes().getNamedItem(attName) .getNodeValue() : null; } /* * 确保此方法的所有参数均不为空 * @param objects 对象参数 * @return 所有参数均不为空返回true 否则为false */ private boolean insureNotNull(Object... objects) { for (Object object : objects) { if (object == null) { return false; } } return true; } public static void main(String[] args) throws Exception { StringBuffer value = new StringBuffer(); POI_读取批注_S4_Test wh = new POI_读取批注_S4_Test( "f:/saveFile/temp/sys_comment_07.docx"); XWPFComment[] comments = wh.getComments(); Map<String, String> commenRefMap = wh.getCommentRefs(); List<Date> l = wh.getSubmitDateList(); SimpleDateFormat sdf = wh.getSdf(); XWPFComment comment; for (int i = 0; i < comments.length; i++) { comment = comments[i]; value.append("批注Id:").append(comment.getId()).append(", ") .append("批注作者:").append(comment.getAuthor()).append(", ") .append("批注日期:").append(sdf.format(l.get(i))).append(", ") .append("批注内容:").append(comment.getText()).append(", ") .append("批注引用正文:") .append(commenRefMap.get(comment.getId())); value.append("\n"); } System.out.println(value); } }
?? 结果为:
??
???
???? 全文完。
?????
原文:http://53873039oycg.iteye.com/blog/2157927