<?xml version="1.0" encoding="UTF-8"?>
<!-- xml解析测试文件 -->
<LData>
<ldata>
<issue>123456</issue>
<date>20200115235900</date>
<nil></nil>
<nil2> </nil2>
<numbers>
<number>1</number>
<number>2</number>
<number>3</number>
<number>4</number>
<number>5</number>
<number>6</number>
</numbers>
</ldata>
<ldata>
<issue>222223</issue>
<date>2020-01-14 23:59:00</date>
<numbers>
<number>11</number>
</numbers>
</ldata>
</LData>
import java.util.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
public class DOM {
//document解析器工厂
private static DocumentBuilderFactory docBuiFactory = null;
//document解析器可以通过documentBuiderFactory的newDocumentBuilder()函数获取
private static DocumentBuilder docBuilder = null;
//document对象可以通过documentBuilder的newDocumentBuilder()函数获取
private static Document doc = null;
//自定义对象book的集合用来存储xml获取的book对象
private static List<Book> books = null;
//静态代码块用来初始化静态属性,只会在类的第一次加载执行一次
static {
try {
//初始化documentBuilderFactory
docBuiFactory = DocumentBuilderFactory.newInstance(); //newInstance通过反射机制创建DocumentBuilderFactory的实现类
//初始化documentBuilder
docBuilder = docBuiFactory.newDocumentBuilder(); //通过DocumentBuilderFactoryImpl的newDocumentBuilder()函数返回DocumentBuilder对象
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static String filterBlankXMl(String xml) {
//校验规则1位有一个空格就开始过滤,如果改为3则三个连续空格才过滤
Pattern p = Pattern.compile("\\s{1,}|\t|\r|\n");
Matcher m = p.matcher(xml);
xml = m.replaceAll("");
System.out.println("接受到的XML:\n" + xml);
return xml;
}
public static List<Book> getXml(String fileURL) throws Exception {
//将给定的url的内容解析为一个xml文档,并返回document对象
doc = docBuilder.parse(fileURL);
//按顺序获取xml内所有book元素节点
NodeList IssueList = doc.getElementsByTagName("ldata");
books = new ArrayList<Book>();
//遍历books
NodeList ldataList = doc.getElementsByTagName("ldata");//获取所有的ldata节点
//遍历所有的ldata节点
//for(int i = 0;i<ldataList.getLength();i++) {
Node numbersNode = ldataList.item(0);
//这里只取第一个ldata的节点数据
NodeList numberList = numbersNode.getChildNodes();
for(int j = 0;j<numberList.getLength();j++){
//if(!"#text".equals(numberList.item(j).getNodeName())){
System.out.println(numberList.item(j).getNodeName());
}
// }
return books;
}
public static void main(String[] args) {
String fileURL = "file:///XmlDemo/test.xml";
try {
List<Book> list = DOM.getXml(fileURL);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
发现
System.out.println(numberList.item(j).getNodeName());
输出的结果:
#text
issue
#text
date
#text
number
#text
这个#text
一度让我很迷茫,从检查xml是否写错到检查读取xmk的方法是否有问题,最后还是善用了搜索引擎??。
正则匹配xml空格换行然后替换:
public static String filterBlankXMl(String xml) {
//校验规则1位有一个空格就开始过滤,如果改为3则三个连续空格才过滤
Pattern p = Pattern.compile("\\s{1,}|\t|\r|\n");
Matcher m = p.matcher(xml);
xml = m.replaceAll("");
System.out.println("接受到的XML:\n" + xml);
return xml;
}
for ...{
if(!"#text".equals(numberList.item(j).getNodeName())){
//处理过滤的节点
System.out.println(numberList.item(j).getNodeName());
}
}
当然还可以使用dom4j等其他第三方jar
原文:https://www.cnblogs.com/zzerx/p/12198193.html