首先需要導入的包
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import java.io.*;
然后建立一個DocumentBuilder對象
DocumentBuilderFactory docBuilderFactory=DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder=docBuilderFactory.newDocumentBuilder();
DocumentBuilder的parse方法可以把一個xml文件處理并返回Document類型的對象
Document document=docBuilder.parse(xmlFile);
之后就可以通過Document提供的方法進行訪問了
假設有一個XML格式為
<entailment-corpus>
<pair id="001" contradiction="YES" type="negation">
<t>Tariq Aziz was not considered a member of Saddam's innermost circle.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
</pair>
<pair id="002" contradiction="YES" type="lexical">
<t>Tariq Aziz kept outside the closed circle of Saddam's Sunni Moslem cronies.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
</pair>
</entailment-corpus>
下面的程序把該XML修改為
<entailment-corpus>
<pair id="001" contradiction="YES" type="negation">
<t>Tariq Aziz was not considered a member of Saddam's innermost circle.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
<addElement1>addText1</addElement1>
<addElement2>addText2</addElement2>
</pair>
<pair id="002" contradiction="YES" type="lexical">
<t>Tariq Aziz kept outside the closed circle of Saddam's Sunni Moslem cronies.</t>
<h>Tariq Aziz was in Saddam's inner circle.</h>
</pair>
</entailment-corpus>
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import java.io.*;
import java.util.*;
public class TestDom {
public void modifyXML(File xmlFile){
try{
DocumentBuilderFactory docBuilderFactory=DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder=docBuilderFactory.newDocumentBuilder();
Document document=docBuilder.parse(xmlFile);
Element root=document.getDocumentElement();
//System.out.println(root.getNodeName());
NodeList pairNodeList=root.getElementsByTagName("pair");
for(int i=0;i<pairNodeList.getLength();i++){
Node pairNode=pairNodeList.item(i); // tag <pair>
//System.out.println(pairNode.getNodeName());
NodeList pairChild=pairNode.getChildNodes(); //tag <t> <h> 會把空格也算上,因此pairChild.getLength()!=2
for(int j=0;j<pairChild.getLength();j++){
Node pairChildNode=pairChild.item(j);
//System.out.println(pairChildNode.getNodeName());
if(pairChildNode instanceof Element){ //由于會把空格算上,所以要判斷是否是Element
Element pairChildElement=(Element)pairChildNode;
String text=((Text)pairChildElement.getFirstChild()).getData().trim();
System.out.println(text);
}
}
Element addElement1=document.createElement("addElement1");
addElement1.setTextContent("addText1");
pairNode.appendChild(addElement1);
Element addElement2=document.createElement("addElement2");
addElement2.setTextContent("addText2");
pairNode.appendChild(addElement2);
}
TransformerFactory tFactory =TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(new java.io.File("E:/我的文檔/tmp/modify.xml"));
transformer.transform(source, result);
}
catch(ParserConfigurationException e){
e.printStackTrace();
}
catch (IOException e){
e.printStackTrace();
}
catch (SAXException e){
e.printStackTrace();
}
catch(TransformerException e){
e.printStackTrace();
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String path="E:/我的文檔/tmp/數據/real_contradiction.xml";
TestDom testDom=new TestDom();
testDom.modifyXML(new File(path));
}
}
注意getChildNodes()會把空格也當做一個Node,因此在程序中要判斷pairChildNode是不是instanceof Element
如果是Element,那么就可以獲得其中的內容了。