一domj 解析xml
俩jar包domjjar 和jaxenbetajar
view plainprint?
/** * java解析xml文件各个节点信息* * @author Jeelon * @param string * 解析的文件名*/ private static void getXmlInfo(String string) { SAXReader reader = new SAXReader()InputStream in = ThreadcurrentThread()getContextClassLoader()getResourceAsStream(string)try { Document doc = readerread(in)Element driverNameEls = (Element) docselectObject(/config/dbinfo/drivername)Element urlEls = (Element) docselectObject(/config/dbinfo/url)Element userNameEls = (Element) docselectObject(/config/dbinfo/username)Element passwordEls = (Element) docselectObject(/config/dbinfo/password)
String driverName = driverNameElsgetStringValue()String url = urlElsgetStringValue()String userName = userNameElsgetStringValue()String password = passwordElsgetStringValue()
Systemoutprintln(====================================)Systemoutprintln(驱动名 + driverName)Systemoutprintln(URL地址 + url)Systemoutprintln(用户名 + userName)Systemoutprintln(密码 + password)Systemoutprintln(====================================)} catch (DocumentException e) { eprintStackTrace()}
}
二java解析HTML需要的jar包jsoupjar
view plainprint?
/** * 提取HTML文件的文本内容* * @author Jeelon * @param html * 提取的html文件名* @return 返回提取内容String */ private static String getDocument(File html) { String text = try { // 设置编码集orgjsoupnodesDocument doc = Jsoupparse(html UTF)// 提取标题信息Elements title = docselect(title)for (orgjsoupnodesElement link title) { text += linktext() + } // 提取table中的文本信息Elements links = docselect(table)for (orgjsoupnodesElement link links) { text += linktext() + } // 提取div中的文本信息Elements divs = docselect(div[class=post])for (orgjsoupnodesElement link divs) { text += linktext() + } } catch (IOException e) { eprintStackTrace()}
return text}
view plainprint?
Element element = nullFile f = new File(axml)DocumentBuilder db = null // documentBuilder为抽象不能直接实例化(将XML文件转换为DOM文件)
DocumentBuilderFactory dbf = nulltry {
dbf = DocumentBuilderFactorynewInstance() // 返回documentBuilderFactory对象db = dbfnewDocumentBuilder()// 返回db对象用documentBuilderFatory对象获得返回documentBuildr对象
Document dt = dbparse(f) // 得到一个DOM并返回给document对象element = dtgetDocumentElement()// 得到一个elment根元素
Systemoutprintln(根元素 + elementgetNodeName()) // 获得根节点
NodeList childNodes = elementgetChildNodes() // 获得根元素下的子节点
for (int i = i < childNodesgetLength() i++) {// 遍历这些子节点Node node = em(i) // em(i)// 获得每个对应位置i的结点if (Accountequals(nodegetNodeName())) { // 如果节点的名称为Account则输出Account元素属性type Systemoutprintln(\r\n找到一篇账号 所属区域 + nodegetAttributes()getNamedItem(type)getNodeValue() + )NodeList nodeDetail = nodegetChildNodes() // 获得<Accounts>下的节点for (int j = j < nodeDetailgetLength() j++) { // 遍历<Accounts>下的节点Node detail = em(j) // 获得<Accounts>元素每一个节点if (codeequals(detailgetNodeName())) // 输出code Systemoutprintln(卡号 + detailgetTextContent())else if (passequals(detailgetNodeName())) // 输出pass Systemoutprintln(密码 + detailgetTextContent())else if (nameequals(detailgetNodeName())) // 输出name Systemoutprintln(姓名 + detailgetTextContent())else if (moneyequals(detailgetNodeName())) // 输出money Systemoutprintln(余额 + detailgetTextContent())
}
} } catch (Exception e) { Systemoutprintln(e)}