對於現在越來越輕量級,越來越講究速度和接近用戶的應用來說,xml確實有點復雜了。解析起來不僅耗內存,而且很復雜。這就好像花了幾千塊錢買了個MS Office,但是80%的feature都用不著,還白白的耗著CPU和內存。個人覺得,設置文件用XML其實挺好,因為設置文件一般並不太大,而且要求可讀性強,還有很多亂七八糟的需求,可以利用XML的力量。昨天搞chrome的設置,發現Chrome的設置文件也是使用的JSon,讀起來也是輕松愉快。前陣子做了個程序,需要解析豆瓣API調用返回的XML。真想說一句,豆瓣你別用XML了。至少,提供個JSon版的API調用吧。(以上謹代表個人觀點)
解析豆瓣返回的xml,實在是不想用DOM這個重量級的玩意。DOM這個玩意,說它強大好還是說它官僚好呢。我傾向於使用SAXP解析。但是現在面臨的一個問題是,我需要根據XML節點的名字和屬性值(一個或者多個)來決定當前的值是不是我想要的。這就麻煩一點點。第一反應是考慮xpath。後來覺得不如自己做一個得了,權當是按需定制一個輕量級的xpath。
首先定義XMLSearchUnit類,這個類的實例用來描述一個需要在XML中搜索的值,值可以是XML節點的值,或者是節點的屬性。
- package com.deepnighttwo.resourceresolver.douban.resolver.utils;
- import Java.util.HashMap;
- import Java.util.Map;
- import org.XML.sax.Attributes;
- /**
- *
- * Represent a search task. Target could be value of a node or attribute of the
- * node.
- *
- * @author mzang
- */
- public class XMLSearchUnit {
- // attribute values to be matched during search
- private Map<String, String> attributeMatchValidation = new HashMap<String, String>();
- // if target is an attribute, then set this member to be the attribute name.
- // if it is null or empty, then means the target is node value.
- private String expectedAttr;
- // XML path, format is: /node_name/node_name/...
- private String XMLPath;
- public XMLSearchUnit(String XMLPath) {
- this.XMLPath = XMLPath;
- }
- /**
- * if current node meets the search conditions or not. Meets means the path
- * is correct and the attribute value is matched.
- *
- * @param path
- * @param attributes
- * @return
- */
- public boolean match(String path, Attributes attributes) {
- if (XMLPath.equals(path) == false) {
- return false;
- }
- for (String key : attributeMatchValidation.keySet()) {
- String exp = attributeMatchValidation.get(key);
- String compare = attributes.getValue(key);
- if (exp.equalsIgnoreCase(compare) == false) {
- return false;
- }
- }
- return true;
- }
- public Map<String, String> getAttributeMatchValidation() {
- return attributeMatchValidation;
- }
- public void addAttributeValidation(String key, String value) {
- attributeMatchValidation.put(key, value);
- }
- public String getXMLPath() {
- return XMLPath;
- }
- public void setAttributeMatchValidation(
- Map<String, String> attributeMatchValidation) {
- this.attributeMatchValidation = attributeMatchValidation;
- }
- public String getExpectedAttr() {
- return expectedAttr;
- }
- /**
- * if target is node value, then set expectedAttr to null. if target is an
- * attribute value, set it to be the attribute name.
- *
- * @param expectedAttr
- */
- public void setExpectedAttr(String expectedAttr) {
- this.expectedAttr = expectedAttr;
- }
- /**
- * hash code can be cached if all propertIEs are not be be changed.
- */
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime
- * result
- + ((attributeMatchValidation == null) ? 0
- : attributeMatchValidation.hashCode());
- result = prime * result
- + ((expectedAttr == null) ? 0 : expectedAttr.hashCode());
- result = prime * result + ((XMLPath == null) ? 0 : XMLPath.hashCode());
- return result;
- }
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- XMLSearchUnit other = (XMLSearchUnit) obj;
- if (attributeMatchValidation == null) {
- if (other.attributeMatchValidation != null)
- return false;
- } else if (!attributeMatchValidation
- .equals(other.attributeMatchValidation))
- return false;
- if (expectedAttr == null) {
- if (other.expectedAttr != null)
- return false;
- } else if (!expectedAttr.equals(other.expectedAttr))
- return false;
- if (XMLPath == null) {
- if (other.XMLPath != null)
- return false;
- } else if (!xmlPath.equals(other.XMLPath))
- return false;
- return true;
- }
- }
這個類比較簡單。就是用一個hashmap保待匹配的attribut鍵值對,用一個字符串表示期待的attribute name,用一個字符串表示期待的node path。
然後就是如何在SAXP裡用到這個類的實例去搜索了。
- package com.deepnighttwo.resourceresolver.douban.resolver.utils;
- import Java.io.InputStream;
- import Java.util.ArrayList;
- import Java.util.HashMap;
- import Java.util.List;
- import Java.util.Map;
- import Javax.XML.parsers.SAXParser;
- import Javax.XML.parsers.SAXParserFactory;
- import org.XML.sax.Attributes;
- import org.XML.sax.InputSource;
- import org.XML.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.XML.sax.helpers.DefaultHandler;
- /**
- *
- * SAXP parser working with XMLSearchUnit.
- *
- * @author mzang
- */
- public class DoubanSearchParser extends DefaultHandler {
- // create and initial search units
- public static final XMLSearchUnit DETAILS_LINK_API_PATH = new XMLSearchUnit(
- "/feed/entry/id");
- public static final XMLSearchUnit DETAILS_CONTENT_PATH = new XMLSearchUnit(
- "/entry/summary");
- public static final XMLSearchUnit DETAILS_TITLE_PATH = new XMLSearchUnit(
- "/entry/title");
- public static final XMLSearchUnit DETAILS_CHINESE_NAME_PATH = new XMLSearchUnit(
- "/entry/db:attribute");
- public static final XMLSearchUnit DETAILS_RATINGE_PATH = new XMLSearchUnit(
- "/entry/gd:rating");
- public static final XMLSearchUnit DETAILS_RATINGE_RATER_COUNT_PATH = new XMLSearchUnit(
- "/entry/gd:rating");
- public static final XMLSearchUnit DETAILS_LINK_URL_PATH = new XMLSearchUnit(
- "/feed/entry/link");
- static {
- DETAILS_LINK_URL_PATH.addAttributeValidation("rel", "alternate");
- DETAILS_LINK_URL_PATH.setExpectedAttr("href");
- DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang", "zh_CN");
- DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name", "aka");
- DETAILS_RATINGE_PATH.setExpectedAttr("average");
- DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters");
- }
- // a map to store the XMLSearchUnit and value
- private Map<XMLSearchUnit, String> results = new HashMap<XMLSearchUnit, String>();
- // a counter of search unit. if it is 0, then all search unit finds a match
- // value and the result of the XML will be skipped.
- private int count = 0;
- private StringBuilder path = new StringBuilder();
- private static final String pathSeparater = "/";
- private XMLSearchUnit[] searchUnits;
- List<XMLSearchUnit> foundItems = new ArrayList<XMLSearchUnit>();
- /**
- * constructor, accept XML input stream, 0 or more search unit instances.
- *
- * @param input
- * @param expectedPath
- * @return
- */
- public Map<XMLSearchUnit, String> parseResults(InputStream input,
- XMLSearchUnit... expectedPath) {
- for (XMLSearchUnit search : expectedPath) {
- results.put(search, null);
- }
- searchUnits = expectedPath;
- count = expectedPath.length;
- XMLReader XMLReader = null;
- try {
- SAXParserFactory spfactory = SAXParserFactory.newInstance();
- spfactory.setValidating(false);
- SAXParser saxParser = spfactory.newSAXParser();
- XMLReader = saxParser.getXMLReader();
- XMLReader.setContentHandler(this);
- XMLReader.parse(new InputSource(input));
- } catch (Exception e) {
- System.err.println(e);
- System.exit(1);
- }
- return results;
- }
- private void addToPath(String addPath) {
- path.append(pathSeparater).append(addPath.toLowerCase());
- }
- private void popPath() {
- int index = path.lastIndexOf(pathSeparater);
- // String removedPath = path.substring(index);
- path.delete(index, path.length());
- }
- @Override
- public void startElement(String uri, String localName, String qName,
- Attributes attributes) throws SAXException {
- foundItems.clear();
- if (count == 0) {
- return;
- }
- // update path
- addToPath(qName);
- List<XMLSearchUnit> foundAttrItems = null;
- // check if current node matches search units. if it is a node value
- // search, then store it in a member variable named foundItems because
- // the value of the node is known only when reaches the end of the
- // node.but for attribute search, it value is known here. So then are
- // put in a local variable list named foundAttrItems.
- for (XMLSearchUnit unit : searchUnits) {
- if (unit.match(path.toString(), attributes) == true) {
- if (unit.getExpectedAttr() == null) {
- foundItems.add(unit);
- } else {
- if (foundAttrItems == null) {
- foundAttrItems = new ArrayList<XMLSearchUnit>();
- }
- foundAttrItems.add(unit);
- }
- }
- }
- // if no attribute match, return.
- if (foundAttrItems == null) {
- return;
- }
- // fill search unit value using attribute value. update count.
- for (XMLSearchUnit attrUnit : foundAttrItems) {
- String attrValue = attributes.getValue(attrUnit.getExpectedAttr());
- if (results.get(attrUnit) == null) {
- count--;
- }
- results.put(attrUnit, attrValue);
- count--;
- }
- }
- /**
- * if current node matches, the the node value is useful, store it.
- */
- @Override
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- if (count == 0) {
- return;
- }
- if (foundItems.size() == 0) {
- return;
- }
- for (XMLSearchUnit unit : foundItems) {
- String content = new String(ch, start, length);
- if (results.get(unit) == null) {
- count--;
- }
- results.put(unit, content);
- }
- }
- @Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
- foundItems.clear();
- if (count == 0) {
- return;
- }
- popPath();
- }
- }