001 package org.maltparser.core.feature.spec.reader; 002 003 import java.io.IOException; 004 import java.net.URL; 005 006 import javax.xml.parsers.DocumentBuilder; 007 import javax.xml.parsers.DocumentBuilderFactory; 008 import javax.xml.parsers.ParserConfigurationException; 009 010 import org.maltparser.core.exception.MaltChainedException; 011 import org.maltparser.core.feature.FeatureException; 012 import org.maltparser.core.feature.spec.SpecificationModels; 013 import org.w3c.dom.Element; 014 import org.w3c.dom.NodeList; 015 import org.xml.sax.SAXException; 016 /** 017 * 018 * 019 * @author Johan Hall 020 */ 021 public class XmlReader implements FeatureSpecReader{ 022 023 public XmlReader() { } 024 025 public void load(URL specModelURL, SpecificationModels featureSpecModels) throws MaltChainedException { 026 try { 027 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 028 DocumentBuilder db = dbf.newDocumentBuilder(); 029 Element root = null; 030 031 root = db.parse(specModelURL.openStream()).getDocumentElement(); 032 033 if (root == null) { 034 throw new FeatureException("The feature specification file '"+specModelURL.getFile()+"' cannot be found. "); 035 } 036 037 readFeatureModels(root, featureSpecModels); 038 } catch (IOException e) { 039 throw new FeatureException("The feature specification file '"+specModelURL.getFile()+"' cannot be found. ", e); 040 } catch (ParserConfigurationException e) { 041 throw new FeatureException("Problem parsing the file "+specModelURL.getFile()+". ", e); 042 } catch (SAXException e) { 043 throw new FeatureException("Problem parsing the file "+specModelURL.getFile()+". ", e); 044 } 045 } 046 047 private void readFeatureModels(Element featuremodels, SpecificationModels featureSpecModels) throws MaltChainedException { 048 NodeList featureModelList = featuremodels.getElementsByTagName("featuremodel"); 049 for (int i = 0; i < featureModelList.getLength(); i++) { 050 readFeatureModel((Element)featureModelList.item(i), featureSpecModels); 051 } 052 } 053 054 private void readFeatureModel(Element featuremodel, SpecificationModels featureSpecModels) throws MaltChainedException { 055 int specModelIndex = featureSpecModels.getNextIndex(); 056 NodeList submodelList = featuremodel.getElementsByTagName("submodel"); 057 if (submodelList.getLength() == 0) { 058 NodeList featureList = featuremodel.getElementsByTagName("feature"); 059 for (int i = 0; i < featureList.getLength(); i++) { 060 String featureText = ((Element)featureList.item(i)).getTextContent().trim(); 061 if (featureText.length() > 1) { 062 featureSpecModels.add(specModelIndex, featureText); 063 } 064 } 065 } else { 066 for (int i = 0; i < submodelList.getLength(); i++) { 067 String name = ((Element)submodelList.item(i)).getAttribute("name"); 068 NodeList featureList = ((Element)submodelList.item(i)).getElementsByTagName("feature"); 069 for (int j = 0; j < featureList.getLength(); j++) { 070 String featureText = ((Element)featureList.item(j)).getTextContent().trim(); 071 if (featureText.length() > 1) { 072 featureSpecModels.add(specModelIndex, name, featureText); 073 } 074 } 075 } 076 } 077 } 078 }