001 package org.maltparser.core.io.dataformat; 002 003 import java.net.URL; 004 import java.util.HashSet; 005 import java.util.SortedMap; 006 import java.util.TreeMap; 007 008 import javax.xml.parsers.DocumentBuilder; 009 import javax.xml.parsers.DocumentBuilderFactory; 010 import javax.xml.parsers.ParserConfigurationException; 011 012 import org.maltparser.core.exception.MaltChainedException; 013 import org.maltparser.core.helper.SystemLogger; 014 import org.maltparser.core.helper.Util; 015 import org.maltparser.core.symbol.SymbolTableHandler; 016 import org.w3c.dom.Element; 017 import org.w3c.dom.NodeList; 018 import org.xml.sax.SAXException; 019 020 /** 021 * 022 * 023 * @author Johan Hall 024 * @since 1.0 025 **/ 026 public class DataFormatSpecification { 027 public enum DataStructure { 028 DEPENDENCY, // Dependency structure 029 PHRASE, // Phrase structure 030 }; 031 private int entryPositionCounter; 032 private String dataFormatName; 033 private DataStructure dataStructure; 034 private final SortedMap<String, DataFormatEntry> entries; 035 private final HashSet<Dependency> dependencies; 036 // private final HashSet<SyntaxGraphReader> supportedReaders; 037 // private final HashSet<SyntaxGraphWriter> supportedWriters; 038 039 public DataFormatSpecification() { 040 entries = new TreeMap<String, DataFormatEntry>(); 041 entryPositionCounter = 0; 042 dependencies = new HashSet<Dependency>(); 043 // supportedReaders = new HashSet<SyntaxGraphReader>(); 044 // supportedWriters = new HashSet<SyntaxGraphWriter>(); 045 } 046 047 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy, String rootLabel) throws MaltChainedException { 048 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, rootLabel, this); 049 050 } 051 052 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException { 053 URL url = Util.findURL(fileName); 054 if (url == null) { 055 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. "); 056 } 057 parseDataFormatXMLfile(url); 058 } 059 060 public HashSet<Dependency> getDependencies() { 061 return dependencies; 062 } 063 064 public void parseDataFormatXMLfile(URL url) throws MaltChainedException { 065 if (url == null) { 066 throw new DataFormatException("The data format specifcation file cannot be found. "); 067 } 068 069 if (SystemLogger.logger().isInfoEnabled()) { 070 int index = url.toString().indexOf('!'); 071 if (index == -1) { 072 SystemLogger.logger().debug(" Data Format : "+url.toString()+"\n"); 073 } else { 074 SystemLogger.logger().debug(" Data Format : "+url.toString().substring(index+1)+"\n"); 075 } 076 } 077 078 try { 079 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 080 DocumentBuilder db = dbf.newDocumentBuilder(); 081 082 Element root = db.parse(url.openStream()).getDocumentElement(); 083 if (root.getNodeName().equals("dataformat")) { 084 dataFormatName = root.getAttribute("name"); 085 if (root.getAttribute("datastructure").length() > 0) { 086 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase()); 087 } else { 088 dataStructure = DataStructure.DEPENDENCY; 089 } 090 } else { 091 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. "); 092 } 093 NodeList cols = root.getElementsByTagName("column"); 094 Element col = null; 095 for (int i = 0, n = cols.getLength(); i < n; i++) { 096 col = (Element)cols.item(i); 097 DataFormatEntry entry = new DataFormatEntry(i, col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default")); 098 entries.put(entry.getDataFormatEntryName(), entry); 099 } 100 NodeList deps = root.getElementsByTagName("dependencies"); 101 if (deps.getLength() > 0) { 102 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency"); 103 for (int i = 0, n = dep.getLength(); i < n; i++) { 104 Element e = (Element)dep.item(i); 105 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap"))); 106 } 107 } 108 } catch (java.io.IOException e) { 109 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e); 110 } catch (ParserConfigurationException e) { 111 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 112 } catch (SAXException e) { 113 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 114 } 115 } 116 117 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) { 118 DataFormatEntry entry = new DataFormatEntry(entryPositionCounter++, dataFormatEntryName, category, type, defaultOutput); 119 entries.put(entry.getDataFormatEntryName(), entry); 120 } 121 122 public DataFormatEntry getEntry(String dataFormatEntryName) { 123 return entries.get(dataFormatEntryName); 124 } 125 126 public String getDataFormatName() { 127 return dataFormatName; 128 } 129 130 public DataStructure getDataStructure() { 131 return dataStructure; 132 } 133 134 public String toString() { 135 final StringBuilder sb = new StringBuilder(); 136 sb.append("Data format specification: "); 137 sb.append(dataFormatName); 138 sb.append('\n'); 139 for (DataFormatEntry dfe : entries.values()) { 140 sb.append(dfe); 141 sb.append('\n'); 142 } 143 return sb.toString(); 144 } 145 146 public class Dependency { 147 protected String dependentOn; 148 protected String urlString; 149 protected String map; 150 protected String mapUrl; 151 152 public Dependency(String dependentOn, String urlString, String map, String mapUrl) { 153 setDependentOn(dependentOn); 154 setUrlString(urlString); 155 setMap(map); 156 setMapUrl(mapUrl); 157 } 158 159 public String getDependentOn() { 160 return dependentOn; 161 } 162 protected void setDependentOn(String dependentOn) { 163 this.dependentOn = dependentOn; 164 } 165 166 public String getUrlString() { 167 return urlString; 168 } 169 170 public void setUrlString(String urlString) { 171 this.urlString = urlString; 172 } 173 174 public String getMap() { 175 return map; 176 } 177 protected void setMap(String map) { 178 this.map = map; 179 } 180 181 public String getMapUrl() { 182 return mapUrl; 183 } 184 185 public void setMapUrl(String mapUrl) { 186 this.mapUrl = mapUrl; 187 } 188 } 189 }