001 package org.maltparser.core.syntaxgraph.reader; 002 003 import java.text.SimpleDateFormat; 004 import java.util.Date; 005 import java.util.LinkedHashMap; 006 import java.util.SortedMap; 007 import java.util.TreeMap; 008 009 import org.maltparser.core.helper.Util; 010 import org.maltparser.core.symbol.SymbolTable; 011 import org.maltparser.core.symbol.SymbolTableHandler; 012 /** 013 * 014 * 015 * @author Johan Hall 016 */ 017 public class TigerXMLHeader { 018 public enum Domain { 019 T, // feature for terminal nodes 020 NT, // feature for nonterminal nodes 021 FREC, //feature for both 022 EL, // edge label (same as "edgelabel" in TigerXML schema) 023 SEL // secondary edge Label (same as "secedgelabel" in TigerXML schema) 024 }; 025 026 private String corpusID; 027 private String corpusVersion; 028 private String external; 029 private String metaName; 030 private String metaAuthor; 031 private String metaDescription; 032 private String metaInDate; 033 private String metaFormat; 034 private String metaHistory; 035 private SymbolTableHandler symbolTableHandler; 036 private FeatureEdgeLabel edgeLabels; 037 private FeatureEdgeLabel secEdgeLabels; 038 private LinkedHashMap<String,FeatureEdgeLabel> features; 039 040 public TigerXMLHeader(SymbolTableHandler symbolTableHandler) { 041 setSymbolTableHandler(symbolTableHandler); 042 features = new LinkedHashMap<String,FeatureEdgeLabel>(); 043 } 044 045 public boolean isTigerXMLWritable() { 046 return true; 047 //return features.size() > 0; 048 } 049 050 public void addFeature(String featureName, String domainName) { 051 if (!features.containsKey(featureName)) { 052 features.put(featureName, new FeatureEdgeLabel(featureName, domainName)); 053 } 054 } 055 056 public void addFeatureValue(String featureName, String name) { 057 addFeatureValue(featureName, name, "\t"); 058 } 059 060 public void addFeatureValue(String featureName, String name, String desc) { 061 if (features.containsKey(featureName)) { 062 if (desc == null || desc.length() == 0) { 063 features.get(featureName).addValue(name, "\t"); 064 } else { 065 features.get(featureName).addValue(name, desc); 066 } 067 } 068 } 069 070 public void addEdgeLabelValue(String name) { 071 addEdgeLabelValue(name, "\t"); 072 } 073 074 public void addEdgeLabelValue(String name, String desc) { 075 if (edgeLabels == null) { 076 edgeLabels = new FeatureEdgeLabel("edgelabel", Domain.EL); 077 } 078 if (desc == null || desc.length() == 0) { 079 edgeLabels.addValue(name, "\t"); 080 } else { 081 edgeLabels.addValue(name, desc); 082 } 083 } 084 085 public void addSecEdgeLabelValue(String name) { 086 addSecEdgeLabelValue(name, "\t"); 087 } 088 089 public void addSecEdgeLabelValue(String name, String desc) { 090 if (secEdgeLabels == null) { 091 secEdgeLabels = new FeatureEdgeLabel("secedgelabel", Domain.SEL); 092 } 093 if (desc == null || desc.length() == 0) { 094 secEdgeLabels.addValue(name, "\t"); 095 } else { 096 secEdgeLabels.addValue(name, desc); 097 } 098 } 099 100 public String getCorpusID() { 101 return corpusID; 102 } 103 104 public void setCorpusID(String corpusID) { 105 this.corpusID = corpusID; 106 } 107 108 public String getCorpusVersion() { 109 return corpusVersion; 110 } 111 112 public void setCorpusVersion(String corpusVersion) { 113 this.corpusVersion = corpusVersion; 114 } 115 116 public void setExternal(String external) { 117 this.external = external; 118 } 119 120 public String getExternal() { 121 return external; 122 } 123 124 public void setMeta(String metaElement, String value) { 125 if (metaElement.equals("name")) { setMetaName(value); } 126 if (metaElement.equals("author")) { setMetaAuthor(value); } 127 if (metaElement.equals("description")) { setMetaDescription(value); } 128 if (metaElement.equals("date")) { setMetaInDate(value); } 129 if (metaElement.equals("format")) { setMetaFormat(value); } 130 if (metaElement.equals("history")) { setMetaHistory(value); } 131 } 132 133 public String getMetaName() { 134 return metaName; 135 } 136 137 public void setMetaName(String metaName) { 138 this.metaName = metaName; 139 } 140 141 public String getMetaAuthor() { 142 return metaAuthor; 143 } 144 145 public void setMetaAuthor(String metaAuthor) { 146 this.metaAuthor = metaAuthor; 147 } 148 149 public String getMetaDescription() { 150 return metaDescription; 151 } 152 153 public void setMetaDescription(String metaDescription) { 154 this.metaDescription = metaDescription; 155 } 156 157 public String getMetaInDate() { 158 return metaInDate; 159 } 160 161 public String getMetaCurrentDate() { 162 return getMetaCurrentDate("yyyy-MM-dd HH:mm:ss"); 163 } 164 165 public String getMetaCurrentDate(String format) { 166 return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()); 167 } 168 169 public void setMetaInDate(String metaInDate) { 170 this.metaInDate = metaInDate; 171 } 172 173 public String getMetaFormat() { 174 return metaFormat; 175 } 176 177 public void setMetaFormat(String metaFormat) { 178 this.metaFormat = metaFormat; 179 } 180 181 public String getMetaHistory() { 182 return metaHistory; 183 } 184 185 public void setMetaHistory(String metaHistory) { 186 this.metaHistory = metaHistory; 187 } 188 189 public SymbolTableHandler getSymbolTableHandler() { 190 return symbolTableHandler; 191 } 192 193 protected void setSymbolTableHandler(SymbolTableHandler symbolTableHandler) { 194 this.symbolTableHandler = symbolTableHandler; 195 } 196 197 public String toTigerXML() { 198 final StringBuilder sb = new StringBuilder(); 199 200 if (getCorpusVersion() == null) { 201 sb.append("<corpus id=\""); 202 sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID())); 203 sb.append("\">\n"); 204 } else { 205 sb.append("<corpus id=\""); 206 sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID())); 207 sb.append("\" version=\""); 208 sb.append(getCorpusVersion()); 209 sb.append("\">\n"); 210 } 211 sb.append(" <head>\n"); 212 sb.append(" <meta>\n"); 213 sb.append(" <name>"); 214 sb.append(((getMetaName() == null)?"GeneratedByMaltParser":Util.xmlEscape(getMetaName()))); 215 sb.append("</name>\n"); 216 sb.append(" <author>MaltParser</author>\n"); 217 sb.append(" <date>"); 218 sb.append(getMetaCurrentDate()); 219 sb.append("</date>\n"); 220 221 sb.append(" <description>"); 222 sb.append(Util.xmlEscape("Unfortunately, you have to add the annotations header data yourself. Maybe in later releases this will be fixed. ")); 223 sb.append("</description>\n"); 224 225 // if (getMetaDescription() != null) { 226 // sb.append(" <description>"); 227 // sb.append(Util.xmlEscape(getMetaDescription())); 228 // sb.append("</description>\n"); 229 // } 230 // if (getMetaFormat() != null) { 231 // sb.append(" <format>"); 232 // sb.append(Util.xmlEscape(getMetaFormat())); 233 // sb.append("</format>\n"); 234 // } 235 // if (getMetaHistory() != null) { 236 // sb.append(" <history>"); 237 // sb.append(Util.xmlEscape(getMetaHistory())); 238 // sb.append("</history>\n"); 239 // } 240 sb.append(" </meta>\n"); 241 sb.append(" <annotation/>\n"); 242 // sb.append(" <annotation>\n"); 243 // for (String name : features.keySet()) { 244 // sb.append(features.get(name).toTigerXML()); 245 // } 246 // if (edgeLabels != null) { 247 // sb.append(edgeLabels.toTigerXML()); 248 // } 249 // if (secEdgeLabels != null) { 250 // sb.append(secEdgeLabels.toTigerXML()); 251 // } 252 // sb.append(" </annotation>\n"); 253 sb.append(" </head>\n"); 254 sb.append(" <body>\n"); 255 return sb.toString(); 256 } 257 258 public String toString() { 259 return toTigerXML(); 260 } 261 262 protected class FeatureEdgeLabel { 263 private String name; 264 private Domain domain; 265 // values: key mapped to \t (tab) indicates that the description part is missing 266 private SortedMap<String, String> values; 267 private SymbolTable table; 268 269 public FeatureEdgeLabel(String name, String domainName) { 270 setName(name); 271 setDomain(domainName); 272 } 273 274 public FeatureEdgeLabel(String name, Domain domain) { 275 setName(name); 276 setDomain(domain); 277 } 278 279 public String getName() { 280 return name; 281 } 282 283 public void setName(String name) { 284 this.name = name; 285 } 286 287 public void setDomain(String domainName) { 288 domain = Domain.valueOf(domainName); 289 } 290 291 public void setDomain(Domain domain) { 292 this.domain = domain; 293 } 294 295 public String getDomainName() { 296 return domain.toString(); 297 } 298 299 public Domain getDomain() { 300 return domain; 301 } 302 303 public SymbolTable getTable() { 304 return table; 305 } 306 307 public void setTable(SymbolTable table) { 308 this.table = table; 309 } 310 311 public void addValue(String name) { 312 addValue(name, "\t"); 313 } 314 315 public void addValue(String name, String desc) { 316 if (values == null) { 317 values = new TreeMap<String,String>(); 318 } 319 values.put(name, desc); 320 } 321 322 public String toTigerXML() { 323 final StringBuilder sb = new StringBuilder(); 324 if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) { 325 sb.append(" <feature domain=\""); 326 sb.append(getDomainName()); 327 sb.append("\" name=\""); 328 sb.append(getName()); 329 sb.append((values == null)?"\" />\n":"\">\n"); 330 } 331 if (domain == Domain.EL) { 332 sb.append((values != null)?" <edgelabel>\n":" <edgelabel />\n"); 333 } 334 if (domain == Domain.SEL) { 335 sb.append((values != null)?" <secedgelabel>\n":" <secedgelabel />\n"); 336 } 337 if (values != null) { 338 for (String name : values.keySet()) { 339 sb.append(" <value name=\""); 340 sb.append(name); 341 if (values.get(name).equals("\t")) { 342 sb.append("\" />\n"); 343 } else { 344 sb.append("\">"); 345 sb.append(Util.xmlEscape(values.get(name))); 346 sb.append("</value>\n"); 347 } 348 } 349 } 350 if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) { 351 if (values != null) { 352 sb.append(" </feature>\n"); 353 } 354 } 355 if (domain == Domain.EL && values != null) { 356 sb.append(" </edgelabel>\n"); 357 } 358 if (domain == Domain.SEL && values != null) { 359 sb.append(" </secedgelabel>\n"); 360 } 361 return sb.toString(); 362 } 363 364 public String toString() { 365 return toTigerXML(); 366 } 367 } 368 } 369 370 371