001 package org.maltparser.core.symbol.trie; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.IOException; 006 import java.util.Set; 007 import java.util.SortedMap; 008 import java.util.TreeMap; 009 010 import org.apache.log4j.Logger; 011 import org.maltparser.core.exception.MaltChainedException; 012 import org.maltparser.core.io.dataformat.ColumnDescription; 013 import org.maltparser.core.symbol.SymbolException; 014 import org.maltparser.core.symbol.SymbolTable; 015 import org.maltparser.core.symbol.nullvalue.InputNullValues; 016 import org.maltparser.core.symbol.nullvalue.NullValues; 017 import org.maltparser.core.symbol.nullvalue.OutputNullValues; 018 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 019 /** 020 021 @author Johan Hall 022 @since 1.0 023 */ 024 public class TrieSymbolTable implements SymbolTable { 025 private final String name; 026 private final Trie trie; 027 private final SortedMap<Integer, TrieNode> codeTable; 028 private int columnCategory; 029 private NullValues nullValues; 030 private int valueCounter; 031 /** Cache the hash code for the symbol table */ 032 private int cachedHash; 033 034 public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy) throws MaltChainedException { 035 this.name = name; 036 this.trie = trie; 037 this.columnCategory = columnCategory; 038 codeTable = new TreeMap<Integer, TrieNode>(); 039 if (columnCategory == ColumnDescription.INPUT) { 040 nullValues = new InputNullValues(nullValueStrategy, this); 041 } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 042 nullValues = new OutputNullValues(nullValueStrategy, this, null); 043 } else { 044 nullValues = new InputNullValues(nullValueStrategy, this); 045 } 046 valueCounter = nullValues.getNextCode(); 047 } 048 049 public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy, String rootLabel) throws MaltChainedException { 050 this.name = name; 051 this.trie = trie; 052 this.columnCategory = columnCategory; 053 codeTable = new TreeMap<Integer, TrieNode>(); 054 if (columnCategory == ColumnDescription.INPUT) { 055 nullValues = new InputNullValues(nullValueStrategy, this); 056 } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 057 nullValues = new OutputNullValues(nullValueStrategy, this, rootLabel); 058 } 059 valueCounter = nullValues.getNextCode(); 060 } 061 062 public TrieSymbolTable(String name, Trie trie) { 063 this.name = name; 064 this.trie = trie; 065 codeTable = new TreeMap<Integer, TrieNode>(); 066 nullValues = new InputNullValues("one", this); 067 //nullValues = null; 068 valueCounter = 1; 069 } 070 071 public int addSymbol(String symbol) throws MaltChainedException { 072 if (nullValues == null || !nullValues.isNullValue(symbol)) { 073 final TrieNode node = trie.addValue(symbol, this, -1); 074 final int code = node.getEntry(this).getCode(); 075 if (!codeTable.containsKey(code)) { 076 codeTable.put(code, node); 077 } 078 return code; 079 } else { 080 return nullValues.symbolToCode(symbol); 081 } 082 } 083 084 public int addSymbol(StringBuilder symbol) throws MaltChainedException { 085 if (nullValues == null || !nullValues.isNullValue(symbol)) { 086 final TrieNode node = trie.addValue(symbol, this, -1); 087 final int code = node.getEntry(this).getCode(); 088 if (!codeTable.containsKey(code)) { 089 codeTable.put(code, node); 090 } 091 return code; 092 } else { 093 return nullValues.symbolToCode(symbol); 094 } 095 } 096 097 public String getSymbolCodeToString(int code) throws MaltChainedException { 098 if (code >= 0) { 099 if (nullValues == null || !nullValues.isNullValue(code)) { 100 if (trie == null) { 101 throw new SymbolException("The symbol table is corrupt. "); 102 } 103 return trie.getValue(codeTable.get(code), this); 104 } else { 105 return nullValues.codeToSymbol(code); 106 } 107 } else { 108 throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. "); 109 } 110 } 111 112 public int getSymbolStringToCode(String symbol) throws MaltChainedException { 113 if (symbol != null) { 114 if (nullValues == null || !nullValues.isNullValue(symbol)) { 115 if (trie == null) { 116 throw new SymbolException("The symbol table is corrupt. "); 117 } 118 final TrieEntry entry = trie.getEntry(symbol, this); 119 if (entry == null) { 120 throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. "); 121 } 122 return entry.getCode(); 123 } else { 124 return nullValues.symbolToCode(symbol); 125 } 126 } else { 127 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. "); 128 } 129 } 130 131 public String getNullValueStrategy() { 132 if (nullValues == null) { 133 return null; 134 } 135 return nullValues.getNullValueStrategy(); 136 } 137 138 139 public int getColumnCategory() { 140 return columnCategory; 141 } 142 143 public boolean getKnown(int code) { 144 if (code >= 0) { 145 if (nullValues == null || !nullValues.isNullValue(code)) { 146 return codeTable.get(code).getEntry(this).isKnown(); 147 } else { 148 return true; 149 } 150 } else { 151 return false; 152 } 153 } 154 155 public boolean getKnown(String symbol) { 156 if (nullValues == null || !nullValues.isNullValue(symbol)) { 157 final TrieEntry entry = trie.getEntry(symbol, this); 158 if (entry == null) { 159 return false; 160 } 161 return entry.isKnown(); 162 } else { 163 return true; 164 } 165 } 166 167 public void makeKnown(int code) { 168 if (code >= 0) { 169 if (nullValues == null || !nullValues.isNullValue(code)) { 170 codeTable.get(code).getEntry(this).setKnown(true); 171 } 172 } 173 } 174 175 public void printSymbolTable(Logger logger) throws MaltChainedException { 176 for (Integer code : codeTable.keySet()) { 177 logger.info(code+"\t"+trie.getValue(codeTable.get(code), this)+"\n"); 178 } 179 } 180 181 public void saveHeader(BufferedWriter out) throws MaltChainedException { 182 try { 183 out.append('\t'); 184 out.append(getName()); 185 out.append('\t'); 186 out.append(Integer.toString(getColumnCategory())); 187 out.append('\t'); 188 out.append(getNullValueStrategy()); 189 out.append('\t'); 190 if (nullValues instanceof OutputNullValues && ((OutputNullValues)nullValues).getRootLabel() != null) { 191 out.append(((OutputNullValues)nullValues).getRootLabel()); 192 } else { 193 out.append("#DUMMY#"); 194 } 195 out.append('\n'); 196 } catch (IOException e) { 197 throw new SymbolException("Could not save the symbol table. ", e); 198 } 199 } 200 201 public int size() { 202 return codeTable.size(); 203 } 204 205 public void save(BufferedWriter out) throws MaltChainedException { 206 try { 207 out.write(name); 208 out.write('\n'); 209 for (Integer code : codeTable.keySet()) { 210 out.write(code+""); 211 out.write('\t'); 212 out.write(trie.getValue(codeTable.get(code), this)); 213 out.write('\n'); 214 } 215 out.write('\n'); 216 } catch (IOException e) { 217 throw new SymbolException("Could not save the symbol table. ", e); 218 } 219 } 220 221 public void load(BufferedReader in) throws MaltChainedException { 222 int max = 0; 223 int index = 0; 224 String fileLine; 225 try { 226 while ((fileLine = in.readLine()) != null) { 227 if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) { 228 setValueCounter(max+1); 229 break; 230 } 231 int code = Integer.parseInt(fileLine.substring(0,index)); 232 final String str = fileLine.substring(index+1); 233 final TrieNode node = trie.addValue(str, this, code); 234 codeTable.put(node.getEntry(this).getCode(), node); 235 if (max < code) { 236 max = code; 237 } 238 } 239 } catch (NumberFormatException e) { 240 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e); 241 } catch (IOException e) { 242 throw new SymbolException("Could not load the symbol table. ", e); 243 } 244 } 245 246 public String getName() { 247 return name; 248 } 249 250 public int getValueCounter() { 251 return valueCounter; 252 } 253 254 private void setValueCounter(int valueCounter) { 255 this.valueCounter = valueCounter; 256 } 257 258 protected void updateValueCounter(int code) { 259 if (code > valueCounter) { 260 valueCounter = code; 261 } 262 } 263 264 protected int increaseValueCounter() { 265 return valueCounter++; 266 } 267 268 public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException { 269 if (nullValues == null) { 270 throw new SymbolException("The symbol table does not have any null-values. "); 271 } 272 return nullValues.nullvalueToCode(nullValueIdentifier); 273 } 274 275 public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException { 276 if (nullValues == null) { 277 throw new SymbolException("The symbol table does not have any null-values. "); 278 } 279 return nullValues.nullvalueToSymbol(nullValueIdentifier); 280 } 281 282 public boolean isNullValue(String symbol) throws MaltChainedException { 283 if (nullValues != null) { 284 return nullValues.isNullValue(symbol); 285 } 286 return false; 287 } 288 289 public boolean isNullValue(int code) throws MaltChainedException { 290 if (nullValues != null) { 291 return nullValues.isNullValue(code); 292 } 293 return false; 294 } 295 296 public void copy(SymbolTable fromTable) throws MaltChainedException { 297 final SortedMap<Integer, TrieNode> fromCodeTable = ((TrieSymbolTable)fromTable).getCodeTable(); 298 int max = getValueCounter()-1; 299 for (Integer code : fromCodeTable.keySet()) { 300 final String str = trie.getValue(fromCodeTable.get(code), this); 301 final TrieNode node = trie.addValue(str, this, code); 302 codeTable.put(node.getEntry(this).getCode(), node); 303 if (max < code) { 304 max = code; 305 } 306 } 307 setValueCounter(max+1); 308 } 309 310 public SortedMap<Integer, TrieNode> getCodeTable() { 311 return codeTable; 312 } 313 314 public Set<Integer> getCodes() { 315 return codeTable.keySet(); 316 } 317 318 protected Trie getTrie() { 319 return trie; 320 } 321 322 public boolean equals(Object obj) { 323 if (this == obj) 324 return true; 325 if (obj == null) 326 return false; 327 if (getClass() != obj.getClass()) 328 return false; 329 return ((name == null) ? ((TrieSymbolTable)obj).name == null : name.equals(((TrieSymbolTable)obj).name)); 330 } 331 332 public int hashCode() { 333 if (cachedHash == 0) { 334 cachedHash = 31 * 7 + (null == name ? 0 : name.hashCode()); 335 } 336 return cachedHash; 337 } 338 339 public String toString() { 340 final StringBuilder sb = new StringBuilder(); 341 sb.append(name); 342 sb.append(" "); 343 sb.append(valueCounter); 344 return sb.toString(); 345 } 346 }