001 package org.maltparser.core.syntaxgraph.feature; 002 003 import java.util.LinkedHashMap; 004 import java.util.Map; 005 import org.maltparser.core.exception.MaltChainedException; 006 import org.maltparser.core.feature.function.AddressFunction; 007 import org.maltparser.core.feature.function.FeatureFunction; 008 import org.maltparser.core.feature.value.AddressValue; 009 import org.maltparser.core.feature.value.FeatureValue; 010 import org.maltparser.core.feature.value.SingleFeatureValue; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.symbol.SymbolTable; 013 import org.maltparser.core.symbol.SymbolTableHandler; 014 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 015 import org.maltparser.core.syntaxgraph.SyntaxGraphException; 016 import org.maltparser.core.syntaxgraph.node.DependencyNode; 017 018 public class DistanceFeature implements FeatureFunction { 019 protected AddressFunction addressFunction1; 020 protected AddressFunction addressFunction2; 021 protected SymbolTableHandler tableHandler; 022 protected SymbolTable table; 023 protected SingleFeatureValue featureValue; 024 protected String normalizationString; 025 protected Map<Integer,String> normalization; 026 027 028 public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 029 super(); 030 featureValue = new SingleFeatureValue(this); 031 setTableHandler(tableHandler); 032 normalization = new LinkedHashMap<Integer,String>(); 033 } 034 035 /** 036 * Initialize the distance feature function 037 * 038 * @param arguments an array of arguments with the type returned by getParameterTypes() 039 * @throws MaltChainedException 040 */ 041 public void initialize(Object[] arguments) throws MaltChainedException { 042 if (arguments.length != 3) { 043 throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. "); 044 } 045 // Checks that the two arguments are address functions 046 if (!(arguments[0] instanceof AddressFunction)) { 047 throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. "); 048 } 049 if (!(arguments[1] instanceof AddressFunction)) { 050 throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. "); 051 } 052 if (!(arguments[2] instanceof java.lang.String)) { 053 throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. "); 054 } 055 setAddressFunction1((AddressFunction)arguments[0]); 056 setAddressFunction2((AddressFunction)arguments[1]); 057 058 // Creates a symbol table called "DISTANCE" using one null value 059 setSymbolTable(tableHandler.addSymbolTable("DISTANCE", ColumnDescription.INPUT, "one")); 060 normalizationString = (String)arguments[2]; 061 String[] items = normalizationString.split("\\|"); 062 063 if (items.length <= 0 || !items[0].equals("0")) { 064 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0."); 065 } 066 int tmp = -1; 067 for (int i = 0; i < items.length; i++) { 068 int v; 069 try { 070 v = Integer.parseInt(items[i]); 071 } catch (NumberFormatException e) { 072 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e); 073 } 074 normalization.put(v, ">="+v); 075 table.addSymbol(">="+v); 076 if (tmp != -1 && tmp >= v) { 077 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |"); 078 } 079 tmp = v; 080 } 081 } 082 083 /** 084 * Returns an array of class types used by the feature extraction system to invoke initialize with 085 * correct arguments. 086 * 087 * @return an array of class types 088 */ 089 public Class<?>[] getParameterTypes() { 090 Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 091 org.maltparser.core.feature.function.AddressFunction.class, 092 java.lang.String.class}; 093 return paramTypes; 094 } 095 096 /** 097 * Returns the string representation of the integer <code>code</code> according to the distance feature function. 098 * 099 * @param code the integer representation of the symbol 100 * @return the string representation of the integer <code>code</code> according to the distance feature function. 101 * @throws MaltChainedException 102 */ 103 public String getSymbol(int code) throws MaltChainedException { 104 return table.getSymbolCodeToString(code); 105 } 106 107 /** 108 * Returns the integer representation of the string <code>symbol</code> according to the distance feature function. 109 * 110 * @param symbol the string representation of the symbol 111 * @return the integer representation of the string <code>symbol</code> according to the distance feature function. 112 * @throws MaltChainedException 113 */ 114 public int getCode(String symbol) throws MaltChainedException { 115 return table.getSymbolStringToCode(symbol); 116 } 117 118 /** 119 * Cause the distance feature function to update the cardinality of the feature value. 120 * 121 * @throws MaltChainedException 122 */ 123 public void updateCardinality() { 124 featureValue.setCardinality(table.getValueCounter()); 125 } 126 127 /** 128 * Cause the feature function to update the feature value. 129 * 130 * @throws MaltChainedException 131 */ 132 public void update() throws MaltChainedException { 133 // Retrieve the address value 134 final AddressValue arg1 = addressFunction1.getAddressValue(); 135 final AddressValue arg2 = addressFunction2.getAddressValue(); 136 137 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value 138 if (arg1.getAddress() == null || arg2.getAddress() == null) { 139 featureValue.setCode(table.getNullValueCode(NullValueId.NO_NODE)); 140 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE)); 141 featureValue.setKnown(true); 142 featureValue.setNullValue(true); 143 } else { 144 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class); 145 // Cast the address arguments to dependency nodes 146 final DependencyNode node1 = (DependencyNode)arg1.getAddress(); 147 final DependencyNode node2 = (DependencyNode)arg2.getAddress(); 148 149 if (!node1.isRoot() && !node2.isRoot()) { 150 // Calculates the distance 151 final int index1 = node1.getIndex(); 152 final int index2 = node2.getIndex(); 153 final int distance = Math.abs(index1-index2); 154 155 156 int lower = -1; 157 boolean f = false; 158 for (Integer upper : normalization.keySet()) { 159 if (distance >= lower && distance < upper) { 160 featureValue.setCode(table.getSymbolStringToCode(normalization.get(lower))); 161 featureValue.setSymbol(normalization.get(lower)); 162 f = true; 163 break; 164 } 165 lower = upper; 166 } 167 if (f == false) { 168 featureValue.setCode(table.getSymbolStringToCode(normalization.get(lower))); 169 featureValue.setSymbol(normalization.get(lower)); 170 } 171 172 // Tells the feature value that the feature is known and is not a null value 173 featureValue.setKnown(true); 174 featureValue.setNullValue(false); 175 176 } else { 177 // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value 178 featureValue.setCode(table.getNullValueCode(NullValueId.ROOT_NODE)); 179 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE)); 180 featureValue.setKnown(true); 181 featureValue.setNullValue(true); 182 } 183 } 184 } 185 186 /** 187 * Returns the feature value 188 * 189 * @return the feature value 190 */ 191 public FeatureValue getFeatureValue() { 192 return featureValue; 193 } 194 195 /** 196 * Returns the symbol table used by the distance feature function 197 * 198 * @return the symbol table used by the distance feature function 199 */ 200 public SymbolTable getSymbolTable() { 201 return table; 202 } 203 204 /** 205 * Returns the address function 1 (argument 1) 206 * 207 * @return the address function 1 (argument 1) 208 */ 209 public AddressFunction getAddressFunction1() { 210 return addressFunction1; 211 } 212 213 214 /** 215 * Sets the address function 1 (argument 1) 216 * 217 * @param addressFunction1 a address function 1 (argument 1) 218 */ 219 public void setAddressFunction1(AddressFunction addressFunction1) { 220 this.addressFunction1 = addressFunction1; 221 } 222 223 /** 224 * Returns the address function 2 (argument 2) 225 * 226 * @return the address function 1 (argument 2) 227 */ 228 public AddressFunction getAddressFunction2() { 229 return addressFunction2; 230 } 231 232 /** 233 * Sets the address function 2 (argument 2) 234 * 235 * @param addressFunction2 a address function 2 (argument 2) 236 */ 237 public void setAddressFunction2(AddressFunction addressFunction2) { 238 this.addressFunction2 = addressFunction2; 239 } 240 241 /** 242 * Returns symbol table handler 243 * 244 * @return a symbol table handler 245 */ 246 public SymbolTableHandler getTableHandler() { 247 return tableHandler; 248 } 249 250 /** 251 * Sets the symbol table handler 252 * 253 * @param tableHandler a symbol table handler 254 */ 255 public void setTableHandler(SymbolTableHandler tableHandler) { 256 this.tableHandler = tableHandler; 257 } 258 259 /** 260 * Sets the symbol table used by the distance feature function 261 * 262 * @param table 263 */ 264 public void setSymbolTable(SymbolTable table) { 265 this.table = table; 266 } 267 268 public boolean equals(Object obj) { 269 if (this == obj) 270 return true; 271 if (obj == null) 272 return false; 273 if (getClass() != obj.getClass()) 274 return false; 275 return obj.toString().equals(this.toString()); 276 } 277 278 public int hashCode() { 279 return 217 + (null == toString() ? 0 : toString().hashCode()); 280 } 281 282 public String toString() { 283 final StringBuilder sb = new StringBuilder(); 284 sb.append("Distance("); 285 sb.append(addressFunction1.toString()); 286 sb.append(", "); 287 sb.append(addressFunction2.toString()); 288 sb.append(", "); 289 sb.append(normalizationString); 290 sb.append(')'); 291 return sb.toString(); 292 } 293 } 294