001 package org.maltparser.core.feature.map; 002 003 import java.util.regex.Pattern; 004 import java.util.regex.PatternSyntaxException; 005 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.feature.FeatureException; 008 import org.maltparser.core.feature.function.FeatureFunction; 009 import org.maltparser.core.feature.function.FeatureMapFunction; 010 import org.maltparser.core.feature.value.FeatureValue; 011 import org.maltparser.core.feature.value.FunctionValue; 012 import org.maltparser.core.feature.value.MultipleFeatureValue; 013 import org.maltparser.core.feature.value.SingleFeatureValue; 014 import org.maltparser.core.symbol.SymbolTable; 015 import org.maltparser.core.symbol.SymbolTableHandler; 016 017 /** 018 * 019 * 020 * @author Johan Hall 021 */ 022 public class SplitFeature implements FeatureMapFunction { 023 protected FeatureFunction parentFeature; 024 protected MultipleFeatureValue multipleFeatureValue; 025 protected SymbolTableHandler tableHandler; 026 protected SymbolTable table; 027 protected String separators; 028 protected Pattern separatorsPattern; 029 030 public SplitFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 031 super(); 032 setTableHandler(tableHandler); 033 multipleFeatureValue = new MultipleFeatureValue(this); 034 } 035 036 public void initialize(Object[] arguments) throws MaltChainedException { 037 if (arguments.length != 2) { 038 throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. "); 039 } 040 if (!(arguments[0] instanceof FeatureFunction)) { 041 throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. "); 042 } 043 if (!(arguments[1] instanceof String)) { 044 throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. "); 045 } 046 setParentFeature((FeatureFunction)arguments[0]); 047 setSeparators((String)arguments[1]); 048 setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable())); 049 } 050 051 public Class<?>[] getParameterTypes() { 052 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class }; 053 return paramTypes; 054 } 055 056 public FeatureValue getFeatureValue() { 057 return multipleFeatureValue; 058 } 059 060 public String getSymbol(int code) throws MaltChainedException { 061 return table.getSymbolCodeToString(code); 062 } 063 064 public int getCode(String symbol) throws MaltChainedException { 065 return table.getSymbolStringToCode(symbol); 066 } 067 068 public void update() throws MaltChainedException { 069 multipleFeatureValue.reset(); 070 parentFeature.update(); 071 FunctionValue value = parentFeature.getFeatureValue(); 072 if (value instanceof SingleFeatureValue) { 073 String symbol = ((SingleFeatureValue)value).getSymbol(); 074 if (((FeatureValue)value).isNullValue()) { 075 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol, true); 076 multipleFeatureValue.setNullValue(true); 077 } else { 078 String items[]; 079 try { 080 items = separatorsPattern.split(symbol); 081 } catch (PatternSyntaxException e) { 082 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e); 083 } 084 for (int i = 0; i < items.length; i++) { 085 if (items[i].length() > 0) { 086 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i], table.getKnown(items[i])); 087 } 088 } 089 multipleFeatureValue.setNullValue(false); 090 } 091 } else if (value instanceof MultipleFeatureValue) { 092 if (((MultipleFeatureValue)value).isNullValue()) { 093 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol(), true); 094 multipleFeatureValue.setNullValue(true); 095 } else { 096 for (String symbol : ((MultipleFeatureValue)value).getSymbols()) { 097 String items[]; 098 try { 099 items = separatorsPattern.split(symbol); 100 } catch (PatternSyntaxException e) { 101 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e); 102 } 103 for (int i = 0; i < items.length; i++) { 104 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i], table.getKnown(items[i])); 105 } 106 multipleFeatureValue.setNullValue(false); 107 } 108 } 109 } 110 } 111 112 public void updateCardinality() throws MaltChainedException { 113 parentFeature.updateCardinality(); 114 multipleFeatureValue.setCardinality(table.getValueCounter()); 115 } 116 117 public boolean equals(Object obj) { 118 if (this == obj) 119 return true; 120 if (obj == null) 121 return false; 122 if (getClass() != obj.getClass()) 123 return false; 124 return obj.toString().equals(this.toString()); 125 } 126 127 public FeatureFunction getParentFeature() { 128 return parentFeature; 129 } 130 131 public void setParentFeature(FeatureFunction parentFeature) { 132 this.parentFeature = parentFeature; 133 } 134 135 public String getSeparators() { 136 return separators; 137 } 138 139 public void setSeparators(String separators) { 140 this.separators = separators; 141 separatorsPattern = Pattern.compile(separators); 142 } 143 144 public SymbolTable getSymbolTable() { 145 return table; 146 } 147 148 public void setSymbolTable(SymbolTable table) { 149 this.table = table; 150 } 151 152 public SymbolTableHandler getTableHandler() { 153 return tableHandler; 154 } 155 156 public void setTableHandler(SymbolTableHandler tableHandler) { 157 this.tableHandler = tableHandler; 158 } 159 160 161 162 public String toString() { 163 final StringBuilder sb = new StringBuilder(); 164 sb.append("Split("); 165 sb.append(parentFeature.toString()); 166 sb.append(", "); 167 sb.append(separators); 168 sb.append(')'); 169 return sb.toString(); 170 } 171 } 172