001 package org.maltparser.ml.libsvm; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.File; 006 import java.io.FileNotFoundException; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.OutputStreamWriter; 011 import java.io.PrintStream; 012 import java.text.DecimalFormat; 013 import java.text.DecimalFormatSymbols; 014 import java.util.ArrayList; 015 import java.util.HashMap; 016 import java.util.Map; 017 import java.util.Set; 018 import java.util.jar.JarEntry; 019 import java.util.regex.Pattern; 020 import java.util.regex.PatternSyntaxException; 021 022 import libsvm.svm; 023 import libsvm.svm_model; 024 import libsvm.svm_node; 025 import libsvm.svm_parameter; 026 import libsvm.svm_problem; 027 028 import org.maltparser.core.exception.MaltChainedException; 029 import org.maltparser.core.feature.FeatureVector; 030 import org.maltparser.core.feature.function.FeatureFunction; 031 import org.maltparser.core.feature.value.FeatureValue; 032 import org.maltparser.core.feature.value.MultipleFeatureValue; 033 import org.maltparser.core.feature.value.SingleFeatureValue; 034 import org.maltparser.core.helper.NoPrintStream; 035 import org.maltparser.core.syntaxgraph.DependencyStructure; 036 import org.maltparser.ml.LearningMethod; 037 import org.maltparser.ml.liblinear.LiblinearException; 038 import org.maltparser.ml.libsvm.LibsvmException; 039 import org.maltparser.parser.DependencyParserConfig; 040 import org.maltparser.parser.guide.instance.InstanceModel; 041 import org.maltparser.parser.history.action.SingleDecision; 042 import org.maltparser.parser.history.kbest.KBestList; 043 import org.maltparser.parser.history.kbest.ScoredKBestList; 044 045 /** 046 Implements an interface to the LIBSVM learner (currently the LIBSVM 2.91 is used). More information 047 about LIBSVM can be found at 048 <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>. 049 050 @author Johan Hall 051 @since 1.0 052 */ 053 public class Libsvm implements LearningMethod { 054 public final static String LIBSVM_VERSION = "2.91"; 055 public enum Verbostity { 056 SILENT, ERROR, ALL 057 } 058 protected InstanceModel owner; 059 protected int learnerMode; 060 protected String name; 061 protected int numberOfInstances; 062 protected boolean saveInstanceFiles; 063 protected boolean excludeNullValues; 064 protected String pathExternalSVMTrain = null; 065 private int[] cardinalities; 066 067 /** 068 * Instance output stream writer 069 */ 070 private BufferedWriter instanceOutput = null; 071 /** 072 * LIBSVM svm_model object, only used during classification. 073 */ 074 private svm_model model = null; 075 076 /** 077 * LIBSVM svm_parameter object 078 */ 079 private svm_parameter svmParam; 080 /** 081 * Parameter string 082 */ 083 private String paramString; 084 /** 085 * An array of LIBSVM svm_node objects, only used during classification. 086 */ 087 private ArrayList<svm_node> xlist = null; 088 089 private Verbostity verbosity; 090 /** 091 * Constructs a LIBSVM learner. 092 * 093 * @param owner the guide model owner 094 * @param learnerMode the mode of the learner TRAIN or CLASSIFY 095 */ 096 public Libsvm(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 097 setOwner(owner); 098 setLearningMethodName("libsvm"); 099 setLearnerMode(learnerMode.intValue()); 100 setNumberOfInstances(0); 101 verbosity = Verbostity.SILENT; 102 initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString()); 103 initSpecialParameters(); 104 if (learnerMode == BATCH) { 105 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 106 // if (pathExternalSVMTrain != null) { 107 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : LIBSVM external "+ getParamString() + "\n"); 108 // } else { 109 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : LIBSVM "+LIBSVM_VERSION+" "+ getParamString() + "\n"); 110 // } 111 // } 112 instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins")); 113 } 114 // else { 115 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 116 // owner.getGuide().getConfiguration().getConfigLogger().info(" Classifier : LIBSVM "+LIBSVM_VERSION+" "+ getParamString()+ "\n"); 117 // } 118 // } 119 } 120 121 122 public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { 123 if (featureVector == null) { 124 throw new LibsvmException("The feature vector cannot be found"); 125 } else if (decision == null) { 126 throw new LibsvmException("The decision cannot be found"); 127 } 128 try { 129 instanceOutput.write(decision.getDecisionCode()+"\t"); 130 for (int i = 0; i < featureVector.size(); i++) { 131 FeatureValue featureValue = featureVector.get(i).getFeatureValue(); 132 if (excludeNullValues == true && featureValue.isNullValue()) { 133 instanceOutput.write("-1"); 134 } else { 135 if (featureValue instanceof SingleFeatureValue) { 136 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 137 } else if (featureValue instanceof MultipleFeatureValue) { 138 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 139 int j=0; 140 for (Integer value : values) { 141 instanceOutput.write(value.toString()); 142 if (j != values.size()-1) { 143 instanceOutput.write("|"); 144 } 145 j++; 146 } 147 } 148 } 149 if (i != featureVector.size()) { 150 instanceOutput.write('\t'); 151 } 152 } 153 154 instanceOutput.write('\n'); 155 instanceOutput.flush(); 156 increaseNumberOfInstances(); 157 } catch (IOException e) { 158 throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e); 159 } 160 } 161 162 public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { } 163 164 /* (non-Javadoc) 165 * @see org.maltparser.ml.LearningMethod#noMoreInstances() 166 */ 167 public void noMoreInstances() throws MaltChainedException { 168 closeInstanceWriter(); 169 } 170 171 172 /* (non-Javadoc) 173 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector) 174 */ 175 public void train(FeatureVector featureVector) throws MaltChainedException { 176 if (featureVector == null) { 177 throw new LibsvmException("The feature vector cannot be found. "); 178 } else if (owner == null) { 179 throw new LibsvmException("The parent guide model cannot be found. "); 180 } 181 cardinalities = getCardinalities(featureVector); 182 if (pathExternalSVMTrain == null) { 183 try { 184 final svm_problem prob = readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), cardinalities, svmParam); 185 if(svm.svm_check_parameter(prob, svmParam) != null) { 186 throw new LibsvmException(svm.svm_check_parameter(prob, svmParam)); 187 } 188 owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model "+getFile(".mod").getName()+"\n"); 189 final PrintStream out = System.out; 190 final PrintStream err = System.err; 191 System.setOut(NoPrintStream.NO_PRINTSTREAM); 192 System.setErr(NoPrintStream.NO_PRINTSTREAM); 193 194 svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam)); 195 System.setOut(err); 196 System.setOut(out); 197 if (!saveInstanceFiles) { 198 getFile(".ins").delete(); 199 } 200 } catch (OutOfMemoryError e) { 201 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 202 } catch (IllegalArgumentException e) { 203 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 204 } catch (SecurityException e) { 205 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 206 } catch (IOException e) { 207 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 208 } 209 } else { 210 trainExternal(featureVector); 211 } 212 saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities); 213 } 214 215 216 @Override 217 public double crossValidate(FeatureVector featureVector, int nrOfSplits) 218 throws MaltChainedException { 219 if (featureVector == null) { 220 throw new LibsvmException("The feature vector cannot be found. "); 221 } else if (owner == null) { 222 throw new LibsvmException("The parent guide model cannot be found. "); 223 } 224 cardinalities = getCardinalities(featureVector); 225 //TODO Implement support for externial SVM for cross validation 226 //if (pathExternalSVMTrain == null) { 227 228 double crossValidationAccuracy = 0.0; 229 230 try { 231 final svm_problem prob = readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), cardinalities, svmParam); 232 if(svm.svm_check_parameter(prob, svmParam) != null) { 233 throw new LibsvmException(svm.svm_check_parameter(prob, svmParam)); 234 } 235 owner.getGuide().getConfiguration().getConfigLogger().info("Doing cross validation\n"); 236 final PrintStream out = System.out; 237 final PrintStream err = System.err; 238 System.setOut(NoPrintStream.NO_PRINTSTREAM); 239 System.setErr(NoPrintStream.NO_PRINTSTREAM); 240 241 //svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam)); 242 243 double[] target = new double[prob.l]; 244 245 svm.svm_cross_validation(prob, svmParam, nrOfSplits, target); 246 247 System.setOut(err); 248 System.setOut(out); 249 if (!saveInstanceFiles) { 250 getFile(".ins").delete(); 251 } 252 253 254 double total_correct = 0.0; 255 256 for(int i=0;i<prob.l;i++) 257 if(target[i] == prob.y[i]) 258 ++total_correct; 259 260 if(total_correct>0) 261 crossValidationAccuracy = 100.0*total_correct/prob.l; 262 263 264 } catch (OutOfMemoryError e) { 265 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 266 } catch (IllegalArgumentException e) { 267 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 268 } catch (SecurityException e) { 269 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 270 } 271 //} else { 272 // trainExternal(featureVector); 273 //} 274 //saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities); 275 276 277 278 return crossValidationAccuracy; 279 } 280 281 282 private void trainExternal(FeatureVector featureVector) throws MaltChainedException { 283 try { 284 maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities); 285 owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName()); 286 287 final ArrayList<String> commands = new ArrayList<String>(); 288 commands.add(pathExternalSVMTrain); 289 final String[] params = getSVMParamStringArray(svmParam); 290 for (int i=0; i < params.length; i++) { 291 commands.add(params[i]); 292 } 293 commands.add(getFile(".ins.tmp").getAbsolutePath()); 294 commands.add(getFile(".mod").getAbsolutePath()); 295 String[] arrayCommands = commands.toArray(new String[commands.size()]); 296 297 if (verbosity == Verbostity.ALL) { 298 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 299 } 300 final Process child = Runtime.getRuntime().exec(arrayCommands); 301 final InputStream in = child.getInputStream(); 302 final InputStream err = child.getErrorStream(); 303 int c; 304 while ((c = in.read()) != -1){ 305 if (verbosity == Verbostity.ALL) { 306 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 307 } 308 } 309 while ((c = err.read()) != -1){ 310 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) { 311 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 312 } 313 } 314 if (child.waitFor() != 0) { 315 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 316 } 317 in.close(); 318 err.close(); 319 if (!saveInstanceFiles) { 320 getFile(".ins").delete(); 321 getFile(".ins.tmp").delete(); 322 } 323 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 324 } catch (InterruptedException e) { 325 throw new LibsvmException("SVM-trainer is interrupted. ", e); 326 } catch (IllegalArgumentException e) { 327 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 328 } catch (SecurityException e) { 329 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 330 } catch (IOException e) { 331 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 332 } catch (OutOfMemoryError e) { 333 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 334 } 335 } 336 337 private int[] getCardinalities(FeatureVector featureVector) { 338 int[] cardinalities = new int[featureVector.size()]; 339 int i = 0; 340 for (FeatureFunction feature : featureVector) { 341 cardinalities[i++] = feature.getFeatureValue().getCardinality(); 342 } 343 return cardinalities; 344 } 345 346 private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 347 final BufferedWriter out = new BufferedWriter(osw); 348 try { 349 for (int i = 0, n = cardinalities.length; i < n; i++) { 350 out.write(Integer.toString(cardinalities[i])); 351 if (i < n - 1) { 352 out.write(','); 353 } 354 } 355 out.write('\n'); 356 out.close(); 357 } catch (IOException e) { 358 throw new LibsvmException("Couldn't save the cardinalities to file. ", e); 359 } 360 } 361 362 private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException { 363 int[] cardinalities = null; 364 try { 365 final BufferedReader in = new BufferedReader(isr); 366 String line; 367 if ((line = in.readLine()) != null) { 368 String[] items = line.split(","); 369 cardinalities = new int[items.length]; 370 for (int i = 0; i < items.length; i++) { 371 cardinalities[i] = Integer.parseInt(items[i]); 372 } 373 } 374 in.close(); 375 } catch (IOException e) { 376 throw new LibsvmException("The cardinalities cannot be read because wrongly formatted. ", e); 377 } catch (NumberFormatException e) { 378 throw new LibsvmException("Couldn't load the cardinalities from file. ", e); 379 } 380 return cardinalities; 381 } 382 383 /* (non-Javadoc) 384 * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList) 385 */ 386 public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException { 387 if (method == null) { 388 throw new LibsvmException("The learning method cannot be found. "); 389 } else if (divideFeature == null) { 390 throw new LibsvmException("The divide feature cannot be found. "); 391 } 392 try { 393 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 394 final BufferedWriter out = method.getInstanceWriter(); 395 final StringBuilder sb = new StringBuilder(6); 396 int l = in.read(); 397 char c; 398 int j = 0; 399 while(true) { 400 if (l == -1) { 401 sb.setLength(0); 402 break; 403 } 404 405 c = (char)l; 406 l = in.read(); 407 if (c == '\t') { 408 if (divideFeatureIndexVector.contains(j-1)) { 409 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 410 out.write('\t'); 411 } 412 out.write(sb.toString()); 413 j++; 414 out.write('\t'); 415 sb.setLength(0); 416 } else if (c == '\n') { 417 if (sb.length() > 0) { 418 out.write(sb.toString()); 419 } 420 if (divideFeatureIndexVector.contains(j-1)) { 421 if (sb.length() > 0) { 422 out.write('\t'); 423 } 424 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 425 } 426 out.write('\n'); 427 sb.setLength(0); 428 method.increaseNumberOfInstances(); 429 this.decreaseNumberOfInstances(); 430 j = 0; 431 } else { 432 sb.append(c); 433 } 434 } 435 in.close(); 436 getFile(".ins").delete(); 437 } catch (SecurityException e) { 438 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 439 } catch (NullPointerException e) { 440 throw new LibsvmException("The instance file cannot be found. ", e); 441 } catch (FileNotFoundException e) { 442 throw new LibsvmException("The instance file cannot be found. ", e); 443 } catch (IOException e) { 444 throw new LibsvmException("The LIBSVM learner read from the instance file. ", e); 445 } 446 } 447 448 /* (non-Javadoc) 449 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList) 450 */ 451 public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { 452 if (model == null) { 453 try { 454 model = svm.svm_load_model(new BufferedReader(getInstanceInputStreamReaderFromConfigFile(".mod"))); 455 } catch (IOException e) { 456 throw new LibsvmException("The model cannot be loaded. ", e); 457 } 458 } 459 if (cardinalities == null) { 460 if (getConfigFileEntry(".car") != null) { 461 cardinalities = loadCardinalities(getInstanceInputStreamReaderFromConfigFile(".car")); 462 } else { 463 cardinalities = getCardinalities(featureVector); 464 } 465 } 466 if (xlist == null) { 467 xlist = new ArrayList<svm_node>(featureVector.size()); 468 } 469 if (model == null) { 470 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. "); 471 } else if (featureVector == null) { 472 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. "); 473 } 474 int j = 0; 475 int offset = 0; 476 int i = 0; 477 for (FeatureFunction feature : featureVector) { 478 final FeatureValue featureValue = feature.getFeatureValue(); 479 if (!(excludeNullValues == true && featureValue.isNullValue())) { 480 if (featureValue instanceof SingleFeatureValue) { 481 if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) { 482 if (j >= xlist.size()) { 483 svm_node x = new svm_node(); 484 x.value = 1; 485 xlist.add(j,x); 486 } 487 xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset; 488 } 489 } else if (featureValue instanceof MultipleFeatureValue) { 490 for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) { 491 if (value < cardinalities[i]) { 492 // if (((MultipleFeatureValue)featureValue).isKnown(value)) { 493 if (j >= xlist.size()) { 494 svm_node x = new svm_node(); 495 x.value = 1; 496 xlist.add(j,x); 497 } 498 xlist.get(j++).index = value + offset; 499 } 500 } 501 } 502 } 503 offset += cardinalities[i]; 504 i++; 505 } 506 507 svm_node[] xarray = new svm_node[j]; 508 for (int k = 0; k < j; k++) { 509 xarray[k] = xlist.get(k); 510 } 511 try { 512 if (decision.getKBestList().getK() == 1 || svm.svm_get_svm_type(model) == svm_parameter.ONE_CLASS || 513 svm.svm_get_svm_type(model) == svm_parameter.EPSILON_SVR || 514 svm.svm_get_svm_type(model) == svm_parameter.NU_SVR) { 515 decision.getKBestList().add((int)svm.svm_predict(model, xarray)); 516 } else { 517 svm_predict_with_kbestlist(model, xarray, decision.getKBestList()); 518 } 519 520 } catch (OutOfMemoryError e) { 521 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 522 } 523 524 return true; 525 } 526 527 528 public void terminate() throws MaltChainedException { 529 closeInstanceWriter(); 530 model = null; 531 svmParam = null; 532 xlist = null; 533 owner = null; 534 } 535 536 public BufferedWriter getInstanceWriter() { 537 return instanceOutput; 538 } 539 540 protected void closeInstanceWriter() throws MaltChainedException { 541 try { 542 if (instanceOutput != null) { 543 instanceOutput.flush(); 544 instanceOutput.close(); 545 instanceOutput = null; 546 } 547 } catch (IOException e) { 548 throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e); 549 } 550 } 551 552 /** 553 * Initialize the LIBSVM according to the parameter string 554 * 555 * @param paramString the parameter string to configure the LIBSVM learner. 556 * @throws MaltChainedException 557 */ 558 protected void initSvmParam(String paramString) throws MaltChainedException { 559 this.paramString = paramString; 560 svmParam = new svm_parameter(); 561 initParameters(svmParam); 562 parseParameters(paramString, svmParam); 563 } 564 565 /** 566 * Returns the parameter string for used for configure LIBSVM 567 * 568 * @return the parameter string for used for configure LIBSVM 569 */ 570 public String getParamString() { 571 return paramString; 572 } 573 574 public InstanceModel getOwner() { 575 return owner; 576 } 577 578 protected void setOwner(InstanceModel owner) { 579 this.owner = owner; 580 } 581 582 public int getLearnerMode() { 583 return learnerMode; 584 } 585 586 public void setLearnerMode(int learnerMode) throws MaltChainedException { 587 this.learnerMode = learnerMode; 588 } 589 590 public String getLearningMethodName() { 591 return name; 592 } 593 594 /** 595 * Returns the current configuration 596 * 597 * @return the current configuration 598 * @throws MaltChainedException 599 */ 600 public DependencyParserConfig getConfiguration() throws MaltChainedException { 601 return owner.getGuide().getConfiguration(); 602 } 603 604 public int getNumberOfInstances() throws MaltChainedException { 605 if(numberOfInstances!=0) 606 return numberOfInstances; 607 else{ 608 //Do a line count of the instance file and return that 609 610 BufferedReader reader = new BufferedReader( getInstanceInputStreamReader(".ins")); 611 try { 612 while(reader.readLine()!=null){ 613 numberOfInstances++; 614 owner.increaseFrequency(); 615 } 616 617 reader.close(); 618 } catch (IOException e) { 619 throw new MaltChainedException("No instances found in file",e); 620 } 621 622 623 624 return numberOfInstances; 625 626 } 627 } 628 629 public void increaseNumberOfInstances() { 630 numberOfInstances++; 631 owner.increaseFrequency(); 632 } 633 634 public void decreaseNumberOfInstances() { 635 numberOfInstances--; 636 owner.decreaseFrequency(); 637 } 638 639 protected void setNumberOfInstances(int numberOfInstances) { 640 this.numberOfInstances = 0; 641 } 642 643 protected void setLearningMethodName(String name) { 644 this.name = name; 645 } 646 647 protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException { 648 return getConfiguration().getConfigurationDir().getAppendOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix); 649 } 650 651 protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException { 652 return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix); 653 } 654 655 protected InputStreamReader getInstanceInputStreamReaderFromConfigFile(String suffix) throws MaltChainedException { 656 return getConfiguration().getConfigurationDir().getInputStreamReaderFromConfigFile(owner.getModelName()+getLearningMethodName()+suffix); 657 } 658 659 protected File getFile(String suffix) throws MaltChainedException { 660 return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix); 661 } 662 663 protected JarEntry getConfigFileEntry(String suffix) throws MaltChainedException { 664 return getConfiguration().getConfigurationDir().getConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix); 665 } 666 667 /** 668 * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated). 669 * 670 * @param isr the instance stream reader for the instance file 671 * @param cardinalities a array containing the number of distinct values for a particular column. 672 * @param param a svm_parameter object 673 * @throws LibsvmException 674 */ 675 public final svm_problem readProblemMaltSVMFormat(InputStreamReader isr, int[] cardinalities, svm_parameter param) throws MaltChainedException { 676 final svm_problem prob = new svm_problem(); 677 try { 678 final BufferedReader fp = new BufferedReader(isr); 679 int max_index = 0; 680 if (xlist == null) { 681 xlist = new ArrayList<svm_node>(); 682 } 683 prob.l = getNumberOfInstances(); 684 prob.x = new svm_node[prob.l][]; 685 prob.y = new double[prob.l]; 686 int i = 0; 687 final Pattern tabPattern = Pattern.compile("\t"); 688 final Pattern pipePattern = Pattern.compile("\\|"); 689 while(true) { 690 String line = fp.readLine(); 691 if(line == null) break; 692 String[] columns = tabPattern.split(line); 693 694 if (columns.length == 0) { 695 continue; 696 } 697 698 int offset = 0; 699 int j = 0; 700 try { 701 prob.y[i] = (double)Integer.parseInt(columns[j]); 702 int p = 0; 703 for(j = 1; j < columns.length; j++) { 704 final String[] items = pipePattern.split(columns[j]); 705 for (int k = 0; k < items.length; k++) { 706 try { 707 if (Integer.parseInt(items[k]) != -1) { 708 xlist.add(p, new svm_node()); 709 xlist.get(p).value = 1; 710 xlist.get(p).index = Integer.parseInt(items[k])+offset; 711 p++; 712 } 713 } catch (NumberFormatException e) { 714 throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e); 715 } 716 } 717 offset += cardinalities[j-1]; 718 } 719 prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]); 720 if(columns.length > 1) { 721 max_index = Math.max(max_index, xlist.get(p-1).index); 722 } 723 i++; 724 xlist.clear(); 725 } catch (ArrayIndexOutOfBoundsException e) { 726 throw new LibsvmException("Cannot read from the instance file. ", e); 727 } 728 } 729 fp.close(); 730 if (param.gamma == 0) { 731 param.gamma = 1.0/max_index; 732 } 733 xlist = null; 734 } catch (IOException e) { 735 throw new LibsvmException("Cannot read from the instance file. ", e); 736 } 737 return prob; 738 } 739 740 protected void initSpecialParameters() throws MaltChainedException { 741 if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) { 742 excludeNullValues = true; 743 } else { 744 excludeNullValues = false; 745 } 746 saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue(); 747 748 if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) { 749 try { 750 if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) { 751 throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong."); 752 } 753 if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) { 754 throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file"); 755 } 756 if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) { 757 throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. "); 758 } 759 pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString(); 760 } catch (SecurityException e) { 761 throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e); 762 } 763 } 764 if (getConfiguration().getOptionValue("libsvm", "verbosity") != null) { 765 verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("libsvm", "verbosity").toString().toUpperCase()); 766 } 767 } 768 769 /** 770 * Assign a default value to all svm parameters 771 * 772 * @param param a svm_parameter object 773 */ 774 protected void initParameters(svm_parameter param) throws MaltChainedException { 775 if (param == null) { 776 throw new LibsvmException("Svm-parameters cannot be found. "); 777 } 778 param.svm_type = svm_parameter.C_SVC; 779 param.kernel_type = svm_parameter.POLY; 780 param.degree = 2; 781 param.gamma = 0.2; // 1/k 782 param.coef0 = 0; 783 param.nu = 0.5; 784 param.cache_size = 100; 785 param.C = 1; 786 param.eps = 1.0; 787 param.p = 0.1; 788 param.shrinking = 1; 789 param.probability = 0; 790 param.nr_weight = 0; 791 param.weight_label = new int[0]; 792 param.weight = new double[0]; 793 } 794 795 /** 796 * Returns a string containing all svm-parameters of interest 797 * 798 * @param param a svm_parameter object 799 * @return a string containing all svm-parameters of interest 800 */ 801 public String toStringParameters(svm_parameter param) { 802 if (param == null) { 803 throw new IllegalArgumentException("Svm-parameters cannot be found. "); 804 } 805 final StringBuffer sb = new StringBuffer(); 806 807 final String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"}; 808 final String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"}; 809 final DecimalFormat dform = new DecimalFormat("#0.0#"); 810 final DecimalFormatSymbols sym = new DecimalFormatSymbols(); 811 sym.setDecimalSeparator('.'); 812 dform.setDecimalFormatSymbols(sym); 813 sb.append("LIBSVM SETTINGS\n"); 814 sb.append(" SVM type : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n"); 815 sb.append(" Kernel : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n"); 816 if (param.kernel_type == svm_parameter.POLY) { 817 sb.append(" Degree : " + param.degree + "\n"); 818 } 819 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) { 820 sb.append(" Gamma : " + dform.format(param.gamma) + "\n"); 821 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) { 822 sb.append(" Coef0 : " + dform.format(param.coef0) + "\n"); 823 } 824 } 825 if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) { 826 sb.append(" Nu : " + dform.format(param.nu) + "\n"); 827 } 828 sb.append(" Cache Size : " + dform.format(param.cache_size) + " MB\n"); 829 if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) { 830 sb.append(" C : " + dform.format(param.C) + "\n"); 831 } 832 sb.append(" Eps : " + dform.format(param.eps) + "\n"); 833 if (param.svm_type == svm_parameter.EPSILON_SVR) { 834 sb.append(" P : " + dform.format(param.p) + "\n"); 835 } 836 sb.append(" Shrinking : " + param.shrinking + "\n"); 837 sb.append(" Probability : " + param.probability + "\n"); 838 if (param.svm_type == svm_parameter.C_SVC) { 839 sb.append(" #Weight : " + param.nr_weight + "\n"); 840 if (param.nr_weight > 0) { 841 sb.append(" Weight labels : "); 842 for (int i = 0; i < param.nr_weight; i++) { 843 sb.append(param.weight_label[i]); 844 if (i != param.nr_weight-1) { 845 sb.append(", "); 846 } 847 } 848 sb.append("\n"); 849 for (int i = 0; i < param.nr_weight; i++) { 850 sb.append(dform.format(param.weight)); 851 if (i != param.nr_weight-1) { 852 sb.append(", "); 853 } 854 } 855 sb.append("\n"); 856 } 857 } 858 return sb.toString(); 859 } 860 861 public String[] getSVMParamStringArray(svm_parameter param) { 862 final ArrayList<String> params = new ArrayList<String>(); 863 864 if (param.svm_type != 0) { 865 params.add("-s"); params.add(new Integer(param.svm_type).toString()); 866 } 867 if (param.kernel_type != 2) { 868 params.add("-t"); params.add(new Integer(param.kernel_type).toString()); 869 } 870 if (param.degree != 3) { 871 params.add("-d"); params.add(new Integer(param.degree).toString()); 872 } 873 params.add("-g"); params.add(new Double(param.gamma).toString()); 874 if (param.coef0 != 0) { 875 params.add("-r"); params.add(new Double(param.coef0).toString()); 876 } 877 if (param.nu != 0.5) { 878 params.add("-n"); params.add(new Double(param.nu).toString()); 879 } 880 if (param.cache_size != 100) { 881 params.add("-m"); params.add(new Double(param.cache_size).toString()); 882 } 883 if (param.C != 1) { 884 params.add("-c"); params.add(new Double(param.C).toString()); 885 } 886 if (param.eps != 0.001) { 887 params.add("-e"); params.add(new Double(param.eps).toString()); 888 } 889 if (param.p != 0.1) { 890 params.add("-p"); params.add(new Double(param.p).toString()); 891 } 892 if (param.shrinking != 1) { 893 params.add("-h"); params.add(new Integer(param.shrinking).toString()); 894 } 895 if (param.probability != 0) { 896 params.add("-b"); params.add(new Integer(param.probability).toString()); 897 } 898 899 return params.toArray(new String[params.size()]); 900 } 901 902 /** 903 * Parses the parameter string. The parameter string must contain parameter and value pairs, which are separated by a blank 904 * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with 905 * the parameters data type. Some examples: 906 * 907 * -s 0 -t 1 -d 2 -g 0.4 -e 0.1 908 * -s_0_-t_1_-d_2_-g_0.4_-e_0.1 909 * 910 * @param paramstring the parameter string 911 * @param param a svm_parameter object 912 * @throws LibsvmException 913 */ 914 public void parseParameters(String paramstring, svm_parameter param) throws MaltChainedException { 915 if (param == null) { 916 throw new LibsvmException("Svm-parameters cannot be found. "); 917 } 918 if (paramstring == null) { 919 return; 920 } 921 final String[] argv; 922 try { 923 argv = paramstring.split("[_\\p{Blank}]"); 924 } catch (PatternSyntaxException e) { 925 throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e); 926 } 927 for (int i=0; i < argv.length-1; i++) { 928 if(argv[i].charAt(0) != '-') { 929 throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0)); 930 } 931 if(++i>=argv.length) { 932 throw new LibsvmException("The last argument does not have any value. "); 933 } 934 try { 935 switch(argv[i-1].charAt(1)) { 936 case 's': 937 param.svm_type = Integer.parseInt(argv[i]); 938 break; 939 case 't': 940 param.kernel_type = Integer.parseInt(argv[i]); 941 break; 942 case 'd': 943 param.degree = Integer.parseInt(argv[i]); 944 break; 945 case 'g': 946 param.gamma = Double.valueOf(argv[i]).doubleValue(); 947 break; 948 case 'r': 949 param.coef0 = Double.valueOf(argv[i]).doubleValue(); 950 break; 951 case 'n': 952 param.nu = Double.valueOf(argv[i]).doubleValue(); 953 break; 954 case 'm': 955 param.cache_size = Double.valueOf(argv[i]).doubleValue(); 956 break; 957 case 'c': 958 param.C = Double.valueOf(argv[i]).doubleValue(); 959 break; 960 case 'e': 961 param.eps = Double.valueOf(argv[i]).doubleValue(); 962 break; 963 case 'p': 964 param.p = Double.valueOf(argv[i]).doubleValue(); 965 break; 966 case 'h': 967 param.shrinking = Integer.parseInt(argv[i]); 968 break; 969 case 'b': 970 param.probability = Integer.parseInt(argv[i]); 971 break; 972 case 'w': 973 ++param.nr_weight; 974 { 975 int[] old = param.weight_label; 976 param.weight_label = new int[param.nr_weight]; 977 System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1); 978 } 979 980 { 981 double[] old = param.weight; 982 param.weight = new double[param.nr_weight]; 983 System.arraycopy(old,0,param.weight,0,param.nr_weight-1); 984 } 985 986 param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2)); 987 param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue(); 988 break; 989 case 'Y': 990 case 'V': 991 case 'S': 992 case 'F': 993 case 'T': 994 case 'M': 995 case 'N': 996 break; 997 default: 998 throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. "); 999 } 1000 } catch (ArrayIndexOutOfBoundsException e) { 1001 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 1002 } catch (NumberFormatException e) { 1003 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 1004 } catch (NullPointerException e) { 1005 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 1006 } 1007 } 1008 } 1009 1010 public void svm_predict_with_kbestlist(svm_model model, svm_node[] x, KBestList kBestList) throws MaltChainedException { 1011 int i; 1012 final int nr_class = svm.svm_get_nr_class(model); 1013 final double[] dec_values = new double[nr_class*(nr_class-1)/2]; 1014 svm.svm_predict_values(model, x, dec_values); 1015 1016 final int[] vote = new int[nr_class]; 1017 final double[] score = new double[nr_class]; 1018 final int[] voteindex = new int[nr_class]; 1019 for(i=0;i<nr_class;i++) { 1020 vote[i] = 0; 1021 score[i] = 0.0; 1022 voteindex[i] = i; 1023 } 1024 int pos=0; 1025 for(i=0;i<nr_class;i++) { 1026 for(int j=i+1;j<nr_class;j++) { 1027 if(dec_values[pos] > 0) { 1028 vote[i]++; 1029 } else { 1030 vote[j]++; 1031 } 1032 score[i] += dec_values[pos]; 1033 score[j] += dec_values[pos]; 1034 pos++; 1035 } 1036 } 1037 for(i=0;i<nr_class;i++) { 1038 score[i] = score[i]/nr_class; 1039 } 1040 int lagest, tmpint; 1041 double tmpdouble; 1042 for (i=0;i<nr_class-1;i++) { 1043 lagest = i; 1044 for (int j=i;j<nr_class;j++) { 1045 if (vote[j] > vote[lagest]) { 1046 lagest = j; 1047 } 1048 } 1049 tmpint = vote[lagest]; 1050 vote[lagest] = vote[i]; 1051 vote[i] = tmpint; 1052 tmpdouble = score[lagest]; 1053 score[lagest] = score[i]; 1054 score[i] = tmpdouble; 1055 tmpint = voteindex[lagest]; 1056 voteindex[lagest] = voteindex[i]; 1057 voteindex[i] = tmpint; 1058 } 1059 final int[] labels = new int[nr_class]; 1060 svm.svm_get_labels(model, labels); 1061 int k = nr_class-1; 1062 if (kBestList.getK() != -1) { 1063 k = kBestList.getK() - 1; 1064 } 1065 1066 for (i=0; i<nr_class && k >= 0; i++, k--) { 1067 if (vote[i] > 0 || i == 0) { 1068 if (kBestList instanceof ScoredKBestList) { 1069 ((ScoredKBestList)kBestList).add(labels[voteindex[i]], (float)vote[i]/(float)(nr_class*(nr_class-1)/2)); 1070 } else { 1071 kBestList.add(labels[voteindex[i]]); 1072 } 1073 } 1074 } 1075 } 1076 /** 1077 * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced) 1078 * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 1079 * 1080 * @param isr the input stream reader for the source instance file 1081 * @param osw the output stream writer for the destination instance file 1082 * @param cardinalities a vector containing the number of distinct values for a particular column 1083 * @throws LibsvmException 1084 */ 1085 public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 1086 try { 1087 final BufferedReader in = new BufferedReader(isr); 1088 final BufferedWriter out = new BufferedWriter(osw); 1089 1090 int c; 1091 int j = 0; 1092 int offset = 0; 1093 int code = 0; 1094 while(true) { 1095 c = in.read(); 1096 if (c == -1) { 1097 break; 1098 } 1099 1100 if (c == '\t' || c == '|') { 1101 if (j == 0) { 1102 out.write(Integer.toString(code)); 1103 j++; 1104 } else { 1105 if (code != -1) { 1106 out.write(' '); 1107 out.write(Integer.toString(code+offset)); 1108 out.write(":1"); 1109 } 1110 if (c == '\t') { 1111 offset += cardinalities[j-1]; 1112 j++; 1113 } 1114 } 1115 code = 0; 1116 } else if (c == '\n') { 1117 j = 0; 1118 offset = 0; 1119 out.write('\n'); 1120 code = 0; 1121 } else if (c == '-') { 1122 code = -1; 1123 } else if (code != -1) { 1124 if (c > 47 && c < 58) { 1125 code = code * 10 + (c-48); 1126 } else { 1127 throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format."); 1128 } 1129 } 1130 } 1131 in.close(); 1132 out.close(); 1133 } catch (IOException e) { 1134 throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e); 1135 } 1136 } 1137 1138 protected void finalize() throws Throwable { 1139 try { 1140 closeInstanceWriter(); 1141 } finally { 1142 super.finalize(); 1143 } 1144 } 1145 1146 /* (non-Javadoc) 1147 * @see java.lang.Object#toString() 1148 */ 1149 public String toString() { 1150 final StringBuffer sb = new StringBuffer(); 1151 sb.append("\nLIBSVM INTERFACE\n"); 1152 sb.append(" LIBSVM version: "+LIBSVM_VERSION+"\n"); 1153 sb.append(" SVM-param string: "+paramString+"\n"); 1154 1155 sb.append(toStringParameters(svmParam)); 1156 return sb.toString(); 1157 } 1158 1159 1160 @Override 1161 public void divideByFeatureSet( 1162 Set<Integer> featureIdsToCreateSeparateBranchesForSet, ArrayList<Integer> divideFeatureIndexVector, String otherId) throws MaltChainedException { 1163 1164 1165 //Create a hash map that maps every feature id to a writer 1166 HashMap<Integer, BufferedWriter> featureIdToWriterMap = new HashMap<Integer, BufferedWriter>(); 1167 1168 for(int element:featureIdsToCreateSeparateBranchesForSet){ 1169 1170 1171 BufferedWriter outputWriter = new BufferedWriter(getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName().replace('.','_') + element + "." + getLearningMethodName()+".ins")); 1172 featureIdToWriterMap.put(element, outputWriter); 1173 1174 } 1175 1176 BufferedWriter otherOutputWriter = new BufferedWriter(getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName().replace('.','_') + otherId + "." + getLearningMethodName()+".ins")); 1177 1178 1179 try { 1180 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 1181 //every line will be written to a separate file 1182 String line = in.readLine(); 1183 final Pattern tabPattern = Pattern.compile("\t"); 1184 while(line!=null){ 1185 1186 //Find out which pot the line shall be put in 1187 String[] lineArray = tabPattern.split(line); 1188 1189 int id = new Integer(lineArray[divideFeatureIndexVector.get(0)+1]); 1190 1191 if(!featureIdToWriterMap.containsKey(id)){ 1192 otherOutputWriter.write(line + "\n"); 1193 }else 1194 featureIdToWriterMap.get(id).write(getLineToWrite(lineArray,divideFeatureIndexVector.get(0)+1)); 1195 1196 line = in.readLine(); 1197 } 1198 1199 otherOutputWriter.close(); 1200 1201 in.close(); 1202 1203 for(BufferedWriter writer: featureIdToWriterMap.values()) 1204 writer.close(); 1205 1206 } catch (SecurityException e) { 1207 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 1208 } catch (NullPointerException e) { 1209 throw new LiblinearException("The instance file cannot be found. ", e); 1210 } catch (FileNotFoundException e) { 1211 throw new LiblinearException("The instance file cannot be found. ", e); 1212 } catch (IOException e) { 1213 throw new LiblinearException("The Liblinear learner read from the instance file. ", e); 1214 } 1215 1216 1217 1218 } 1219 1220 1221 private String getLineToWrite(String[] lineArray, int excludeIndex) { 1222 StringBuffer buf = new StringBuffer(); 1223 1224 for(int n = 0; n < lineArray.length; n++) 1225 if(n != excludeIndex) 1226 buf.append(lineArray[n] + "\t"); 1227 1228 1229 buf.append("\n"); 1230 1231 1232 return buf.toString(); 1233 } 1234 1235 1236 @Override 1237 public Map<Integer, Integer> createFeatureIdToCountMap( 1238 ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException{ 1239 1240 HashMap<Integer, Integer> featureIdToCountMap = new HashMap<Integer, Integer>(); 1241 1242 //Go trough the file and count all feature ids in the given column(s) 1243 1244 try { 1245 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 1246 //every line will be written to a separate file 1247 String line = in.readLine(); 1248 final Pattern tabPattern = Pattern.compile("\t"); 1249 while(line!=null){ 1250 1251 //Find out which pot the line shall be put in 1252 String[] lineArray = tabPattern.split(line); 1253 1254 for(int n = 0; n < divideFeatureIndexVector.size(); n++){ 1255 int id = new Integer(lineArray[divideFeatureIndexVector.get(n)+1]); 1256 1257 1258 if (!featureIdToCountMap.containsKey(id)) { 1259 1260 featureIdToCountMap.put(id, 0); 1261 1262 } 1263 1264 int previousCount = featureIdToCountMap.get(id); 1265 1266 featureIdToCountMap.put(id, previousCount + 1); 1267 1268 } 1269 1270 line = in.readLine(); 1271 } 1272 1273 1274 1275 in.close(); 1276 1277 1278 } catch (SecurityException e) { 1279 throw new LiblinearException("The Libsvm learner cannot remove the instance file. ", e); 1280 } catch (NullPointerException e) { 1281 throw new LiblinearException("The instance file cannot be found. ", e); 1282 } catch (FileNotFoundException e) { 1283 throw new LiblinearException("The instance file cannot be found. ", e); 1284 } catch (IOException e) { 1285 throw new LiblinearException("The Liblinear learner read from the instance file. ", e); 1286 } 1287 1288 1289 1290 return featureIdToCountMap; 1291 } 1292 1293 1294 1295 }