001 package org.maltparser.core.syntaxgraph.headrules; 002 003 import java.io.BufferedReader; 004 import java.io.IOException; 005 import java.io.InputStreamReader; 006 import java.net.URL; 007 import java.util.HashMap; 008 009 import org.apache.log4j.Logger; 010 import org.maltparser.core.exception.MaltChainedException; 011 import org.maltparser.core.helper.Util; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.symbol.SymbolTable; 014 import org.maltparser.core.syntaxgraph.node.NonTerminalNode; 015 import org.maltparser.core.syntaxgraph.node.PhraseStructureNode; 016 017 /** 018 * 019 * 020 * @author Johan Hall 021 */ 022 public class HeadRules extends HashMap<String,HeadRule> { 023 public static final long serialVersionUID = 8045568022124826323L; 024 protected Logger logger; 025 protected String name; 026 protected DataFormatInstance dataFormatInstance; 027 protected SymbolTable nonTerminalSymbolTable; // TODO more complex 028 protected SymbolTable edgelabelSymbolTable; // TODO more complex 029 030 public HeadRules(Logger logger, DataFormatInstance dataFormatInstance) throws MaltChainedException { 031 setLogger(logger); 032 setDataFormatInstance(dataFormatInstance); 033 nonTerminalSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("CAT"); 034 edgelabelSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("LABEL"); 035 } 036 037 public void parseHeadRules(String fileName) throws MaltChainedException { 038 parseHeadRules(Util.findURL(fileName)); 039 } 040 041 public void parseHeadRules(URL url) throws MaltChainedException { 042 BufferedReader br = null; 043 try { 044 br = new BufferedReader(new InputStreamReader(url.openStream())); 045 } catch (IOException e) { 046 throw new HeadRuleException("Could not read the head rules from file '"+url.toString()+"'. ", e); 047 } 048 if (logger.isInfoEnabled()) { 049 logger.debug("Loading the head rule specification '"+url.toString()+"' ...\n"); 050 } 051 String fileLine; 052 while (true) { 053 try { 054 fileLine = br.readLine(); 055 } catch (IOException e) { 056 throw new HeadRuleException("Could not read the head rules from file '"+url.toString()+"'. ", e); 057 } 058 if (fileLine == null) { 059 break; 060 } 061 if (fileLine.length() <= 1 && fileLine.trim().substring(0, 2).trim().equals("--")) { 062 continue; 063 } 064 int index = fileLine.indexOf('\t'); 065 if (index == -1) { 066 throw new HeadRuleException("The specification of the head rule is not correct '"+fileLine+"'. "); 067 } 068 069 HeadRule rule = new HeadRule(this, fileLine); 070 put(fileLine.substring(0,index), rule); 071 } 072 } 073 074 public PhraseStructureNode getHeadChild(NonTerminalNode nt) throws MaltChainedException { 075 HeadRule rule = null; 076 if (nt.hasLabel(nonTerminalSymbolTable)) { 077 rule = this.get(nonTerminalSymbolTable.getName()+":"+nt.getLabelSymbol(nonTerminalSymbolTable)); 078 } 079 if (rule == null && nt.hasParentEdgeLabel(edgelabelSymbolTable)) { 080 rule = this.get(edgelabelSymbolTable.getName()+":"+nt.getParentEdgeLabelSymbol(edgelabelSymbolTable)); 081 } 082 083 if (rule != null) { 084 return rule.getHeadChild(nt); 085 } 086 return null; 087 } 088 089 public Direction getDefaultDirection(NonTerminalNode nt) throws MaltChainedException { 090 HeadRule rule = null; 091 if (nt.hasLabel(nonTerminalSymbolTable)) { 092 rule = this.get(nonTerminalSymbolTable.getName()+":"+nt.getLabelSymbol(nonTerminalSymbolTable)); 093 } 094 if (rule == null && nt.hasParentEdgeLabel(edgelabelSymbolTable)) { 095 rule = this.get(edgelabelSymbolTable.getName()+":"+nt.getParentEdgeLabelSymbol(edgelabelSymbolTable)); 096 } 097 098 if (rule != null) { 099 return rule.getDefaultDirection(); 100 } 101 return Direction.LEFT; 102 } 103 104 public Logger getLogger() { 105 return logger; 106 } 107 108 public void setLogger(Logger logger) { 109 this.logger = logger; 110 } 111 112 public DataFormatInstance getDataFormatInstance() { 113 return dataFormatInstance; 114 } 115 116 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 117 this.dataFormatInstance = dataFormatInstance; 118 } 119 120 public String toString() { 121 final StringBuilder sb = new StringBuilder(); 122 for (HeadRule rule : this.values()) { 123 sb.append(rule); 124 sb.append('\n'); 125 } 126 return sb.toString(); 127 } 128 }