001 package org.maltparser.core.syntaxgraph.ds2ps; 002 003 004 import java.util.SortedMap; 005 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.helper.SystemLogger; 008 import org.maltparser.core.io.dataformat.ColumnDescription; 009 import org.maltparser.core.io.dataformat.DataFormatInstance; 010 import org.maltparser.core.symbol.SymbolTable; 011 import org.maltparser.core.symbol.SymbolTableHandler; 012 import org.maltparser.core.syntaxgraph.MappablePhraseStructureGraph; 013 import org.maltparser.core.syntaxgraph.edge.Edge; 014 import org.maltparser.core.syntaxgraph.headrules.HeadRules; 015 import org.maltparser.core.syntaxgraph.node.DependencyNode; 016 import org.maltparser.core.syntaxgraph.node.NonTerminalNode; 017 import org.maltparser.core.syntaxgraph.node.PhraseStructureNode; 018 /** 019 * 020 * 021 * @author Johan Hall 022 */ 023 public class LosslessMapping implements Dependency2PhraseStructure { 024 private String DEPREL = "DEPREL"; 025 private String PHRASE = "PHRASE"; 026 private String HEADREL = "HEADREL"; 027 private String ATTACH = "ATTACH"; 028 private String CAT = "CAT"; 029 private String EDGELABEL; 030 private final char EMPTY_SPINE = '*'; 031 private final String EMPTY_LABEL = "??"; 032 private final char SPINE_ELEMENT_SEPARATOR = '|'; 033 private final char LABEL_ELEMENT_SEPARATOR = '~'; 034 private final char QUESTIONMARK = '?'; 035 private String optionString; 036 private HeadRules headRules; 037 private DataFormatInstance dependencyDataFormatInstance; 038 private DataFormatInstance phraseStructuretDataFormatInstance; 039 private boolean lockUpdate = false; 040 private int nonTerminalCounter; 041 private StringBuilder deprel; 042 private StringBuilder headrel; 043 private StringBuilder phrase; 044 045 public LosslessMapping(DataFormatInstance dependencyDataFormatInstance, DataFormatInstance phraseStructuretDataFormatInstance) { 046 setDependencyDataFormatInstance(dependencyDataFormatInstance); 047 setPhraseStructuretDataFormatInstance(phraseStructuretDataFormatInstance); 048 deprel = new StringBuilder(); 049 headrel = new StringBuilder(); 050 phrase = new StringBuilder(); 051 052 if (phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptionSet().size() == 1) { 053 for (ColumnDescription column : phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptionSet()) { 054 EDGELABEL = column.getName(); 055 } 056 } 057 058 clear(); 059 } 060 061 public void clear() { 062 nonTerminalCounter = 0; 063 } 064 065 public String getOptionString() { 066 return optionString; 067 } 068 069 public void setOptionString(String optionString) { 070 this.optionString = optionString; 071 } 072 073 public DataFormatInstance getDependencyDataFormatInstance() { 074 return dependencyDataFormatInstance; 075 } 076 077 public void setDependencyDataFormatInstance( 078 DataFormatInstance dependencyDataFormatInstance) { 079 this.dependencyDataFormatInstance = dependencyDataFormatInstance; 080 } 081 082 public DataFormatInstance getPhraseStructuretDataFormatInstance() { 083 return phraseStructuretDataFormatInstance; 084 } 085 086 public void setPhraseStructuretDataFormatInstance( 087 DataFormatInstance phraseStructuretDataFormatInstance) { 088 this.phraseStructuretDataFormatInstance = phraseStructuretDataFormatInstance; 089 } 090 091 public void update(MappablePhraseStructureGraph graph, Edge e, Object arg) throws MaltChainedException { 092 if (lockUpdate == false) { 093 // if (e.getType() == Edge.PHRASE_STRUCTURE_EDGE && e.getSource() instanceof NonTerminalNode && lockUpdate == false) { 094 // if(e.getTarget() instanceof TerminalNode) { 095 // PhraseStructureNode top = (PhraseStructureNode)e.getTarget(); 096 // while (top.getParent() != null && ((NonTerminalNode)top.getParent()).getLexicalHead() == (PhraseStructureNode)e.getTarget()) { 097 // top = top.getParent(); 098 // } 099 // updateDependenyGraph(graph, top); 100 // } 101 // else if (e.getSource().isRoot()) { 102 // updateDependenyGraph(graph, graph.getPhraseStructureRoot()); 103 // } 104 // } 105 if (e.getType() == Edge.DEPENDENCY_EDGE && e.getSource() instanceof DependencyNode && e.getTarget() instanceof DependencyNode) { 106 if (e.isLabeled() && e.getLabelSet().size() == 4) { 107 updatePhraseStructureGraph(graph, (Edge)e, false); 108 } 109 } 110 } 111 } 112 113 public void updateDependenyGraph(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException { 114 if (graph.nTokenNode() == 1 && graph.nNonTerminals() == 0) { 115 // Special case when the root dominates direct a single terminal node 116 Edge e = graph.addDependencyEdge(graph.getDependencyRoot(), graph.getDependencyNode(1)); 117 e.addLabel(graph.getSymbolTables().getSymbolTable(DEPREL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 118 e.addLabel(graph.getSymbolTables().getSymbolTable(HEADREL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(HEADREL))); 119 e.addLabel(graph.getSymbolTables().getSymbolTable(PHRASE), "*"); 120 // e.addLabel(graph.getSymbolTables().getSymbolTable(PHRASE), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE))); 121 e.addLabel(graph.getSymbolTables().getSymbolTable(ATTACH), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(ATTACH))); 122 } else { 123 updateDependencyEdges(graph, top); 124 updateDependenyLabels(graph); 125 } 126 } 127 128 129 130 private void updateDependencyEdges(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException { 131 if (top == null) { 132 return; 133 } 134 DependencyNode head = null; 135 DependencyNode dependent = null; 136 if (top instanceof NonTerminalNode) { 137 for (PhraseStructureNode node : ((NonTerminalNode)top).getChildren()) { 138 if (node instanceof NonTerminalNode) { 139 updateDependencyEdges(graph,node); 140 } else { 141 head = ((NonTerminalNode)top).getLexicalHead(headRules); 142 dependent = (DependencyNode)node; 143 if (head != null && dependent != null && head != dependent) { 144 lockUpdate = true; 145 if (!dependent.hasHead()) { 146 graph.addDependencyEdge(head, dependent); 147 } 148 else if (head != dependent.getHead()) { 149 graph.moveDependencyEdge(head, dependent); 150 } 151 lockUpdate = false; 152 } 153 } 154 } 155 } 156 157 head = null; 158 if (top.getParent() != null) { 159 head = ((NonTerminalNode)top.getParent()).getLexicalHead(headRules); 160 } else if (top.isRoot()) { 161 head = (DependencyNode)top; 162 } 163 164 if (top instanceof NonTerminalNode) { 165 dependent = ((NonTerminalNode)top).getLexicalHead(headRules); 166 } else if (!top.isRoot()) { 167 dependent = (DependencyNode)top; 168 } 169 if (head != null && dependent != null && head != dependent) { 170 lockUpdate = true; 171 if (!dependent.hasHead()) { 172 graph.addDependencyEdge(head, dependent); 173 } 174 else if (head != dependent.getHead()) { 175 graph.moveDependencyEdge(head, dependent); 176 } 177 lockUpdate = false; 178 } 179 } 180 181 private void updateDependenyLabels(MappablePhraseStructureGraph graph) throws MaltChainedException { 182 for (int index :graph.getTokenIndices()) { 183 PhraseStructureNode top = (PhraseStructureNode)graph.getTokenNode(index); 184 185 while (top != null && top.getParent() != null &&graph.getTokenNode(index) == ((NonTerminalNode)top.getParent()).getLexicalHead(headRules)) { 186 top = top.getParent(); 187 } 188 lockUpdate = true; 189 labelDependencyEdge(graph, graph.getTokenNode(index).getHeadEdge(), top); 190 lockUpdate = false; 191 } 192 } 193 194 195 // private void updateDependenyLabels(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException { 196 // if (top == null) { 197 // return; 198 // } 199 // DependencyNode head = null; 200 // DependencyNode dependent = null; 201 // if (top instanceof NonTerminalNode) { 202 // for (PhraseStructureNode node : ((NonTerminalNode)top).getChildren()) { 203 // if (node instanceof NonTerminalNode) { 204 // updateDependenyLabels(graph, node); 205 // } else { 206 // head = ((NonTerminalNode)top).getLexicalHead(headRules); 207 // dependent = (DependencyNode)node; 208 // if (head != null && dependent != null && head != dependent) { 209 // lockUpdate = true; 210 // if (dependent.hasHead()) { 211 // Edge e = dependent.getHeadEdge(); 212 // labelDependencyEdge(graph, e, node); 213 // } 214 // lockUpdate = false; 215 // } 216 // } 217 // } 218 // } 219 // 220 // dependent = null; 221 // if (top instanceof NonTerminalNode) { 222 // dependent = ((NonTerminalNode)top).getLexicalHead(headRules); 223 // } 224 // 225 // if (dependent != null) { 226 // lockUpdate = true; 227 // if (dependent.hasHead()) { 228 // Edge e = dependent.getHeadEdge(); 229 // labelDependencyEdge(graph, e, top); 230 // } 231 // lockUpdate = false; 232 // } 233 // } 234 235 private void labelDependencyEdge(MappablePhraseStructureGraph graph, Edge e, PhraseStructureNode top) throws MaltChainedException { 236 if (e == null) { 237 return; 238 } 239 SymbolTableHandler symbolTables = graph.getSymbolTables(); 240 deprel.setLength(0); 241 phrase.setLength(0); 242 headrel.setLength(0); 243 244 e.removeLabel(symbolTables.getSymbolTable(DEPREL)); 245 e.removeLabel(symbolTables.getSymbolTable(HEADREL)); 246 e.removeLabel(symbolTables.getSymbolTable(PHRASE)); 247 e.removeLabel(symbolTables.getSymbolTable(ATTACH)); 248 249 int i = 0; 250 SortedMap<String, SymbolTable> edgeLabelSymbolTables = phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelSymbolTables(); 251 SortedMap<String, SymbolTable> nodeLabelSymbolTables = phraseStructuretDataFormatInstance.getPhraseStructureNodeLabelSymbolTables(); 252 if (!top.isRoot()) { 253 for (String name : edgeLabelSymbolTables.keySet()) { 254 if (top.hasParentEdgeLabel(symbolTables.getSymbolTable(name))) { 255 deprel.append(top.getParentEdgeLabelSymbol(symbolTables.getSymbolTable(name))); 256 } else { 257 deprel.append(EMPTY_LABEL); 258 } 259 i++; 260 if (i < edgeLabelSymbolTables.size()) { 261 deprel.append(LABEL_ELEMENT_SEPARATOR); 262 } 263 } 264 if (deprel.length() != 0) { 265 e.addLabel(symbolTables.getSymbolTable(DEPREL), deprel.toString()); 266 } 267 } else { 268 String deprelDefaultRootLabel = graph.getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(DEPREL)); 269 if (deprelDefaultRootLabel != null) { 270 e.addLabel(symbolTables.getSymbolTable(DEPREL), deprelDefaultRootLabel); 271 } else { 272 e.addLabel(symbolTables.getSymbolTable(DEPREL), EMPTY_LABEL); 273 } 274 } 275 PhraseStructureNode tmp = (PhraseStructureNode)e.getTarget(); 276 while (tmp != top && tmp.getParent() != null) { // && !tmp.getParent().isRoot()) { 277 i=0; 278 for (String name : edgeLabelSymbolTables.keySet()) { 279 if (tmp.hasParentEdgeLabel(symbolTables.getSymbolTable(name))) { 280 headrel.append(tmp.getParentEdgeLabelSymbol(symbolTables.getSymbolTable(name))); 281 } else { 282 headrel.append(EMPTY_LABEL); 283 } 284 i++; 285 if (i < edgeLabelSymbolTables.size()) { 286 headrel.append(LABEL_ELEMENT_SEPARATOR); 287 } 288 } 289 i=0; 290 headrel.append(SPINE_ELEMENT_SEPARATOR); 291 for (String name : nodeLabelSymbolTables.keySet()) { 292 if (tmp.getParent().hasLabel(symbolTables.getSymbolTable(name))) { 293 phrase.append(tmp.getParent().getLabelSymbol(symbolTables.getSymbolTable(name))); 294 } else { 295 if (tmp.getParent().isRoot()) { 296 String deprelDefaultRootLabel = graph.getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(PHRASE)); 297 if (deprelDefaultRootLabel != null) { 298 phrase.append(deprelDefaultRootLabel); 299 } else { 300 phrase.append(EMPTY_LABEL); 301 } 302 } else { 303 phrase.append(EMPTY_LABEL); 304 } 305 } 306 i++; 307 if (i < nodeLabelSymbolTables.size()) { 308 phrase.append(LABEL_ELEMENT_SEPARATOR); 309 } 310 } 311 phrase.append(SPINE_ELEMENT_SEPARATOR); 312 tmp = tmp.getParent(); 313 } 314 if (phrase.length() == 0) { 315 headrel.append(EMPTY_SPINE); 316 phrase.append(EMPTY_SPINE); 317 } else { 318 headrel.setLength(headrel.length()-1); 319 phrase.setLength(phrase.length()-1); 320 } 321 e.addLabel(symbolTables.getSymbolTable(HEADREL), headrel.toString()); 322 e.addLabel(symbolTables.getSymbolTable(PHRASE), phrase.toString()); 323 int a = 0; 324 tmp = (PhraseStructureNode)e.getSource(); 325 while (top.getParent() != null && tmp.getParent() != null && tmp.getParent() != top.getParent()) { 326 a++; 327 tmp = tmp.getParent(); 328 } 329 e.addLabel(symbolTables.getSymbolTable(ATTACH), Integer.toString(a)); 330 } 331 332 public void connectUnattachedSpines(MappablePhraseStructureGraph graph) throws MaltChainedException { 333 connectUnattachedSpines(graph, graph.getDependencyRoot()); 334 335 if (!graph.getPhraseStructureRoot().isLabeled()) { 336 graph.getPhraseStructureRoot().addLabel(graph.getSymbolTables().addSymbolTable(CAT), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE))); 337 338 } 339 } 340 341 private void connectUnattachedSpines(MappablePhraseStructureGraph graph, DependencyNode depNode) throws MaltChainedException { 342 if (!depNode.isRoot()) { 343 PhraseStructureNode dependentSpine = (PhraseStructureNode)depNode; 344 while (dependentSpine.getParent() != null) { 345 dependentSpine = dependentSpine.getParent(); 346 } 347 if (!dependentSpine.isRoot()) { 348 updatePhraseStructureGraph(graph,depNode.getHeadEdge(),true); 349 } 350 } 351 for (int i = 0; i < depNode.getLeftDependentCount(); i++) { 352 connectUnattachedSpines(graph, depNode.getLeftDependent(i)); 353 } 354 for (int i = depNode.getRightDependentCount()-1; i >= 0 ; i--) { 355 connectUnattachedSpines(graph, depNode.getRightDependent(i)); 356 } 357 } 358 359 public void updatePhraseStructureGraph(MappablePhraseStructureGraph graph, Edge depEdge, boolean attachHeadSpineToRoot) throws MaltChainedException { 360 PhraseStructureNode dependentSpine = (PhraseStructureNode)depEdge.getTarget(); 361 362 if (((PhraseStructureNode)depEdge.getTarget()).getParent() == null) { 363 // Restore dependent spine 364 String phraseSpineLabel = null; 365 String edgeSpineLabel = null; 366 int empty_label = 0; 367 368 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(PHRASE))) { 369 phraseSpineLabel = depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE)); 370 } 371 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(HEADREL))) { 372 edgeSpineLabel = depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(HEADREL)); 373 } 374 if (phraseSpineLabel != null && phraseSpineLabel.length() > 0 && phraseSpineLabel.charAt(0) != EMPTY_SPINE) { 375 int ps = 0, es = 0, i = 0, j = 0, n = phraseSpineLabel.length()-1, m = edgeSpineLabel.length()-1; 376 PhraseStructureNode child = (PhraseStructureNode)depEdge.getTarget(); 377 while (true) { 378 while (i <= n && phraseSpineLabel.charAt(i) != SPINE_ELEMENT_SEPARATOR) { 379 if (phraseSpineLabel.charAt(i) == QUESTIONMARK) { 380 empty_label++; 381 } else { 382 empty_label = 0; 383 } 384 i++; 385 } 386 if (depEdge.getSource().isRoot() && i >= n) { 387 dependentSpine = graph.getPhraseStructureRoot(); 388 } else { 389 dependentSpine = graph.addNonTerminalNode(++nonTerminalCounter); 390 } 391 392 if (empty_label != 2 && ps != i) { 393 dependentSpine.addLabel(graph.getSymbolTables().addSymbolTable(CAT), phraseSpineLabel.substring(ps,i)); 394 } 395 396 empty_label = 0; 397 if (edgeSpineLabel != null) { 398 while (j <= m && edgeSpineLabel.charAt(j) != SPINE_ELEMENT_SEPARATOR) { 399 if (edgeSpineLabel.charAt(j) == QUESTIONMARK) { 400 empty_label++; 401 } else { 402 empty_label = 0; 403 } 404 j++; 405 } 406 } 407 lockUpdate = true; 408 Edge e = graph.addPhraseStructureEdge(dependentSpine, child); 409 if (empty_label != 2 && es != j && edgeSpineLabel != null && e != null) { 410 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), edgeSpineLabel.substring(es,j)); 411 } else if (es == j) { 412 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), EMPTY_LABEL); 413 } 414 415 lockUpdate = false; 416 child = dependentSpine; 417 if (i >= n) { break; } 418 empty_label = 0; 419 ps = i = i + 1; 420 es = j = j + 1; 421 } 422 } 423 424 // Recursively attach the dependent spines to target node. 425 DependencyNode target = (DependencyNode)depEdge.getTarget(); 426 for (int i = 0; i < target.getLeftDependentCount(); i++) { 427 updatePhraseStructureGraph(graph, target.getLeftDependent(i).getHeadEdge(), attachHeadSpineToRoot); 428 } 429 for (int i = target.getRightDependentCount()-1; i >= 0 ; i--) { 430 updatePhraseStructureGraph(graph, target.getRightDependent(i).getHeadEdge(), attachHeadSpineToRoot); 431 } 432 } else { 433 // If dependent spine already exist, then set dependentSpine to the highest nonterminal 434 // of the dependent spine. 435 while (dependentSpine.getParent() != null && !dependentSpine.getParent().isRoot()) { 436 dependentSpine = dependentSpine.getParent(); 437 } 438 } 439 440 441 PhraseStructureNode headSpine = null; 442 if (((PhraseStructureNode)depEdge.getSource()).getParent() != null) { 443 // If head spine exist, then attach dependent spine to the head spine at the attachment level a. 444 int a = 0; 445 headSpine = ((PhraseStructureNode)depEdge.getSource()).getParent(); 446 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(ATTACH))) { 447 try { 448 a = Integer.parseInt((depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(ATTACH)))); 449 } catch (NumberFormatException e) { 450 throw new MaltChainedException(e.getMessage()); 451 } 452 } 453 for (int i = 0; i < a && headSpine != null; i++) { 454 headSpine = headSpine.getParent(); 455 } 456 457 if ((headSpine == null || headSpine == dependentSpine) && attachHeadSpineToRoot) { 458 headSpine = graph.getPhraseStructureRoot(); 459 } 460 if (headSpine != null) { 461 lockUpdate = true; 462 Edge e = graph.addPhraseStructureEdge(headSpine, dependentSpine); 463 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(DEPREL)) && !depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)).equals(EMPTY_LABEL) & e != null) { 464 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 465 } 466 lockUpdate = false; 467 } 468 } 469 else if (depEdge.getSource().isRoot() && !depEdge.isLabeled()) { 470 headSpine = graph.getPhraseStructureRoot(); 471 lockUpdate = true; 472 Edge e = graph.addPhraseStructureEdge(headSpine, dependentSpine); 473 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(DEPREL)) && !depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)).equals(EMPTY_LABEL) & e != null) { 474 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 475 } else { 476 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 477 } 478 lockUpdate = false; 479 // Recursively attach the dependent spines to target node. 480 DependencyNode target = (DependencyNode)depEdge.getTarget(); 481 for (int i = 0; i < target.getLeftDependentCount(); i++) { 482 updatePhraseStructureGraph(graph, target.getLeftDependent(i).getHeadEdge(), attachHeadSpineToRoot); 483 } 484 for (int i = target.getRightDependentCount()-1; i >= 0 ; i--) { 485 updatePhraseStructureGraph(graph, target.getRightDependent(i).getHeadEdge(), attachHeadSpineToRoot); 486 } 487 } 488 } 489 490 public HeadRules getHeadRules() { 491 return headRules; 492 } 493 494 public void setHeadRules(HeadRules headRules) { 495 this.headRules = headRules; 496 } 497 498 public void setHeadRules(String headRulesURL) throws MaltChainedException { 499 if (headRulesURL != null && headRulesURL.length() > 0 && !headRulesURL.equals("*")) { 500 headRules = new HeadRules(SystemLogger.logger(), phraseStructuretDataFormatInstance); 501 headRules.parseHeadRules(headRulesURL); 502 } 503 } 504 }