/*
 * Copyright (c) 2003 by SAP AG. All Rights Reserved.
 *
 * SAP, mySAP, mySAP.com and other SAP products and
 * services mentioned herein as well as their respective
 * logos are trademarks or registered trademarks of
 * SAP AG in Germany and in several other countries all
 * over the world. MarketSet and Enterprise Buyer are
 * jointly owned trademarks of SAP AG and Commerce One.
 * All other product and service names mentioned are
 * trademarks of their respective companies.
 *
 * @version $Id$
 */

package com.sapportals.wcm.util.regex;
import com.sap.tc.logging.Location;
import com.sapportals.wcm.util.logging.LoggingFormatter;

import com.sapportals.wcm.util.regex.re.RE;
import com.sapportals.wcm.util.regex.re.RESyntaxException;

/**
 * PathPatternMatcher matches single line strings against a given pattern. The
 * pattern syntax resembles the path pattern syntax used by the open source make
 * tool <cite>ant</cite> , at least to some degree. A pattern consists of 5
 * syntactical elements: <p>
 *
 *
 * <tableborderwidth=100%cellpadding=5cellspacing=1>
 *
 *   <tr>
 *
 *     <td>
 *       1. path separators '/' and '\'
 *       <td>
 *         either slash or backslash
 *         <tr>
 *
 *           <td>
 *             2. single character wildcard '?'
 *             <td>
 *               question mark; matches any single non-whitespace char except
 *               for path separators
 *               <tr>
 *
 *                 <td>
 *                   3. multiple character wildcard '*'
 *                   <td>
 *                     asterisk, matches sequences of non-whitespace characters
 *                     of any length greater or equal zero
 *                     <tr>
 *
 *                       <td>
 *                         4. multiple dir wildcard '**'
 *                         <td>
 *                           matches any sequences of groups of nodes and path
 *                           separators
 *                           <tr>
 *
 *                             <td>
 *                               5. node
 *                               <td>
 *                                 any sequence of non-whitespace characters not
 *                                 including separators
 *                               </table>
 *                               <p>
 *
 *                               In addition, a path separator at the end of a
 *                               strings serves as a convenience shortcut so
 *                               that "string/" behaves like "string/**". <p>
 *
 *                               <strong>NOTE</strong> that PathPatternMatcher
 *                               does reject some strings that the open source
 *                               tools accepts. E.g., the <cite>ant</cite>
 *                               manual pages state that the following two pairs
 *                               of string and pattern do match. <!-- [The
 *                               spaces in the pattern are not supposed to be
 *                               there but if left out then the java compiler
 *                               takes the pattern as an end-of-comment symbol
 *                               and severely complains about what's following;
 *                               so please read as if the spaces were not there]
 *                               "CVS/Repository", "** /CVS/*"
 *                               "org/apache/CVS/Entries", "org/apache/**
 *                               /CVS/*" --> <p>
 *
 *
 *                               <tableborderwidth=100%cellpadding=5
 *                                 cellspacing=1>
 *
 *                                 <tr>
 *
 *                                   <th>
 *                                     String
 *                                     <th>
 *                                       Pattern
 *                                       <tr>
 *
 *                                         <td>
 *                                           "CVS/Repository"
 *                                           <td>
 *                                             "&#42&#42/CVS/*"
 *                                             <tr>
 *
 *                                               <td>
 *                                                 "org/apache/CVS/Entries"
 *                                                 <td>
 *                                                   "org/apache/&#42&#42/CVS/*"
 *
 *                                                 </table>
 *                                                 <p>
 *
 *                                                 PathPatternMatcher rejects
 *                                                 those examples; the first one
 *                                                 because the pattern contains
 *                                                 2 terminal symbols of type
 *                                                 path separator while the
 *                                                 string contains only 1; the
 *                                                 second examples is rejected
 *                                                 for almost the same reason:
 *                                                 the pattern contains 2 path
 *                                                 separators between the
 *                                                 "apache" and the "CVS" part
 *                                                 of the pattern while the
 *                                                 string has only 1. <p>
 *
 *                                                 Copyright (c) SAP AG
 *                                                 2001-2002
 *
 * @author Helmut Cossmann
 * @version $Id: PathPatternMatcher.java,v 1.1 2002/07/22 13:48:31 sei Exp $
 */
public class PathPatternMatcher {
  private static com.sap.tc.logging.Location log =
    com.sap.tc.logging.Location.getLocation(com.sapportals.wcm.util.regex.PathPatternMatcher.class);
  private final static String questionMarkReplacement = "[^\\t/]";
  private final static String singleAsteriskReplacement = "([^\\t/])*";
  private final static String doubleAsteriskReplacement =
    "([^\\t/])*([/]([^\\t/])*)*";
  /*
   * 2003/01/10: requirements extended; so far, the grammar '/a/** /*.txt'
   * rejects '/a/b.txt' because the grammar requires three terminal
   * symbols '/' which are not present in '/a/b.txt'; nevertheless, the
   * usage of the grammar in e.g. ant DO accept '/a/b.txt' although
   * implicitely by the underlying filesystem behavior. The story reads
   * like this for LR top-down parsing: the non-terminal '**' is reduced
   * to lambda, leaving '/a//*.txt'; next, the non-terminal '*' is reduced
   * to the terminal 'b', ending the parsing process with '/a//b.txt'. This
   * string is handed over to the filesystem which on Unix and DOS similarly
   * extend '/a//b.txt' to '/a/./b.txt' which finds the file '/a/b.txt'.
   * Therefore, local matching semantics have been changed to behave the
   * same way.
   */
  private final static String doubleAsteriskSeparatorReplacement =
    "(([^\\t/])*[/])*";

  private String[] originalPattern;
  private String[] localPattern;
  private boolean hasWildcards;
  private boolean matchesAnyAbsPath;
  private String rePattern;
  private RE re;

  private boolean[] prefixMatchMode;
  private boolean prefixOnlyMatchMode;
  private int[] prefixLen;
  private String[] prefix;

  /**
   * Creates a pattern matcher instance for <code>pattern</code> .
   *
   * @param pattern TBD: Description of the incoming method parameter
   * @exception PatternSyntaxException Exception raised in failure situation
   */
  public PathPatternMatcher(String pattern)
    throws PatternSyntaxException {
    int i;
    int singleAsteriskCount = 0;
    int doubleAsteriskCount = 0;
    int doubleAsteriskSeparatorCount = 0;
    int questionMarkCount = 0;

    if (pattern == null) {
      throw new NullPointerException("no pattern given");
    }
    if (log.beInfo()) {
      log.infoT("PathPatternMatcher(187)", "pattern: " + pattern);
    }
    this.originalPattern = new String[1];
    this.localPattern = new String[1];
    this.originalPattern[0] = pattern;
    this.prefixMatchMode = new boolean[1];
    this.prefixLen = new int[1];
    this.prefix = new String[1];
    this.prefixOnlyMatchMode = true;
    /*
     * make life easy for subsequent code by turning special semantics of
     * terminating path separator into its full blown notation
     */
    if (pattern.endsWith("/")) {
      this.localPattern[0] = pattern + "**";
    }
    else {
      this.localPattern[0] = pattern;
    }
    this.matchesAnyAbsPath = this.localPattern[0].equals("/**");
    /*
     * count wildcard occurrences for buffer space calculation
     */
    for (i = 0; i < this.localPattern[0].length(); ) {
      if (this.localPattern[0].startsWith("**/", i)) {
        i += 3;
        doubleAsteriskSeparatorCount++;
      }
      else if (this.localPattern[0].startsWith("**", i)) {
        i += 2;
        doubleAsteriskCount++;
      }
      else {
        switch (localPattern[0].charAt(i)) {
          case '*':
            singleAsteriskCount++;
            break;
          case '?':
            questionMarkCount++;
            break;
          default:
            break;
        }
        i++;
      }
    }
    /*
     * if all counters are still at zero then there are no wildcards
     * which implies that this path pattern matcher accepts exact matches
     * only; in this case we don't need to generate a regexp pattern
     * matcher but can rely on string equality
     */
    i = singleAsteriskCount + doubleAsteriskCount
       + doubleAsteriskSeparatorCount + questionMarkCount;
    if (!this.matchesAnyAbsPath && (i == 1)) {
      int pl = this.localPattern[0].length();
      if ((pl > 1) && this.localPattern[0].startsWith("**", pl - 2)) {
        this.prefixLen[0] = pl - 2;
        this.prefix[0] = this.localPattern[0].substring(0, pl - 2);
        this.prefixMatchMode[0] = true;
      }
      else if ((this.localPattern[0].charAt(pl - 1) == '?')
         || (this.localPattern[0].charAt(pl - 1) == '*')) {
        this.prefixLen[0] = pl - 1;
        this.prefix[0] = this.localPattern[0].substring(0, pl - 1);
        this.prefixMatchMode[0] = true;
      }
    }
    this.prefixOnlyMatchMode = this.prefixMatchMode[0];
    if ((!this.prefixMatchMode[0]) && (i > 0)) {
      this.hasWildcards = true;
      StringBuffer sb = new StringBuffer(this.localPattern[0].length()
         + (questionMarkCount * questionMarkReplacement.length())
         + (singleAsteriskCount * singleAsteriskReplacement.length())
         + (doubleAsteriskSeparatorCount
         * doubleAsteriskSeparatorReplacement.length())
         + (doubleAsteriskCount * doubleAsteriskReplacement.length()));
      sb.append('(');
      for (i = 0; i < this.localPattern[0].length(); ) {
        if (this.localPattern[0].startsWith("**/", i)) {
          i += 3;
          sb.append(doubleAsteriskSeparatorReplacement);
        }
        else if (this.localPattern[0].startsWith("**", i)) {
          i += 2;
          sb.append(doubleAsteriskReplacement);
        }
        else {
          switch (this.localPattern[0].charAt(i)) {
            case '*':
              sb.append(singleAsteriskReplacement);
              break;
            case '?':
              sb.append(questionMarkReplacement);
              break;
            default:
              sb.append(localPattern[0].charAt(i));
              break;
          }
          i++;
        }
      }
      /*
       * enclose the regexp so that we can easily check for a full match
       * condition
       */
      this.rePattern = sb.append(')').toString();
      try {
        this.re = new RE(this.rePattern, RE.MATCH_SINGLELINE);
      }
      catch (RESyntaxException e) {
            //$JL-EXC$
        throw new PatternSyntaxException(
          "failed to generate regex for \"" + this.rePattern + "\"");
      }
    }
  }


  /**
   * Creates a pattern matcher instance that works on an array of <code>pattern
   * </code>.
   *
   * @param pattern TBD: Description of the incoming method parameter
   * @exception PatternSyntaxException Exception raised in failure situation
   */
  public PathPatternMatcher(String[] pattern)
    throws PatternSyntaxException {
    int i;
    int j;
    int k;
    if (pattern == null) {
      throw new NullPointerException("no pattern given");
    }
    if ((i = pattern.length) == 0) {
      throw new IllegalArgumentException("length of pattern array is zero");
    }
    if (log.beInfo()) {
      StringBuffer b = new StringBuffer("pattern[" + pattern.length + "]: ");
      b.append(" [0]: " + pattern[0]);
      for (i = 1; i < pattern.length; i++) {
        b.append(", [" + i + "]: " + pattern[i]);
      }
      log.infoT("PathPatternMatcher(329)", b.toString());
    }
    this.originalPattern = new String[i];
    this.localPattern = new String[i];
    this.prefixMatchMode = new boolean[i];
    this.prefixLen = new int[i];
    this.prefix = new String[i];
    StringBuffer sb = new StringBuffer(128 * (i + 1));
    int prefixMatchCount = 0;
    for (i = 0; i < pattern.length; i++) {
      if (pattern[i] == null) {
        throw new NullPointerException("no pattern given");
      }
      this.originalPattern[i] = pattern[i];
      /*
       * make life easy for subsequent code by turning special semantics of
       * terminating path separator into its full blown notation
       */
      if (pattern[i].endsWith("/")) {
        this.localPattern[i] = pattern[i] + "**";
      }
      else {
        this.localPattern[i] = pattern[i];
      }
      this.matchesAnyAbsPath = this.matchesAnyAbsPath
         || this.localPattern[i].equals("/**");
      /*
       * count wildcard occurrences for buffer space calculation
       */
      boolean wildcardsFound = false;
      for (j = 0; j < this.localPattern[i].length() && !wildcardsFound; ) {
        if (this.localPattern[i].startsWith("**", j)) {
          wildcardsFound = true;
          j += 2;
        }
        else {
          switch (localPattern[i].charAt(j)) {
            case '*':
            case '?':
              wildcardsFound = true;
              break;
            default:
              break;
          }
          j++;
        }
      }
      /*
       * if there are no wildcards then this pattern hits exact matches
       * only; in this case we don't need to generate a regexp matcher
       * component for this pattern but can rely on string equality
       */
      if (wildcardsFound) {
        if (!this.hasWildcards) {
          this.hasWildcards = true;
        }
        sb.append(sb.length() == 0 ? "((" : "|(");
        int wc1 = 0;
        int wc2 = 0;
        for (j = 0; j < this.localPattern[i].length(); ) {
          if (this.localPattern[i].startsWith("**/", j)) {
            j += 3;
            sb.append(doubleAsteriskSeparatorReplacement);
            wc1++;
          }
          else if (this.localPattern[i].startsWith("**", j)) {
            j += 2;
            sb.append(doubleAsteriskReplacement);
            wc1++;
          }
          else {
            switch (this.localPattern[i].charAt(j)) {
              case '*':
                sb.append(singleAsteriskReplacement);
                wc2++;
                break;
              case '?':
                sb.append(questionMarkReplacement);
                wc2++;
                break;
              default:
                sb.append(localPattern[i].charAt(j));
                break;
            }
            j++;
          }
        }
        /*
         * enclose the regexp so that we can easily check for a full match
         * condition
         */
        sb.append(')');
        if (wc1 + wc2 == 1) {
          int pl = this.localPattern[i].length();
          if ((wc1 > 0) && this.localPattern[i].startsWith("**", pl - 2)) {
            this.prefixLen[i] = pl - 2;
            this.prefix[i] = this.localPattern[i].substring(0, pl - 2);
            this.prefixMatchMode[i] = true;
            prefixMatchCount++;
          }
          else if ((this.localPattern[i].charAt(pl - 1) == '?')
             || (this.localPattern[i].charAt(pl - 1) == '*')) {
            this.prefixLen[i] = pl - 1;
            this.prefix[i] = this.localPattern[i].substring(0, pl - 1);
            this.prefixMatchMode[i] = true;
            prefixMatchCount++;
          }
        }
      }
    }
    /*
     * only if wildcards have been found the string buffer does contain
     * a regular expression; slight improvement here: if all patterns
     * are prefix matching patterns then we don't need to generate an
     * RE object at all
     */
    this.prefixOnlyMatchMode = prefixMatchCount == pattern.length;
    if ((sb.length() > 0) && (!this.prefixOnlyMatchMode)) {
      this.rePattern = sb.append(')').toString();
      try {
        this.re = new RE(this.rePattern, RE.MATCH_SINGLELINE);
      }
      catch (RESyntaxException e) {
            //$JL-EXC$
        throw new PatternSyntaxException(
          "failed to generate regex for \"" + this.rePattern + "\"");
      }
    }
  }


  /*
   * debug wrapper:
   *
   * public boolean matches ( String needle ) {
   * boolean rv = _matches ( needle ) ;
   * if ( log.beInfo()) {
   * log.infoT( "OOO match.needle (" + needle + "), this (" + this
   * + ") = " + rv + " OOO" );
   * }
   * return rv ;
   * }
   */
  /**
   * Matches <code>needle</code> against the pattern of this pattern matcher.
   *
   * @param needle TBD: Description of the incoming method parameter
   * @return TBD: Description of the outgoing return value
   */
  public boolean matches(String needle) {
    int i;

    if (log.beInfo()) {
      log.infoT("matches(481)", "needle: " + needle + "; " + this);
    }
    for (i = 0; i < this.originalPattern.length; i++) {
      if (this.originalPattern[i].equals(needle)) {
        return true;
      }
      if (this.prefixMatchMode[i]) {
        int pl = this.localPattern[i].length();
        int nl = needle.length();
        if ((nl < this.prefixLen[i])
           || (!needle.startsWith(this.prefix[i]))) {
          if (this.originalPattern.length == 1) {
            return false;
          }
          else {
            continue;
          }
        }
        if ((pl - this.prefixLen[i]) == 2) {
          if (this.originalPattern.length == 1) {
            return needle.startsWith(this.prefix[i]);
          }
          else {
            if (needle.startsWith(this.prefix[i])) {
              return true;
            }
            else {
              continue;
            }
          }
        }
        else if (this.localPattern[i].charAt(pl - 1) == '*') {
          if (this.originalPattern.length == 1) {
            return (needle.indexOf('/', this.prefixLen[i]) == -1);
          }
          else {
            if ((needle.indexOf('/', this.prefixLen[i]) == -1)) {
              return true;
            }
            else {
              continue;
            }
          }
        }
        else {
          /*
           * at this point we know that the prefixes match and that the
           * wildcard is ?; so they must match in length
           */
          if ((this.originalPattern.length == 1) || (nl == pl)) {
            return (nl == pl);
          }
        }
      }
    }
    if (!this.hasWildcards) {
      /*
       * if none of the patterns has any wildcards and
       * none has matched on equality then we're doomed
       */
      return false;
    }
    if ((this.matchesAnyAbsPath) && (needle.length() > 0)
       && (needle.charAt(0) == '/')) {
      return true;
    }
    if (this.prefixOnlyMatchMode) {
      /*
       * if all patterns are prefix pattern and if none has
       * matched then there is no chance left to match; bail
       */
      return false;
    }
    /*
     * note that the underlying regexp returns true when matching a substring;
     * so the additional step to run through is to make sure that when we see
     * a match that it does match the needle completely
     */
    try {
      synchronized (this.re) {
        return this.re.match(needle) && (this.re.getParenStart(0) == 0)
           && (this.re.getParenLength(0) == needle.length());
      }
    }
    catch (RuntimeException e) {
      log.errorT("matches(566)", "needle: " + needle + "; " + this + " - "
         + com.sapportals.wcm.util.logging.LoggingFormatter.extractCallstack(e));
      throw e;
    }
  }


  /**
   * Returns a printable representation of this pattern matcher.
   *
   * @return TBD: Description of the outgoing return value
   */
  public String toString() {
    String op = this.originalPattern[0];
    for (int i = 1; i < this.originalPattern.length; i++) {
      op = op + "," + this.originalPattern[i];
    }
    return "input pattern[" + this.originalPattern.length + "]: "
       + op + ", regexp: " + this.rePattern;
  }
}
