/*
 * Copyright (c) 2003 by SAP AG. All Rights Reserved.
 *
 * SAP, mySAP, mySAP.com and other SAP products and
 * services mentioned herein as well as their respective
 * logos are trademarks or registered trademarks of
 * SAP AG in Germany and in several other countries all
 * over the world. MarketSet and Enterprise Buyer are
 * jointly owned trademarks of SAP AG and Commerce One.
 * All other product and service names mentioned are
 * trademarks of their respective companies.
 *
 * @version $Id$
 */

package com.sapportals.wcm.util.html;

import com.sapportals.wcm.util.http.ContentType;

import java.io.*;

/**
 * A {@link java.io.InputStream} on top of a {@link IHTMLReader}. <p>
 *
 * Allows to treat a reader as an InputStream. The reader can be a {@link
 * IHTMLFilter} allowing a HTML document to be processed in a streaming manner.
 * <p>
 *
 * Characters from the reader are converted to bytes either by using the
 * encoding of the reader (e.g. the encoding of the original documnet) or by
 * specifying an encoding explicitly. <p>
 *
 * Copyright (c) SAP AG 2001-2002
 *
 * @author stefan.eissing@greenbytes.de
 * @version $Id: HTMLInputStream.java,v 1.5 2002/06/18 11:52:15 sei Exp $
 */
public class HTMLInputStream extends InputStream {

  private final IHTMLReader m_reader;
  private final String m_encoding;

  private ByteBufferOutputStream m_bos;
  private Writer m_writer;
  private boolean m_eof;
  private boolean m_inputExhausted;

  /**
   * An InputStream with reader as HTML source. The charcters are encoded using
   * the reader's encoding.
   *
   * @param reader to read the HTML document from
   * @exception UnsupportedEncodingException Exception raised in failure
   *      situation
   * @exception HTMLException Exception raised in failure situation
   * @exception IOException Exception raised in failure situation
   */
  public HTMLInputStream(IHTMLReader reader)
    throws UnsupportedEncodingException, HTMLException, IOException {
    m_reader = reader;
    m_encoding = reader.getEncoding();
    init();
  }

  /**
   * An InputStream with reader as HTML source. The charcters are encoded in the
   * given encoding. If encoding is <code> null</code> , the default system
   * encoding is used.
   *
   * @param reader to read the HTML document from
   * @param encoding to use (null for default system encoding)
   * @exception UnsupportedEncodingException Exception raised in failure
   *      situation
   */
  public HTMLInputStream(IHTMLReader reader, String encoding)
    throws UnsupportedEncodingException {
    m_reader = reader;
    m_encoding = ContentType.mapEncoding(encoding);
    init();
  }

  /**
   * Returns the character encoding used by this input stream. When encoding is
   * <code>null</code> , the default system encoding is used.
   *
   * @return encoding used by this input stream for the HTML characters.
   */
  public String getEncoding() {
    return m_encoding;
  }

  // ----------------- java.io.InputStream --------------------------

  public void close()
    throws IOException {
    m_bos.close();
    m_eof = true;
    if (!m_inputExhausted) {
      // terminate reader
      m_reader.discard();
    }
  }

  public int read()
    throws IOException {
    if (m_eof) {
      return -1;
    }
    byte[] buffer = new byte[1];
    int count;
    while ((count = read(buffer, 0, 1)) == 0) {
      // loop
    }

    if (count == -1) {
      return -1;
    }
    return ((int)buffer[0]) & 0xff;
  }

  public int read(byte[] buffer, int offset, int len)
    throws IOException {
    if (m_eof) {
      return -1;
    }

    try {
      while (!m_inputExhausted && len > m_bos.available()) {
        m_inputExhausted = !m_reader.parseNextEvent();
      }
    }
    catch (HTMLException ex) {
            //$JL-EXC$
      m_inputExhausted = true;
      m_writer.flush();
      throw new IOException("reading next event: " + ex.getMessage());
    }

    int count = m_bos.read(buffer, offset, len);
    if (count == -1 || (m_inputExhausted && count == 0)) {
      m_eof = true;
      return -1;
    }
    return count;
  }

  // ----------------- private ---------------------------------------

  private void init()
    throws UnsupportedEncodingException {
    m_bos = new ByteBufferOutputStream(HTMLInput.INPUT_BUFFER_SIZE);
    if (m_encoding != null) {
      m_writer = new OutputStreamWriter(m_bos, m_encoding);
    }
    else {
      m_writer = new OutputStreamWriter(m_bos);
    }

    m_writer = new BufferedWriter(m_writer, HTMLInput.INPUT_BUFFER_SIZE);
    m_eof = false;
    m_inputExhausted = false;

    m_reader.setContentHandler(new HTMLStreamWriter(m_writer));
  }

  /**
   * TBD: Description of the class.
   */
  private static class ByteBufferOutputStream extends ByteArrayOutputStream {

    public ByteBufferOutputStream() { }

    public ByteBufferOutputStream(int size) {
      super(size);
    }

    /**
     * Read bytes already collected in the output stream into an array. Read
     * bytes are consumed, e.g. the count of bytes is reduced by the amount of
     * bytes read. Will return 0, if no bytes are available and the stream was
     * not closed.
     *
     * @param buffer to copy bytes into
     * @param offset where in the buffer to copy bytes to
     * @param len maximum amount of bytes to copy
     * @return number of bytes copied, or -1 if end of stream is reached
     * @exception IOException Exception raised in failure situation
     */
    public int read(byte[] buffer, int offset, int len)
      throws IOException {
      if (super.count <= 0) {
        return 0;
      }

      if (len > super.count) {
        len = super.count;
      }

      System.arraycopy(super.buf, 0, buffer, offset, len);
      if (len >= super.count) {
        super.count = 0;
      }
      else {
        super.count -= len;
        System.arraycopy(super.buf, len, super.buf, 0, super.count);
      }
      return len;
    }

    public int available() {
      return super.count;
    }
  }

}
