nekohtml-1.9.14

ring ename = this$0.scanName();
    if ((this$0.fReportErrors) && (ename == null)) {
      this$0.fErrorReporter.reportError("HTML1012", null);
    }
    this$0.skipMarkup(false);
    if (ename != null)
    {
      ename = HTMLScanner.modifyName(ename, this$0.fNamesElems);
      if ((this$0.fDocumentHandler != null) && (this$0.fElementCount >= this$0.fElementDepth))
      {
        fQName.setValues(null, ename, ename, null);
        
        this$0.fEndLineNumber = this$0.fCurrentEntity.getLineNumber();
        this$0.fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(this$0.fCurrentEntity);
        this$0.fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(this$0.fCurrentEntity);
        this$0.fDocumentHandler.endElement(fQName, this$0.locationAugs());
      }
    }
  }
  
  private boolean isEnded(String ename)
  {
    String content = new String(this$0.fCurrentEntity.buffer, this$0.fCurrentEntity.offset, this$0.fCurrentEntity.length - this$0.fCurrentEntity.offset);
    
    return content.toLowerCase().indexOf("</" + ename.toLowerCase() + ">") != -1;
  }
}

/* Location:
 * Qualified Name:     org.cyberneko.html.HTMLScanner.ContentScanner
 * Java Class Version: 1.3 (47.0)
 * JD-Core Version:    0.7.1
 */
package org.cyberneko.html;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import org.apache.xerces.util.XMLStringBuffer;

public class HTMLScanner$CurrentEntity
{
  private Reader stream_;
  public final String encoding;
  public final String publicId;
  public final String baseSystemId;
  public final String literalSystemId;
  public final String expandedSystemId;
  public final String version = "1.0";
  private int lineNumber_ = 1;
  private int columnNumber_ = 1;
  public int characterOffset_ = 0;
  public char[] buffer = new char['?'];
  public int offset = 0;
  public int length = 0;
  private boolean endReached_ = false;
  
  public HTMLScanner$CurrentEntity(Reader stream, String encoding, String publicId, String baseSystemId, String literalSystemId, String expandedSystemId)
  {
    stream_ = stream;
    this.encoding = encoding;
    this.publicId = publicId;
    this.baseSystemId = baseSystemId;
    this.literalSystemId = literalSystemId;
    this.expandedSystemId = expandedSystemId;
  }
  
  private char getCurrentChar()
  {
    return buffer[offset];
  }
  
  private char getNextChar()
  {
    characterOffset_ += 1;
    columnNumber_ += 1;
    return buffer[(offset++)];
  }
  
  private void closeQuietly()
  {
    try
    {
      stream_.close();
    }
    catch (IOException e) {}
  }
  
  boolean hasNext()
  {
    return offset < length;
  }
  
  protected int load(int offset)
    throws IOException
  {
    debugBufferIfNeeded("(load: ");
    if (offset == buffer.length)
    {
      int adjust = buffer.length / 4;
      char[] array = new char[buffer.length + adjust];
      System.arraycopy(buffer, 0, array, 0, length);
      buffer = array;
    }
    int count = stream_.read(buffer, offset, buffer.length - offset);
    if (count == -1) {
      endReached_ = true;
    }
    length = (count != -1 ? count + offset : offset);
    this.offset = offset;
    debugBufferIfNeeded(")load: ", " -> " + count);
    return count;
  }
  
  protected int read()
    throws IOException
  {
    debugBufferIfNeeded("(read: ");
    if (offset == length)
    {
      if (endReached_) {
        return -1;
      }
      if (load(0) == -1) {
        return -1;
      }
    }
    char c = buffer[(offset++)];
    characterOffset_ += 1;
    columnNumber_ += 1;
    
    debugBufferIfNeeded(")read: ", " -> " + c);
    return c;
  }
  
  private void debugBufferIfNeeded(String prefix)
  {
    debugBufferIfNeeded(prefix, "");
  }
  
  private void debugBufferIfNeeded(String prefix, String suffix) {}
  
  private void setStream(InputStreamReader inputStreamReader)
  {
    stream_ = inputStreamReader;
    offset = (length = characterOffset_ = 0);
    lineNumber_ = (columnNumber_ = 1);
  }
  
  private void rewind()
  {
    offset -= 1;
    characterOffset_ -= 1;
    columnNumber_ -= 1;
  }
  
  private void rewind(int i)
  {
    offset -= i;
    characterOffset_ -= i;
    columnNumber_ -= i;
  }
  
  private void incLine()
  {
    lineNumber_ += 1;
    columnNumber_ = 1;
  }
  
  private void incLine(int nbLines)
  {
    lineNumber_ += nbLines;
    columnNumber_ = 1;
  }
  
  public int getLineNumber()
  {
    return lineNumber_;
  }
  
  private void resetBuffer(XMLStringBuffer buffer, int lineNumber, int columnNumber, int characterOffset)
  {
    lineNumber_ = lineNumber;
    columnNumber_ = columnNumber;
    characterOffset_ = characterOffset;
    this.buffer = ch;
    offset = offset;
    length = length;
  }
  
  private int getColumnNumber()
  {
    return columnNumber_;
  }
  
  private void restorePosition(int originalOffset, int originalColumnNumber, int originalCharacterOffset)
  {
    offset = originalOffset;
    columnNumber_ = originalColumnNumber;
    characterOffset_ = originalCharacterOffset;
  }
  
  private int getCharacterOffset()
  {
    return characterOffset_;
  }
}

/* Location:
 * Qualified Name:     org.cyberneko.html.HTMLScanner.CurrentEntity
 * Java Class Version: 1.3 (47.0)
 * JD-Core Version:    0.7.1
 */
package org.cyberneko.html;

public class HTMLScanner$LocationItem
  implements HTMLEventInfo, Cloneable
{
  protected int fBeginLineNumber;
  protected int fBeginColumnNumber;
  protected int fBeginCharacterOffset;
  protected int fEndLineNumber;
  protected int fEndColumnNumber;
  protected int fEndCharacterOffset;
  
  public HTMLScanner$LocationItem() {}
  
  HTMLScanner$LocationItem(LocationItem other)
  {
    setValues(fBeginLineNumber, fBeginColumnNumber, fBeginCharacterOffset, fEndLineNumber, fEndColumnNumber, fEndCharacterOffset);
  }
  
  public void setValues(int beginLine, int beginColumn, int beginOffset, int endLine, int endColumn, int endOffset)
  {
    fBeginLineNumber = beginLine;
    fBeginColumnNumber = beginColumn;
    fBeginCharacterOffset = beginOffset;
    fEndLineNumber = endLine;
    fEndColumnNumber = endColumn;
    fEndCharacterOffset = endOffset;
  }
  
  public int getBeginLineNumber()
  {
    return fBeginLineNumber;
  }
  
  public int getBeginColumnNumber()
  {
    return fBeginColumnNumber;
  }
  
  public int getBeginCharacterOffset()
  {
    return fBeginCharacterOffset;
  }
  
  public int getEndLineNumber()
  {
    return fEndLineNumber;
  }
  
  public int getEndColumnNumber()
  {
    return fEndColumnNumber;
  }
  
  public int getEndCharacterOffset()
  {
    return fEndCharacterOffset;
  }
  
  public boolean isSynthesized()
  {
    return false;
  }
  
  public String toString()
  {
    StringBuffer str = new StringBuffer();
    str.append(fBeginLineNumber);
    str.append(':');
    str.append(fBeginColumnNumber);
    str.append(':');
    str.append(fBeginCharacterOffset);
    str.append(':');
    str.append(fEndLineNumber);
    str.append(':');
    str.append(fEndColumnNumber);
    str.append(':');
    str.append(fEndCharacterOffset);
    return str.toString();
  }
}

/* Location:
 * Qualified Name:     org.cyberneko.html.HTMLScanner.LocationItem
 * Java Class Version: 1.3 (47.0)
 * JD-Core Version:    0.7.1
 */
package org.cyberneko.html;

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;

public class HTMLScanner$PlaybackInputStream
  extends FilterInputStream
{
  private static final boolean DEBUG_PLAYBACK = false;
  protected boolean fPlayback = false;
  protected boolean fCleared = false;
  protected boolean fDetected = false;
  protected byte[] fByteBuffer = new byte['?'];
  protected int fByteOffset = 0;
  protected int fByteLength = 0;
  public int fPushbackOffset = 0;
  public int fPushbackLength = 0;
  
  public HTMLScanner$PlaybackInputStream(InputStream in)
  {
    super(in);
  }
  
  public void detectEncoding(String[] encodings)
    throws IOException
  {
    if (fDetected) {
      throw new IOException("Should not detect encoding twice.");
    }
    fDetected = true;
    int b1 = read();
    if (b1 == -1) {
      return;
    }
    int b2 = read();
    if (b2 == -1)
    {
      fPushbackLength = 1;
      return;
    }
    if ((b1 == 239) && (b2 == 187))
    {
      int b3 = read();
      if (b3 == 191)
      {
        fPushbackOffset = 3;
        encodings[0] = "UTF-8";
        encodings[1] = "UTF8";
        return;
      }
      fPushbackLength = 3;
    }
    if ((b1 == 255) && (b2 == 254))
    {
      encodings[0] = "UTF-16";
      encodings[1] = "UnicodeLittleUnmarked";
      return;
    }
    if ((b1 == 254) && (b2 == 255))
    {
      encodings[0] = "UTF-16";
      encodings[1] = "UnicodeBigUnmarked";
      return;
    }
    fPushbackLength = 2;
  }
  
  public void playback()
  {
    fPlayback = true;
  }
  
  public void clear()
  {
    if (!fPlayback)
    {
      fCleared = true;
      fByteBuffer = null;
    }
  }
  
  public int read()
    throws IOException
  {
    if (fPushbackOffset < fPushbackLength) {
      return fByteBuffer[(fPushbackOffset++)];
    }
    if (fCleared) {
      return in.read();
    }
    if (fPlayback)
    {
      int c = fByteBuffer[(fByteOffset++)];
      if (fByteOffset == fByteLength)
      {
        fCleared = true;
        fByteBuffer = null;
      }
      return c;
    }
    int c = in.read();
    if (c != -1)
    {
      if (fByteLength == fByteBuffer.length)
      {
        byte[] newarray = new byte[fByteLength + 1024];
        System.arraycopy(fByteBuffer, 0, newarray, 0, fByteLength);
        fByteBuffer = newarray;
      }
      fByteBuffer[(fByteLength++)] = ((byte)c);
    }
    return c;
  }
  
  public int read(byte[] array)
    throws IOException
  {
    return read(array, 0, array.length);
  }
  
  public int read(byte[] array, int offset, int length)
    throws IOException
  {
    if (fPushbackOffset < fPushbackLength)
    {
      int count = fPushbackLength - fPushbackOffset;
      if (count > length) {
        count = length;
      }
      System.arraycopy(fByteBuffer, fPushbackOffset, array, offset, count);
      fPushbackOffset += count;
      return count;
    }
    if (fCleared) {
      return in.read(array, offset, length);
    }
    if (fPlayback)
    {
      if (fByteOffset + length > fByteLength) {
        length = fByteLength - fByteOffset;
      }
      System.arraycopy(fByteBuffer, fByteOffset, array, offset, length);
      fByteOffset += length;
      if (fByteOffset == fByteLength)
      {
        fCleared = true;
        fByteBuffer = null;
      }
      return length;
    }
    int count = in.read(array, offset, length);
    if (count != -1)
    {
      if (fByteLength + count > fByteBuffer.length)
      {
        byte[] newarray = new byte[fByteLength + count + 512];
        System.arraycopy(fByteBuffer, 0, newarray, 0, fByteLength);
        fByteBuffer = newarray;
      }
      System.arraycopy(array, offset, fByteBuffer, fByteLength, count);
      fByteLength += count;
    }
    return count;
  }
}

/* Location:
 * Qualified Name:     org.cyberneko.html.HTMLScanner.PlaybackInputStream
 * Java Class Version: 1.3 (47.0)
 * JD-Core Version:    0.7.1
 */
package org.cyberneko.html;

import java.io.IOException;

public abstract interface HTMLScanner$Scanner
{
  public abstract boolean scan(boolean paramBoolean)
    throws IOException;
}

/* Location:
 * Qualified Name:     org.cyberneko.html.HTMLScanner.Scanner
 * Java Class Version: 1.3 (47.0)
 * JD-Core Version:    0.7.1
 */
package org.cyberneko.html;

import java.io.EOFException;
import java.io.IOException;
import java.util.Stack;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLDocumentHandler;

public class HTMLScanner$SpecialScanner
  implements HTMLScanner.Scanner
{
  protected String fElementName;
  protected boolean fStyle;
  protected boolean fTextarea;
  protected boolean fTitle;
  private final QName fQName;
  private final XMLStringBuffer fStringBuffer;
  private final HTMLScanner this$0;
  
  public HTMLScanner$SpecialScanner(HTMLScanner paramHTMLScanner)
  {
    this$0 = paramHTMLScanner;
    
    fQName = new QName();
    
    fStringBuffer = new XMLStringBuffer();
  }
  
  public HTMLScanner.Scanner setElementName(String ename)
  {
    fElementName = ename;
    fStyle = fElementName.equalsIgnoreCase("STYLE");
    fTextarea = fElementName.equalsIgnoreCase("TEXTAREA");
    fTitle = fElementName.equalsIgnoreCase("TITLE");
    return this;
  }
  
  public boolean scan(boolean complete)
    throws IOException
  {
    boolean next;
    do
    {
      try
      {
        next = false;
        switch (this$0.fScannerState)
        {
        case 0: 
          this$0.fBeginLineNumber = this$0.fCurrentEntity.getLineNumber();
          this$0.fBeginColumnNumber = HTMLScanner.CurrentEntity.access$100(this$0.fCurrentEntity);
          this$0.fBeginCharacterOffset = HTMLScanner.CurrentEntity.access$200(this$0.fCurrentEntity);
          int c = this$0.fCurrentEntity.read();
          if (c == 60)
          {
            this$0.setScannerState((short)1);
          }
          else
          {
            if (c == 38)
            {
              if ((fTextarea) || (fTitle))
              {
                this$0.scanEntityRef(fStringBuffer, true);
                continue;
              }
              fStringBuffer.clear();
              fStringBuffer.append('&');
            }
            else
            {
              if (c == -1)
              {
                if (this$0.fReportErrors) {
                  this$0.fErrorReporter.reportError("HTML1007", null);
                }
                throw new EOFException();
              }
              HTMLScanner.CurrentEntity.access$300(this$0.fCurrentEntity);
              fStringBuffer.clear();
            }
            scanCharacters(fStringBuffer, -1);
          }
          break;
        case 1: 
          int delimiter = -1;
          int c = this$0.fCurrentEntity.read();
          if (c == 47)
          {
            String ename = this$0.scanName();
            if (ename != null)
            {
              if (ename.equalsIgnoreCase(fElementName))
              {
                if (this$0.fCurrentEntity.read() == 62)
                {
                  ename = HTMLScanner.modifyName(ename, this$0.fNamesElems);
                  if ((this$0.fDocumentHandler != null) && (this$0.fElementCount >= this$0.fElementDepth))
                  {
                    fQName.setValues(null, ename, ename, null);
                    
                    this$0.fEndLineNumber = this$0.fCurrentEntity.getLineNumber();
                    this$0.fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(this$0.fCurrentEntity);
                    this$0.fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(this$0.fCurrentEntity);
                    this$0.fDocumentHandler.endElement(fQName, this$0.locationAugs());
                  }
                  this$0.setScanner(this$0.fContentScanner);
                  this$0.setScannerState((short)0);
                  return true;
                }
                HTMLScanner.CurrentEntity.access$300(this$0.fCurrentEntity);
              }
              fStringBuffer.clear();
              fStringBuffer.append("</");
              fStringBuffer.append(ename);
            }
            else
            {
              fStringBuffer.clear();
              fStringBuffer.append("</");
            }
          }
          else
          {
            fStringBuffer.clear();
            fStringBuffer.append('<');
            fStringBuffer.append((char)c);
          }
          scanCharacters(fStringBuffer, delimiter);
          this$0.setScannerState((short)0);
        }
      }
      catch (EOFException e)
      {
        this$0.setScanner(this$0.fContentScanner);
        if (this$0.fCurrentEntityStack.empty())
        {
          this$0.setScannerState((short)11);
        }
        else
        {
          this$0.fCurrentEntity = ((HTMLScanner.CurrentEntity)this$0.fCurrentEntityStack.pop());
          this$0.setScannerState((short)0);
        }
        return true;
      }
    } while ((next) || (complete));
    return true;
  }
  
  protected void scanCharacters(XMLStringBuffer buffer, int delimiter)
    throws IOException
  {
    HTMLScanner.CurrentEntity.access$400(this$0.fCurrentEntity, "(scanCharacters, delimiter=" + delimiter + ": ");
    for (;;)
    {
      int c = this$0.fCurrentEntity.read();
      if ((c == -1) || (c == 60) || (c == 38))
      {
        if (c == -1) {
          break;
        }
        HTMLScanner.CurrentEntity.access$300(this$0.fCurrentEntity); break;
      }
      if ((c == 13) || (c == 10))
      {
        HTMLScanner.CurrentEntity.access$300(this$0.fCurrentEntity);
        int newlines = this$0.skipNewlines();
        for (int i = 0; i < newlines; i++) {
          buffer.append('\n');
        }
      }
      else
      {
        buffer.append((char)c);
        if (c == 10) {
          HTMLScanner.CurrentEntity.access$1500(this$0.fCurrentEntity);
        }
      }
    }
    if (fStyle)
    {
      if (this$0.fStyleStripCommentDelims) {
        HTMLScanner.reduceToContent(buffer, "<!--", "-->");
      }
      if (this$0.fStyleStripCDATADelims) {
        HTMLScanner.reduceToContent(buffer, "<![CDATA[", "]]>");
      }
    }
    if ((length > 0) && (this$0.fDocumentHandler != null) && (this$0.fElementCount >= this$0.fElementDepth))
    {
      this$0.fEndLineNumber = this$0.fCurrentEntity.getLineNumber();
      this$0.fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(this$0.fCurrentEntity);
      this$0.fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(this$0.fCurrentEntity);
      this$0.fDocumentHandler.characters(buffer, this$0.locationAugs());
    }
    HTMLScanner.CurrentEntity.access$400(this$0.fCurrentEntity, ")scanCharacters: ");
  }
}

/* Location:
 * Qualified Name:     org.cyberneko.html.HTMLScanner.SpecialScanner
 * Java Class Version: 1.3 (47.0)
 * JD-Core Version:    0.7.1
 */
package org.cyberneko.html;

import java.io.EOFException;
import java.io.File;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.BitSet;
import java.util.Stack;
import org.apache.xerces.util.EncodingMap;
import org.apache.xerces.util.NamespaceSupport;
import org.apache.xerces.util.URI;
import org.apache.xerces.util.URI.MalformedURIException;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLResourceIdentifierImpl;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XMLResourceIdentifier;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentScanner;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.cyberneko.html.xercesbridge.XercesBridge;

public class HTMLScanner
  implements XMLDocumentScanner, XMLLocator, HTMLComponent
{
  public static final String HTML_4_01_STRICT_PUBID = "-//W3C//DTD HTML 4.01//EN";
  public static final String HTML_4_01_STRICT_SYSID = "http://www.w3.org/TR/html4/strict.dtd";
  public static final String HTML_4_01_TRANSITIONAL_PUBID = "-//W3C//DTD HTML 4.01 Transitional//EN";
  public static final String HTML_4_01_TRANSITIONAL_SYSID = "http://www.w3.org/TR/html4/loose.dtd";
  public static final String HTML_4_01_FRAMESET_PUBID = "-//W3C//DTD HTML 4.01 Frameset//EN";
  public static final String HTML_4_01_FRAMESET_SYSID = "http://www.w3.org/TR/html4/frameset.dtd";
  protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
  protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
  public static final String NOTIFY_CHAR_REFS = "http://apache.org/xml/features/scanner/notify-char-refs";
  public static final String NOTIFY_XML_BUILTIN_REFS = "http://apache.org/xml/features/scanner/notify-builtin-refs";
  public static final String NOTIFY_HTML_BUILTIN_REFS = "http://cyberneko.org/html/features/scanner/notify-builtin-refs";
  public static final String FIX_MSWINDOWS_REFS = "http://cyberneko.org/html/features/scanner/fix-mswindows-refs";
  public static final String SCRIPT_STRIP_COMMENT_DELIMS = "http://cyberneko.org/html/features/scanner/script/strip-comment-delims";
  public static final String SCRIPT_STRIP_CDATA_DELIMS = "http://cyberneko.org/html/features/scanner/script/strip-cdata-delims";
  public static final String STYLE_STRIP_COMMENT_DELIMS = "http://cyberneko.org/html/features/scanner/style/strip-comment-delims";
  public static final String STYLE_STRIP_CDATA_DELIMS = "http://cyberneko.org/html/features/scanner/style/strip-cdata-delims";
  public static final String IGNORE_SPECIFIED_CHARSET = "http://cyberneko.org/html/features/scanner/ignore-specified-charset";
  public static final String CDATA_SECTIONS = "http://cyberneko.org/html/features/scanner/cdata-sections";
  public static final String OVERRIDE_DOCTYPE = "http://cyberneko.org/html/features/override-doctype";
  public static final String INSERT_DOCTYPE = "http://cyberneko.org/html/features/insert-doctype";
  public static final String PARSE_NOSCRIPT_CONTENT = "http://cyberneko.org/html/features/parse-noscript-content";
  protected static final String NORMALIZE_ATTRIBUTES = "http://cyberneko.org/html/features/scanner/normalize-attrs";
  private static final String[] RECOGNIZED_FEATURES = { "http://cyberneko.org/html/features/augmentations", "http://cyberneko.org/html/features/report-errors", "http://apache.org/xml/features/scanner/notify-char-refs", "http://apache.org/xml/features/scanner/notify-builtin-refs", "http://cyberneko.org/html/features/scanner/notify-builtin-refs", "http://cyberneko.org/html/features/scanner/fix-mswindows-refs", "http://cyberneko.org/html/features/scanner/script/strip-cdata-delims", "http://cyberneko.org/html/features/scanner/script/strip-comment-delims", "http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", "http://cyberneko.org/html/features/scanner/style/strip-comment-delims", "http://cyberneko.org/html/features/scanner/ignore-specified-charset", "http://cyberneko.org/html/features/scanner/cdata-sections", "http://cyberneko.org/html/features/override-doctype", "http://cyberneko.org/html/features/insert-doctype", "http://cyberneko.org/html/features/scanner/normalize-attrs", "http://cyberneko.org/html/features/parse-noscript-content" };
  private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = { null, null, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, Boolean.TRUE };
  protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
  protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
  protected static final String DEFAULT_ENCODING = "http://cyberneko.org/html/properties/default-encoding";
  protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
  protected static final String DOCTYPE_PUBID = "http://cyberneko.org/html/properties/doctype/pubid";
  protected static final String DOCTYPE_SYSID = "http://cyberneko.org/html/properties/doctype/sysid";
  private static final String[] RECOGNIZED_PROPERTIES = { "http://cyberneko.org/html/properties/names/elems", "http://cyberneko.org/html/properties/names/attrs", "http://cyberneko.org/html/properties/default-encoding", "http://cyberneko.org/html/properties/error-reporter", "http://cyberneko.org/html/properties/doctype/pubid", "http://cyberneko.org/html/properties/doctype/sysid" };
  private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = { null, null, "Windows-1252", null, "-//W3C//DTD HTML 4.01 Transitional//EN", "http://www.w3.org/TR/html4/loose.dtd" };
  protected static final short STATE_CONTENT = 0;
  protected static final short STATE_MARKUP_BRACKET = 1;
  protected static final short STATE_START_DOCUMENT = 10;
  protected static final short STATE_END_DOCUMENT = 11;
  protected static final short NAMES_NO_CHANGE = 0;
  protected static final short NAMES_UPPERCASE = 1;
  protected static final short NAMES_LOWERCASE = 2;
  protected static final int DEFAULT_BUFFER_SIZE = 2048;
  private static final boolean DEBUG_SCANNER = false;
  private static final boolean DEBUG_SCANNER_STATE = false;
  private static final boolean DEBUG_BUFFER = false;
  private static final boolean DEBUG_CHARSET = false;
  protected static final boolean DEBUG_CALLBACKS = false;
  protected static final HTMLEventInfo SYNTHESIZED_ITEM = new HTMLEventInfo.SynthesizedItem();
  private static final BitSet ENTITY_CHARS = new BitSet();
  protected boolean fAugmentations;
  protected boolean fReportErrors;
  protected boolean fNotifyCharRefs;
  protected boolean fNotifyXmlBuiltinRefs;
  protected boolean fNotifyHtmlBuiltinRefs;
  protected boolean fFixWindowsCharRefs;
  protected boolean fScriptStripCDATADelims;
  protected boolean fScriptStripCommentDelims;
  protected boolean fStyleStripCDATADelims;
  protected boolean fStyleStripCommentDelims;
  protected boolean fIgnoreSpecifiedCharset;
  protected boolean fCDATASections;
  protected boolean fOverrideDoctype;
  protected boolean fInsertDoctype;
  protected boolean fNormalizeAttributes;
  protected boolean fParseNoScriptContent;
  protected boolean fParseNoFramesContent;
  protected short fNamesElems;
  protected short fNamesAttrs;
  protected String fDefaultIANAEncoding;
  protected HTMLErrorReporter fErrorReporter;
  protected String fDoctypePubid;
  protected String fDoctypeSysid;
  protected int fBeginLineNumber;
  protected int fBeginColumnNumber;
  protected int fBeginCharacterOffset;
  protected int fEndLineNumber;
  protected int fEndColumnNumber;
  protected int fEndCharacterOffset;
  protected PlaybackInputStream fByteStream;
  protected CurrentEntity fCurrentEntity;
  protected final Stack fCurrentEntityStack;
  protected Scanner fScanner;
  protected short fScannerState;
  protected XMLDocumentHandler fDocumentHandler;
  protected String fIANAEncoding;
  protected String fJavaEncoding;
  protected boolean fIso8859Encoding;
  protected int fElementCount;
  protected int fElementDepth;
  protected Scanner fContentScanner;
  protected SpecialScanner fSpecialScanner;
  protected final XMLStringBuffer fStringBuffer;
  private final XMLStringBuffer fStringBuffer2;
  private final XMLStringBuffer fNonNormAttr;
  private final HTMLAugmentations fInfosetAugs;
  private final LocationItem fLocationItem;
  private final boolean[] fSingleBoolean;
  private final XMLResourceIdentifierImpl fResourceId;
  
  static
  {
    String str = "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
    for (int i = 0; i < "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".length(); i++)
    {
      char c = "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz".charAt(i);
      ENTITY_CHARS.set(c);
    }
  }
  
  public HTMLScanner()
  {
    fCurrentEntityStack = new Stack();
    
    fContentScanner = new ContentScanner();
    
    fSpecialScanner = new SpecialScanner();
    
    fStringBuffer = new XMLStringBuffer(1024);
    
    fStringBuffer2 = new XMLStringBuffer(1024);
    
    fNonNormAttr = new XMLStringBuffer(128);
    
    fInfosetAugs = new HTMLAugmentations();
    
    fLocationItem = new LocationItem();
    
    fSingleBoolean = new boolean[] { false };
    
    fResourceId = new XMLResourceIdentifierImpl();
  }
  
  public void pushInputSource(XMLInputSource inputSource)
  {
    Reader reader = getReader(inputSource);
    
    fCurrentEntityStack.push(fCurrentEntity);
    String encoding = inputSource.getEncoding();
    String publicId = inputSource.getPublicId();
    String baseSystemId = inputSource.getBaseSystemId();
    String literalSystemId = inputSource.getSystemId();
    String expandedSystemId = expandSystemId(literalSystemId, baseSystemId);
    fCurrentEntity = new CurrentEntity(reader, encoding, publicId, baseSystemId, literalSystemId, expandedSystemId);
  }
  
  private Reader getReader(XMLInputSource inputSource)
  {
    Reader reader = inputSource.getCharacterStream();
    if (reader == null) {
      try
      {
        return new InputStreamReader(inputSource.getByteStream(), fJavaEncoding);
      }
      catch (UnsupportedEncodingException e) {}
    }
    return reader;
  }
  
  public void evaluateInputSource(XMLInputSource inputSource)
  {
    Scanner previousScanner = fScanner;
    short previousScannerState = fScannerState;
    CurrentEntity previousEntity = fCurrentEntity;
    Reader reader = getReader(inputSource);
    
    String encoding = inputSource.getEncoding();
    String publicId = inputSource.getPublicId();
    String baseSystemId = inputSource.getBaseSystemId();
    String literalSystemId = inputSource.getSystemId();
    String expandedSystemId = expandSystemId(literalSystemId, baseSystemId);
    fCurrentEntity = new CurrentEntity(reader, encoding, publicId, baseSystemId, literalSystemId, expandedSystemId);
    
    setScanner(fContentScanner);
    setScannerState((short)0);
    try
    {
      do
      {
        fScanner.scan(false);
      } while (fScannerState != 11);
    }
    catch (IOException e) {}
    setScanner(previousScanner);
    setScannerState(previousScannerState);
    fCurrentEntity = previousEntity;
  }
  
  public void cleanup(boolean closeall)
  {
    int size = fCurrentEntityStack.size();
    if (size > 0)
    {
      if (fCurrentEntity != null) {
        fCurrentEntity.closeQuietly();
      }
      for (int i = closeall ? 0 : 1; i < size; i++)
      {
        fCurrentEntity = ((CurrentEntity)fCurrentEntityStack.pop());
        fCurrentEntity.closeQuietly();
      }
    }
    else if ((closeall) && (fCurrentEntity != null))
    {
      fCurrentEntity.closeQuietly();
    }
  }
  
  public String getEncoding()
  {
    return fCurrentEntity != null ? fCurrentEntity.encoding : null;
  }
  
  public String getPublicId()
  {
    return fCurrentEntity != null ? fCurrentEntity.publicId : null;
  }
  
  public String getBaseSystemId()
  {
    return fCurrentEntity != null ? fCurrentEntity.baseSystemId : null;
  }
  
  public String getLiteralSystemId()
  {
    return fCurrentEntity != null ? fCurrentEntity.literalSystemId : null;
  }
  
  public String getExpandedSystemId()
  {
    return fCurrentEntity != null ? fCurrentEntity.expandedSystemId : null;
  }
  
  public int getLineNumber()
  {
    return fCurrentEntity != null ? fCurrentEntity.getLineNumber() : -1;
  }
  
  public int getColumnNumber()
  {
    return fCurrentEntity != null ? fCurrentEntity.getColumnNumber() : -1;
  }
  
  public String getXMLVersion()
  {
    fCurrentEntity.getClass();return fCurrentEntity != null ? "1.0" : null;
  }
  
  public int getCharacterOffset()
  {
    return fCurrentEntity != null ? fCurrentEntity.getCharacterOffset() : -1;
  }
  
  public Boolean getFeatureDefault(String featureId)
  {
    int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
    for (int i = 0; i < length; i++) {
      if (RECOGNIZED_FEATURES[i].equals(featureId)) {
        return RECOGNIZED_FEATURES_DEFAULTS[i];
      }
    }
    return null;
  }
  
  public Object getPropertyDefault(String propertyId)
  {
    int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
    for (int i = 0; i < length; i++) {
      if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
        return RECOGNIZED_PROPERTIES_DEFAULTS[i];
      }
    }
    return null;
  }
  
  public String[] getRecognizedFeatures()
  {
    return RECOGNIZED_FEATURES;
  }
  
  public String[] getRecognizedProperties()
  {
    return RECOGNIZED_PROPERTIES;
  }
  
  public void reset(XMLComponentManager manager)
    throws XMLConfigurationException
  {
    fAugmentations = manager.getFeature("http://cyberneko.org/html/features/augmentations");
    fReportErrors = manager.getFeature("http://cyberneko.org/html/features/report-errors");
    fNotifyCharRefs = manager.getFeature("http://apache.org/xml/features/scanner/notify-char-refs");
    fNotifyXmlBuiltinRefs = manager.getFeature("http://apache.org/xml/features/scanner/notify-builtin-refs");
    fNotifyHtmlBuiltinRefs = manager.getFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs");
    fFixWindowsCharRefs = manager.getFeature("http://cyberneko.org/html/features/scanner/fix-mswindows-refs");
    fScriptStripCDATADelims = manager.getFeature("http://cyberneko.org/html/features/scanner/script/strip-cdata-delims");
    fScriptStripCommentDelims = manager.getFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims");
    fStyleStripCDATADelims = manager.getFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims");
    fStyleStripCommentDelims = manager.getFeature("http://cyberneko.org/html/features/scanner/style/strip-comment-delims");
    fIgnoreSpecifiedCharset = manager.getFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset");
    fCDATASections = manager.getFeature("http://cyberneko.org/html/features/scanner/cdata-sections");
    fOverrideDoctype = manager.getFeature("http://cyberneko.org/html/features/override-doctype");
    fInsertDoctype = manager.getFeature("http://cyberneko.org/html/features/insert-doctype");
    fNormalizeAttributes = manager.getFeature("http://cyberneko.org/html/features/scanner/normalize-attrs");
    fParseNoScriptContent = manager.getFeature("http://cyberneko.org/html/features/parse-noscript-content");
    
    fNamesElems = getNamesValue(String.valueOf(manager.getProperty("http://cyberneko.org/html/properties/names/elems")));
    fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty("http://cyberneko.org/html/properties/names/attrs")));
    fDefaultIANAEncoding = String.valueOf(manager.getProperty("http://cyberneko.org/html/properties/default-encoding"));
    fErrorReporter = ((HTMLErrorReporter)manager.getProperty("http://cyberneko.org/html/properties/error-reporter"));
    fDoctypePubid = String.valueOf(manager.getProperty("http://cyberneko.org/html/properties/doctype/pubid"));
    fDoctypeSysid = String.valueOf(manager.getProperty("http://cyberneko.org/html/properties/doctype/sysid"));
  }
  
  public void setFeature(String featureId, boolean state)
    throws XMLConfigurationException
  {
    if (featureId.equals("http://cyberneko.org/html/features/augmentations")) {
      fAugmentations = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/ignore-specified-charset")) {
      fIgnoreSpecifiedCharset = state;
    } else if (featureId.equals("http://apache.org/xml/features/scanner/notify-char-refs")) {
      fNotifyCharRefs = state;
    } else if (featureId.equals("http://apache.org/xml/features/scanner/notify-builtin-refs")) {
      fNotifyXmlBuiltinRefs = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/notify-builtin-refs")) {
      fNotifyHtmlBuiltinRefs = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/fix-mswindows-refs")) {
      fFixWindowsCharRefs = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/script/strip-cdata-delims")) {
      fScriptStripCDATADelims = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/script/strip-comment-delims")) {
      fScriptStripCommentDelims = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims")) {
      fStyleStripCDATADelims = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/style/strip-comment-delims")) {
      fStyleStripCommentDelims = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/scanner/ignore-specified-charset")) {
      fIgnoreSpecifiedCharset = state;
    } else if (featureId.equals("http://cyberneko.org/html/features/parse-noscript-content")) {
      fParseNoScriptContent = state;
    }
  }
  
  public void setProperty(String propertyId, Object value)
    throws XMLConfigurationException
  {
    if (propertyId.equals("http://cyberneko.org/html/properties/names/elems"))
    {
      fNamesElems = getNamesValue(String.valueOf(value));
      return;
    }
    if (propertyId.equals("http://cyberneko.org/html/properties/names/attrs"))
    {
      fNamesAttrs = getNamesValue(String.valueOf(value));
      return;
    }
    if (propertyId.equals("http://cyberneko.org/html/properties/default-encoding"))
    {
      fDefaultIANAEncoding = String.valueOf(value);
      return;
    }
  }
  
  public void setInputSource(XMLInputSource source)
    throws IOException
  {
    fElementCount = 0;
    fElementDepth = -1;
    fByteStream = null;
    fCurrentEntityStack.removeAllElements();
    
    fBeginLineNumber = 1;
    fBeginColumnNumber = 1;
    fBeginCharacterOffset = 0;
    fEndLineNumber = fBeginLineNumber;
    fEndColumnNumber = fBeginColumnNumber;
    fEndCharacterOffset = fBeginCharacterOffset;
    
    fIANAEncoding = fDefaultIANAEncoding;
    fJavaEncoding = fIANAEncoding;
    
    String encoding = source.getEncoding();
    String publicId = source.getPublicId();
    String baseSystemId = source.getBaseSystemId();
    String literalSystemId = source.getSystemId();
    String expandedSystemId = expandSystemId(literalSystemId, baseSystemId);
    
    Reader reader = source.getCharacterStream();
    if (reader == null)
    {
      InputStream inputStream = source.getByteStream();
      if (inputStream == null)
      {
        URL url = new URL(expandedSystemId);
        inputStream = url.openStream();
      }
      fByteStream = new PlaybackInputStream(inputStream);
      String[] encodings = new String[2];
      if (encoding == null) {
        fByteStream.detectEncoding(encodings);
      } else {
        encodings[0] = encoding;
      }
      if (encodings[0] == null)
      {
        encodings[0] = fDefaultIANAEncoding;
        if (fReportErrors) {
          fErrorReporter.reportWarning("HTML1000", null);
        }
      }
      if (encodings[1] == null)
      {
        encodings[1] = EncodingMap.getIANA2JavaMapping(encodings[0].toUpperCase());
        if (encodings[1] == null)
        {
          encodings[1] = encodings[0];
          if (fReportErrors) {
            fErrorReporter.reportWarning("HTML1001", new Object[] { encodings[0] });
          }
        }
      }
      fIANAEncoding = encodings[0];
      fJavaEncoding = encodings[1];
      
      fIso8859Encoding = ((fIANAEncoding == null) || (fIANAEncoding.toUpperCase().startsWith("ISO-8859")) || (fIANAEncoding.equalsIgnoreCase(fDefaultIANAEncoding)));
      
      encoding = fIANAEncoding;
      reader = new InputStreamReader(fByteStream, fJavaEncoding);
    }
    fCurrentEntity = new CurrentEntity(reader, encoding, publicId, baseSystemId, literalSystemId, expandedSystemId);
    
    setScanner(fContentScanner);
    setScannerState((short)10);
  }
  
  public boolean scanDocument(boolean complete)
    throws XNIException, IOException
  {
    do
    {
      if (!fScanner.scan(complete)) {
        return false;
      }
    } while (complete);
    return true;
  }
  
  public void setDocumentHandler(XMLDocumentHandler handler)
1 2 3 4 5 6 7 8 9

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2017. Infinite Loop Ltd