nekohtml-1.9.14

  {
    fDocumentHandler = handler;
  }
  
  public XMLDocumentHandler getDocumentHandler()
  {
    return fDocumentHandler;
  }
  
  protected static String getValue(XMLAttributes attrs, String aname)
  {
    int length = attrs != null ? attrs.getLength() : 0;
    for (int i = 0; i < length; i++) {
      if (attrs.getQName(i).equalsIgnoreCase(aname)) {
        return attrs.getValue(i);
      }
    }
    return null;
  }
  
  public static String expandSystemId(String systemId, String baseSystemId)
  {
    if ((systemId == null) || (systemId.length() == 0)) {
      return systemId;
    }
    try
    {
      URI uri = new URI(systemId);
      if (uri != null) {
        return systemId;
      }
    }
    catch (URI.MalformedURIException e) {}
    String id = fixURI(systemId);
    
    URI base = null;
    URI uri = null;
    try
    {
      if ((baseSystemId == null) || (baseSystemId.length() == 0) || (baseSystemId.equals(systemId)))
      {
        String dir;
        try
        {
          dir = fixURI(System.getProperty("user.dir"));
        }
        catch (SecurityException se)
        {
          dir = "";
        }
        if (!dir.endsWith("/")) {
          dir = dir + "/";
        }
        base = new URI("file", "", dir, null, null);
      }
      else
      {
        try
        {
          base = new URI(fixURI(baseSystemId));
        }
        catch (URI.MalformedURIException e)
        {
          String dir;
          try
          {
            dir = fixURI(System.getProperty("user.dir"));
          }
          catch (SecurityException se)
          {
            dir = "";
          }
          if (baseSystemId.indexOf(':') != -1)
          {
            base = new URI("file", "", fixURI(baseSystemId), null, null);
          }
          else
          {
            if (!dir.endsWith("/")) {
              dir = dir + "/";
            }
            dir = dir + fixURI(baseSystemId);
            base = new URI("file", "", dir, null, null);
          }
        }
      }
      uri = new URI(base, id);
    }
    catch (URI.MalformedURIException e) {}
    if (uri == null) {
      return systemId;
    }
    return uri.toString();
  }
  
  protected static String fixURI(String str)
  {
    str = str.replace(File.separatorChar, '/');
    if (str.length() >= 2)
    {
      char ch1 = str.charAt(1);
      if (ch1 == ':')
      {
        char ch0 = Character.toUpperCase(str.charAt(0));
        if ((ch0 >= 'A') && (ch0 <= 'Z')) {
          str = "/" + str;
        }
      }
      else if ((ch1 == '/') && (str.charAt(0) == '/'))
      {
        str = "file:" + str;
      }
    }
    return str;
  }
  
  protected static final String modifyName(String name, short mode)
  {
    switch (mode)
    {
    case 1: 
      return name.toUpperCase();
    case 2: 
      return name.toLowerCase();
    }
    return name;
  }
  
  protected static final short getNamesValue(String value)
  {
    if (value.equals("lower")) {
      return 2;
    }
    if (value.equals("upper")) {
      return 1;
    }
    return 0;
  }
  
  protected int fixWindowsCharacter(int origChar)
  {
    switch (origChar)
    {
    case 130: 
      return 8218;
    case 131: 
      return 402;
    case 132: 
      return 8222;
    case 133: 
      return 8230;
    case 134: 
      return 8224;
    case 135: 
      return 8225;
    case 136: 
      return 710;
    case 137: 
      return 8240;
    case 138: 
      return 352;
    case 139: 
      return 8249;
    case 140: 
      return 338;
    case 145: 
      return 8216;
    case 146: 
      return 8217;
    case 147: 
      return 8220;
    case 148: 
      return 8221;
    case 149: 
      return 8226;
    case 150: 
      return 8211;
    case 151: 
      return 8212;
    case 152: 
      return 732;
    case 153: 
      return 8482;
    case 154: 
      return 353;
    case 155: 
      return 8250;
    case 156: 
      return 339;
    case 159: 
      return 376;
    }
    return origChar;
  }
  
  protected int read()
    throws IOException
  {
    return fCurrentEntity.read();
  }
  
  protected void setScanner(Scanner scanner)
  {
    fScanner = scanner;
  }
  
  protected void setScannerState(short state)
  {
    fScannerState = state;
  }
  
  protected void scanDoctype()
    throws IOException
  {
    String root = null;
    String pubid = null;
    String sysid = null;
    if (skipSpaces())
    {
      root = scanName();
      if (root == null)
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1014", null);
        }
      }
      else {
        root = modifyName(root, fNamesElems);
      }
      if (skipSpaces()) {
        if (skip("PUBLIC", false))
        {
          skipSpaces();
          pubid = scanLiteral();
          if (skipSpaces()) {
            sysid = scanLiteral();
          }
        }
        else if (skip("SYSTEM", false))
        {
          skipSpaces();
          sysid = scanLiteral();
        }
      }
    }
    int c;
    while ((c = fCurrentEntity.read()) != -1) {
      if (c == 60) {
        fCurrentEntity.rewind();
      } else if (c != 62) {
        if (c == 91) {
          skipMarkup(true);
        }
      }
    }
    if (fDocumentHandler != null)
    {
      if (fOverrideDoctype)
      {
        pubid = fDoctypePubid;
        sysid = fDoctypeSysid;
      }
      fEndLineNumber = fCurrentEntity.getLineNumber();
      fEndColumnNumber = fCurrentEntity.getColumnNumber();
      fEndCharacterOffset = fCurrentEntity.getCharacterOffset();
      fDocumentHandler.doctypeDecl(root, pubid, sysid, locationAugs());
    }
  }
  
  protected String scanLiteral()
    throws IOException
  {
    int quote = fCurrentEntity.read();
    if ((quote == 39) || (quote == 34))
    {
      StringBuffer str = new StringBuffer();
      int c;
      while (((c = fCurrentEntity.read()) != -1) && 
        (c != quote)) {
        if ((c == 13) || (c == 10))
        {
          fCurrentEntity.rewind();
          
          skipNewlines();
          str.append(' ');
        }
        else
        {
          if (c == 60)
          {
            fCurrentEntity.rewind();
            break;
          }
          str.append((char)c);
        }
      }
      if (c == -1)
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1007", null);
        }
        throw new EOFException();
      }
      return str.toString();
    }
    fCurrentEntity.rewind();
    
    return null;
  }
  
  protected String scanName()
    throws IOException
  {
    fCurrentEntity.debugBufferIfNeeded("(scanName: ");
    if ((fCurrentEntity.offset == fCurrentEntity.length) && 
      (fCurrentEntity.load(0) == -1))
    {
      fCurrentEntity.debugBufferIfNeeded(")scanName: ");
      return null;
    }
    int offset = fCurrentEntity.offset;
    for (;;)
    {
      if (fCurrentEntity.hasNext())
      {
        char c = fCurrentEntity.getNextChar();
        if ((!Character.isLetterOrDigit(c)) && (c != '-') && (c != '.') && (c != ':') && (c != '_')) {
          fCurrentEntity.rewind();
        } else {
          continue;
        }
      }
      if (fCurrentEntity.offset != fCurrentEntity.length) {
        break;
      }
      int length = fCurrentEntity.length - offset;
      System.arraycopy(fCurrentEntity.buffer, offset, fCurrentEntity.buffer, 0, length);
      int count = fCurrentEntity.load(length);
      offset = 0;
      if (count == -1) {
        break;
      }
    }
    int length = fCurrentEntity.offset - offset;
    String name = length > 0 ? new String(fCurrentEntity.buffer, offset, length) : null;
    fCurrentEntity.debugBufferIfNeeded(")scanName: ", " -> \"" + name + '"');
    return name;
  }
  
  protected int scanEntityRef(XMLStringBuffer str, boolean content)
    throws IOException
  {
    str.clear();
    str.append('&');
    boolean endsWithSemicolon = false;
    for (;;)
    {
      int c = fCurrentEntity.read();
      if (c == 59)
      {
        str.append(';');
        endsWithSemicolon = true;
        break;
      }
      if (c == -1) {
        break;
      }
      if ((!ENTITY_CHARS.get(c)) && (c != 35))
      {
        fCurrentEntity.rewind();
        break;
      }
      str.append((char)c);
    }
    if ((!endsWithSemicolon) && 
      (fReportErrors)) {
      fErrorReporter.reportWarning("HTML1004", null);
    }
    if (length == 1)
    {
      if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.getLineNumber();
        fEndColumnNumber = fCurrentEntity.getColumnNumber();
        fEndCharacterOffset = fCurrentEntity.getCharacterOffset();
        fDocumentHandler.characters(str, locationAugs());
      }
      return -1;
    }
    String name;
    String name;
    if (endsWithSemicolon) {
      name = str.toString().substring(1, length - 1);
    } else {
      name = str.toString().substring(1);
    }
    if (name.startsWith("#"))
    {
      int value = -1;
      try
      {
        if ((name.startsWith("#x")) || (name.startsWith("#X"))) {
          value = Integer.parseInt(name.substring(2), 16);
        } else {
          value = Integer.parseInt(name.substring(1));
        }
        if ((fFixWindowsCharRefs) && (fIso8859Encoding)) {
          value = fixWindowsCharacter(value);
        }
        if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.getLineNumber();
          fEndColumnNumber = fCurrentEntity.getColumnNumber();
          fEndCharacterOffset = fCurrentEntity.getCharacterOffset();
          if (fNotifyCharRefs)
          {
            XMLResourceIdentifier id = resourceId();
            String encoding = null;
            fDocumentHandler.startGeneralEntity(name, id, encoding, locationAugs());
          }
          str.clear();
          str.append((char)value);
          fDocumentHandler.characters(str, locationAugs());
          if (fNotifyCharRefs) {
            fDocumentHandler.endGeneralEntity(name, locationAugs());
          }
        }
      }
      catch (NumberFormatException e)
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1005", new Object[] { name });
        }
        if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.getLineNumber();
          fEndColumnNumber = fCurrentEntity.getColumnNumber();
          fEndCharacterOffset = fCurrentEntity.getCharacterOffset();
          fDocumentHandler.characters(str, locationAugs());
        }
      }
      return value;
    }
    int c = HTMLEntities.get(name);
    
    boolean invalidEntityInAttribute = (!content) && (!endsWithSemicolon) && (c > 256);
    if ((c == -1) || (invalidEntityInAttribute))
    {
      if (fReportErrors) {
        fErrorReporter.reportWarning("HTML1006", new Object[] { name });
      }
      if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.getLineNumber();
        fEndColumnNumber = fCurrentEntity.getColumnNumber();
        fEndCharacterOffset = fCurrentEntity.getCharacterOffset();
        fDocumentHandler.characters(str, locationAugs());
      }
      return -1;
    }
    if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
    {
      fEndLineNumber = fCurrentEntity.getLineNumber();
      fEndColumnNumber = fCurrentEntity.getColumnNumber();
      fEndCharacterOffset = fCurrentEntity.getCharacterOffset();
      boolean notify = (fNotifyHtmlBuiltinRefs) || ((fNotifyXmlBuiltinRefs) && (builtinXmlRef(name)));
      if (notify)
      {
        XMLResourceIdentifier id = resourceId();
        String encoding = null;
        fDocumentHandler.startGeneralEntity(name, id, encoding, locationAugs());
      }
      str.clear();
      str.append((char)c);
      fDocumentHandler.characters(str, locationAugs());
      if (notify) {
        fDocumentHandler.endGeneralEntity(name, locationAugs());
      }
    }
    return c;
  }
  
  protected boolean skip(String s, boolean caseSensitive)
    throws IOException
  {
    int length = s != null ? s.length() : 0;
    for (int i = 0; i < length; i++)
    {
      if (fCurrentEntity.offset == fCurrentEntity.length)
      {
        System.arraycopy(fCurrentEntity.buffer, fCurrentEntity.offset - i, fCurrentEntity.buffer, 0, i);
        if (fCurrentEntity.load(i) == -1)
        {
          fCurrentEntity.offset = 0;
          return false;
        }
      }
      char c0 = s.charAt(i);
      char c1 = fCurrentEntity.getNextChar();
      if (!caseSensitive)
      {
        c0 = Character.toUpperCase(c0);
        c1 = Character.toUpperCase(c1);
      }
      if (c0 != c1)
      {
        fCurrentEntity.rewind(i + 1);
        return false;
      }
    }
    return true;
  }
  
  protected boolean skipMarkup(boolean balance)
    throws IOException
  {
    fCurrentEntity.debugBufferIfNeeded("(skipMarkup: ");
    int depth = 1;
    boolean slashgt = false;
    while ((fCurrentEntity.offset != fCurrentEntity.length) || 
      (fCurrentEntity.load(0) != -1)) {
      while (fCurrentEntity.hasNext())
      {
        char c = fCurrentEntity.getNextChar();
        if ((balance) && (c == '<'))
        {
          depth++;
        }
        else if (c == '>')
        {
          depth--;
          if (depth == 0) {
            break label204;
          }
        }
        else if (c == '/')
        {
          if ((fCurrentEntity.offset == fCurrentEntity.length) && 
            (fCurrentEntity.load(0) == -1)) {
            break label204;
          }
          c = fCurrentEntity.getNextChar();
          if (c == '>')
          {
            slashgt = true;
            depth--;
            if (depth == 0) {
              break label204;
            }
          }
          else
          {
            fCurrentEntity.rewind();
          }
        }
        else if ((c == '\r') || (c == '\n'))
        {
          fCurrentEntity.rewind();
          skipNewlines();
        }
      }
    }
    label204:
    fCurrentEntity.debugBufferIfNeeded(")skipMarkup: ", " -> " + slashgt);
    return slashgt;
  }
  
  protected boolean skipSpaces()
    throws IOException
  {
    fCurrentEntity.debugBufferIfNeeded("(skipSpaces: ");
    boolean spaces = false;
    while ((fCurrentEntity.offset != fCurrentEntity.length) || 
      (fCurrentEntity.load(0) != -1))
    {
      char c = fCurrentEntity.getNextChar();
      if (!Character.isWhitespace(c))
      {
        fCurrentEntity.rewind();
        break;
      }
      spaces = true;
      if ((c == '\r') || (c == '\n'))
      {
        fCurrentEntity.rewind();
        skipNewlines();
      }
    }
    fCurrentEntity.debugBufferIfNeeded(")skipSpaces: ", " -> " + spaces);
    return spaces;
  }
  
  protected int skipNewlines()
    throws IOException
  {
    fCurrentEntity.debugBufferIfNeeded("(skipNewlines: ");
    if ((!fCurrentEntity.hasNext()) && 
      (fCurrentEntity.load(0) == -1))
    {
      fCurrentEntity.debugBufferIfNeeded(")skipNewlines: ");
      return 0;
    }
    char c = fCurrentEntity.getCurrentChar();
    int newlines = 0;
    int offset = fCurrentEntity.offset;
    if ((c == '\n') || (c == '\r'))
    {
      do
      {
        c = fCurrentEntity.getNextChar();
        if (c == '\r')
        {
          newlines++;
          if (fCurrentEntity.offset == fCurrentEntity.length)
          {
            offset = 0;
            fCurrentEntity.offset = newlines;
            if (fCurrentEntity.load(newlines) == -1) {
              break;
            }
          }
          if (fCurrentEntity.getCurrentChar() == '\n')
          {
            fCurrentEntity.offset += 1;
            fCurrentEntity.characterOffset_ += 1;
            offset++;
          }
        }
        else if (c == '\n')
        {
          newlines++;
          if (fCurrentEntity.offset == fCurrentEntity.length)
          {
            offset = 0;
            fCurrentEntity.offset = newlines;
            if (fCurrentEntity.load(newlines) == -1) {
              break;
            }
          }
        }
        else
        {
          fCurrentEntity.rewind();
          break;
        }
      } while (fCurrentEntity.offset < fCurrentEntity.length - 1);
      fCurrentEntity.incLine(newlines);
    }
    fCurrentEntity.debugBufferIfNeeded(")skipNewlines: ", " -> " + newlines);
    return newlines;
  }
  
  protected final Augmentations locationAugs()
  {
    HTMLAugmentations augs = null;
    if (fAugmentations)
    {
      fLocationItem.setValues(fBeginLineNumber, fBeginColumnNumber, fBeginCharacterOffset, fEndLineNumber, fEndColumnNumber, fEndCharacterOffset);
      
      augs = fInfosetAugs;
      augs.removeAllItems();
      augs.putItem("http://cyberneko.org/html/features/augmentations", fLocationItem);
    }
    return augs;
  }
  
  protected final Augmentations synthesizedAugs()
  {
    HTMLAugmentations augs = null;
    if (fAugmentations)
    {
      augs = fInfosetAugs;
      augs.removeAllItems();
      augs.putItem("http://cyberneko.org/html/features/augmentations", SYNTHESIZED_ITEM);
    }
    return augs;
  }
  
  protected final XMLResourceIdentifier resourceId()
  {
    fResourceId.clear();
    return fResourceId;
  }
  
  protected static boolean builtinXmlRef(String name)
  {
    return (name.equals("amp")) || (name.equals("lt")) || (name.equals("gt")) || (name.equals("quot")) || (name.equals("apos"));
  }
  
  public static abstract interface Scanner
  {
    public abstract boolean scan(boolean paramBoolean)
      throws IOException;
  }
  
  public static class CurrentEntity
  {
    private Reader stream_;
    public final String encoding;
    public final String publicId;
    public final String baseSystemId;
    public final String literalSystemId;
    public final String expandedSystemId;
    public final String version = "1.0";
    private int lineNumber_ = 1;
    private int columnNumber_ = 1;
    public int characterOffset_ = 0;
    public char[] buffer = new char['?'];
    public int offset = 0;
    public int length = 0;
    private boolean endReached_ = false;
    
    public CurrentEntity(Reader stream, String encoding, String publicId, String baseSystemId, String literalSystemId, String expandedSystemId)
    {
      stream_ = stream;
      this.encoding = encoding;
      this.publicId = publicId;
      this.baseSystemId = baseSystemId;
      this.literalSystemId = literalSystemId;
      this.expandedSystemId = expandedSystemId;
    }
    
    private char getCurrentChar()
    {
      return buffer[offset];
    }
    
    private char getNextChar()
    {
      characterOffset_ += 1;
      columnNumber_ += 1;
      return buffer[(offset++)];
    }
    
    private void closeQuietly()
    {
      try
      {
        stream_.close();
      }
      catch (IOException e) {}
    }
    
    boolean hasNext()
    {
      return offset < length;
    }
    
    protected int load(int offset)
      throws IOException
    {
      debugBufferIfNeeded("(load: ");
      if (offset == buffer.length)
      {
        int adjust = buffer.length / 4;
        char[] array = new char[buffer.length + adjust];
        System.arraycopy(buffer, 0, array, 0, length);
        buffer = array;
      }
      int count = stream_.read(buffer, offset, buffer.length - offset);
      if (count == -1) {
        endReached_ = true;
      }
      length = (count != -1 ? count + offset : offset);
      this.offset = offset;
      debugBufferIfNeeded(")load: ", " -> " + count);
      return count;
    }
    
    protected int read()
      throws IOException
    {
      debugBufferIfNeeded("(read: ");
      if (offset == length)
      {
        if (endReached_) {
          return -1;
        }
        if (load(0) == -1) {
          return -1;
        }
      }
      char c = buffer[(offset++)];
      characterOffset_ += 1;
      columnNumber_ += 1;
      
      debugBufferIfNeeded(")read: ", " -> " + c);
      return c;
    }
    
    private void debugBufferIfNeeded(String prefix)
    {
      debugBufferIfNeeded(prefix, "");
    }
    
    private void debugBufferIfNeeded(String prefix, String suffix) {}
    
    private void setStream(InputStreamReader inputStreamReader)
    {
      stream_ = inputStreamReader;
      offset = (length = characterOffset_ = 0);
      lineNumber_ = (columnNumber_ = 1);
    }
    
    private void rewind()
    {
      offset -= 1;
      characterOffset_ -= 1;
      columnNumber_ -= 1;
    }
    
    private void rewind(int i)
    {
      offset -= i;
      characterOffset_ -= i;
      columnNumber_ -= i;
    }
    
    private void incLine()
    {
      lineNumber_ += 1;
      columnNumber_ = 1;
    }
    
    private void incLine(int nbLines)
    {
      lineNumber_ += nbLines;
      columnNumber_ = 1;
    }
    
    public int getLineNumber()
    {
      return lineNumber_;
    }
    
    private void resetBuffer(XMLStringBuffer buffer, int lineNumber, int columnNumber, int characterOffset)
    {
      lineNumber_ = lineNumber;
      columnNumber_ = columnNumber;
      characterOffset_ = characterOffset;
      this.buffer = ch;
      offset = offset;
      length = length;
    }
    
    private int getColumnNumber()
    {
      return columnNumber_;
    }
    
    private void restorePosition(int originalOffset, int originalColumnNumber, int originalCharacterOffset)
    {
      offset = originalOffset;
      columnNumber_ = originalColumnNumber;
      characterOffset_ = originalCharacterOffset;
    }
    
    private int getCharacterOffset()
    {
      return characterOffset_;
    }
  }
  
  public class ContentScanner
    implements HTMLScanner.Scanner
  {
    private final QName fQName = new QName();
    private final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
    
    public ContentScanner() {}
    
    public boolean scan(boolean complete)
      throws IOException
    {
      boolean next;
      do
      {
        try
        {
          next = false;
          switch (fScannerState)
          {
          case 0: 
            fBeginLineNumber = fCurrentEntity.getLineNumber();
            fBeginColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
            fBeginCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
            int c = fCurrentEntity.read();
            if (c == 60)
            {
              setScannerState((short)1);
              next = true;
            }
            else if (c == 38)
            {
              scanEntityRef(fStringBuffer, true);
            }
            else
            {
              if (c == -1) {
                throw new EOFException();
              }
              HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
              scanCharacters();
            }
            break;
          case 1: 
            int c = fCurrentEntity.read();
            if (c == 33)
            {
              if (skip("--", false))
              {
                scanComment();
              }
              else if (skip("[CDATA[", false))
              {
                scanCDATA();
              }
              else if (skip("DOCTYPE", false))
              {
                scanDoctype();
              }
              else
              {
                if (fReportErrors) {
                  fErrorReporter.reportError("HTML1002", null);
                }
                skipMarkup(true);
              }
            }
            else if (c == 63)
            {
              scanPI();
            }
            else if (c == 47)
            {
              scanEndElement();
            }
            else
            {
              if (c == -1)
              {
                if (fReportErrors) {
                  fErrorReporter.reportError("HTML1003", null);
                }
                if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
                {
                  fStringBuffer.clear();
                  fStringBuffer.append('<');
                  fDocumentHandler.characters(fStringBuffer, null);
                }
                throw new EOFException();
              }
              HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
              fElementCount += 1;
              fSingleBoolean[0] = 0;
              String ename = scanStartElement(fSingleBoolean);
              fBeginLineNumber = fCurrentEntity.getLineNumber();
              fBeginColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
              fBeginCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
              if ("script".equalsIgnoreCase(ename))
              {
                scanScriptContent();
              }
              else if ((!fParseNoScriptContent) && ("noscript".equalsIgnoreCase(ename)))
              {
                scanNoXxxContent("noscript");
              }
              else if ((!fParseNoFramesContent) && ("noframes".equalsIgnoreCase(ename)))
              {
                scanNoXxxContent("noframes");
              }
              else if ((ename != null) && (fSingleBoolean[0] == 0) && (HTMLElements.getElement(ename).isSpecial()) && ((!ename.equalsIgnoreCase("TITLE")) || (isEnded(ename))))
              {
                setScanner(fSpecialScanner.setElementName(ename));
                setScannerState((short)0);
                return true;
              }
            }
            setScannerState((short)0);
            break;
          case 10: 
            if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
            {
              XMLLocator locator = HTMLScanner.this;
              String encoding = fIANAEncoding;
              Augmentations augs = locationAugs();
              NamespaceContext nscontext = new NamespaceSupport();
              XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator, encoding, nscontext, augs);
            }
            if ((fInsertDoctype) && (fDocumentHandler != null))
            {
              String root = getElement46name;
              root = HTMLScanner.modifyName(root, fNamesElems);
              String pubid = fDoctypePubid;
              String sysid = fDoctypeSysid;
              fDocumentHandler.doctypeDecl(root, pubid, sysid, synthesizedAugs());
            }
            setScannerState((short)0);
            break;
          case 11: 
            if ((fDocumentHandler != null) && (fElementCount >= fElementDepth) && (complete))
            {
              fEndLineNumber = fCurrentEntity.getLineNumber();
              fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
              fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
              fDocumentHandler.endDocument(locationAugs());
            }
            return false;
          default: 
            throw new RuntimeException("unknown scanner state: " + fScannerState);
          }
        }
        catch (EOFException e)
        {
          if (fCurrentEntityStack.empty()) {
            setScannerState((short)11);
          } else {
            fCurrentEntity = ((HTMLScanner.CurrentEntity)fCurrentEntityStack.pop());
          }
          next = true;
        }
      } while ((next) || (complete));
      return true;
    }
    
    private void scanNoXxxContent(String tagName)
      throws IOException
    {
      XMLStringBuffer buffer = new XMLStringBuffer();
      String end = "/" + tagName;
      for (;;)
      {
        int c = fCurrentEntity.read();
        if (c == -1) {
          break;
        }
        if (c == 60)
        {
          String next = nextContent(10) + " ";
          if ((next.length() >= 10) && (end.equalsIgnoreCase(next.substring(0, end.length()))) && (('>' == next.charAt(9)) || (Character.isWhitespace(next.charAt(9)))))
          {
            HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
            break;
          }
        }
        if ((c == 13) || (c == 10))
        {
          HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
          int newlines = skipNewlines();
          for (int i = 0; i < newlines; i++) {
            buffer.append('\n');
          }
        }
        else
        {
          buffer.append((char)c);
        }
      }
      if ((length > 0) && (fDocumentHandler != null))
      {
        fEndLineNumber = fCurrentEntity.getLineNumber();
        fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
        fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
        fDocumentHandler.characters(buffer, locationAugs());
      }
    }
    
    private void scanScriptContent()
      throws IOException
    {
      XMLStringBuffer buffer = new XMLStringBuffer();
      boolean waitForEndComment = false;
      for (;;)
      {
        int c = fCurrentEntity.read();
        if (c == -1) {
          break;
        }
        if ((c == 45) && (HTMLScanner.this.endsWith(buffer, "<!-")))
        {
          waitForEndComment = HTMLScanner.this.endCommentAvailable();
        }
        else if ((!waitForEndComment) && (c == 60))
        {
          String next = nextContent(8) + " ";
          if ((next.length() >= 8) && ("/script".equalsIgnoreCase(next.substring(0, 7))) && (('>' == next.charAt(7)) || (Character.isWhitespace(next.charAt(7)))))
          {
            HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
            break;
          }
        }
        else if ((c == 62) && (HTMLScanner.this.endsWith(buffer, "--")))
        {
          waitForEndComment = false;
        }
        if ((c == 13) || (c == 10))
        {
          HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
          int newlines = skipNewlines();
          for (int i = 0; i < newlines; i++) {
            buffer.append('\n');
          }
        }
        else
        {
          buffer.append((char)c);
        }
      }
      if (fScriptStripCommentDelims) {
        HTMLScanner.reduceToContent(buffer, "<!--", "-->");
      }
      if (fScriptStripCDATADelims) {
        HTMLScanner.reduceToContent(buffer, "<![CDATA[", "]]>");
      }
      if ((length > 0) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.getLineNumber();
        fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
        fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
        fDocumentHandler.characters(buffer, locationAugs());
      }
    }
    
    protected String nextContent(int len)
      throws IOException
    {
      int originalOffset = fCurrentEntity.offset;
      int originalColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
      int originalCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
      
      char[] buff = new char[len];
      int nbRead = 0;
      for (nbRead = 0; nbRead < len; nbRead++)
      {
        if (fCurrentEntity.offset == fCurrentEntity.length)
        {
          if (fCurrentEntity.length != fCurrentEntity.buffer.length) {
            break;
          }
          fCurrentEntity.load(fCurrentEntity.buffer.length);
        }
        int c = fCurrentEntity.read();
        if (c == -1) {
          break;
        }
        buff[nbRead] = ((char)c);
      }
      HTMLScanner.CurrentEntity.access$1300(fCurrentEntity, originalOffset, originalColumnNumber, originalCharacterOffset);
      return new String(buff, 0, nbRead);
    }
    
    protected void scanCharacters()
      throws IOException
    {
      HTMLScanner.CurrentEntity.access$400(fCurrentEntity, "(scanCharacters: ");
      fStringBuffer.clear();
      for (;;)
      {
        int newlines = skipNewlines();
        if ((newlines == 0) && (fCurrentEntity.offset == fCurrentEntity.length))
        {
          HTMLScanner.CurrentEntity.access$400(fCurrentEntity, ")scanCharacters: ");
        }
        else
        {
          int offset = fCurrentEntity.offset - newlines;
          for (int i = offset; i < fCurrentEntity.offset; i++) {
            fCurrentEntity.buffer[i] = '\n';
          }
          while (fCurrentEntity.hasNext())
          {
            char c = HTMLScanner.CurrentEntity.access$500(fCurrentEntity);
            if ((c == '<') || (c == '&') || (c == '\n') || (c == '\r')) {
              HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
            }
          }
          if ((fCurrentEntity.offset > offset) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
          {
            fEndLineNumber = fCurrentEntity.getLineNumber();
            fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
            fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
            fStringBuffer.append(fCurrentEntity.buffer, offset, fCurrentEntity.offset - offset);
          }
          HTMLScanner.CurrentEntity.access$400(fCurrentEntity, ")scanCharacters: ");
          
          boolean hasNext = fCurrentEntity.offset < fCurrentEntity.buffer.length;
          int next = hasNext ? HTMLScanner.CurrentEntity.access$800(fCurrentEntity) : -1;
          if ((next == 38) || (next == 60) || (next == -1)) {
            break;
          }
        }
      }
      if (fStringBuffer.length != 0) {
        fDocumentHandler.characters(fStringBuffer, locationAugs());
      }
    }
    
    protected void scanCDATA()
      throws IOException
    {
      HTMLScanner.CurrentEntity.access$400(fCurrentEntity, "(scanCDATA: ");
      fStringBuffer.clear();
      if (fCDATASections)
      {
        if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.getLineNumber();
          fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
          fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
          
          fDocumentHandler.startCDATA(locationAugs());
        }
      }
      else {
        fStringBuffer.append("[CDATA[");
      }
      boolean eof = scanMarkupContent(fStringBuffer, ']');
      if (!fCDATASections) {
        fStringBuffer.append("]]");
      }
      if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.getLineNumber();
        fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
        fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
        if (fCDATASections)
        {
          fDocumentHandler.characters(fStringBuffer, locationAugs());
          
          fDocumentHandler.endCDATA(locationAugs());
        }
        else
        {
          fDocumentHandler.comment(fStringBuffer, locationAugs());
        }
      }
      HTMLScanner.CurrentEntity.access$400(fCurrentEntity, ")scanCDATA: ");
      if (eof) {
        throw new EOFException();
      }
    }
    
    protected void scanComment()
      throws IOException
    {
      HTMLScanner.CurrentEntity.access$400(fCurrentEntity, "(scanComment: ");
      fEndLineNumber = fCurrentEntity.getLineNumber();
      fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
      fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
      XMLStringBuffer buffer = new XMLStringBuffer();
      boolean eof = scanMarkupContent(buffer, '-');
      if (eof)
      {
        HTMLScanner.CurrentEntity.access$1400(fCurrentEntity, buffer, fEndLineNumber, fEndColumnNumber, fEndCharacterOffset);
        buffer = new XMLStringBuffer();
        for (;;)
        {
          int c = fCurrentEntity.read();
          if (c == -1)
          {
            if (fReportErrors) {
              fErrorReporter.reportError("HTML1007", null);
            }
            eof = true;
            break label245;
          }
          if (c != 62)
          {
            buffer.append((char)c);
          }
          else
          {
            if ((c != 10) && (c != 13)) {
              break;
            }
            HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
            int newlines = skipNewlines();
            for (int i = 0; i < newlines; i++) {
              buffer.append('\n');
            }
          }
        }
        eof = false;
      }
      label245:
      if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.getLineNumber();
        fEndColumnNumber = HTMLScanner.CurrentEntity.access$100(fCurrentEntity);
        fEndCharacterOffset = HTMLScanner.CurrentEntity.access$200(fCurrentEntity);
        fDocumentHandler.comment(buffer, locationAugs());
      }
      HTMLScanner.CurrentEntity.access$400(fCurrentEntity, ")scanComment: ");
      if (eof) {
        throw new EOFException();
      }
    }
    
    protected boolean scanMarkupContent(XMLStringBuffer buffer, char cend)
      throws IOException
    {
      int c = -1;
      for (;;)
      {
        c = fCurrentEntity.read();
        if (c == cend)
        {
          int count = 1;
          for (;;)
          {
            c = fCurrentEntity.read();
            if (c != cend) {
              break;
            }
            count++;
          }
          if (c == -1)
          {
            if (!fReportErrors) {
              break;
            }
            fErrorReporter.reportError("HTML1007", null); break;
          }
          if (count < 2)
          {
            buffer.append(cend);
            
            HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
          }
          else if (c != 62)
          {
            for (int i = 0; i < count; i++) {
              buffer.append(cend);
            }
            HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
          }
          else
          {
            for (int i = 0; i < count - 2; i++) {
              buffer.append(cend);
            }
            break;
          }
        }
        else if ((c == 10) || (c == 13))
        {
          HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
          int newlines = skipNewlines();
          for (int i = 0; i < newlines; i++) {
            buffer.append('\n');
          }
        }
        else
        {
          if (c == -1)
          {
            if (!fReportErrors) {
              break;
            }
            fErrorReporter.reportError("HTML1007", null); break;
          }
          buffer.append((char)c);
        }
      }
      return c == -1;
    }
    
    protected void scanPI()
      throws IOException
    {
      HTMLScanner.CurrentEntity.access$400(fCurrentEntity, "(scanPI: ");
      if (fReportErrors) {
        fErrorReporter.reportWarning("HTML1008", null);
      }
      String target = scanName();
      if ((target != null) && (!target.equalsIgnoreCase("xml")))
      {
        for (;;)
        {
          int c = fCurrentEntity.read();
          if ((c == 13) || (c == 10))
          {
            if (c == 13)
            {
              c = fCurrentEntity.read();
              if (c != 10)
              {
                fCurrentEntity.offset -= 1;
                fCurrentEntity.characterOffset_ -= 1;
              }
            }
            HTMLScanner.CurrentEntity.access$1500(fCurrentEntity);
          }
          else
          {
            if (c == -1) {
              break;
            }
            if ((c != 32) && (c != 9))
            {
              HTMLScanner.CurrentEntity.access$300(fCurrentEntity);
              break;
            }
          }
        }
        fStringBuffer.clear();
        for (;;)
        {
          int c = fCurrentEntity.read();
          if ((c == 63) || (c == 47))
          {
            char c0 = (char)c;
            c = fCurrentEntity.read();
            if (c == 62) {
              break;
            }
            fStringBuffer.append(c0);
            HTMLScanner.Cur
1 2 3 4 5 6 7 8 9

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2017. Infinite Loop Ltd