nekohtml

return 338;
    case 145: 
      return 8216;
    case 146: 
      return 8217;
    case 147: 
      return 8220;
    case 148: 
      return 8221;
    case 149: 
      return 8226;
    case 150: 
      return 8211;
    case 151: 
      return 8212;
    case 152: 
      return 732;
    case 153: 
      return 8482;
    case 154: 
      return 353;
    case 155: 
      return 8250;
    case 156: 
      return 339;
    case 159: 
      return 376;
    }
    return origChar;
  }
  
  protected int read()
    throws IOException
  {
    if ((fCurrentEntity.offset == fCurrentEntity.length) && 
      (load(0) == -1)) {
      return -1;
    }
    char c = fCurrentEntity.buffer[(fCurrentEntity.offset++)];
    fCurrentEntity.characterOffset += 1;
    fCurrentEntity.columnNumber += 1;
    
    return c;
  }
  
  protected int load(int offset)
    throws IOException
  {
    if (offset == fCurrentEntity.buffer.length)
    {
      int adjust = fCurrentEntity.buffer.length / 4;
      char[] array = new char[fCurrentEntity.buffer.length + adjust];
      System.arraycopy(fCurrentEntity.buffer, 0, array, 0, fCurrentEntity.length);
      fCurrentEntity.buffer = array;
    }
    int count = fCurrentEntity.stream.read(fCurrentEntity.buffer, offset, fCurrentEntity.buffer.length - offset);
    fCurrentEntity.length = (count != -1 ? count + offset : offset);
    fCurrentEntity.offset = offset;
    
    return count;
  }
  
  protected void setScanner(Scanner scanner)
  {
    fScanner = scanner;
  }
  
  protected void setScannerState(short state)
  {
    fScannerState = state;
  }
  
  protected void scanDoctype()
    throws IOException
  {
    String root = null;
    String pubid = null;
    String sysid = null;
    if (skipSpaces())
    {
      root = scanName();
      if (root == null)
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1014", null);
        }
      }
      else {
        root = modifyName(root, fNamesElems);
      }
      if (skipSpaces()) {
        if (skip("PUBLIC", false))
        {
          skipSpaces();
          pubid = scanLiteral();
          if (skipSpaces()) {
            sysid = scanLiteral();
          }
        }
        else if (skip("SYSTEM", false))
        {
          skipSpaces();
          sysid = scanLiteral();
        }
      }
    }
    int c;
    while ((c = read()) != -1) {
      if (c == 60)
      {
        fCurrentEntity.offset -= 1;
        fCurrentEntity.characterOffset -= 1;
        fCurrentEntity.columnNumber -= 1;
      }
      else if (c != 62)
      {
        if (c == 91) {
          skipMarkup(true);
        }
      }
    }
    if (fDocumentHandler != null)
    {
      if (fOverrideDoctype)
      {
        pubid = fDoctypePubid;
        sysid = fDoctypeSysid;
      }
      fEndLineNumber = fCurrentEntity.lineNumber;
      fEndColumnNumber = fCurrentEntity.columnNumber;
      fEndCharacterOffset = fCurrentEntity.characterOffset;
      fDocumentHandler.doctypeDecl(root, pubid, sysid, locationAugs());
    }
  }
  
  protected String scanLiteral()
    throws IOException
  {
    int quote = read();
    if ((quote == 39) || (quote == 34))
    {
      StringBuffer str = new StringBuffer();
      int c;
      while (((c = read()) != -1) && 
        (c != quote)) {
        if ((c == 13) || (c == 10))
        {
          fCurrentEntity.offset -= 1;
          fCurrentEntity.characterOffset -= 1;
          fCurrentEntity.columnNumber -= 1;
          
          skipNewlines();
          str.append(' ');
        }
        else
        {
          if (c == 60)
          {
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
            break;
          }
          str.append((char)c);
        }
      }
      if (c == -1)
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1007", null);
        }
        throw new EOFException();
      }
      return str.toString();
    }
    fCurrentEntity.offset -= 1;
    fCurrentEntity.characterOffset -= 1;
    fCurrentEntity.columnNumber -= 1;
    
    return null;
  }
  
  protected String scanName()
    throws IOException
  {
    if ((fCurrentEntity.offset == fCurrentEntity.length) && 
      (load(0) == -1)) {
      return null;
    }
    int offset = fCurrentEntity.offset;
    for (;;)
    {
      if (fCurrentEntity.offset < fCurrentEntity.length)
      {
        char c = fCurrentEntity.buffer[fCurrentEntity.offset];
        if ((Character.isLetterOrDigit(c)) || (c == '-') || (c == '.') || (c == ':') || (c == '_'))
        {
          fCurrentEntity.offset += 1;
          fCurrentEntity.characterOffset += 1;
          fCurrentEntity.columnNumber += 1;
          continue;
        }
      }
      if (fCurrentEntity.offset != fCurrentEntity.length) {
        break;
      }
      int length = fCurrentEntity.length - offset;
      System.arraycopy(fCurrentEntity.buffer, offset, fCurrentEntity.buffer, 0, length);
      int count = load(length);
      offset = 0;
      if (count == -1) {
        break;
      }
    }
    int length = fCurrentEntity.offset - offset;
    String name = length > 0 ? new String(fCurrentEntity.buffer, offset, length) : null;
    
    return name;
  }
  
  protected int scanEntityRef(XMLStringBuffer str, boolean content)
    throws IOException
  {
    str.clear();
    str.append('&');
    boolean endsWithSemicolon = false;
    for (;;)
    {
      int c = read();
      if (c == 59)
      {
        str.append(';');
        endsWithSemicolon = true;
        break;
      }
      if (c == -1) {
        break;
      }
      if ((!ENTITY_CHARS.get(c)) && (c != 35))
      {
        fCurrentEntity.offset -= 1;
        fCurrentEntity.characterOffset -= 1;
        fCurrentEntity.columnNumber -= 1;
        break;
      }
      str.append((char)c);
    }
    if ((!endsWithSemicolon) && 
      (fReportErrors)) {
      fErrorReporter.reportWarning("HTML1004", null);
    }
    if (length == 1)
    {
      if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        fDocumentHandler.characters(str, locationAugs());
      }
      return -1;
    }
    String name;
    String name;
    if (endsWithSemicolon) {
      name = str.toString().substring(1, length - 1);
    } else {
      name = str.toString().substring(1);
    }
    if (name.startsWith("#"))
    {
      int value = -1;
      try
      {
        if ((name.startsWith("#x")) || (name.startsWith("#X"))) {
          value = Integer.parseInt(name.substring(2), 16);
        } else {
          value = Integer.parseInt(name.substring(1));
        }
        if ((fFixWindowsCharRefs) && (fIso8859Encoding)) {
          value = fixWindowsCharacter(value);
        }
        if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.lineNumber;
          fEndColumnNumber = fCurrentEntity.columnNumber;
          fEndCharacterOffset = fCurrentEntity.characterOffset;
          if (fNotifyCharRefs)
          {
            XMLResourceIdentifier id = resourceId();
            String encoding = null;
            fDocumentHandler.startGeneralEntity(name, id, encoding, locationAugs());
          }
          str.clear();
          str.append((char)value);
          fDocumentHandler.characters(str, locationAugs());
          if (fNotifyCharRefs) {
            fDocumentHandler.endGeneralEntity(name, locationAugs());
          }
        }
      }
      catch (NumberFormatException e)
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1005", new Object[] { name });
        }
        if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.lineNumber;
          fEndColumnNumber = fCurrentEntity.columnNumber;
          fEndCharacterOffset = fCurrentEntity.characterOffset;
          fDocumentHandler.characters(str, locationAugs());
        }
      }
      return value;
    }
    int c = HTMLEntities.get(name);
    
    boolean invalidEntityInAttribute = (!content) && (!endsWithSemicolon) && (c > 256);
    if ((c == -1) || (invalidEntityInAttribute))
    {
      if (fReportErrors) {
        fErrorReporter.reportWarning("HTML1006", new Object[] { name });
      }
      if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        fDocumentHandler.characters(str, locationAugs());
      }
      return -1;
    }
    if ((content) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
    {
      fEndLineNumber = fCurrentEntity.lineNumber;
      fEndColumnNumber = fCurrentEntity.columnNumber;
      fEndCharacterOffset = fCurrentEntity.characterOffset;
      boolean notify = (fNotifyHtmlBuiltinRefs) || ((fNotifyXmlBuiltinRefs) && (builtinXmlRef(name)));
      if (notify)
      {
        XMLResourceIdentifier id = resourceId();
        String encoding = null;
        fDocumentHandler.startGeneralEntity(name, id, encoding, locationAugs());
      }
      str.clear();
      str.append((char)c);
      fDocumentHandler.characters(str, locationAugs());
      if (notify) {
        fDocumentHandler.endGeneralEntity(name, locationAugs());
      }
    }
    return c;
  }
  
  protected boolean skip(String s, boolean caseSensitive)
    throws IOException
  {
    int length = s != null ? s.length() : 0;
    for (int i = 0; i < length; i++)
    {
      if (fCurrentEntity.offset == fCurrentEntity.length)
      {
        System.arraycopy(fCurrentEntity.buffer, fCurrentEntity.offset - i, fCurrentEntity.buffer, 0, i);
        if (load(i) == -1)
        {
          fCurrentEntity.offset = 0;
          return false;
        }
      }
      char c0 = s.charAt(i);
      char c1 = fCurrentEntity.buffer[(fCurrentEntity.offset++)];
      fCurrentEntity.characterOffset += 1;
      fCurrentEntity.columnNumber += 1;
      if (!caseSensitive)
      {
        c0 = Character.toUpperCase(c0);
        c1 = Character.toUpperCase(c1);
      }
      if (c0 != c1)
      {
        fCurrentEntity.offset -= i + 1;
        fCurrentEntity.characterOffset -= i + 1;
        fCurrentEntity.columnNumber -= i + 1;
        return false;
      }
    }
    return true;
  }
  
  protected boolean skipMarkup(boolean balance)
    throws IOException
  {
    int depth = 1;
    boolean slashgt = false;
    while ((fCurrentEntity.offset != fCurrentEntity.length) || 
      (load(0) != -1)) {
      while (fCurrentEntity.offset < fCurrentEntity.length)
      {
        char c = fCurrentEntity.buffer[(fCurrentEntity.offset++)];
        fCurrentEntity.characterOffset += 1;
        fCurrentEntity.columnNumber += 1;
        if ((balance) && (c == '<'))
        {
          depth++;
        }
        else if (c == '>')
        {
          depth--;
          if (depth == 0) {
            return slashgt;
          }
        }
        else if (c == '/')
        {
          if ((fCurrentEntity.offset == fCurrentEntity.length) && 
            (load(0) == -1)) {
            return slashgt;
          }
          c = fCurrentEntity.buffer[(fCurrentEntity.offset++)];
          fCurrentEntity.characterOffset += 1;
          fCurrentEntity.columnNumber += 1;
          if (c == '>')
          {
            slashgt = true;
            depth--;
            if (depth == 0) {
              return slashgt;
            }
          }
          else
          {
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
          }
        }
        else if ((c == '\r') || (c == '\n'))
        {
          skipNewlines();
        }
      }
    }
    return slashgt;
  }
  
  protected boolean skipSpaces()
    throws IOException
  {
    boolean spaces = false;
    while ((fCurrentEntity.offset != fCurrentEntity.length) || 
      (load(0) != -1))
    {
      char c = fCurrentEntity.buffer[fCurrentEntity.offset];
      if (!Character.isSpace(c)) {
        break;
      }
      spaces = true;
      if ((c == '\r') || (c == '\n'))
      {
        skipNewlines();
      }
      else
      {
        fCurrentEntity.offset += 1;
        fCurrentEntity.characterOffset += 1;
        fCurrentEntity.columnNumber += 1;
      }
    }
    return spaces;
  }
  
  protected int skipNewlines()
    throws IOException
  {
    return skipNewlines(Integer.MAX_VALUE);
  }
  
  protected int skipNewlines(int maxlines)
    throws IOException
  {
    if ((fCurrentEntity.offset == fCurrentEntity.length) && 
      (load(0) == -1)) {
      return 0;
    }
    char c = fCurrentEntity.buffer[fCurrentEntity.offset];
    int newlines = 0;
    int offset = fCurrentEntity.offset;
    if ((c == '\n') || (c == '\r'))
    {
      do
      {
        c = fCurrentEntity.buffer[(fCurrentEntity.offset++)];
        fCurrentEntity.characterOffset += 1;
        if (c == '\r')
        {
          newlines++;
          if (fCurrentEntity.offset == fCurrentEntity.length)
          {
            offset = 0;
            fCurrentEntity.offset = newlines;
            if (load(newlines) == -1) {
              break;
            }
          }
          if (fCurrentEntity.buffer[fCurrentEntity.offset] == '\n')
          {
            fCurrentEntity.offset += 1;
            fCurrentEntity.characterOffset += 1;
            offset++;
          }
        }
        else if (c == '\n')
        {
          newlines++;
          if (fCurrentEntity.offset == fCurrentEntity.length)
          {
            offset = 0;
            fCurrentEntity.offset = newlines;
            if (load(newlines) == -1) {
              break;
            }
          }
        }
        else
        {
          fCurrentEntity.offset -= 1;
          fCurrentEntity.characterOffset -= 1;
          break;
        }
      } while ((newlines < maxlines) && (fCurrentEntity.offset < fCurrentEntity.length - 1));
      fCurrentEntity.lineNumber += newlines;
      fCurrentEntity.columnNumber = 1;
    }
    return newlines;
  }
  
  protected final Augmentations locationAugs()
  {
    HTMLAugmentations augs = null;
    if (fAugmentations)
    {
      fLocationItem.setValues(fBeginLineNumber, fBeginColumnNumber, fBeginCharacterOffset, fEndLineNumber, fEndColumnNumber, fEndCharacterOffset);
      
      augs = fInfosetAugs;
      augs.removeAllItems();
      augs.putItem("http://cyberneko.org/html/features/augmentations", fLocationItem);
    }
    return augs;
  }
  
  protected final Augmentations synthesizedAugs()
  {
    HTMLAugmentations augs = null;
    if (fAugmentations)
    {
      augs = fInfosetAugs;
      augs.removeAllItems();
      augs.putItem("http://cyberneko.org/html/features/augmentations", SYNTHESIZED_ITEM);
    }
    return augs;
  }
  
  protected final XMLResourceIdentifier resourceId()
  {
    fResourceId.clear();
    return fResourceId;
  }
  
  protected static boolean builtinXmlRef(String name)
  {
    return (name.equals("amp")) || (name.equals("lt")) || (name.equals("gt")) || (name.equals("quot")) || (name.equals("apos"));
  }
  
  public static abstract interface Scanner
  {
    public abstract boolean scan(boolean paramBoolean)
      throws IOException;
  }
  
  public static class CurrentEntity
  {
    public Reader stream;
    public String encoding;
    public String publicId;
    public String baseSystemId;
    public String literalSystemId;
    public String expandedSystemId;
    public String version = "1.0";
    public int lineNumber = 1;
    public int columnNumber = 1;
    public int characterOffset = 0;
    public char[] buffer = new char['?'];
    public int offset = 0;
    public int length = 0;
    
    public CurrentEntity(Reader stream, String encoding, String publicId, String baseSystemId, String literalSystemId, String expandedSystemId)
    {
      this.stream = stream;
      this.encoding = encoding;
      this.publicId = publicId;
      this.baseSystemId = baseSystemId;
      this.literalSystemId = literalSystemId;
      this.expandedSystemId = expandedSystemId;
    }
  }
  
  public class ContentScanner
    implements HTMLScanner.Scanner
  {
    private final QName fQName = new QName();
    private final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
    
    public ContentScanner() {}
    
    public boolean scan(boolean complete)
      throws IOException
    {
      boolean next;
      do
      {
        try
        {
          next = false;
          switch (fScannerState)
          {
          case 0: 
            fBeginLineNumber = fCurrentEntity.lineNumber;
            fBeginColumnNumber = fCurrentEntity.columnNumber;
            fBeginCharacterOffset = fCurrentEntity.characterOffset;
            int c = read();
            if (c == 60)
            {
              setScannerState((short)1);
              next = true;
            }
            else if (c == 38)
            {
              scanEntityRef(fStringBuffer, true);
            }
            else
            {
              if (c == -1) {
                throw new EOFException();
              }
              fCurrentEntity.offset -= 1;
              fCurrentEntity.characterOffset -= 1;
              fCurrentEntity.columnNumber -= 1;
              scanCharacters();
            }
            break;
          case 1: 
            int c = read();
            if (c == 33)
            {
              if (skip("--", false))
              {
                scanComment();
              }
              else if (skip("[CDATA[", false))
              {
                scanCDATA();
              }
              else if (skip("DOCTYPE", false))
              {
                scanDoctype();
              }
              else
              {
                if (fReportErrors) {
                  fErrorReporter.reportError("HTML1002", null);
                }
                skipMarkup(true);
              }
            }
            else if (c == 63)
            {
              scanPI();
            }
            else if (c == 47)
            {
              scanEndElement();
            }
            else
            {
              if (c == -1)
              {
                if (fReportErrors) {
                  fErrorReporter.reportError("HTML1003", null);
                }
                if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
                {
                  fStringBuffer.clear();
                  fStringBuffer.append('<');
                  fDocumentHandler.characters(fStringBuffer, null);
                }
                throw new EOFException();
              }
              fCurrentEntity.offset -= 1;
              fCurrentEntity.characterOffset -= 1;
              fCurrentEntity.columnNumber -= 1;
              fElementCount += 1;
              fSingleBoolean[0] = 0;
              String ename = scanStartElement(fSingleBoolean);
              fBeginLineNumber = fCurrentEntity.lineNumber;
              fBeginColumnNumber = fCurrentEntity.columnNumber;
              fBeginCharacterOffset = fCurrentEntity.characterOffset;
              if ("script".equalsIgnoreCase(ename))
              {
                scanScriptContent();
              }
              else if ((!fParseNoScriptContent) && ("noscript".equalsIgnoreCase(ename)))
              {
                scanNoScriptContent();
              }
              else if ((ename != null) && (fSingleBoolean[0] == 0) && (HTMLElements.getElement(ename).isSpecial()) && ((!ename.equalsIgnoreCase("TITLE")) || (isEnded(ename))))
              {
                setScanner(fSpecialScanner.setElementName(ename));
                setScannerState((short)0);
                return true;
              }
            }
            setScannerState((short)0);
            break;
          case 10: 
            if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
            {
              XMLLocator locator = HTMLScanner.this;
              String encoding = fIANAEncoding;
              Augmentations augs = locationAugs();
              NamespaceContext nscontext = new NamespaceSupport();
              XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator, encoding, nscontext, augs);
            }
            if ((fInsertDoctype) && (fDocumentHandler != null))
            {
              String root = getElement46name;
              root = HTMLScanner.modifyName(root, fNamesElems);
              String pubid = fDoctypePubid;
              String sysid = fDoctypeSysid;
              fDocumentHandler.doctypeDecl(root, pubid, sysid, synthesizedAugs());
            }
            setScannerState((short)0);
            break;
          case 11: 
            if ((fDocumentHandler != null) && (fElementCount >= fElementDepth) && (complete))
            {
              fEndLineNumber = fCurrentEntity.lineNumber;
              fEndColumnNumber = fCurrentEntity.columnNumber;
              fEndCharacterOffset = fCurrentEntity.characterOffset;
              fDocumentHandler.endDocument(locationAugs());
            }
            return false;
          default: 
            throw new RuntimeException("unknown scanner state: " + fScannerState);
          }
        }
        catch (EOFException e)
        {
          if (fCurrentEntityStack.empty()) {
            setScannerState((short)11);
          } else {
            fCurrentEntity = ((HTMLScanner.CurrentEntity)fCurrentEntityStack.pop());
          }
          next = true;
        }
      } while ((next) || (complete));
      return true;
    }
    
    private void scanNoScriptContent()
      throws IOException
    {
      XMLStringBuffer buffer = new XMLStringBuffer();
      for (;;)
      {
        int c = read();
        if (c == -1) {
          break;
        }
        if (c == 60)
        {
          String next = nextContent(10) + " ";
          if ((next.length() >= 10) && ("/noscript".equalsIgnoreCase(next.substring(0, 9))) && (('>' == next.charAt(9)) || (Character.isWhitespace(next.charAt(9)))))
          {
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
            break;
          }
        }
        if ((c == 13) || (c == 10))
        {
          fCurrentEntity.offset -= 1;
          fCurrentEntity.characterOffset -= 1;
          fCurrentEntity.columnNumber -= 1;
          int newlines = skipNewlines();
          for (int i = 0; i < newlines; i++) {
            buffer.append('\n');
          }
        }
        else
        {
          buffer.append((char)c);
        }
      }
      if ((length > 0) && (fDocumentHandler != null))
      {
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        fDocumentHandler.characters(buffer, locationAugs());
      }
    }
    
    private void scanScriptContent()
      throws IOException
    {
      XMLStringBuffer buffer = new XMLStringBuffer();
      boolean waitForEndComment = false;
      for (;;)
      {
        int c = read();
        if (c == -1) {
          break;
        }
        if ((c == 45) && (HTMLScanner.this.endsWith(buffer, "<!-")))
        {
          waitForEndComment = HTMLScanner.this.endCommentAvailable();
        }
        else if ((!waitForEndComment) && (c == 60))
        {
          String next = nextContent(8) + " ";
          if ((next.length() >= 8) && ("/script".equalsIgnoreCase(next.substring(0, 7))) && (('>' == next.charAt(7)) || (Character.isWhitespace(next.charAt(7)))))
          {
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
            break;
          }
        }
        else if ((c == 62) && (HTMLScanner.this.endsWith(buffer, "--")))
        {
          waitForEndComment = false;
        }
        if ((c == 13) || (c == 10))
        {
          fCurrentEntity.offset -= 1;
          fCurrentEntity.characterOffset -= 1;
          fCurrentEntity.columnNumber -= 1;
          int newlines = skipNewlines();
          for (int i = 0; i < newlines; i++) {
            buffer.append('\n');
          }
        }
        else
        {
          buffer.append((char)c);
        }
      }
      if (fScriptStripCommentDelims) {
        HTMLScanner.reduceToContent(buffer, "<!--", "-->");
      }
      if (fScriptStripCDATADelims) {
        HTMLScanner.reduceToContent(buffer, "<![CDATA[", "]]>");
      }
      if ((length > 0) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        fDocumentHandler.characters(buffer, locationAugs());
      }
    }
    
    protected String nextContent(int len)
      throws IOException
    {
      int originalOffset = fCurrentEntity.offset;
      int originalColumnNumber = fCurrentEntity.columnNumber;
      int originalCharacterOffset = fCurrentEntity.characterOffset;
      
      char[] buff = new char[len];
      int nbRead = 0;
      for (nbRead = 0; nbRead < len; nbRead++)
      {
        if (fCurrentEntity.offset == fCurrentEntity.length)
        {
          if (fCurrentEntity.length != fCurrentEntity.buffer.length) {
            break;
          }
          load(fCurrentEntity.buffer.length);
        }
        int c = read();
        if (c == -1) {
          break;
        }
        buff[nbRead] = ((char)c);
      }
      fCurrentEntity.offset = originalOffset;
      fCurrentEntity.columnNumber = originalColumnNumber;
      fCurrentEntity.characterOffset = originalCharacterOffset;
      return new String(buff, 0, nbRead);
    }
    
    protected void scanCharacters()
      throws IOException
    {
      fStringBuffer.clear();
      for (;;)
      {
        int newlines = skipNewlines();
        if ((newlines == 0) && (fCurrentEntity.offset == fCurrentEntity.length)) {
          break;
        }
        int offset = fCurrentEntity.offset - newlines;
        for (int i = offset; i < fCurrentEntity.offset; i++) {
          fCurrentEntity.buffer[i] = '\n';
        }
        while (fCurrentEntity.offset < fCurrentEntity.length)
        {
          char c = fCurrentEntity.buffer[fCurrentEntity.offset];
          if ((c == '<') || (c == '&') || (c == '\n') || (c == '\r')) {
            break;
          }
          fCurrentEntity.offset += 1;
          fCurrentEntity.characterOffset += 1;
          fCurrentEntity.columnNumber += 1;
        }
        if ((fCurrentEntity.offset > offset) && (fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.lineNumber;
          fEndColumnNumber = fCurrentEntity.columnNumber;
          fEndCharacterOffset = fCurrentEntity.characterOffset;
          fStringBuffer.append(fCurrentEntity.buffer, offset, fCurrentEntity.offset - offset);
        }
        boolean hasNext = fCurrentEntity.offset < fCurrentEntity.buffer.length;
        int next = hasNext ? fCurrentEntity.buffer[fCurrentEntity.offset] : -1;
        if ((next == 38) || (next == 60) || (next == -1)) {
          break;
        }
      }
      if (fStringBuffer.length != 0) {
        fDocumentHandler.characters(fStringBuffer, locationAugs());
      }
    }
    
    protected void scanCDATA()
      throws IOException
    {
      fStringBuffer.clear();
      if (fCDATASections)
      {
        if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fEndLineNumber = fCurrentEntity.lineNumber;
          fEndColumnNumber = fCurrentEntity.columnNumber;
          fEndCharacterOffset = fCurrentEntity.characterOffset;
          
          fDocumentHandler.startCDATA(locationAugs());
        }
      }
      else {
        fStringBuffer.append("[CDATA[");
      }
      boolean eof = scanMarkupContent(fStringBuffer, ']');
      if (!fCDATASections) {
        fStringBuffer.append("]]");
      }
      if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        if (fCDATASections)
        {
          fDocumentHandler.characters(fStringBuffer, locationAugs());
          
          fDocumentHandler.endCDATA(locationAugs());
        }
        else
        {
          fDocumentHandler.comment(fStringBuffer, locationAugs());
        }
      }
      if (eof) {
        throw new EOFException();
      }
    }
    
    protected void scanComment()
      throws IOException
    {
      fEndLineNumber = fCurrentEntity.lineNumber;
      fEndColumnNumber = fCurrentEntity.columnNumber;
      fEndCharacterOffset = fCurrentEntity.characterOffset;
      XMLStringBuffer buffer = new XMLStringBuffer();
      boolean eof = scanMarkupContent(buffer, '-');
      if (eof)
      {
        fCurrentEntity.lineNumber = fEndLineNumber;
        fCurrentEntity.columnNumber = fEndColumnNumber;
        fCurrentEntity.characterOffset = fEndCharacterOffset;
        fCurrentEntity.buffer = ch;
        fCurrentEntity.offset = offset;
        fCurrentEntity.length = length;
        buffer = new XMLStringBuffer();
        for (;;)
        {
          int c = read();
          if (c == -1)
          {
            if (fReportErrors) {
              fErrorReporter.reportError("HTML1007", null);
            }
            eof = true;
            break label329;
          }
          if (c != 62)
          {
            buffer.append((char)c);
          }
          else
          {
            if ((c != 10) && (c != 13)) {
              break;
            }
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
            int newlines = skipNewlines();
            for (int i = 0; i < newlines; i++) {
              buffer.append('\n');
            }
          }
        }
        eof = false;
      }
      label329:
      if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        fDocumentHandler.comment(buffer, locationAugs());
      }
      if (eof) {
        throw new EOFException();
      }
    }
    
    protected boolean scanMarkupContent(XMLStringBuffer buffer, char cend)
      throws IOException
    {
      int c = -1;
      for (;;)
      {
        c = read();
        if (c == cend)
        {
          int count = 1;
          for (;;)
          {
            c = read();
            if (c != cend) {
              break;
            }
            count++;
          }
          if (c == -1)
          {
            if (!fReportErrors) {
              break;
            }
            fErrorReporter.reportError("HTML1007", null); break;
          }
          if (count < 2)
          {
            buffer.append(cend);
            
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
          }
          else if (c != 62)
          {
            for (int i = 0; i < count; i++) {
              buffer.append(cend);
            }
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
          }
          else
          {
            for (int i = 0; i < count - 2; i++) {
              buffer.append(cend);
            }
            break;
          }
        }
        else if ((c == 10) || (c == 13))
        {
          fCurrentEntity.offset -= 1;
          fCurrentEntity.characterOffset -= 1;
          fCurrentEntity.columnNumber -= 1;
          int newlines = skipNewlines();
          for (int i = 0; i < newlines; i++) {
            buffer.append('\n');
          }
        }
        else
        {
          if (c == -1)
          {
            if (!fReportErrors) {
              break;
            }
            fErrorReporter.reportError("HTML1007", null); break;
          }
          buffer.append((char)c);
        }
      }
      return c == -1;
    }
    
    protected void scanPI()
      throws IOException
    {
      if (fReportErrors) {
        fErrorReporter.reportWarning("HTML1008", null);
      }
      String target = scanName();
      if ((target != null) && (!target.equalsIgnoreCase("xml")))
      {
        for (;;)
        {
          int c = read();
          if ((c == 13) || (c == 10))
          {
            if (c == 13)
            {
              c = read();
              if (c != 10)
              {
                fCurrentEntity.offset -= 1;
                fCurrentEntity.characterOffset -= 1;
              }
            }
            fCurrentEntity.lineNumber += 1;
            fCurrentEntity.columnNumber = 1;
          }
          else
          {
            if (c == -1) {
              break;
            }
            if ((c != 32) && (c != 9))
            {
              fCurrentEntity.offset -= 1;
              fCurrentEntity.characterOffset -= 1;
              fCurrentEntity.columnNumber -= 1;
              break;
            }
          }
        }
        fStringBuffer.clear();
        for (;;)
        {
          int c = read();
          if ((c == 63) || (c == 47))
          {
            char c0 = (char)c;
            c = read();
            if (c == 62) {
              break;
            }
            fStringBuffer.append(c0);
            fCurrentEntity.offset -= 1;
            fCurrentEntity.characterOffset -= 1;
            fCurrentEntity.columnNumber -= 1;
          }
          else if ((c == 13) || (c == 10))
          {
            fStringBuffer.append('\n');
            if (c == 13)
            {
              c = read();
              if (c != 10)
              {
                fCurrentEntity.offset -= 1;
                fCurrentEntity.characterOffset -= 1;
              }
            }
            fCurrentEntity.lineNumber += 1;
            fCurrentEntity.columnNumber = 1;
          }
          else
          {
            if (c == -1) {
              break;
            }
            fStringBuffer.append((char)c);
          }
        }
        XMLString data = fStringBuffer;
        if (fDocumentHandler != null)
        {
          fEndLineNumber = fCurrentEntity.lineNumber;
          fEndColumnNumber = fCurrentEntity.columnNumber;
          fEndCharacterOffset = fCurrentEntity.characterOffset;
          fDocumentHandler.processingInstruction(target, data, locationAugs());
        }
      }
      else
      {
        int beginLineNumber = fBeginLineNumber;
        int beginColumnNumber = fBeginColumnNumber;
        int beginCharacterOffset = fBeginCharacterOffset;
        fAttributes.removeAllAttributes();
        int aindex = 0;
        while (scanPseudoAttribute(fAttributes)) {
          if (fAttributes.getValue(aindex).length() == 0)
          {
            fAttributes.removeAttributeAt(aindex);
          }
          else
          {
            fAttributes.getName(aindex, fQName);
            fQName.rawname = fQName.rawname.toLowerCase();
            fAttributes.setName(aindex, fQName);
            aindex++;
          }
        }
        if (fDocumentHandler != null)
        {
          String version = fAttributes.getValue("version");
          String encoding = fAttributes.getValue("encoding");
          String standalone = fAttributes.getValue("standalone");
          
          boolean xmlDeclNow = (fIgnoreSpecifiedCharset) || (!changeEncoding(encoding));
          if (xmlDeclNow)
          {
            fBeginLineNumber = beginLineNumber;
            fBeginColumnNumber = beginColumnNumber;
            fBeginCharacterOffset = beginCharacterOffset;
            fEndLineNumber = fCurrentEntity.lineNumber;
            fEndColumnNumber = fCurrentEntity.columnNumber;
            fEndCharacterOffset = fCurrentEntity.characterOffset;
            fDocumentHandler.xmlDecl(version, encoding, standalone, locationAugs());
          }
        }
      }
    }
    
    protected String scanStartElement(boolean[] empty)
      throws IOException
    {
      String ename = scanName();
      int length = ename != null ? ename.length() : 0;
      int c = length > 0 ? ename.charAt(0) : -1;
      if ((length == 0) || (((c < 97) || (c > 122)) && ((c < 65) || (c > 90))))
      {
        if (fReportErrors) {
          fErrorReporter.reportError("HTML1009", null);
        }
        if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
        {
          fStringBuffer.clear();
          fStringBuffer.append('<');
          if (length > 0) {
            fStringBuffer.append(ename);
          }
          fDocumentHandler.characters(fStringBuffer, null);
        }
        return null;
      }
      ename = HTMLScanner.modifyName(ename, fNamesElems);
      fAttributes.removeAllAttributes();
      int beginLineNumber = fBeginLineNumber;
      int beginColumnNumber = fBeginColumnNumber;
      int beginCharacterOffset = fBeginCharacterOffset;
      while (scanAttribute(fAttributes, empty)) {}
      fBeginLineNumber = beginLineNumber;
      fBeginColumnNumber = beginColumnNumber;
      fBeginCharacterOffset = beginCharacterOffset;
      if ((fByteStream != null) && (fElementDepth == -1)) {
        if (ename.equalsIgnoreCase("META"))
        {
          String httpEquiv = HTMLScanner.getValue(fAttributes, "http-equiv");
          if ((httpEquiv != null) && (httpEquiv.equalsIgnoreCase("content-type")))
          {
            String content = HTMLScanner.getValue(fAttributes, "content");
            int index1 = content != null ? content.toLowerCase().indexOf("charset=") : -1;
            if ((index1 != -1) && (!fIgnoreSpecifiedCharset))
            {
              int index2 = content.indexOf(';', index1);
              String charset = index2 != -1 ? content.substring(index1 + 8, index2) : content.substring(index1 + 8);
              changeEncoding(charset);
            }
          }
        }
        else if (ename.equalsIgnoreCase("BODY"))
        {
          fByteStream.clear();
          fByteStream = null;
        }
        else
        {
          HTMLElements.Element element = HTMLElements.getElement(ename);
          if ((parent != null) && (parent.length > 0) && 
            (parent[0].code == 14))
          {
            fByteStream.clear();
            fByteStream = null;
          }
        }
      }
      if ((fDocumentHandler != null) && (fElementCount >= fElementDepth))
      {
        fQName.setValues(null, ename, ename, null);
        
        fEndLineNumber = fCurrentEntity.lineNumber;
        fEndColumnNumber = fCurrentEntity.columnNumber;
        fEndCharacterOffset = fCurrentEntity.characterOffset;
        if (empty[0] != 0) {
          fDocumentHandler.emptyElement(fQName, fAttributes, locationAugs());
        } else {
          fDocumentHandler.startElement(fQName, fAttributes, locationAugs());
        }
      }
      return ename;
    }
    
    private boolean changeEncoding(String charset)
    {
      if ((charset == null) || (fByteStream == null)) {
        return false;
      }
      boolean encodingChanged = false;
      try
      {
        String ianaEncoding = charset;
        String javaEncoding = EncodingMap.getIANA2JavaMapping(ianaEncoding.toUpperCase());
        if (javaEncoding == nu
1 2 3 4 5 6 7 8

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2017. Infinite Loop Ltd