juniversalchardet-1.0.3

16:38:20.675 INFO  jd.cli.Main - Decompiling juniversalchardet-1.0.3.jar
package org.mozilla.universalchardet;

public abstract interface CharsetListener
{
  public abstract void report(String paramString);
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.CharsetListener
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet;

public final class Constants
{
  public static final String CHARSET_ISO_2022_JP = "ISO-2022-JP".intern();
  public static final String CHARSET_ISO_2022_CN = "ISO-2022-CN".intern();
  public static final String CHARSET_ISO_2022_KR = "ISO-2022-KR".intern();
  public static final String CHARSET_ISO_8859_5 = "ISO-8859-5".intern();
  public static final String CHARSET_ISO_8859_7 = "ISO-8859-7".intern();
  public static final String CHARSET_ISO_8859_8 = "ISO-8859-8".intern();
  public static final String CHARSET_BIG5 = "BIG5".intern();
  public static final String CHARSET_GB18030 = "GB18030".intern();
  public static final String CHARSET_EUC_JP = "EUC-JP".intern();
  public static final String CHARSET_EUC_KR = "EUC-KR".intern();
  public static final String CHARSET_EUC_TW = "EUC-TW".intern();
  public static final String CHARSET_SHIFT_JIS = "SHIFT_JIS".intern();
  public static final String CHARSET_IBM855 = "IBM855".intern();
  public static final String CHARSET_IBM866 = "IBM866".intern();
  public static final String CHARSET_KOI8_R = "KOI8-R".intern();
  public static final String CHARSET_MACCYRILLIC = "MACCYRILLIC".intern();
  public static final String CHARSET_WINDOWS_1251 = "WINDOWS-1251".intern();
  public static final String CHARSET_WINDOWS_1252 = "WINDOWS-1252".intern();
  public static final String CHARSET_WINDOWS_1253 = "WINDOWS-1253".intern();
  public static final String CHARSET_WINDOWS_1255 = "WINDOWS-1255".intern();
  public static final String CHARSET_UTF_8 = "UTF-8".intern();
  public static final String CHARSET_UTF_16BE = "UTF-16BE".intern();
  public static final String CHARSET_UTF_16LE = "UTF-16LE".intern();
  public static final String CHARSET_UTF_32BE = "UTF-32BE".intern();
  public static final String CHARSET_UTF_32LE = "UTF-32LE".intern();
  public static final String CHARSET_HZ_GB_2312 = "HZ-GB-2312".intern();
  public static final String CHARSET_X_ISO_10646_UCS_4_3412 = "X-ISO-10646-UCS-4-3412".intern();
  public static final String CHARSET_X_ISO_10646_UCS_4_2143 = "X-ISO-10646-UCS-4-2143".intern();
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.Constants
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet;

import java.io.PrintStream;

final class UniversalDetector$1
  implements CharsetListener
{
  public void report(String paramString)
  {
    System.out.println("charset = " + paramString);
  }
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.UniversalDetector.1
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet;

public enum UniversalDetector$InputState
{
  PURE_ASCII,  ESC_ASCII,  HIGHBYTE;
  
  private UniversalDetector$InputState() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.UniversalDetector.InputState
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet;

import java.io.FileInputStream;
import java.io.PrintStream;
import org.mozilla.universalchardet.prober.CharsetProber;
import org.mozilla.universalchardet.prober.CharsetProber.ProbingState;
import org.mozilla.universalchardet.prober.EscCharsetProber;
import org.mozilla.universalchardet.prober.Latin1Prober;
import org.mozilla.universalchardet.prober.MBCSGroupProber;
import org.mozilla.universalchardet.prober.SBCSGroupProber;

public class UniversalDetector
{
  public static final float SHORTCUT_THRESHOLD = 0.95F;
  public static final float MINIMUM_THRESHOLD = 0.2F;
  private InputState inputState;
  private boolean done;
  private boolean start;
  private boolean gotData;
  private byte lastChar;
  private String detectedCharset;
  private CharsetProber[] probers;
  private CharsetProber escCharsetProber;
  private CharsetListener listener;
  
  public UniversalDetector(CharsetListener paramCharsetListener)
  {
    listener = paramCharsetListener;
    escCharsetProber = null;
    probers = new CharsetProber[3];
    for (int i = 0; i < probers.length; i++) {
      probers[i] = null;
    }
    reset();
  }
  
  public boolean isDone()
  {
    return done;
  }
  
  public String getDetectedCharset()
  {
    return detectedCharset;
  }
  
  public void setListener(CharsetListener paramCharsetListener)
  {
    listener = paramCharsetListener;
  }
  
  public CharsetListener getListener()
  {
    return listener;
  }
  
  public void handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    if (done) {
      return;
    }
    if (paramInt2 > 0) {
      gotData = true;
    }
    int k;
    if (start)
    {
      start = false;
      if (paramInt2 > 3)
      {
        i = paramArrayOfByte[paramInt1] & 0xFF;
        j = paramArrayOfByte[(paramInt1 + 1)] & 0xFF;
        k = paramArrayOfByte[(paramInt1 + 2)] & 0xFF;
        int m = paramArrayOfByte[(paramInt1 + 3)] & 0xFF;
        switch (i)
        {
        case 239: 
          if ((j == 187) && (k == 191)) {
            detectedCharset = Constants.CHARSET_UTF_8;
          }
          break;
        case 254: 
          if ((j == 255) && (k == 0) && (m == 0)) {
            detectedCharset = Constants.CHARSET_X_ISO_10646_UCS_4_3412;
          } else if (j == 255) {
            detectedCharset = Constants.CHARSET_UTF_16BE;
          }
          break;
        case 0: 
          if ((j == 0) && (k == 254) && (m == 255)) {
            detectedCharset = Constants.CHARSET_UTF_32BE;
          } else if ((j == 0) && (k == 255) && (m == 254)) {
            detectedCharset = Constants.CHARSET_X_ISO_10646_UCS_4_2143;
          }
          break;
        case 255: 
          if ((j == 254) && (k == 0) && (m == 0)) {
            detectedCharset = Constants.CHARSET_UTF_32LE;
          } else if (j == 254) {
            detectedCharset = Constants.CHARSET_UTF_16LE;
          }
          break;
        }
        if (detectedCharset != null)
        {
          done = true;
          return;
        }
      }
    }
    int i = paramInt1 + paramInt2;
    for (int j = paramInt1; j < i; j++)
    {
      k = paramArrayOfByte[j] & 0xFF;
      if (((k & 0x80) != 0) && (k != 160))
      {
        if (inputState != InputState.HIGHBYTE)
        {
          inputState = InputState.HIGHBYTE;
          if (escCharsetProber != null) {
            escCharsetProber = null;
          }
          if (probers[0] == null) {
            probers[0] = new MBCSGroupProber();
          }
          if (probers[1] == null) {
            probers[1] = new SBCSGroupProber();
          }
          if (probers[2] == null) {
            probers[2] = new Latin1Prober();
          }
        }
      }
      else
      {
        if ((inputState == InputState.PURE_ASCII) && ((k == 27) || ((k == 123) && (lastChar == 126)))) {
          inputState = InputState.ESC_ASCII;
        }
        lastChar = paramArrayOfByte[j];
      }
    }
    CharsetProber.ProbingState localProbingState;
    if (inputState == InputState.ESC_ASCII)
    {
      if (escCharsetProber == null) {
        escCharsetProber = new EscCharsetProber();
      }
      localProbingState = escCharsetProber.handleData(paramArrayOfByte, paramInt1, paramInt2);
      if (localProbingState == CharsetProber.ProbingState.FOUND_IT)
      {
        done = true;
        detectedCharset = escCharsetProber.getCharSetName();
      }
    }
    else if (inputState == InputState.HIGHBYTE)
    {
      for (k = 0; k < probers.length; k++)
      {
        localProbingState = probers[k].handleData(paramArrayOfByte, paramInt1, paramInt2);
        if (localProbingState == CharsetProber.ProbingState.FOUND_IT)
        {
          done = true;
          detectedCharset = probers[k].getCharSetName();
          return;
        }
      }
    }
  }
  
  public void dataEnd()
  {
    if (!gotData) {
      return;
    }
    if (detectedCharset != null)
    {
      done = true;
      if (listener != null) {
        listener.report(detectedCharset);
      }
      return;
    }
    if (inputState == InputState.HIGHBYTE)
    {
      float f2 = 0.0F;
      int i = 0;
      for (int j = 0; j < probers.length; j++)
      {
        float f1 = probers[j].getConfidence();
        if (f1 > f2)
        {
          f2 = f1;
          i = j;
        }
      }
      if (f2 > 0.2F)
      {
        detectedCharset = probers[i].getCharSetName();
        if (listener != null) {
          listener.report(detectedCharset);
        }
      }
    }
    else if (inputState != InputState.ESC_ASCII) {}
  }
  
  public void reset()
  {
    done = false;
    start = true;
    detectedCharset = null;
    gotData = false;
    inputState = InputState.PURE_ASCII;
    lastChar = 0;
    if (escCharsetProber != null) {
      escCharsetProber.reset();
    }
    for (int i = 0; i < probers.length; i++) {
      if (probers[i] != null) {
        probers[i].reset();
      }
    }
  }
  
  public static void main(String[] paramArrayOfString)
    throws Exception
  {
    if (paramArrayOfString.length != 1)
    {
      System.out.println("USAGE: java UniversalDetector filename");
      return;
    }
    UniversalDetector localUniversalDetector = new UniversalDetector(new CharsetListener()
    {
      public void report(String paramAnonymousString)
      {
        System.out.println("charset = " + paramAnonymousString);
      }
    });
    byte[] arrayOfByte = new byte['?'];
    FileInputStream localFileInputStream = new FileInputStream(paramArrayOfString[0]);
    int i;
    while (((i = localFileInputStream.read(arrayOfByte)) > 0) && (!localUniversalDetector.isDone())) {
      localUniversalDetector.handleData(arrayOfByte, 0, i);
    }
    localUniversalDetector.dataEnd();
  }
  
  public static enum InputState
  {
    PURE_ASCII,  ESC_ASCII,  HIGHBYTE;
    
    private InputState() {}
  }
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.UniversalDetector
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.util.Arrays;
import org.mozilla.universalchardet.Constants;
import org.mozilla.universalchardet.prober.distributionanalysis.Big5DistributionAnalysis;
import org.mozilla.universalchardet.prober.statemachine.Big5SMModel;
import org.mozilla.universalchardet.prober.statemachine.CodingStateMachine;
import org.mozilla.universalchardet.prober.statemachine.SMModel;

public class Big5Prober
  extends CharsetProber
{
  private CodingStateMachine codingSM = new CodingStateMachine(smModel);
  private CharsetProber.ProbingState state;
  private Big5DistributionAnalysis distributionAnalyzer = new Big5DistributionAnalysis();
  private byte[] lastChar = new byte[2];
  private static final SMModel smModel = new Big5SMModel();
  
  public Big5Prober()
  {
    reset();
  }
  
  public String getCharSetName()
  {
    return Constants.CHARSET_BIG5;
  }
  
  public float getConfidence()
  {
    float f = distributionAnalyzer.getConfidence();
    return f;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int j = paramInt1 + paramInt2;
    for (int k = paramInt1; k < j; k++)
    {
      int i = codingSM.nextState(paramArrayOfByte[k]);
      if (i == 1)
      {
        state = CharsetProber.ProbingState.NOT_ME;
        break;
      }
      if (i == 2)
      {
        state = CharsetProber.ProbingState.FOUND_IT;
        break;
      }
      if (i == 0)
      {
        int m = codingSM.getCurrentCharLen();
        if (k == paramInt1)
        {
          lastChar[1] = paramArrayOfByte[paramInt1];
          distributionAnalyzer.handleOneChar(lastChar, 0, m);
        }
        else
        {
          distributionAnalyzer.handleOneChar(paramArrayOfByte, k - 1, m);
        }
      }
    }
    lastChar[0] = paramArrayOfByte[(j - 1)];
    if ((state == CharsetProber.ProbingState.DETECTING) && (distributionAnalyzer.gotEnoughData()) && (getConfidence() > 0.95F)) {
      state = CharsetProber.ProbingState.FOUND_IT;
    }
    return state;
  }
  
  public void reset()
  {
    codingSM.reset();
    state = CharsetProber.ProbingState.DETECTING;
    distributionAnalyzer.reset();
    Arrays.fill(lastChar, (byte)0);
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.Big5Prober
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

public enum CharsetProber$ProbingState
{
  DETECTING,  FOUND_IT,  NOT_ME;
  
  private CharsetProber$ProbingState() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.CharsetProber.ProbingState
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.nio.ByteBuffer;

public abstract class CharsetProber
{
  public static final float SHORTCUT_THRESHOLD = 0.95F;
  public static final int ASCII_A = 97;
  public static final int ASCII_Z = 122;
  public static final int ASCII_A_CAPITAL = 65;
  public static final int ASCII_Z_CAPITAL = 90;
  public static final int ASCII_LT = 60;
  public static final int ASCII_GT = 62;
  public static final int ASCII_SP = 32;
  
  public abstract String getCharSetName();
  
  public abstract ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2);
  
  public abstract ProbingState getState();
  
  public abstract void reset();
  
  public abstract float getConfidence();
  
  public abstract void setOption();
  
  public ByteBuffer filterWithoutEnglishLetters(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    ByteBuffer localByteBuffer = ByteBuffer.allocate(paramInt2);
    int i = 0;
    int j = paramInt1;
    int k = paramInt1;
    int m = paramInt1 + paramInt2;
    while (k < m)
    {
      byte b = paramArrayOfByte[k];
      if (!isAscii(b)) {
        i = 1;
      } else if (isAsciiSymbol(b)) {
        if ((i != 0) && (k > j))
        {
          localByteBuffer.put(paramArrayOfByte, j, k - j);
          localByteBuffer.put((byte)32);
          j = k + 1;
          i = 0;
        }
        else
        {
          j = k + 1;
        }
      }
      k++;
    }
    if ((i != 0) && (k > j)) {
      localByteBuffer.put(paramArrayOfByte, j, k - j);
    }
    return localByteBuffer;
  }
  
  public ByteBuffer filterWithEnglishLetters(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    ByteBuffer localByteBuffer = ByteBuffer.allocate(paramInt2);
    int i = 0;
    int j = paramInt1;
    int k = paramInt1;
    int m = paramInt1 + paramInt2;
    while (k < m)
    {
      byte b = paramArrayOfByte[k];
      if (b == 62) {
        i = 0;
      } else if (b == 60) {
        i = 1;
      }
      if ((isAscii(b)) && (isAsciiSymbol(b))) {
        if ((k > j) && (i == 0))
        {
          localByteBuffer.put(paramArrayOfByte, j, k - j);
          localByteBuffer.put((byte)32);
          j = k + 1;
        }
        else
        {
          j = k + 1;
        }
      }
      k++;
    }
    if ((i == 0) && (k > j)) {
      localByteBuffer.put(paramArrayOfByte, j, k - j);
    }
    return localByteBuffer;
  }
  
  private boolean isAscii(byte paramByte)
  {
    return (paramByte & 0x80) == 0;
  }
  
  private boolean isAsciiSymbol(byte paramByte)
  {
    int i = paramByte & 0xFF;
    return (i < 65) || ((i > 90) && (i < 97)) || (i > 122);
  }
  
  public static enum ProbingState
  {
    DETECTING,  FOUND_IT,  NOT_ME;
    
    private ProbingState() {}
  }
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.CharsetProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.util.Arrays;
import org.mozilla.universalchardet.Constants;
import org.mozilla.universalchardet.prober.contextanalysis.EUCJPContextAnalysis;
import org.mozilla.universalchardet.prober.distributionanalysis.EUCJPDistributionAnalysis;
import org.mozilla.universalchardet.prober.statemachine.CodingStateMachine;
import org.mozilla.universalchardet.prober.statemachine.EUCJPSMModel;
import org.mozilla.universalchardet.prober.statemachine.SMModel;

public class EUCJPProber
  extends CharsetProber
{
  private CodingStateMachine codingSM = new CodingStateMachine(smModel);
  private CharsetProber.ProbingState state;
  private EUCJPContextAnalysis contextAnalyzer = new EUCJPContextAnalysis();
  private EUCJPDistributionAnalysis distributionAnalyzer = new EUCJPDistributionAnalysis();
  private byte[] lastChar = new byte[2];
  private static final SMModel smModel = new EUCJPSMModel();
  
  public EUCJPProber()
  {
    reset();
  }
  
  public String getCharSetName()
  {
    return Constants.CHARSET_EUC_JP;
  }
  
  public float getConfidence()
  {
    float f1 = contextAnalyzer.getConfidence();
    float f2 = distributionAnalyzer.getConfidence();
    return Math.max(f1, f2);
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int j = paramInt1 + paramInt2;
    for (int k = paramInt1; k < j; k++)
    {
      int i = codingSM.nextState(paramArrayOfByte[k]);
      if (i == 1)
      {
        state = CharsetProber.ProbingState.NOT_ME;
        break;
      }
      if (i == 2)
      {
        state = CharsetProber.ProbingState.FOUND_IT;
        break;
      }
      if (i == 0)
      {
        int m = codingSM.getCurrentCharLen();
        if (k == paramInt1)
        {
          lastChar[1] = paramArrayOfByte[paramInt1];
          contextAnalyzer.handleOneChar(lastChar, 0, m);
          distributionAnalyzer.handleOneChar(lastChar, 0, m);
        }
        else
        {
          contextAnalyzer.handleOneChar(paramArrayOfByte, k - 1, m);
          distributionAnalyzer.handleOneChar(paramArrayOfByte, k - 1, m);
        }
      }
    }
    lastChar[0] = paramArrayOfByte[(j - 1)];
    if ((state == CharsetProber.ProbingState.DETECTING) && (contextAnalyzer.gotEnoughData()) && (getConfidence() > 0.95F)) {
      state = CharsetProber.ProbingState.FOUND_IT;
    }
    return state;
  }
  
  public void reset()
  {
    codingSM.reset();
    state = CharsetProber.ProbingState.DETECTING;
    contextAnalyzer.reset();
    distributionAnalyzer.reset();
    Arrays.fill(lastChar, (byte)0);
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.EUCJPProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.util.Arrays;
import org.mozilla.universalchardet.Constants;
import org.mozilla.universalchardet.prober.distributionanalysis.EUCKRDistributionAnalysis;
import org.mozilla.universalchardet.prober.statemachine.CodingStateMachine;
import org.mozilla.universalchardet.prober.statemachine.EUCKRSMModel;
import org.mozilla.universalchardet.prober.statemachine.SMModel;

public class EUCKRProber
  extends CharsetProber
{
  private CodingStateMachine codingSM = new CodingStateMachine(smModel);
  private CharsetProber.ProbingState state;
  private EUCKRDistributionAnalysis distributionAnalyzer = new EUCKRDistributionAnalysis();
  private byte[] lastChar = new byte[2];
  private static final SMModel smModel = new EUCKRSMModel();
  
  public EUCKRProber()
  {
    reset();
  }
  
  public String getCharSetName()
  {
    return Constants.CHARSET_EUC_KR;
  }
  
  public float getConfidence()
  {
    float f = distributionAnalyzer.getConfidence();
    return f;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int j = paramInt1 + paramInt2;
    for (int k = paramInt1; k < j; k++)
    {
      int i = codingSM.nextState(paramArrayOfByte[k]);
      if (i == 1)
      {
        state = CharsetProber.ProbingState.NOT_ME;
        break;
      }
      if (i == 2)
      {
        state = CharsetProber.ProbingState.FOUND_IT;
        break;
      }
      if (i == 0)
      {
        int m = codingSM.getCurrentCharLen();
        if (k == paramInt1)
        {
          lastChar[1] = paramArrayOfByte[paramInt1];
          distributionAnalyzer.handleOneChar(lastChar, 0, m);
        }
        else
        {
          distributionAnalyzer.handleOneChar(paramArrayOfByte, k - 1, m);
        }
      }
    }
    lastChar[0] = paramArrayOfByte[(j - 1)];
    if ((state == CharsetProber.ProbingState.DETECTING) && (distributionAnalyzer.gotEnoughData()) && (getConfidence() > 0.95F)) {
      state = CharsetProber.ProbingState.FOUND_IT;
    }
    return state;
  }
  
  public void reset()
  {
    codingSM.reset();
    state = CharsetProber.ProbingState.DETECTING;
    distributionAnalyzer.reset();
    Arrays.fill(lastChar, (byte)0);
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.EUCKRProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.util.Arrays;
import org.mozilla.universalchardet.Constants;
import org.mozilla.universalchardet.prober.distributionanalysis.EUCTWDistributionAnalysis;
import org.mozilla.universalchardet.prober.statemachine.CodingStateMachine;
import org.mozilla.universalchardet.prober.statemachine.EUCTWSMModel;
import org.mozilla.universalchardet.prober.statemachine.SMModel;

public class EUCTWProber
  extends CharsetProber
{
  private CodingStateMachine codingSM = new CodingStateMachine(smModel);
  private CharsetProber.ProbingState state;
  private EUCTWDistributionAnalysis distributionAnalyzer = new EUCTWDistributionAnalysis();
  private byte[] lastChar = new byte[2];
  private static final SMModel smModel = new EUCTWSMModel();
  
  public EUCTWProber()
  {
    reset();
  }
  
  public String getCharSetName()
  {
    return Constants.CHARSET_EUC_TW;
  }
  
  public float getConfidence()
  {
    float f = distributionAnalyzer.getConfidence();
    return f;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int j = paramInt1 + paramInt2;
    for (int k = paramInt1; k < j; k++)
    {
      int i = codingSM.nextState(paramArrayOfByte[k]);
      if (i == 1)
      {
        state = CharsetProber.ProbingState.NOT_ME;
        break;
      }
      if (i == 2)
      {
        state = CharsetProber.ProbingState.FOUND_IT;
        break;
      }
      if (i == 0)
      {
        int m = codingSM.getCurrentCharLen();
        if (k == paramInt1)
        {
          lastChar[1] = paramArrayOfByte[paramInt1];
          distributionAnalyzer.handleOneChar(lastChar, 0, m);
        }
        else
        {
          distributionAnalyzer.handleOneChar(paramArrayOfByte, k - 1, m);
        }
      }
    }
    lastChar[0] = paramArrayOfByte[(j - 1)];
    if ((state == CharsetProber.ProbingState.DETECTING) && (distributionAnalyzer.gotEnoughData()) && (getConfidence() > 0.95F)) {
      state = CharsetProber.ProbingState.FOUND_IT;
    }
    return state;
  }
  
  public void reset()
  {
    codingSM.reset();
    state = CharsetProber.ProbingState.DETECTING;
    distributionAnalyzer.reset();
    Arrays.fill(lastChar, (byte)0);
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.EUCTWProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import org.mozilla.universalchardet.prober.statemachine.CodingStateMachine;
import org.mozilla.universalchardet.prober.statemachine.HZSMModel;
import org.mozilla.universalchardet.prober.statemachine.ISO2022CNSMModel;
import org.mozilla.universalchardet.prober.statemachine.ISO2022JPSMModel;
import org.mozilla.universalchardet.prober.statemachine.ISO2022KRSMModel;

public class EscCharsetProber
  extends CharsetProber
{
  private CodingStateMachine[] codingSM = new CodingStateMachine[4];
  private int activeSM;
  private CharsetProber.ProbingState state;
  private String detectedCharset;
  private static final HZSMModel hzsModel = new HZSMModel();
  private static final ISO2022CNSMModel iso2022cnModel = new ISO2022CNSMModel();
  private static final ISO2022JPSMModel iso2022jpModel = new ISO2022JPSMModel();
  private static final ISO2022KRSMModel iso2022krModel = new ISO2022KRSMModel();
  
  public EscCharsetProber()
  {
    codingSM[0] = new CodingStateMachine(hzsModel);
    codingSM[1] = new CodingStateMachine(iso2022cnModel);
    codingSM[2] = new CodingStateMachine(iso2022jpModel);
    codingSM[3] = new CodingStateMachine(iso2022krModel);
    reset();
  }
  
  public String getCharSetName()
  {
    return detectedCharset;
  }
  
  public float getConfidence()
  {
    return 0.99F;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int j = paramInt1 + paramInt2;
    for (int k = paramInt1; (k < j) && (state == CharsetProber.ProbingState.DETECTING); k++) {
      for (int m = activeSM - 1; m >= 0; m--)
      {
        int i = codingSM[m].nextState(paramArrayOfByte[k]);
        if (i == 1)
        {
          activeSM -= 1;
          if (activeSM <= 0)
          {
            state = CharsetProber.ProbingState.NOT_ME;
            return state;
          }
          if (m != activeSM)
          {
            CodingStateMachine localCodingStateMachine = codingSM[activeSM];
            codingSM[activeSM] = codingSM[m];
            codingSM[m] = localCodingStateMachine;
          }
        }
        else if (i == 2)
        {
          state = CharsetProber.ProbingState.FOUND_IT;
          detectedCharset = codingSM[m].getCodingStateMachine();
          return state;
        }
      }
    }
    return state;
  }
  
  public void reset()
  {
    state = CharsetProber.ProbingState.DETECTING;
    for (int i = 0; i < codingSM.length; i++) {
      codingSM[i].reset();
    }
    activeSM = codingSM.length;
    detectedCharset = null;
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.EscCharsetProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.util.Arrays;
import org.mozilla.universalchardet.Constants;
import org.mozilla.universalchardet.prober.distributionanalysis.GB2312DistributionAnalysis;
import org.mozilla.universalchardet.prober.statemachine.CodingStateMachine;
import org.mozilla.universalchardet.prober.statemachine.GB18030SMModel;
import org.mozilla.universalchardet.prober.statemachine.SMModel;

public class GB18030Prober
  extends CharsetProber
{
  private CodingStateMachine codingSM = new CodingStateMachine(smModel);
  private CharsetProber.ProbingState state;
  private GB2312DistributionAnalysis distributionAnalyzer = new GB2312DistributionAnalysis();
  private byte[] lastChar = new byte[2];
  private static final SMModel smModel = new GB18030SMModel();
  
  public GB18030Prober()
  {
    reset();
  }
  
  public String getCharSetName()
  {
    return Constants.CHARSET_GB18030;
  }
  
  public float getConfidence()
  {
    float f = distributionAnalyzer.getConfidence();
    return f;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int j = paramInt1 + paramInt2;
    for (int k = paramInt1; k < j; k++)
    {
      int i = codingSM.nextState(paramArrayOfByte[k]);
      if (i == 1)
      {
        state = CharsetProber.ProbingState.NOT_ME;
        break;
      }
      if (i == 2)
      {
        state = CharsetProber.ProbingState.FOUND_IT;
        break;
      }
      if (i == 0)
      {
        int m = codingSM.getCurrentCharLen();
        if (k == paramInt1)
        {
          lastChar[1] = paramArrayOfByte[paramInt1];
          distributionAnalyzer.handleOneChar(lastChar, 0, m);
        }
        else
        {
          distributionAnalyzer.handleOneChar(paramArrayOfByte, k - 1, m);
        }
      }
    }
    lastChar[0] = paramArrayOfByte[(j - 1)];
    if ((state == CharsetProber.ProbingState.DETECTING) && (distributionAnalyzer.gotEnoughData()) && (getConfidence() > 0.95F)) {
      state = CharsetProber.ProbingState.FOUND_IT;
    }
    return state;
  }
  
  public void reset()
  {
    codingSM.reset();
    state = CharsetProber.ProbingState.DETECTING;
    distributionAnalyzer.reset();
    Arrays.fill(lastChar, (byte)0);
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.GB18030Prober
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import org.mozilla.universalchardet.Constants;

public class HebrewProber
  extends CharsetProber
{
  public static final int FINAL_KAF = 234;
  public static final int NORMAL_KAF = 235;
  public static final int FINAL_MEM = 237;
  public static final int NORMAL_MEM = 238;
  public static final int FINAL_NUN = 239;
  public static final int NORMAL_NUN = 240;
  public static final int FINAL_PE = 243;
  public static final int NORMAL_PE = 244;
  public static final int FINAL_TSADI = 245;
  public static final int NORMAL_TSADI = 246;
  public static final byte SPACE = 32;
  public static final int MIN_FINAL_CHAR_DISTANCE = 5;
  public static final float MIN_MODEL_DISTANCE = 0.01F;
  private int finalCharLogicalScore;
  private int finalCharVisualScore;
  private byte prev;
  private byte beforePrev;
  private CharsetProber logicalProber = null;
  private CharsetProber visualProber = null;
  
  public HebrewProber()
  {
    reset();
  }
  
  public void setModalProbers(CharsetProber paramCharsetProber1, CharsetProber paramCharsetProber2)
  {
    logicalProber = paramCharsetProber1;
    visualProber = paramCharsetProber2;
  }
  
  public String getCharSetName()
  {
    int i = finalCharLogicalScore - finalCharVisualScore;
    if (i >= 5) {
      return Constants.CHARSET_WINDOWS_1255;
    }
    if (i <= -5) {
      return Constants.CHARSET_ISO_8859_8;
    }
    float f = logicalProber.getConfidence() - visualProber.getConfidence();
    if (f > 0.01F) {
      return Constants.CHARSET_WINDOWS_1255;
    }
    if (f < -0.01F) {
      return Constants.CHARSET_ISO_8859_8;
    }
    if (i < 0) {
      return Constants.CHARSET_ISO_8859_8;
    }
    return Constants.CHARSET_WINDOWS_1255;
  }
  
  public float getConfidence()
  {
    return 0.0F;
  }
  
  public CharsetProber.ProbingState getState()
  {
    if ((logicalProber.getState() == CharsetProber.ProbingState.NOT_ME) && (visualProber.getState() == CharsetProber.ProbingState.NOT_ME)) {
      return CharsetProber.ProbingState.NOT_ME;
    }
    return CharsetProber.ProbingState.DETECTING;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    if (getState() == CharsetProber.ProbingState.NOT_ME) {
      return CharsetProber.ProbingState.NOT_ME;
    }
    int i = paramInt1 + paramInt2;
    for (int j = paramInt1; j < i; j++)
    {
      byte b = paramArrayOfByte[j];
      if (b == 32)
      {
        if (beforePrev != 32) {
          if (isFinal(prev)) {
            finalCharLogicalScore += 1;
          } else if (isNonFinal(prev)) {
            finalCharVisualScore += 1;
          }
        }
      }
      else if ((beforePrev == 32) && (isFinal(prev)) && (b != 32)) {
        finalCharVisualScore += 1;
      }
      beforePrev = prev;
      prev = b;
    }
    return CharsetProber.ProbingState.DETECTING;
  }
  
  public void reset()
  {
    finalCharLogicalScore = 0;
    finalCharVisualScore = 0;
    prev = 32;
    beforePrev = 32;
  }
  
  public void setOption() {}
  
  protected static boolean isFinal(byte paramByte)
  {
    int i = paramByte & 0xFF;
    return (i == 234) || (i == 237) || (i == 239) || (i == 243) || (i == 245);
  }
  
  protected static boolean isNonFinal(byte paramByte)
  {
    int i = paramByte & 0xFF;
    return (i == 235) || (i == 238) || (i == 240) || (i == 244);
  }
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.HebrewProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.nio.ByteBuffer;
import org.mozilla.universalchardet.Constants;

public class Latin1Prober
  extends CharsetProber
{
  public static final byte UDF = 0;
  public static final byte OTH = 1;
  public static final byte ASC = 2;
  public static final byte ASS = 3;
  public static final byte ACV = 4;
  public static final byte ACO = 5;
  public static final byte ASV = 6;
  public static final byte ASO = 7;
  public static final int CLASS_NUM = 8;
  public static final int FREQ_CAT_NUM = 4;
  private CharsetProber.ProbingState state;
  private byte lastCharClass;
  private int[] freqCounter = new int[4];
  private static final byte[] latin1CharToClass = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 0, 1, 7, 1, 1, 1, 1, 1, 1, 5, 1, 5, 0, 5, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 7, 0, 7, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 6, 6, 6, 6, 6, 1, 6, 6, 6, 6, 6, 7, 7, 7 };
  private static final byte[] latin1ClassModel = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 1, 1, 3, 3, 0, 3, 3, 3, 1, 2, 1, 2, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 1, 3, 1, 1, 1, 3, 0, 3, 1, 3, 1, 1, 3, 3 };
  
  public Latin1Prober()
  {
    reset();
  }
  
  public String getCharSetName()
  {
    return Constants.CHARSET_WINDOWS_1252;
  }
  
  public float getConfidence()
  {
    if (state == CharsetProber.ProbingState.NOT_ME) {
      return 0.01F;
    }
    int i = 0;
    for (int j = 0; j < freqCounter.length; j++) {
      i += freqCounter[j];
    }
    float f;
    if (i <= 0)
    {
      f = 0.0F;
    }
    else
    {
      f = freqCounter[3] * 1.0F / i;
      f -= freqCounter[1] * 20.0F / i;
    }
    if (f < 0.0F) {
      f = 0.0F;
    }
    f *= 0.5F;
    return f;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    ByteBuffer localByteBuffer = filterWithEnglishLetters(paramArrayOfByte, paramInt1, paramInt2);
    byte[] arrayOfByte = localByteBuffer.array();
    int k = localByteBuffer.position();
    for (int m = 0; m < k; m++)
    {
      int n = arrayOfByte[m] & 0xFF;
      int i = latin1CharToClass[n];
      int j = latin1ClassModel[(lastCharClass * 8 + i)];
      if (j == 0)
      {
        state = CharsetProber.ProbingState.NOT_ME;
        break;
      }
      freqCounter[j] += 1;
      lastCharClass = i;
    }
    return state;
  }
  
  public void reset()
  {
    state = CharsetProber.ProbingState.DETECTING;
    lastCharClass = 1;
    for (int i = 0; i < freqCounter.length; i++) {
      freqCounter[i] = 0;
    }
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.Latin1Prober
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

public class MBCSGroupProber
  extends CharsetProber
{
  private CharsetProber.ProbingState state;
  private CharsetProber[] probers = new CharsetProber[7];
  private boolean[] isActive = new boolean[7];
  private int bestGuess;
  private int activeNum;
  
  public MBCSGroupProber()
  {
    probers[0] = new UTF8Prober();
    probers[1] = new SJISProber();
    probers[2] = new EUCJPProber();
    probers[3] = new GB18030Prober();
    probers[4] = new EUCKRProber();
    probers[5] = new Big5Prober();
    probers[6] = new EUCTWProber();
    reset();
  }
  
  public String getCharSetName()
  {
    if (bestGuess == -1)
    {
      getConfidence();
      if (bestGuess == -1) {
        bestGuess = 0;
      }
    }
    return probers[bestGuess].getCharSetName();
  }
  
  public float getConfidence()
  {
    float f1 = 0.0F;
    if (state == CharsetProber.ProbingState.FOUND_IT) {
      return 0.99F;
    }
    if (state == CharsetProber.ProbingState.NOT_ME) {
      return 0.01F;
    }
    for (int i = 0; i < probers.length; i++) {
      if (isActive[i] != 0)
      {
        float f2 = probers[i].getConfidence();
        if (f1 < f2)
        {
          f1 = f2;
          bestGuess = i;
        }
      }
    }
    return f1;
  }
  
  public CharsetProber.ProbingState getState()
  {
    return state;
  }
  
  public CharsetProber.ProbingState handleData(byte[] paramArrayOfByte, int paramInt1, int paramInt2)
  {
    int i = 1;
    byte[] arrayOfByte = new byte[paramInt2];
    int j = 0;
    int k = paramInt1 + paramInt2;
    for (int m = paramInt1; m < k; m++) {
      if ((paramArrayOfByte[m] & 0x80) != 0)
      {
        arrayOfByte[(j++)] = paramArrayOfByte[m];
        i = 1;
      }
      else if (i != 0)
      {
        arrayOfByte[(j++)] = paramArrayOfByte[m];
        i = 0;
      }
    }
    for (m = 0; m < probers.length; m++) {
      if (isActive[m] != 0)
      {
        CharsetProber.ProbingState localProbingState = probers[m].handleData(arrayOfByte, 0, j);
        if (localProbingState == CharsetProber.ProbingState.FOUND_IT)
        {
          bestGuess = m;
          state = CharsetProber.ProbingState.FOUND_IT;
          break;
        }
        if (localProbingState == CharsetProber.ProbingState.NOT_ME)
        {
          isActive[m] = false;
          activeNum -= 1;
          if (activeNum <= 0)
          {
            state = CharsetProber.ProbingState.NOT_ME;
            break;
          }
        }
      }
    }
    return state;
  }
  
  public void reset()
  {
    activeNum = 0;
    for (int i = 0; i < probers.length; i++)
    {
      probers[i].reset();
      isActive[i] = true;
      activeNum += 1;
    }
    bestGuess = -1;
    state = CharsetProber.ProbingState.DETECTING;
  }
  
  public void setOption() {}
}

/* Location:
 * Qualified Name:     org.mozilla.universalchardet.prober.MBCSGroupProber
 * Java Class Version: 5 (49.0)
 * JD-Core Version:    0.7.1
 */
package org.mozilla.universalchardet.prober;

import java.nio.ByteBuffer;
import org.mozilla.universalchardet.prober.sequence.HebrewModel;
import org.mozilla.universalchardet.prober.sequence.Ibm855Model;
import org.mozilla.universalchardet.prober.sequence.Ibm866Model;
import org.mozilla.universalchardet.prober.sequence.Koi8rModel;
import org.mozilla.universalchardet.prober.sequence.Latin5BulgarianModel;
import org.mozilla.universalchardet.prober.sequence.Latin5Model;
import org.mozilla.universalchardet.prober.sequence.Latin7Model;
import org.mozilla.universalchardet.prober.sequence.MacCyrillicModel;
import org.mozilla.universalchardet.prober.sequence.SequenceModel;
import org.mozilla.universalchardet.prob
1 2 3 4 5 6 7 8

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2017. Infinite Loop Ltd