org.apache.lucene.queries_2.9.1.v20100421-0704

    {
      Document d = hits.doc(i);
      String summary = d.get("summary");
      o.println("score  : " + hits.score(i));
      o.println("url    : " + d.get("url"));
      o.println("\ttitle  : " + d.get("title"));
      if (summary != null) {
        o.println("\tsummary: " + d.get("summary"));
      }
      o.println();
    }
  }
  
  public PriorityQueue retrieveTerms(int docNum)
    throws IOException
  {
    Map termFreqMap = new HashMap();
    for (int i = 0; i < fieldNames.length; i++)
    {
      String fieldName = fieldNames[i];
      TermFreqVector vector = ir.getTermFreqVector(docNum, fieldName);
      if (vector == null)
      {
        Document d = ir.document(docNum);
        String[] text = d.getValues(fieldName);
        if (text != null) {
          for (int j = 0; j < text.length; j++) {
            addTermFrequencies(new StringReader(text[j]), termFreqMap, fieldName);
          }
        }
      }
      else
      {
        addTermFrequencies(termFreqMap, vector);
      }
    }
    return createQueue(termFreqMap);
  }
  
  private void addTermFrequencies(Map termFreqMap, TermFreqVector vector)
  {
    String[] terms = vector.getTerms();
    int[] freqs = vector.getTermFrequencies();
    for (int j = 0; j < terms.length; j++)
    {
      String term = terms[j];
      if (!isNoiseWord(term))
      {
        Int cnt = (Int)termFreqMap.get(term);
        if (cnt == null)
        {
          cnt = new Int();
          termFreqMap.put(term, cnt);
          x = freqs[j];
        }
        else
        {
          x += freqs[j];
        }
      }
    }
  }
  
  private void addTermFrequencies(Reader r, Map termFreqMap, String fieldName)
    throws IOException
  {
    TokenStream ts = analyzer.tokenStream(fieldName, r);
    int tokenCount = 0;
    
    TermAttribute termAtt = (TermAttribute)ts.addAttribute(TermAttribute.class);
    while (ts.incrementToken())
    {
      String word = termAtt.term();
      tokenCount++;
      if (tokenCount > maxNumTokensParsed) {
        break;
      }
      if (!isNoiseWord(word))
      {
        Int cnt = (Int)termFreqMap.get(word);
        if (cnt == null) {
          termFreqMap.put(word, new Int());
        } else {
          x += 1;
        }
      }
    }
  }
  
  private boolean isNoiseWord(String term)
  {
    int len = term.length();
    if ((minWordLen > 0) && (len < minWordLen)) {
      return true;
    }
    if ((maxWordLen > 0) && (len > maxWordLen)) {
      return true;
    }
    if ((stopWords != null) && (stopWords.contains(term))) {
      return true;
    }
    return false;
  }
  
  public PriorityQueue retrieveTerms(Reader r)
    throws IOException
  {
    Map words = new HashMap();
    for (int i = 0; i < fieldNames.length; i++)
    {
      String fieldName = fieldNames[i];
      addTermFrequencies(r, words, fieldName);
    }
    return createQueue(words);
  }
  
  public String[] retrieveInterestingTerms(int docNum)
    throws IOException
  {
    ArrayList al = new ArrayList(maxQueryTerms);
    PriorityQueue pq = retrieveTerms(docNum);
    
    int lim = maxQueryTerms;
    Object cur;
    while (((cur = pq.pop()) != null) && (lim-- > 0))
    {
      Object[] ar = (Object[])cur;
      al.add(ar[0]);
    }
    String[] res = new String[al.size()];
    return (String[])al.toArray(res);
  }
  
  public String[] retrieveInterestingTerms(Reader r)
    throws IOException
  {
    ArrayList al = new ArrayList(maxQueryTerms);
    PriorityQueue pq = retrieveTerms(r);
    
    int lim = maxQueryTerms;
    Object cur;
    while (((cur = pq.pop()) != null) && (lim-- > 0))
    {
      Object[] ar = (Object[])cur;
      al.add(ar[0]);
    }
    String[] res = new String[al.size()];
    return (String[])al.toArray(res);
  }
  
  private static class FreqQ
    extends PriorityQueue
  {
    FreqQ(int s)
    {
      initialize(s);
    }
    
    protected boolean lessThan(Object a, Object b)
    {
      Object[] aa = (Object[])a;
      Object[] bb = (Object[])b;
      Float fa = (Float)aa[2];
      Float fb = (Float)bb[2];
      return fa.floatValue() > fb.floatValue();
    }
  }
  
  private static class Int
  {
    int x;
    
    Int()
    {
      x = 1;
    }
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.similar.MoreLikeThis
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.similar;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;

public class MoreLikeThisQuery
  extends Query
{
  private String likeText;
  private String[] moreLikeFields;
  private Analyzer analyzer;
  float percentTermsToMatch = 0.3F;
  int minTermFrequency = 1;
  int maxQueryTerms = 5;
  Set stopWords = null;
  int minDocFreq = -1;
  
  public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer)
  {
    this.likeText = likeText;
    this.moreLikeFields = moreLikeFields;
    this.analyzer = analyzer;
  }
  
  public Query rewrite(IndexReader reader)
    throws IOException
  {
    MoreLikeThis mlt = new MoreLikeThis(reader);
    
    mlt.setFieldNames(moreLikeFields);
    mlt.setAnalyzer(analyzer);
    mlt.setMinTermFreq(minTermFrequency);
    if (minDocFreq >= 0) {
      mlt.setMinDocFreq(minDocFreq);
    }
    mlt.setMaxQueryTerms(maxQueryTerms);
    mlt.setStopWords(stopWords);
    BooleanQuery bq = (BooleanQuery)mlt.like(new ByteArrayInputStream(likeText.getBytes()));
    BooleanClause[] clauses = bq.getClauses();
    
    bq.setMinimumNumberShouldMatch((int)(clauses.length * percentTermsToMatch));
    return bq;
  }
  
  public String toString(String field)
  {
    return "like:" + likeText;
  }
  
  public float getPercentTermsToMatch()
  {
    return percentTermsToMatch;
  }
  
  public void setPercentTermsToMatch(float percentTermsToMatch)
  {
    this.percentTermsToMatch = percentTermsToMatch;
  }
  
  public Analyzer getAnalyzer()
  {
    return analyzer;
  }
  
  public void setAnalyzer(Analyzer analyzer)
  {
    this.analyzer = analyzer;
  }
  
  public String getLikeText()
  {
    return likeText;
  }
  
  public void setLikeText(String likeText)
  {
    this.likeText = likeText;
  }
  
  public int getMaxQueryTerms()
  {
    return maxQueryTerms;
  }
  
  public void setMaxQueryTerms(int maxQueryTerms)
  {
    this.maxQueryTerms = maxQueryTerms;
  }
  
  public int getMinTermFrequency()
  {
    return minTermFrequency;
  }
  
  public void setMinTermFrequency(int minTermFrequency)
  {
    this.minTermFrequency = minTermFrequency;
  }
  
  public String[] getMoreLikeFields()
  {
    return moreLikeFields;
  }
  
  public void setMoreLikeFields(String[] moreLikeFields)
  {
    this.moreLikeFields = moreLikeFields;
  }
  
  public Set getStopWords()
  {
    return stopWords;
  }
  
  public void setStopWords(Set stopWords)
  {
    this.stopWords = stopWords;
  }
  
  public int getMinDocFreq()
  {
    return minDocFreq;
  }
  
  public void setMinDocFreq(int minDocFreq)
  {
    this.minDocFreq = minDocFreq;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.similar.MoreLikeThisQuery
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.similar;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

public final class SimilarityQueries
{
  public static Query formSimilarQuery(String body, Analyzer a, String field, Set stop)
    throws IOException
  {
    TokenStream ts = a.tokenStream(field, new StringReader(body));
    TermAttribute termAtt = (TermAttribute)ts.addAttribute(TermAttribute.class);
    
    BooleanQuery tmp = new BooleanQuery();
    Set already = new HashSet();
    while (ts.incrementToken())
    {
      String word = termAtt.term();
      if (((stop == null) || (!stop.contains(word))) && 
      
        (already.add(word)))
      {
        TermQuery tq = new TermQuery(new Term(field, word));
        try
        {
          tmp.add(tq, BooleanClause.Occur.SHOULD);
        }
        catch (BooleanQuery.TooManyClauses too)
        {
          break;
        }
      }
    }
    return tmp;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.similar.SimilarityQueries
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
1 2

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2019. Infinite Loop Ltd