Package org.apache.lucene.analysis.en
Class KStemmer
- java.lang.Object
-
- org.apache.lucene.analysis.en.KStemmer
-
public class KStemmer extends java.lang.ObjectThis class implements the Kstem algorithm
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description (package private) static classKStemmer.DictEntry
-
Field Summary
Fields Modifier and Type Field Description private static char[]ationprivate static java.lang.String[][]countryNationalityprivate static CharArrayMap<KStemmer.DictEntry>dict_htprivate static java.lang.String[][]directConflationsprivate static java.lang.String[]exceptionWordsprivate static char[]icationprivate static char[]itionprivate static char[]izationprivate intjprivate intk(package private) KStemmer.DictEntrymatchedEntryprivate static intMaxWordLenprivate static java.lang.String[]properNouns(package private) java.lang.Stringresultprivate static java.lang.String[]supplementDictprivate OpenStringBuilderwordcaching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same
-
Constructor Summary
Constructors Constructor Description KStemmer()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description private voidalEndings()(package private) java.lang.CharSequenceasCharSequence()private voidaspect()(package private) java.lang.StringasString()Returns the result of the stem (assuming the word was changed) as a String.private voidbleEndings()private booleandoubleC(int i)private booleanendsIn(char[] s)private booleanendsIn(char a, char b)private booleanendsIn(char a, char b, char c)private booleanendsIn(char a, char b, char c, char d)private voiderAndOrEndings()private charfinalChar()(package private) char[]getChars()(package private) intgetLength()(package private) java.lang.StringgetString()private voidicEndings()private static CharArrayMap<KStemmer.DictEntry>initializeDictHash()private voidionEndings()private booleanisAlpha(char ch)private booleanisCons(int index)private voidismEndings()private booleanisVowel(int index)private voidityEndings()private voidiveEndings()private voidizeEndings()private booleanlookup()private voidlyEndings()private booleanmatched()private voidmentEndings()private voidnceEndings()private voidncyEndings()private voidnessEndings()private voidpastTense()private charpenultChar()private voidplural()private voidsetSuff(java.lang.String s, int len)private voidsetSuffix(java.lang.String s)(package private) booleanstem(char[] term, int len)Stems the text in the token.(package private) java.lang.Stringstem(java.lang.String term)private intstemLength()private booleanvowelInStem()private KStemmer.DictEntrywordInDict()
-
-
-
Field Detail
-
MaxWordLen
private static final int MaxWordLen
- See Also:
- Constant Field Values
-
exceptionWords
private static final java.lang.String[] exceptionWords
-
directConflations
private static final java.lang.String[][] directConflations
-
countryNationality
private static final java.lang.String[][] countryNationality
-
supplementDict
private static final java.lang.String[] supplementDict
-
properNouns
private static final java.lang.String[] properNouns
-
dict_ht
private static final CharArrayMap<KStemmer.DictEntry> dict_ht
-
word
private final OpenStringBuilder word
caching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same
-
j
private int j
-
k
private int k
-
matchedEntry
KStemmer.DictEntry matchedEntry
-
ization
private static char[] ization
-
ition
private static char[] ition
-
ation
private static char[] ation
-
ication
private static char[] ication
-
result
java.lang.String result
-
-
Method Detail
-
finalChar
private char finalChar()
-
penultChar
private char penultChar()
-
isVowel
private boolean isVowel(int index)
-
isCons
private boolean isCons(int index)
-
initializeDictHash
private static CharArrayMap<KStemmer.DictEntry> initializeDictHash()
-
isAlpha
private boolean isAlpha(char ch)
-
stemLength
private int stemLength()
-
endsIn
private boolean endsIn(char[] s)
-
endsIn
private boolean endsIn(char a, char b)
-
endsIn
private boolean endsIn(char a, char b, char c)
-
endsIn
private boolean endsIn(char a, char b, char c, char d)
-
wordInDict
private KStemmer.DictEntry wordInDict()
-
plural
private void plural()
-
setSuffix
private void setSuffix(java.lang.String s)
-
setSuff
private void setSuff(java.lang.String s, int len)
-
lookup
private boolean lookup()
-
pastTense
private void pastTense()
-
doubleC
private boolean doubleC(int i)
-
vowelInStem
private boolean vowelInStem()
-
aspect
private void aspect()
-
ityEndings
private void ityEndings()
-
nceEndings
private void nceEndings()
-
nessEndings
private void nessEndings()
-
ismEndings
private void ismEndings()
-
mentEndings
private void mentEndings()
-
izeEndings
private void izeEndings()
-
ncyEndings
private void ncyEndings()
-
bleEndings
private void bleEndings()
-
icEndings
private void icEndings()
-
ionEndings
private void ionEndings()
-
erAndOrEndings
private void erAndOrEndings()
-
lyEndings
private void lyEndings()
-
alEndings
private void alEndings()
-
iveEndings
private void iveEndings()
-
stem
java.lang.String stem(java.lang.String term)
-
asString
java.lang.String asString()
Returns the result of the stem (assuming the word was changed) as a String.
-
asCharSequence
java.lang.CharSequence asCharSequence()
-
getString
java.lang.String getString()
-
getChars
char[] getChars()
-
getLength
int getLength()
-
matched
private boolean matched()
-
stem
boolean stem(char[] term, int len)Stems the text in the token. Returns true if changed.
-
-