- java.lang.Object
-
- org.apache.lucene.analysis.en.KStemmer
-
public class KStemmer extends java.lang.Object
This class implements the Kstem algorithm
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description (package private) static class
KStemmer.DictEntry
-
Field Summary
Fields Modifier and Type Field Description private static char[]
ation
private static java.lang.String[][]
countryNationality
private static CharArrayMap<KStemmer.DictEntry>
dict_ht
private static java.lang.String[][]
directConflations
private static java.lang.String[]
exceptionWords
private static char[]
ication
private static char[]
ition
private static char[]
ization
private int
j
private int
k
(package private) KStemmer.DictEntry
matchedEntry
private static int
MaxWordLen
private static java.lang.String[]
properNouns
(package private) java.lang.String
result
private static java.lang.String[]
supplementDict
private OpenStringBuilder
word
caching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same
-
Constructor Summary
Constructors Constructor Description KStemmer()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description private void
alEndings()
(package private) java.lang.CharSequence
asCharSequence()
private void
aspect()
(package private) java.lang.String
asString()
Returns the result of the stem (assuming the word was changed) as a String.private void
bleEndings()
private boolean
doubleC(int i)
private boolean
endsIn(char[] s)
private boolean
endsIn(char a, char b)
private boolean
endsIn(char a, char b, char c)
private boolean
endsIn(char a, char b, char c, char d)
private void
erAndOrEndings()
(package private) char[]
getChars()
(package private) int
getLength()
(package private) java.lang.String
getString()
private void
icEndings()
private static CharArrayMap<KStemmer.DictEntry>
initializeDictHash()
private void
ionEndings()
private boolean
isAlpha(char ch)
private boolean
isCons(int index)
private void
ismEndings()
private boolean
isVowel(int index)
private void
ityEndings()
private void
iveEndings()
private void
izeEndings()
private boolean
lookup()
private void
lyEndings()
private boolean
matched()
private void
mentEndings()
private void
nceEndings()
private void
ncyEndings()
private void
nessEndings()
private void
pastTense()
private char
penultChar()
private void
plural()
private void
setSuff(java.lang.String s, int len)
private void
setSuffix(java.lang.String s)
(package private) boolean
stem(char[] term, int len)
Stems the text in the token.(package private) java.lang.String
stem(java.lang.String term)
private int
stemLength()
private boolean
vowelInStem()
private KStemmer.DictEntry
wordInDict()
-
-
-
Field Detail
-
MaxWordLen
private static final int MaxWordLen
- See Also:
- Constant Field Values
-
exceptionWords
private static final java.lang.String[] exceptionWords
-
directConflations
private static final java.lang.String[][] directConflations
-
countryNationality
private static final java.lang.String[][] countryNationality
-
supplementDict
private static final java.lang.String[] supplementDict
-
properNouns
private static final java.lang.String[] properNouns
-
dict_ht
private static final CharArrayMap<KStemmer.DictEntry> dict_ht
-
word
private final OpenStringBuilder word
caching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same
-
j
private int j
-
k
private int k
-
matchedEntry
KStemmer.DictEntry matchedEntry
-
ization
private static char[] ization
-
ition
private static char[] ition
-
ation
private static char[] ation
-
ication
private static char[] ication
-
result
java.lang.String result
-
-
Method Detail
-
penultChar
private char penultChar()
-
isVowel
private boolean isVowel(int index)
-
isCons
private boolean isCons(int index)
-
initializeDictHash
private static CharArrayMap<KStemmer.DictEntry> initializeDictHash()
-
isAlpha
private boolean isAlpha(char ch)
-
stemLength
private int stemLength()
-
endsIn
private boolean endsIn(char[] s)
-
endsIn
private boolean endsIn(char a, char b)
-
endsIn
private boolean endsIn(char a, char b, char c)
-
endsIn
private boolean endsIn(char a, char b, char c, char d)
-
wordInDict
private KStemmer.DictEntry wordInDict()
-
plural
private void plural()
-
setSuffix
private void setSuffix(java.lang.String s)
-
setSuff
private void setSuff(java.lang.String s, int len)
-
lookup
private boolean lookup()
-
pastTense
private void pastTense()
-
doubleC
private boolean doubleC(int i)
-
vowelInStem
private boolean vowelInStem()
-
aspect
private void aspect()
-
ityEndings
private void ityEndings()
-
nceEndings
private void nceEndings()
-
nessEndings
private void nessEndings()
-
ismEndings
private void ismEndings()
-
mentEndings
private void mentEndings()
-
izeEndings
private void izeEndings()
-
ncyEndings
private void ncyEndings()
-
bleEndings
private void bleEndings()
-
icEndings
private void icEndings()
-
ionEndings
private void ionEndings()
-
erAndOrEndings
private void erAndOrEndings()
-
lyEndings
private void lyEndings()
-
alEndings
private void alEndings()
-
iveEndings
private void iveEndings()
-
stem
java.lang.String stem(java.lang.String term)
-
asString
java.lang.String asString()
Returns the result of the stem (assuming the word was changed) as a String.
-
asCharSequence
java.lang.CharSequence asCharSequence()
-
getString
java.lang.String getString()
-
getChars
char[] getChars()
-
getLength
int getLength()
-
matched
private boolean matched()
-
stem
boolean stem(char[] term, int len)
Stems the text in the token. Returns true if changed.
-
-