- java.lang.Object
-
- org.apache.lucene.analysis.hunspell.Stemmer
-
final class Stemmer extends java.lang.Object
Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word. It conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description (package private) static interface
Stemmer.CaseVariationProcessor
(package private) static interface
Stemmer.RootProcessor
(package private) static class
Stemmer.StemCandidateProcessor
-
Field Summary
Fields Modifier and Type Field Description private Dictionary
dictionary
private int
formStep
-
Constructor Summary
Constructors Constructor Description Stemmer(Dictionary dictionary)
Constructs a new Stemmer which will use the provided Dictionary to create its stems.
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description (package private) void
analyze(char[] word, int length, Stemmer.RootProcessor processor)
private boolean
applyAffix(char[] word, int offset, int length, int affix, boolean prefix, int outerPrefix, int innerPrefix, int outerSuffix, Stemmer.StemCandidateProcessor processor)
Applies the affix rule to the given word, producing a list of stems if any are found.private static char[]
capitalizeAfterApostrophe(char[] word, int length)
private char[]
caseFoldLower(char[] word, int length)
folds lowercase variant of word (title cased) to lowerBufferprivate char[]
caseFoldTitle(char[] word, int length)
folds titlecase variant of word to titleBuffer(package private) WordCase
caseOf(char[] word, int length)
returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word(package private) boolean
doStem(char[] word, int offset, int length, WordContext context, Stemmer.RootProcessor processor)
private boolean
isAffixCompatible(int affix, boolean isPrefix, int outerPrefix, int outerSuffix, WordContext context)
private boolean
isRootCompatibleWithContext(WordContext context, int lastAffix, int entryId)
private int
morphDataId(IntsRef forms, int i)
private boolean
needsAnotherAffix(int affix, int previousAffix, boolean isSuffix, int prefixId)
private CharsRef
newStem(CharsRef stem, int morphDataId)
(package private) boolean
removeAffixes(char[] word, int offset, int length, boolean doPrefix, int outerPrefix, int innerPrefix, int outerSuffix, Stemmer.StemCandidateProcessor processor)
Generates a list of stems for the provided word.java.util.List<CharsRef>
stem(char[] word, int length)
Find the stem(s) of the provided wordjava.util.List<CharsRef>
stem(java.lang.String word)
Find the stem(s) of the provided word.private java.lang.String
stemException(int morphDataId)
private char[]
stripAffix(char[] word, int offset, int length, int affixLen, int affix, boolean isPrefix)
java.util.List<CharsRef>
uniqueStems(char[] word, int length)
Find the unique stem(s) of the provided word(package private) boolean
varyCase(char[] word, int length, WordCase wordCase, Stemmer.CaseVariationProcessor processor)
private boolean
varySharpS(char[] word, int length, Stemmer.CaseVariationProcessor processor)
-
-
-
Field Detail
-
dictionary
private final Dictionary dictionary
-
formStep
private final int formStep
-
-
Constructor Detail
-
Stemmer
public Stemmer(Dictionary dictionary)
Constructs a new Stemmer which will use the provided Dictionary to create its stems.- Parameters:
dictionary
- Dictionary that will be used to create the stems
-
-
Method Detail
-
stem
public java.util.List<CharsRef> stem(java.lang.String word)
Find the stem(s) of the provided word.- Parameters:
word
- Word to find the stems for- Returns:
- List of stems for the word
-
stem
public java.util.List<CharsRef> stem(char[] word, int length)
Find the stem(s) of the provided word- Parameters:
word
- Word to find the stems for- Returns:
- List of stems for the word
-
analyze
void analyze(char[] word, int length, Stemmer.RootProcessor processor)
-
varyCase
boolean varyCase(char[] word, int length, WordCase wordCase, Stemmer.CaseVariationProcessor processor)
-
caseOf
WordCase caseOf(char[] word, int length)
returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word
-
caseFoldTitle
private char[] caseFoldTitle(char[] word, int length)
folds titlecase variant of word to titleBuffer
-
caseFoldLower
private char[] caseFoldLower(char[] word, int length)
folds lowercase variant of word (title cased) to lowerBuffer
-
capitalizeAfterApostrophe
private static char[] capitalizeAfterApostrophe(char[] word, int length)
-
varySharpS
private boolean varySharpS(char[] word, int length, Stemmer.CaseVariationProcessor processor)
-
doStem
boolean doStem(char[] word, int offset, int length, WordContext context, Stemmer.RootProcessor processor)
-
uniqueStems
public java.util.List<CharsRef> uniqueStems(char[] word, int length)
Find the unique stem(s) of the provided word- Parameters:
word
- Word to find the stems for- Returns:
- List of stems for the word
-
stemException
private java.lang.String stemException(int morphDataId)
-
removeAffixes
boolean removeAffixes(char[] word, int offset, int length, boolean doPrefix, int outerPrefix, int innerPrefix, int outerSuffix, Stemmer.StemCandidateProcessor processor)
Generates a list of stems for the provided word. It's called recursively when applying affixes one by one, setting(inner/outer)(Suffix/Prefix)
parameters to non-negative values as that happens.- Parameters:
word
- Word to generate the stems fordoPrefix
- true if we should remove prefixes- Returns:
- whether the processing should be continued
-
stripAffix
private char[] stripAffix(char[] word, int offset, int length, int affixLen, int affix, boolean isPrefix)
- Returns:
- null if affix conditions isn't met; a reference to the same char[] if the affix has no strip data and can thus be simply removed, or a new char[] containing the word affix removal
-
isAffixCompatible
private boolean isAffixCompatible(int affix, boolean isPrefix, int outerPrefix, int outerSuffix, WordContext context)
-
applyAffix
private boolean applyAffix(char[] word, int offset, int length, int affix, boolean prefix, int outerPrefix, int innerPrefix, int outerSuffix, Stemmer.StemCandidateProcessor processor)
Applies the affix rule to the given word, producing a list of stems if any are found. Non-negative(inner/outer)(Suffix/Prefix)
parameters indicate the already applied affixes.- Parameters:
word
- Char array containing the word with the affix removed and the strip addedoffset
- where the word actually starts in the arraylength
- the length of the stripped wordaffix
- the id of the affix inDictionary.affixData
prefix
- true if we are removing a prefix (false if it's a suffix)- Returns:
- whether the processing should be continued
-
isRootCompatibleWithContext
private boolean isRootCompatibleWithContext(WordContext context, int lastAffix, int entryId)
-
morphDataId
private int morphDataId(IntsRef forms, int i)
-
needsAnotherAffix
private boolean needsAnotherAffix(int affix, int previousAffix, boolean isSuffix, int prefixId)
-
-