package org.apache.lucene.analysis.kr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.kr.morph.AnalysisOutput;
import org.apache.lucene.analysis.kr.morph.CompoundEntry;
import org.apache.lucene.analysis.kr.morph.CompoundNounAnalyzer;
import org.apache.lucene.analysis.kr.morph.MorphAnalyzer;
import org.apache.lucene.analysis.kr.morph.MorphException;
import org.apache.lucene.analysis.kr.morph.WordEntry;
import org.apache.lucene.analysis.kr.morph.WordSpaceAnalyzer;
import org.apache.lucene.analysis.kr.utils.DictionaryUtil;
import org.apache.lucene.analysis.kr.utils.HanjaUtils;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

/* loaded from: input_file:WEB-INF/lib/koreananalyzer.4x-20130409.jar:org/apache/lucene/analysis/kr/KoreanFilter.class */
public class KoreanFilter extends TokenFilter {
    private LinkedList<IndexWord> morphQueue;
    private MorphAnalyzer morph;
    private WordSpaceAnalyzer wsAnal;
    private boolean bigrammable;
    private boolean hasOrigin;
    private boolean originCNoun;
    private boolean exactMatch;
    private char[] curTermBuffer;
    private int curTermLength;
    private String curType;
    private String curSource;
    private int tokStart;
    private int hanStart;
    private int chStart;
    private CompoundNounAnalyzer cnAnalyzer;
    private final CharTermAttribute termAtt;
    private final PositionIncrementAttribute posIncrAtt;
    private final PositionLengthAttribute posLenAtt;
    private final TypeAttribute typeAtt;
    private final OffsetAttribute offsetAtt;
    private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[1];
    private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[2];

    public KoreanFilter(TokenStream tokenStream) {
        super(tokenStream);
        this.bigrammable = true;
        this.hasOrigin = true;
        this.originCNoun = true;
        this.exactMatch = false;
        this.hanStart = 0;
        this.chStart = 0;
        this.cnAnalyzer = new CompoundNounAnalyzer();
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.posLenAtt = (PositionLengthAttribute) addAttribute(PositionLengthAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.morphQueue = new LinkedList<>();
        this.morph = new MorphAnalyzer();
        this.wsAnal = new WordSpaceAnalyzer();
        this.cnAnalyzer.setExactMach(false);
    }

    public KoreanFilter(TokenStream tokenStream, boolean z) {
        this(tokenStream);
        this.bigrammable = z;
    }

    public KoreanFilter(TokenStream tokenStream, boolean z, boolean z2) {
        this(tokenStream, z);
        this.hasOrigin = z2;
    }

    public KoreanFilter(TokenStream tokenStream, boolean z, boolean z2, boolean z3) {
        this(tokenStream, z, z2);
        this.exactMatch = z3;
    }

    public KoreanFilter(TokenStream tokenStream, boolean z, boolean z2, boolean z3, boolean z4) {
        this(tokenStream, z, z2, z3);
        this.originCNoun = z4;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() throws IOException {
        if (this.curTermBuffer != null && this.morphQueue.size() > 0) {
            setTermBufferByQueue(false);
            return true;
        }
        if (!this.input.incrementToken()) {
            return false;
        }
        this.curTermBuffer = (char[]) this.termAtt.buffer().clone();
        this.curTermLength = this.termAtt.length();
        this.tokStart = this.offsetAtt.startOffset();
        this.curType = this.typeAtt.type();
        try {
            if (KoreanTokenizer.TOKEN_TYPES[9].equals(this.curType)) {
                analysisKorean(new String(this.curTermBuffer, 0, this.termAtt.length()));
            } else if (KoreanTokenizer.TOKEN_TYPES[10].equals(this.curType)) {
                analysisChinese(new String(this.curTermBuffer, 0, this.termAtt.length()));
            } else {
                analysisETC(new String(this.curTermBuffer, 0, this.termAtt.length()));
            }
            if (this.morphQueue == null || this.morphQueue.size() <= 0) {
                return incrementToken();
            }
            setTermBufferByQueue(true);
            return true;
        } catch (MorphException e) {
            throw new IOException("Korean Filter MorphException\n" + e.getMessage());
        }
    }

    private void setTermBufferByQueue(boolean z) {
        clearAttributes();
        IndexWord removeFirst = this.morphQueue.removeFirst();
        int offset = removeFirst.getOffset();
        this.termAtt.copyBuffer(removeFirst.getWord().toCharArray(), 0, removeFirst.getWord().length());
        this.offsetAtt.setOffset(this.tokStart + offset, this.tokStart + offset + removeFirst.getWord().length());
        if (z || removeFirst.getOffset() != 0) {
            return;
        }
        this.posIncrAtt.setPositionIncrement(0);
    }

    private void analysisKorean(String str) throws MorphException {
        List<AnalysisOutput> analyze = this.morph.analyze(str);
        if (analyze.size() == 0) {
            return;
        }
        Map<String, IndexWord> linkedHashMap = new LinkedHashMap<>();
        if (this.hasOrigin) {
            linkedHashMap.put(str, new IndexWord(str, 0));
        }
        if (analyze.get(0).getScore() >= 70) {
            extractKeyword(analyze, linkedHashMap);
        } else {
            try {
                List<AnalysisOutput> analyze2 = this.wsAnal.analyze(str);
                List<AnalysisOutput> arrayList = new ArrayList<>();
                if (analyze2.size() > 1) {
                    for (AnalysisOutput analysisOutput : analyze2) {
                        if (this.hasOrigin) {
                            linkedHashMap.put(analysisOutput.getSource(), new IndexWord(analysisOutput.getSource(), 0));
                        }
                        arrayList.addAll(this.morph.analyze(analysisOutput.getSource()));
                    }
                } else {
                    arrayList.addAll(analyze2);
                }
                extractKeyword(arrayList, linkedHashMap);
            } catch (Exception e) {
                extractKeyword(analyze, linkedHashMap);
            }
        }
        for (String str2 : linkedHashMap.keySet()) {
            if (str2.length() > 1) {
                this.morphQueue.add(linkedHashMap.get(str2));
            }
        }
    }

    private void extractKeyword(List<AnalysisOutput> list, Map<String, IndexWord> map) throws MorphException {
        for (AnalysisOutput analysisOutput : list) {
            if (analysisOutput.getPos() != 'V' && (this.originCNoun || (!this.originCNoun && analysisOutput.getCNounList().size() == 0))) {
                map.put(analysisOutput.getStem(), new IndexWord(analysisOutput.getStem(), 0));
            }
            if (!this.exactMatch) {
                if (analysisOutput.getScore() >= 70) {
                    List<CompoundEntry> cNounList = analysisOutput.getCNounList();
                    int i = 0;
                    for (int i2 = 0; i2 < cNounList.size(); i2++) {
                        CompoundEntry compoundEntry = cNounList.get(i2);
                        if (compoundEntry.getWord().length() > 1) {
                            map.put(compoundEntry.getWord(), new IndexWord(compoundEntry.getWord(), i));
                        }
                        if (i2 == 0 && compoundEntry.getWord().length() == 1) {
                            map.put(String.valueOf(compoundEntry.getWord()) + cNounList.get(i2 + 1).getWord(), new IndexWord(compoundEntry.getWord(), i));
                        } else if (i2 > 1 && compoundEntry.getWord().length() == 1) {
                            String str = String.valueOf(cNounList.get(i2 - 1).getWord()) + compoundEntry.getWord();
                            map.put(str, new IndexWord(str, i - cNounList.get(i2 - 1).getWord().length()));
                        }
                        i += compoundEntry.getWord().length();
                    }
                } else if (this.bigrammable) {
                    addBiagramToMap(analysisOutput.getStem(), map);
                }
            }
        }
    }

    private void addBiagramToMap(String str, Map<String, IndexWord> map) {
        int i = 0;
        int length = str.length();
        while (i < length - 1) {
            if (isAlphaNumChar(str.charAt(i))) {
                String findAlphaNumeric = findAlphaNumeric(str.substring(i));
                map.put(findAlphaNumeric, new IndexWord(findAlphaNumeric, i));
                i += findAlphaNumeric.length();
            } else {
                String substring = str.substring(i, i + 2 > length ? length : i + 2);
                map.put(substring, new IndexWord(substring, i));
                i++;
            }
        }
    }

    private String findAlphaNumeric(String str) {
        int i = 0;
        for (int i2 = 0; i2 < str.length() && isAlphaNumChar(str.charAt(i2)); i2++) {
            i++;
        }
        return str.substring(0, i);
    }

    private void analysisChinese(String str) throws MorphException {
        this.morphQueue.add(new IndexWord(str, 0));
        if (str.length() < 2) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(new StringBuffer());
        for (int i = 0; i < str.length(); i++) {
            char[] convertToHangul = HanjaUtils.convertToHangul(str.charAt(i));
            if (convertToHangul != null) {
                int size = arrayList.size();
                for (int i2 = 0; i2 < size; i2++) {
                    StringBuffer stringBuffer = (StringBuffer) arrayList.get(i2);
                    for (int i3 = 1; i3 < convertToHangul.length && i3 != 1; i3++) {
                        StringBuffer stringBuffer2 = new StringBuffer(stringBuffer);
                        stringBuffer2.append(convertToHangul[i3]);
                        arrayList.add(stringBuffer2);
                    }
                    stringBuffer.append(convertToHangul[0]);
                }
            }
        }
        int size2 = arrayList.size() < 5 ? arrayList.size() : 5;
        for (int i4 = 0; i4 < size2; i4++) {
            this.morphQueue.add(new IndexWord(((StringBuffer) arrayList.get(i4)).toString(), 0));
        }
        HashMap hashMap = new HashMap();
        for (int i5 = 0; i5 < size2; i5++) {
            int i6 = 0;
            int i7 = 0;
            for (CompoundEntry compoundEntry : confirmCNoun(((StringBuffer) arrayList.get(i5)).toString())) {
                i6 += compoundEntry.getWord().length();
                if (hashMap.get(compoundEntry.getWord()) == null) {
                    this.morphQueue.add(new IndexWord(str.substring(i7, i6), i7));
                    hashMap.put(compoundEntry.getWord(), compoundEntry.getWord());
                    if (compoundEntry.getWord().length() >= 2) {
                        this.morphQueue.add(new IndexWord(compoundEntry.getWord(), i7));
                        i7 = i6;
                    }
                }
            }
        }
    }

    private List confirmCNoun(String str) throws MorphException {
        WordEntry cNoun = DictionaryUtil.getCNoun(str);
        return (cNoun == null || cNoun.getFeature(0) != '2') ? this.cnAnalyzer.analyze(str) : cNoun.getCompounds();
    }

    private void analysisETC(String str) throws MorphException {
        char[] buffer = this.termAtt.buffer();
        int length = this.termAtt.length();
        String type = this.typeAtt.type();
        if (type == APOSTROPHE_TYPE && length >= 2 && buffer[length - 2] == '\'' && (buffer[length - 1] == 's' || buffer[length - 1] == 'S')) {
            this.morphQueue.add(new IndexWord(str.substring(0, length - 2), 0));
            return;
        }
        if (type != ACRONYM_TYPE) {
            this.morphQueue.add(new IndexWord(str, 0));
            return;
        }
        int i = 0;
        for (int i2 = 0; i2 < length; i2++) {
            char c = buffer[i2];
            if (c != '.') {
                int i3 = i;
                i++;
                buffer[i3] = c;
            }
        }
        this.morphQueue.add(new IndexWord(str.substring(0, i), 0));
    }

    private boolean isAlphaNumChar(int i) {
        if (i < 48 || i > 57) {
            return i >= 65 && i <= 122;
        }
        return true;
    }

    public void setHasOrigin(boolean z) {
        this.hasOrigin = z;
    }

    public void setExactMatch(boolean z) {
        this.exactMatch = z;
    }
}
