package org.lionsoul.jcseg.tokenizer;

import java.io.IOException;
import java.io.Reader;
import java.util.LinkedList;
import org.lionsoul.jcseg.tokenizer.core.ADictionary;
import org.lionsoul.jcseg.tokenizer.core.Entity;
import org.lionsoul.jcseg.tokenizer.core.IChunk;
import org.lionsoul.jcseg.tokenizer.core.IWord;
import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig;
import org.lionsoul.jcseg.util.ArrayUtil;
import org.lionsoul.jcseg.util.IStringBuffer;
import org.lionsoul.jcseg.util.NumericUtil;
import org.lionsoul.jcseg.util.StringUtil;
import org.lionsoul.jcseg.util.TimeUtil;

/* loaded from: input_file:org/lionsoul/jcseg/tokenizer/NLPSeg.class */
public class NLPSeg extends ComplexSeg {
    private final LinkedList<IWord> eWordPool;
    private final IStringBuffer buffer;

    public NLPSeg(Reader reader, JcsegTaskConfig jcsegTaskConfig, ADictionary aDictionary) throws IOException {
        super(reader, jcsegTaskConfig, aDictionary);
        this.eWordPool = new LinkedList<>();
        this.buffer = new IStringBuffer(64);
        jcsegTaskConfig.APPEND_CJK_PINYIN = false;
        jcsegTaskConfig.APPEND_CJK_SYN = false;
        jcsegTaskConfig.MAX_LATIN_LENGTH = 128;
    }

    public NLPSeg(JcsegTaskConfig jcsegTaskConfig, ADictionary aDictionary) throws IOException {
        this(null, jcsegTaskConfig, aDictionary);
    }

    /* JADX WARN: Code restructure failed: missing block: B:24:0x006d, code lost:
    
        if (r0 > (-1)) goto L26;
     */
    @Override // org.lionsoul.jcseg.tokenizer.ASegment, org.lionsoul.jcseg.tokenizer.core.ISegment
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public org.lionsoul.jcseg.tokenizer.core.IWord next() throws java.io.IOException {
        /*
            r4 = this;
            r0 = r4
            java.util.LinkedList<org.lionsoul.jcseg.tokenizer.core.IWord> r0 = r0.eWordPool
            int r0 = r0.size()
            if (r0 <= 0) goto L15
            r0 = r4
            java.util.LinkedList<org.lionsoul.jcseg.tokenizer.core.IWord> r0 = r0.eWordPool
            java.lang.Object r0 = r0.removeFirst()
            org.lionsoul.jcseg.tokenizer.core.IWord r0 = (org.lionsoul.jcseg.tokenizer.core.IWord) r0
            return r0
        L15:
            r0 = r4
            org.lionsoul.jcseg.tokenizer.core.IWord r0 = super.next()
            r5 = r0
            r0 = r5
            if (r0 != 0) goto L20
            r0 = 0
            return r0
        L20:
            r0 = r5
            java.lang.String[] r0 = r0.getEntity()
            r6 = r0
            r0 = r6
            if (r0 != 0) goto L50
            r0 = r5
            java.lang.String r0 = r0.getValue()
            r1 = 0
            char r0 = r0.charAt(r1)
            r1 = 31532(0x7b2c, float:4.4186E-41)
            if (r0 != r1) goto L50
            r0 = r4
            r1 = r5
            org.lionsoul.jcseg.tokenizer.core.IWord r0 = r0.getNextTheWord(r1)
            r7 = r0
            r0 = r7
            if (r0 == 0) goto L50
            r0 = r7
            java.lang.String[] r1 = org.lionsoul.jcseg.tokenizer.core.IWord.QUANTIFIER
            r0.setPartSpeech(r1)
            r0 = r7
            return r0
        L50:
            r0 = r6
            if (r0 != 0) goto L56
            r0 = r5
            return r0
        L56:
            r0 = 0
            r7 = r0
            java.lang.String r0 = "time.a"
            r1 = r6
            int r0 = org.lionsoul.jcseg.util.ArrayUtil.startsWith(r0, r1)
            r1 = r0
            r7 = r1
            r1 = -1
            if (r0 > r1) goto L70
            java.lang.String r0 = "datetime."
            r1 = r6
            int r0 = org.lionsoul.jcseg.util.ArrayUtil.startsWith(r0, r1)
            r1 = r0
            r7 = r1
            r1 = -1
            if (r0 <= r1) goto L80
        L70:
            r0 = r4
            r1 = r5
            r2 = r7
            org.lionsoul.jcseg.tokenizer.core.IWord r0 = r0.getNextTimeMergedWord(r1, r2)
            r8 = r0
            r0 = r8
            if (r0 == 0) goto L80
            r0 = r8
            return r0
        L80:
            java.lang.String r0 = "datetime.ymd"
            r1 = r6
            int r0 = org.lionsoul.jcseg.util.ArrayUtil.startsWith(r0, r1)
            r1 = r0
            r7 = r1
            r1 = -1
            if (r0 <= r1) goto L9c
            r0 = r4
            r1 = r5
            r2 = r7
            org.lionsoul.jcseg.tokenizer.core.IWord r0 = r0.getNextDatetimeWord(r1, r2)
            r8 = r0
            r0 = r8
            if (r0 == 0) goto L9c
            r0 = r8
            return r0
        L9c:
            r0 = r5
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: org.lionsoul.jcseg.tokenizer.NLPSeg.next():org.lionsoul.jcseg.tokenizer.core.IWord");
    }

    protected IWord getNextTheWord(IWord iWord) throws IOException {
        String value = iWord.getValue();
        int length = iWord.getValue().length();
        if (length == 1) {
            return _nextNumberWord(iWord);
        }
        if (length == 2) {
            if (NumericUtil.isCNNumeric(value.charAt(1)) == -1 && !StringUtil.isEnNumeric(value.charAt(1))) {
                return null;
            }
            iWord.addEntity(Entity.E_THE_NUMBER);
            return _nextNumberWord(iWord);
        }
        IWord iWord2 = this.dic.get(10, "" + value.charAt(length - 1));
        if (iWord2 != null) {
            if (!NumericUtil.isCNNumericString(value, 1, length - 1) && !StringUtil.isDigit(value, 1, length - 1)) {
                return null;
            }
            iWord.setEntity(iWord2.getEntity());
            return iWord;
        }
        if (!NumericUtil.isCNNumericString(value, 1, length) && !StringUtil.isDigit(value, 1, length)) {
            return null;
        }
        iWord.addEntity(Entity.E_THE_NUMBER);
        return _nextNumberWord(iWord);
    }

    private IWord _nextNumberWord(IWord iWord) throws IOException {
        IWord next = super.next();
        if (next == null) {
            return null;
        }
        String[] entity = next.getEntity();
        int length = next.getValue().length();
        if (ArrayUtil.indexOf(Entity.E_NUMERIC_ARABIC, entity) <= -1) {
            IWord iWord2 = this.dic.get(10, "" + next.getValue().charAt(length - 1));
            if (iWord2 != null) {
                String value = next.getValue();
                if (length == 1 || NumericUtil.isCNNumericString(value, 0, length - 1) || StringUtil.isDigit(value, 0, length - 1)) {
                    Word word = new Word(iWord.getValue() + value, 1);
                    word.setEntity(iWord2.getEntity());
                    return word;
                }
            }
            this.eWordPool.push(next);
            return null;
        }
        IWord next2 = super.next();
        if (next2 == null) {
            Word word2 = new Word(iWord.getValue() + next.getValue(), 1);
            word2.setPosition(iWord.getPosition());
            word2.setEntity(Entity.E_THE_NUMBER_A);
            return word2;
        }
        int length2 = next2.getValue().length();
        IWord iWord3 = this.dic.get(10, "" + next2.getValue().charAt(0));
        if (iWord3 != null) {
            String value2 = next2.getValue();
            if (length2 == 1 || NumericUtil.isCNNumericString(value2, 0, length2 - 1) || StringUtil.isDigit(value2, 0, length2 - 1)) {
                Word word3 = new Word(iWord.getValue() + next.getValue() + value2, 1);
                word3.setEntity(iWord3.getEntity());
                return word3;
            }
        }
        this.eWordPool.push(next2);
        Word word4 = new Word(iWord.getValue() + next.getValue(), 1);
        word4.setPosition(iWord.getPosition());
        word4.setEntity(Entity.E_THE_NUMBER_A);
        return word4;
    }

    protected IWord getNextTimeMergedWord(IWord iWord, int i) throws IOException {
        IWord next;
        int dateTimeIndex = TimeUtil.getDateTimeIndex(iWord.getEntity(i));
        if (dateTimeIndex == -1) {
            return null;
        }
        IWord[] createDateTimePool = TimeUtil.createDateTimePool();
        TimeUtil.fillDateTimePool(createDateTimePool, dateTimeIndex, iWord);
        int i2 = 0;
        while (true) {
            next = super.next();
            if (next == null) {
                break;
            }
            String[] entity = next.getEntity();
            if (entity == null) {
                this.eWordPool.push(next);
                break;
            }
            if (ArrayUtil.startsWith("time.a", entity) <= -1) {
                if (ArrayUtil.startsWith(Entity.E_DATETIME_HI, entity) <= -1) {
                    if (ArrayUtil.startsWith(Entity.E_DATETIME_P, entity) <= -1) {
                        this.eWordPool.push(next);
                        break;
                    }
                    int fillDateTimePool = TimeUtil.fillDateTimePool(createDateTimePool, next);
                    if (fillDateTimePool == -1 || createDateTimePool[fillDateTimePool - 1] == null) {
                        break;
                    }
                } else {
                    TimeUtil.fillTimeToPool(createDateTimePool, next.getValue());
                }
                i2++;
            } else {
                if (-1 == TimeUtil.fillDateTimePool(createDateTimePool, next)) {
                    this.eWordPool.push(next);
                    break;
                }
                i2++;
            }
        }
        this.eWordPool.push(next);
        if (i2 == 0) {
            return null;
        }
        this.buffer.clear();
        for (int i3 = 0; i3 < createDateTimePool.length; i3++) {
            if (createDateTimePool[i3] != null) {
                if (this.buffer.length() > 0 && i3 + 1 < createDateTimePool.length) {
                    this.buffer.append(' ');
                }
                this.buffer.append(createDateTimePool[i3].getValue());
            }
        }
        Word word = new Word(this.buffer.toString(), 5);
        word.setPosition(iWord.getPosition());
        word.setPartSpeech(IWord.TIME_POSPEECH);
        this.buffer.clear().append(Entity.E_DATETIME_P);
        for (int i4 = 0; i4 < createDateTimePool.length; i4++) {
            if (createDateTimePool[i4] != null) {
                this.buffer.append(TimeUtil.getTimeKey(createDateTimePool[i4]));
            }
        }
        word.setEntity(new String[]{this.buffer.toString()});
        return word;
    }

    protected IWord getNextDatetimeWord(IWord iWord, int i) throws IOException {
        IWord next = super.next();
        if (next == null) {
            return null;
        }
        String[] entity = next.getEntity();
        if (entity == null) {
            this.eWordPool.add(next);
            return null;
        }
        int startsWith = ArrayUtil.startsWith(Entity.E_DATETIME_H, entity);
        int i2 = startsWith;
        if (startsWith <= -1) {
            int startsWith2 = ArrayUtil.startsWith("time.a", entity);
            int i3 = startsWith2;
            if (startsWith2 <= -1) {
                int startsWith3 = ArrayUtil.startsWith(Entity.E_DATETIME_P, entity);
                i3 = startsWith3;
                if (startsWith3 <= -1) {
                    this.eWordPool.add(next);
                    return null;
                }
            }
            IWord nextTimeMergedWord = getNextTimeMergedWord(next, i3);
            if (nextTimeMergedWord == null) {
                this.eWordPool.addFirst(next);
                return null;
            }
            String entity2 = nextTimeMergedWord.getEntity(0);
            if (!entity2.contains(".h") && !entity2.contains(".a")) {
                this.eWordPool.addFirst(nextTimeMergedWord);
                return null;
            }
            i2 = 0;
            next = nextTimeMergedWord;
            entity = next.getEntity();
        }
        this.buffer.clear().append(iWord.getValue()).append(' ').append(next.getValue());
        Word word = new Word(this.buffer.toString(), 5);
        word.setPosition(iWord.getPosition());
        word.setPartSpeech(IWord.TIME_POSPEECH);
        this.buffer.clear().append(iWord.getEntity(0)).append(entity[i2].substring(9));
        word.addEntity(this.buffer.toString());
        return word;
    }

    private IWord getNumericUnitComposedWord(String str, IWord iWord) {
        IStringBuffer iStringBuffer = new IStringBuffer();
        iStringBuffer.clear().append(str).append(iWord.getValue());
        Word word = new Word(iStringBuffer.toString(), 1);
        String[] entity = iWord.getEntity();
        int startsWith = ArrayUtil.startsWith(Entity.E_TIME_P, entity);
        if (startsWith > -1) {
            iStringBuffer.clear().append(entity[startsWith].replace(Entity.E_TIME_P, Entity.E_DATETIME_P));
        } else {
            iStringBuffer.clear().append(Entity.E_NUC_PREFIX).append(iWord.getEntity(0));
        }
        word.setEntity(new String[]{iStringBuffer.toString()});
        word.setPartSpeech(IWord.QUANTIFIER);
        iStringBuffer.clear();
        return word;
    }

    public IWord getNumericUnitComposedWord(int i, IWord iWord) {
        return getNumericUnitComposedWord(String.valueOf(i), iWord);
    }

    @Override // org.lionsoul.jcseg.tokenizer.ASegment
    protected IWord getNextCJKWord(int i, int i2) throws IOException {
        IWord numericUnitComposedWord;
        String findCHName;
        char[] nextCJKSentence = nextCJKSentence(i);
        int i3 = 0;
        while (i3 < nextCJKSentence.length) {
            int isCNNumeric = NumericUtil.isCNNumeric(nextCJKSentence[i3]);
            if (isCNNumeric > -1) {
                IWord iWord = null;
                int i4 = -1;
                String nextCNNumeric = nextCNNumeric(nextCJKSentence, i3);
                if ((this.ctrlMask & 2) == 0) {
                    IStringBuffer iStringBuffer = new IStringBuffer();
                    if (isCNNumeric <= 10) {
                        int length = nextCNNumeric.length();
                        for (int i5 = 0; i3 + length < nextCJKSentence.length && i5 < this.config.MAX_UNIT_LENGTH; i5++) {
                            iStringBuffer.append(nextCJKSentence[i3 + length]);
                            String iStringBuffer2 = iStringBuffer.toString();
                            if (this.dic.match(1, iStringBuffer2)) {
                                iWord = this.dic.get(1, iStringBuffer2);
                            }
                            length++;
                        }
                    }
                    IWord iWord2 = null;
                    iStringBuffer.clear().append(nextCNNumeric);
                    for (int length2 = nextCNNumeric.length(); i3 + length2 < nextCJKSentence.length && length2 < this.config.MAX_LENGTH; length2++) {
                        iStringBuffer.append(nextCJKSentence[i3 + length2]);
                        String iStringBuffer3 = iStringBuffer.toString();
                        if (this.dic.match(0, iStringBuffer3)) {
                            iWord2 = this.dic.get(0, iStringBuffer3);
                        }
                    }
                    if (iWord2 == null) {
                        if (iWord == null) {
                            if (this.config.CNNUM_TO_ARABIC) {
                                numericUnitComposedWord = new Word(String.valueOf(NumericUtil.cnNumericToArabic(nextCNNumeric, true)), 9);
                                numericUnitComposedWord.setEntity(Entity.E_NUMERIC_ARABIC_A);
                                numericUnitComposedWord.setPartSpeech(IWord.NUMERIC_POSPEECH);
                                i4 = nextCNNumeric.length();
                            } else {
                                numericUnitComposedWord = new Word(nextCNNumeric, 9);
                                numericUnitComposedWord.setEntity(Entity.E_NUMERIC_CN_A);
                                numericUnitComposedWord.setPartSpeech(IWord.NUMERIC_POSPEECH);
                            }
                        } else if (this.config.CNNUM_TO_ARABIC) {
                            numericUnitComposedWord = getNumericUnitComposedWord(NumericUtil.cnNumericToArabic(nextCNNumeric, true), iWord);
                            i4 = nextCNNumeric.length() + iWord.getLength();
                        } else {
                            numericUnitComposedWord = getNumericUnitComposedWord(nextCNNumeric, iWord);
                        }
                    } else if (iWord == null) {
                        numericUnitComposedWord = iWord2.m0clone();
                    } else if (iWord2.getLength() > nextCNNumeric.length() + iWord.getLength()) {
                        numericUnitComposedWord = iWord2.m0clone();
                    } else if (this.config.CNNUM_TO_ARABIC) {
                        numericUnitComposedWord = getNumericUnitComposedWord(NumericUtil.cnNumericToArabic(nextCNNumeric, true), iWord);
                        i4 = nextCNNumeric.length() + iWord.getLength();
                    } else {
                        numericUnitComposedWord = getNumericUnitComposedWord(nextCNNumeric, iWord);
                    }
                } else if (this.config.CNFRA_TO_ARABIC) {
                    String[] split = nextCNNumeric.split("分之");
                    numericUnitComposedWord = new Word(NumericUtil.cnNumericToArabic(split[1], true) + "/" + NumericUtil.cnNumericToArabic(split[0], true), 9, Entity.E_NUMERIC_FRACTION_A);
                    numericUnitComposedWord.setPartSpeech(IWord.NUMERIC_POSPEECH);
                } else {
                    numericUnitComposedWord = new Word(nextCNNumeric, 9, Entity.E_NUMERIC_CN_FRACTION_A);
                    numericUnitComposedWord.setPartSpeech(IWord.NUMERIC_POSPEECH);
                }
                this.wordPool.add(numericUnitComposedWord);
                numericUnitComposedWord.setPosition(i2 + i3);
                i3 += i4 > 0 ? i4 : numericUnitComposedWord.getLength();
            } else {
                IChunk bestCJKChunk = getBestCJKChunk(nextCJKSentence, i3);
                IWord iWord3 = bestCJKChunk.getWords()[0];
                int i6 = -1;
                if (this.config.I_CN_NAME && iWord3.getLength() <= 2 && bestCJKChunk.getWords().length > 1) {
                    StringBuilder sb = new StringBuilder();
                    sb.append(iWord3.getValue());
                    if (this.dic.match(2, iWord3.getValue()) && (findCHName = findCHName(nextCJKSentence, 0, bestCJKChunk)) != null) {
                        i6 = 3;
                        sb.append(findCHName);
                    } else if (this.dic.match(6, iWord3.getValue()) && bestCJKChunk.getWords()[1].getLength() <= 2 && this.dic.match(2, bestCJKChunk.getWords()[1].getValue())) {
                        i6 = 4;
                        sb.append(bestCJKChunk.getWords()[1].getValue());
                    }
                    if (i6 != -1) {
                        iWord3 = new Word(sb.toString(), i6);
                        iWord3.addEntity(i6 == 4 ? Entity.E_NAME_NICKNAME : Entity.E_NAME_CN);
                        iWord3.setPartSpeech(IWord.NAME_POSPEECH);
                    }
                }
                if (this.config.CLEAR_STOPWORD && this.dic.match(7, iWord3.getValue())) {
                    i3 += iWord3.getLength();
                } else {
                    IWord iWord4 = null;
                    if ((this.ctrlMask & 1) != 0 && nextCJKSentence.length - i3 <= this.dic.mixPrefixLength) {
                        iWord4 = getNextMixedWord(nextCJKSentence, i3);
                    }
                    if (iWord4 != null) {
                        iWord3 = iWord4.m0clone();
                    } else if (i6 == -1) {
                        iWord3 = iWord3.m0clone();
                    }
                    iWord3.setPosition(i2 + i3);
                    this.wordPool.add(iWord3);
                    i3 += iWord3.getLength();
                    if (i6 == -1) {
                        appendWordFeatures(iWord3);
                    }
                }
            }
        }
        if (this.wordPool.size() == 0) {
            return null;
        }
        return this.wordPool.remove();
    }

    /* JADX WARN: Removed duplicated region for block: B:252:0x03cc A[LOOP:6: B:216:0x036a->B:252:0x03cc, LOOP_END] */
    /* JADX WARN: Removed duplicated region for block: B:253:0x03b8 A[SYNTHETIC] */
    @Override // org.lionsoul.jcseg.tokenizer.ASegment
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected org.lionsoul.jcseg.tokenizer.core.IWord nextLatinWord(int r7, int r8) throws java.io.IOException {
        /*
            Method dump skipped, instructions count: 2021
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.lionsoul.jcseg.tokenizer.NLPSeg.nextLatinWord(int, int):org.lionsoul.jcseg.tokenizer.core.IWord");
    }
}
