package com.hankcs.hanlp.classification.corpus;

import com.hankcs.hanlp.classification.collections.FrequencyMap;
import com.hankcs.hanlp.classification.models.AbstractModel;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:com/hankcs/hanlp/classification/corpus/FileDataSet.class */
public class FileDataSet extends AbstractDataSet {
    File cache;
    DataOutputStream out;
    int size;

    public FileDataSet(AbstractModel abstractModel, File file) throws FileNotFoundException {
        super(abstractModel);
        initCache(file);
    }

    public FileDataSet(AbstractModel abstractModel) throws IOException {
        this(abstractModel, File.createTempFile(String.valueOf(System.currentTimeMillis()), ".dat"));
    }

    public FileDataSet(File file) throws FileNotFoundException {
        initCache(file);
    }

    private void initCache(File file) throws FileNotFoundException {
        this.cache = file;
        this.out = new DataOutputStream(new FileOutputStream(file));
    }

    private void initCache() throws IOException {
        initCache(File.createTempFile(String.valueOf(System.currentTimeMillis()), ".dat"));
    }

    public FileDataSet() throws IOException {
        this(File.createTempFile(String.valueOf(System.currentTimeMillis()), ".dat"));
    }

    @Override // com.hankcs.hanlp.classification.corpus.IDataSet
    public Document add(String str, String str2) {
        Document convert = convert(str, str2);
        try {
            add(convert);
            return convert;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void add(Document document) throws IOException {
        this.out.writeInt(document.category);
        Set<Map.Entry<Integer, int[]>> entrySet = document.tfMap.entrySet();
        this.out.writeInt(entrySet.size());
        for (Map.Entry<Integer, int[]> entry : entrySet) {
            this.out.writeInt(entry.getKey().intValue());
            this.out.writeInt(entry.getValue()[0]);
        }
        this.size++;
    }

    @Override // com.hankcs.hanlp.classification.corpus.IDataSet
    public int size() {
        return this.size;
    }

    @Override // com.hankcs.hanlp.classification.corpus.IDataSet
    public void clear() {
        this.size = 0;
    }

    @Override // com.hankcs.hanlp.classification.corpus.IDataSet
    public IDataSet shrink(int[] iArr) {
        try {
            clear();
            Iterator<Document> it = iterator();
            initCache();
            while (it.hasNext()) {
                Document next = it.next();
                FrequencyMap<Integer> frequencyMap = new FrequencyMap<>();
                for (Map.Entry<Integer, int[]> entry : next.tfMap.entrySet()) {
                    Integer key = entry.getKey();
                    if (iArr[key.intValue()] != -1) {
                        frequencyMap.put(Integer.valueOf(iArr[key.intValue()]), entry.getValue());
                    }
                }
                if (frequencyMap.size() != 0) {
                    next.tfMap = frequencyMap;
                    add(next);
                }
            }
            return this;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // java.lang.Iterable
    public Iterator<Document> iterator() {
        try {
            this.out.close();
            final DataInputStream dataInputStream = new DataInputStream(new FileInputStream(this.cache));
            return new Iterator<Document>() { // from class: com.hankcs.hanlp.classification.corpus.FileDataSet.1
                @Override // java.util.Iterator
                public void remove() {
                    throw new RuntimeException("不支持的操作");
                }

                @Override // java.util.Iterator
                public boolean hasNext() {
                    try {
                        boolean z = dataInputStream.available() > 0;
                        if (!z) {
                            dataInputStream.close();
                        }
                        return z;
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }

                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.Iterator
                public Document next() {
                    try {
                        return new Document(dataInputStream);
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            };
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
