package com.alibaba.cloud.ai.graph.node;

import com.alibaba.cloud.ai.document.JsonDocumentParser;
import com.alibaba.cloud.ai.document.TextDocumentParser;
import com.alibaba.cloud.ai.graph.OverAllState;
import com.alibaba.cloud.ai.graph.action.NodeAction;
import com.alibaba.cloud.ai.parser.bshtml.BsHtmlDocumentParser;
import com.alibaba.cloud.ai.parser.markdown.MarkdownDocumentParser;
import com.alibaba.cloud.ai.parser.tika.TikaDocumentParser;
import com.alibaba.cloud.ai.parser.yaml.YamlDocumentParser;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import org.springframework.ai.document.Document;
import org.springframework.util.StringUtils;

/* loaded from: input_file:com/alibaba/cloud/ai/graph/node/DocumentExtractorNode.class */
public class DocumentExtractorNode implements NodeAction {
    private final String paramsKey;
    private final String outputKey;
    private final List<String> fileList;
    private final Map<String, Function<InputStream, List<Document>>> extractors = new HashMap();

    /* loaded from: input_file:com/alibaba/cloud/ai/graph/node/DocumentExtractorNode$Builder.class */
    public static class Builder {
        private String paramsKey;
        private String outputKey;
        private List<String> fileList;

        public Builder paramsKey(String str) {
            this.paramsKey = str;
            return this;
        }

        public Builder outputKey(String str) {
            this.outputKey = str;
            return this;
        }

        public Builder fileList(List<String> list) {
            this.fileList = list;
            return this;
        }

        public DocumentExtractorNode build() {
            return new DocumentExtractorNode(this.paramsKey, this.outputKey, this.fileList);
        }
    }

    public DocumentExtractorNode(String str, String str2, List<String> list) {
        this.paramsKey = str;
        this.outputKey = str2;
        this.fileList = list;
        this.extractors.put(".txt", inputStream -> {
            return new TextDocumentParser().parse(inputStream);
        });
        this.extractors.put(".markdown", inputStream2 -> {
            return new MarkdownDocumentParser().parse(inputStream2);
        });
        this.extractors.put(".md", inputStream3 -> {
            return new MarkdownDocumentParser().parse(inputStream3);
        });
        this.extractors.put(".html", inputStream4 -> {
            return new BsHtmlDocumentParser().parse(inputStream4);
        });
        this.extractors.put(".htm", inputStream5 -> {
            return new BsHtmlDocumentParser().parse(inputStream5);
        });
        this.extractors.put(".xml", inputStream6 -> {
            return new BsHtmlDocumentParser().parse(inputStream6);
        });
        this.extractors.put(".json", inputStream7 -> {
            return new JsonDocumentParser(new String[0]).parse(inputStream7);
        });
        this.extractors.put(".yaml", inputStream8 -> {
            return new YamlDocumentParser().parse(inputStream8);
        });
        this.extractors.put(".yml", inputStream9 -> {
            return new YamlDocumentParser().parse(inputStream9);
        });
        this.extractors.put(".pdf", inputStream10 -> {
            return new TikaDocumentParser().parse(inputStream10);
        });
        this.extractors.put(".doc", inputStream11 -> {
            return new TikaDocumentParser().parse(inputStream11);
        });
        this.extractors.put(".docx", inputStream12 -> {
            return new TikaDocumentParser().parse(inputStream12);
        });
        this.extractors.put(".csv", inputStream13 -> {
            return new TikaDocumentParser().parse(inputStream13);
        });
        this.extractors.put(".xls", inputStream14 -> {
            return new TikaDocumentParser().parse(inputStream14);
        });
        this.extractors.put(".xlsx", inputStream15 -> {
            return new TikaDocumentParser().parse(inputStream15);
        });
        this.extractors.put(".ppt", inputStream16 -> {
            return new TikaDocumentParser().parse(inputStream16);
        });
        this.extractors.put(".pptx", inputStream17 -> {
            return new TikaDocumentParser().parse(inputStream17);
        });
    }

    @Override // com.alibaba.cloud.ai.graph.action.NodeAction
    public Map<String, Object> apply(OverAllState overAllState) throws Exception {
        if (this.paramsKey == null && this.fileList == null) {
            throw new RuntimeException("File variable not found for selector");
        }
        List<String> list = (List) overAllState.value(this.paramsKey).orElse(this.fileList);
        if (list == null || list.isEmpty()) {
            throw new RuntimeException("Variable fileList is not an ArrayFileSegment");
        }
        ArrayList arrayList = new ArrayList(10);
        for (String str : list) {
            try {
                InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
                if (resourceAsStream == null) {
                    throw new IllegalArgumentException("File not found in resources: " + str);
                }
                try {
                    arrayList.add(extractTextByFileExtension(resourceAsStream, getFileExtension(str)));
                    if (resourceAsStream != null) {
                        resourceAsStream.close();
                    }
                } finally {
                }
            } catch (Exception e) {
                throw new RuntimeException("Failed to parse test file: " + str, e);
            }
        }
        HashMap hashMap = new HashMap();
        hashMap.put("text", arrayList);
        if (StringUtils.hasLength(this.outputKey)) {
            hashMap.put(this.outputKey, arrayList);
        }
        return hashMap;
    }

    private String extractTextByFileExtension(InputStream inputStream, String str) {
        Function<InputStream, List<Document>> function = this.extractors.get(str);
        if (function == null) {
            throw new RuntimeException("Unsupported Extension Type: " + str);
        }
        return function.apply(inputStream).get(0).getText();
    }

    private String getFileExtension(String str) {
        String path = Paths.get(str, new String[0]).getFileName().toString();
        int lastIndexOf = path.lastIndexOf(46);
        return lastIndexOf == -1 ? "" : path.substring(lastIndexOf + 1);
    }

    public static Builder builder() {
        return new Builder();
    }
}
