package org.apache.any23.extractor.microdata;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.microdata.ItemPropValue;
import org.apache.any23.rdf.RDFUtils;
import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.w3c.dom.Document;

/* loaded from: input_file:org/apache/any23/extractor/microdata/MicrodataExtractor.class */
public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
    private static final String hcardPrefix = "http://microformats.org/profile/hcard";
    static final IRI MICRODATA_ITEM = RDFUtils.iri("http://www.w3.org/1999/xhtml/microdata#item");
    private static final ParsedIRI EMPTY_FRAG = ParsedIRI.create("#");
    private static final IRI hcardNamespaceIRI = RDFUtils.iri("http://microformats.org/profile/hcard#");

    public ExtractorDescription getDescription() {
        return MicrodataExtractorFactory.getDescriptionInstance();
    }

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document document, ExtractionResult extractionResult) throws IOException, ExtractionException {
        IRI iri;
        MicrodataParserReport microdata = MicrodataParser.getMicrodata(document);
        if (microdata.getErrors().length > 0) {
            notifyError(microdata.getErrors(), extractionResult);
        }
        ItemScope[] detectedItemScopes = microdata.getDetectedItemScopes();
        if (detectedItemScopes.length == 0) {
            return;
        }
        IRI documentIRI = extractionContext.getDocumentIRI();
        ParsedIRI create = ParsedIRI.create(documentIRI.stringValue());
        if (extractionParameters.getFlag("any23.microdata.strict")) {
            iri = RDFUtils.iri(create.resolve(EMPTY_FRAG).toString());
        } else {
            iri = RDFUtils.iri(extractionParameters.getProperty("any23.microdata.ns.default"));
            if (!iri.getLocalName().isEmpty()) {
                throw new IllegalArgumentException("invalid namespace IRI: " + iri);
            }
        }
        HashMap hashMap = new HashMap();
        for (ItemScope itemScope : detectedItemScopes) {
            extractionResult.writeTriple(documentIRI, MICRODATA_ITEM, processType(itemScope, create, extractionResult, hashMap, iri));
        }
    }

    private Resource processType(ItemScope itemScope, ParsedIRI parsedIRI, ExtractionResult extractionResult, Map<ItemScope, Resource> map, IRI iri) throws ExtractionException {
        Resource computeIfAbsent = map.computeIfAbsent(itemScope, itemScope2 -> {
            return createSubjectForItemId(parsedIRI, itemScope2.getItemId());
        });
        List<IRI> types = itemScope.getTypes();
        if (!types.isEmpty()) {
            iri = getNamespaceIRI(types.get(0));
            Iterator<IRI> it = types.iterator();
            while (it.hasNext()) {
                extractionResult.writeTriple(computeIfAbsent, RDF.TYPE, it.next());
            }
        }
        for (Map.Entry<String, List<ItemProp>> entry : itemScope.getProperties().entrySet()) {
            IRI predicate = getPredicate(iri, entry.getKey());
            if (predicate != null) {
                for (ItemProp itemProp : entry.getValue()) {
                    try {
                        processProperty(computeIfAbsent, predicate, itemProp, parsedIRI, map, extractionResult, iri);
                    } catch (URISyntaxException e) {
                        throw new ExtractionException("Error while processing on subject '" + computeIfAbsent + "' the itemProp: '" + itemProp + "' ");
                    }
                }
            }
        }
        return computeIfAbsent;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Resource createSubjectForItemId(ParsedIRI parsedIRI, String str) {
        if (str == null) {
            return RDFUtils.bnode();
        }
        try {
            return toAbsoluteIRI(parsedIRI, str);
        } catch (URISyntaxException e) {
            return RDFUtils.bnode();
        }
    }

    private void processProperty(Resource resource, IRI iri, ItemProp itemProp, ParsedIRI parsedIRI, Map<ItemScope, Resource> map, ExtractionResult extractionResult, IRI iri2) throws URISyntaxException, ExtractionException {
        Value absoluteIRI;
        Object content = itemProp.getValue().getContent();
        ItemPropValue.Type type = itemProp.getValue().getType();
        if (itemProp.getValue().literal != null) {
            absoluteIRI = itemProp.getValue().literal;
        } else if (type.equals(ItemPropValue.Type.Nested)) {
            absoluteIRI = processType((ItemScope) content, parsedIRI, extractionResult, map, iri2);
        } else {
            if (!type.equals(ItemPropValue.Type.Link)) {
                throw new RuntimeException("Invalid Type '" + type + "' for ItemPropValue with name: '" + iri + "'");
            }
            absoluteIRI = toAbsoluteIRI(parsedIRI, (String) content);
            if (iri.stringValue().equals("http://schema.org/additionalType")) {
                if (itemProp.reverse) {
                    extractionResult.writeTriple((Resource) absoluteIRI, RDF.TYPE, resource);
                } else {
                    extractionResult.writeTriple(resource, RDF.TYPE, absoluteIRI);
                }
            }
        }
        if (itemProp.reverse) {
            extractionResult.writeTriple((Resource) absoluteIRI, iri, resource);
        } else {
            extractionResult.writeTriple(resource, iri, absoluteIRI);
        }
    }

    private static IRI getNamespaceIRI(IRI iri) {
        return iri.stringValue().startsWith(hcardPrefix) ? hcardNamespaceIRI : iri;
    }

    private static IRI getPredicate(IRI iri, String str) {
        return toAbsoluteIRI(str).orElseGet(() -> {
            if (iri == null) {
                return null;
            }
            return RDFUtils.iri(iri.getNamespace(), str.trim());
        });
    }

    private static Optional<IRI> toAbsoluteIRI(String str) {
        if (str != null) {
            try {
                ParsedIRI create = ParsedIRI.create(str.trim());
                if (create.isAbsolute()) {
                    return Optional.of(RDFUtils.iri(create.toString()));
                }
            } catch (RuntimeException e) {
            }
        }
        return Optional.empty();
    }

    private static IRI toAbsoluteIRI(ParsedIRI parsedIRI, String str) throws URISyntaxException {
        try {
            return RDFUtils.iri(parsedIRI.resolve(str.trim()));
        } catch (RuntimeException e) {
            if (e.getCause() instanceof URISyntaxException) {
                throw ((URISyntaxException) e.getCause());
            }
            throw new URISyntaxException(String.valueOf(str), e.getClass().getName() + (e.getMessage() != null ? ": " + e.getMessage() : ""));
        }
    }

    private void notifyError(MicrodataParserException[] microdataParserExceptionArr, ExtractionResult extractionResult) {
        for (MicrodataParserException microdataParserException : microdataParserExceptionArr) {
            extractionResult.notifyIssue(IssueReport.IssueLevel.ERROR, microdataParserException.toJSON(), r0.getErrorLocationBeginRow(), r0.getErrorLocationBeginCol());
        }
    }
}
