/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.neuralsearch.processor.semantic;

import com.google.common.annotations.VisibleForTesting;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import lombok.Generated;
import lombok.NonNull;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.action.ActionRequest;
import org.opensearch.action.ActionType;
import org.opensearch.action.get.MultiGetAction;
import org.opensearch.action.get.MultiGetItemResponse;
import org.opensearch.action.get.MultiGetRequest;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.Nullable;
import org.opensearch.core.action.ActionListener;
import org.opensearch.env.Environment;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.ingest.AbstractBatchingSystemProcessor;
import org.opensearch.ingest.IngestDocument;
import org.opensearch.ingest.IngestDocumentWrapper;
import org.opensearch.ml.common.MLModel;
import org.opensearch.neuralsearch.mapper.dto.ChunkingConfig;
import org.opensearch.neuralsearch.mapper.dto.SparseEncodingConfig;
import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
import org.opensearch.neuralsearch.processor.InferenceRequest;
import org.opensearch.neuralsearch.processor.TextInferenceRequest;
import org.opensearch.neuralsearch.processor.chunker.Chunker;
import org.opensearch.neuralsearch.processor.dto.SemanticFieldInfo;
import org.opensearch.neuralsearch.processor.util.ChunkUtils;
import org.opensearch.neuralsearch.processor.util.ProcessorUtils;
import org.opensearch.neuralsearch.stats.events.EventStatName;
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
import org.opensearch.neuralsearch.util.ProcessorDocumentUtils;
import org.opensearch.neuralsearch.util.SemanticMLModelUtils;
import org.opensearch.neuralsearch.util.SemanticMappingUtils;
import org.opensearch.neuralsearch.util.TokenWeightUtil;
import org.opensearch.neuralsearch.util.prune.PruneType;
import org.opensearch.neuralsearch.util.prune.PruneUtils;
import org.opensearch.transport.client.OpenSearchClient;

public class SemanticFieldProcessor
extends AbstractBatchingSystemProcessor {
    @Generated
    private static final Logger log = LogManager.getLogger(SemanticFieldProcessor.class);
    public static final String PROCESSOR_TYPE = "system_ingest_processor_semantic_field";
    private final Map<String, Map<String, Object>> pathToFieldConfig;
    private final Map<String, MLModel> modelIdToModelMap = new ConcurrentHashMap<String, MLModel>();
    private final Map<String, String> modelIdToModelTypeMap = new ConcurrentHashMap<String, String>();
    protected final MLCommonsClientAccessor mlCommonsClientAccessor;
    private final Environment environment;
    private final ClusterService clusterService;
    private final AnalysisRegistry analysisRegistry;
    private final Chunker defaultTextChunker;
    private static final float DEFAULT_PRUNE_RATIO = 0.1f;
    private final OpenSearchClient openSearchClient;

    public SemanticFieldProcessor(@Nullable String tag, @Nullable String description, int batchSize, @NonNull Map<String, Map<String, Object>> pathToFieldConfig, @NonNull MLCommonsClientAccessor mlClientAccessor, @NonNull Environment environment, @NonNull ClusterService clusterService, @NonNull Chunker defaultTextChunker, @NonNull AnalysisRegistry analysisRegistry, @NonNull OpenSearchClient openSearchClient) {
        super(tag, description, batchSize);
        Objects.requireNonNull(pathToFieldConfig, "pathToFieldConfig is marked non-null but is null");
        Objects.requireNonNull(mlClientAccessor, "mlClientAccessor is marked non-null but is null");
        Objects.requireNonNull(environment, "environment is marked non-null but is null");
        Objects.requireNonNull(clusterService, "clusterService is marked non-null but is null");
        Objects.requireNonNull(defaultTextChunker, "defaultTextChunker is marked non-null but is null");
        Objects.requireNonNull(analysisRegistry, "analysisRegistry is marked non-null but is null");
        Objects.requireNonNull(openSearchClient, "openSearchClient is marked non-null but is null");
        this.pathToFieldConfig = pathToFieldConfig;
        this.mlCommonsClientAccessor = mlClientAccessor;
        this.environment = environment;
        this.clusterService = clusterService;
        this.defaultTextChunker = defaultTextChunker;
        this.openSearchClient = openSearchClient;
        this.analysisRegistry = analysisRegistry;
    }

    public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
        throw new UnsupportedOperationException(String.format(Locale.ROOT, "Should not try to use %s to ingest a doc synchronously.", PROCESSOR_TYPE));
    }

    public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
        EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
        try {
            ProcessorDocumentUtils.unflattenIngestDoc(ingestDocument);
            List<SemanticFieldInfo> semanticFieldInfoList = this.getSemanticFieldInfo(ingestDocument);
            if (semanticFieldInfoList.isEmpty()) {
                handler.accept(ingestDocument, null);
            } else {
                this.fetchModelInfoThenProcess(ingestDocument, semanticFieldInfoList, handler);
            }
        }
        catch (Exception e) {
            handler.accept(null, e);
        }
    }

    private void fetchModelInfoThenProcess(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList, @NonNull BiConsumer<IngestDocument, Exception> handler) {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        Set<String> modelIdsToGetModelInfo = semanticFieldInfoList.stream().map(SemanticFieldInfo::getModelId).collect(Collectors.toSet());
        for (String existingModelId : this.modelIdToModelMap.keySet()) {
            modelIdsToGetModelInfo.remove(existingModelId);
        }
        if (modelIdsToGetModelInfo.isEmpty()) {
            this.process(ingestDocument, semanticFieldInfoList, handler);
        } else {
            this.mlCommonsClientAccessor.getModels(modelIdsToGetModelInfo, modelIdToConfigMap -> {
                this.modelIdToModelMap.putAll((Map<String, MLModel>)modelIdToConfigMap);
                this.process(ingestDocument, semanticFieldInfoList, handler);
            }, e -> handler.accept((IngestDocument)null, (Exception)e));
        }
    }

    private void process(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList, @NonNull BiConsumer<IngestDocument, Exception> handler) {
        Object index;
        Set<String> docIdsToCheckReuse;
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        boolean isChunked = this.chunk(ingestDocument, semanticFieldInfoList);
        if (isChunked) {
            EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
        }
        if (this.shouldCheckExistDoc(docIdsToCheckReuse = this.getDocIdsToCheckReuse(List.of(semanticFieldInfoList)), index = ingestDocument.getSourceAndMetadata().get("_index"))) {
            this.getExistingDocs(docIdsToCheckReuse, (String)index, (existingDocs, exception) -> {
                if (exception != null) {
                    handler.accept(null, this.wrapGetExistDocException((Exception)exception));
                    return;
                }
                List<SemanticFieldInfo> semanticFieldInfoToGenerateEmbedding = this.applyReusableEmbeddingsAndFilterUnprocessedFields(ingestDocument, semanticFieldInfoList, (Map<String, Map<String, Object>>)existingDocs);
                if (semanticFieldInfoToGenerateEmbedding.isEmpty()) {
                    handler.accept(ingestDocument, null);
                } else {
                    this.setModelInfo(ingestDocument, semanticFieldInfoToGenerateEmbedding);
                    this.generateAndSetEmbedding(ingestDocument, semanticFieldInfoToGenerateEmbedding, handler);
                }
            });
            return;
        }
        this.setModelInfo(ingestDocument, semanticFieldInfoList);
        this.generateAndSetEmbedding(ingestDocument, semanticFieldInfoList, handler);
    }

    private Exception wrapGetExistDocException(@NonNull Exception exception) {
        Objects.requireNonNull(exception, "exception is marked non-null but is null");
        return new RuntimeException(String.format(Locale.ROOT, "Failed to get existing docs to check embedding reusability for the semantic field. Error: %s", exception.getMessage()), exception);
    }

    private List<SemanticFieldInfo> applyReusableEmbeddingsAndFilterUnprocessedFields(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList, @NonNull Map<String, Map<String, Object>> existingDocs) {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        Objects.requireNonNull(existingDocs, "existingDocs is marked non-null but is null");
        return semanticFieldInfoList.stream().filter(info -> {
            Map modelMap;
            if (!info.getSkipExistingEmbedding().booleanValue() || info.getDocId() == null) {
                return true;
            }
            Map existingDoc = (Map)existingDocs.get(info.getDocId());
            if (existingDoc == null) {
                return true;
            }
            Object existingValue = ProcessorUtils.getValueFromSourceByFullPath(existingDoc, info.getSemanticFieldFullPathInDoc());
            if (!Objects.equals(info.getValue(), existingValue)) {
                return true;
            }
            Object modelInfo = ProcessorUtils.getValueFromSourceByFullPath(existingDoc, info.getFullPathForModelInfoInDoc());
            if (!(modelInfo instanceof Map) || !Objects.equals((modelMap = (Map)modelInfo).get("id"), info.getModelId())) {
                return true;
            }
            if (info.getChunkingEnabled().booleanValue()) {
                Object chunksObj = ProcessorUtils.getValueFromSourceByFullPath(existingDoc, info.getFullPathForChunksInDoc());
                if (!(chunksObj instanceof List)) {
                    return true;
                }
                List chunks = (List)chunksObj;
                ArrayList existingChunkedTexts = new ArrayList();
                for (Object chunk : chunks) {
                    if (!(chunk instanceof Map)) continue;
                    Map chunkMap = (Map)chunk;
                    existingChunkedTexts.add(chunkMap.get("text"));
                }
                if (!Objects.equals(existingChunkedTexts, info.getChunks())) {
                    return true;
                }
            }
            Object semanticInfo = ProcessorUtils.getValueFromSourceByFullPath(existingDoc, info.getSemanticInfoFullPathInDoc());
            ingestDocument.setFieldValue(info.getSemanticInfoFullPathInDoc(), semanticInfo);
            return false;
        }).toList();
    }

    private boolean shouldCheckExistDoc(@NonNull Set<String> docIdsToCheckReuse, Object index) {
        Objects.requireNonNull(docIdsToCheckReuse, "docIdsToCheckReuse is marked non-null but is null");
        return !docIdsToCheckReuse.isEmpty() && index instanceof String;
    }

    private Set<String> getDocIdsToCheckReuse(@NonNull Collection<List<SemanticFieldInfo>> semanticFieldInfoList) {
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        HashSet<String> docIdsToCheckReuse = new HashSet<String>();
        for (List<SemanticFieldInfo> semanticFieldInfos : semanticFieldInfoList) {
            for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfos) {
                if (!semanticFieldInfo.getSkipExistingEmbedding().booleanValue() || !Objects.nonNull(semanticFieldInfo.getDocId())) continue;
                docIdsToCheckReuse.add(semanticFieldInfo.getDocId());
            }
        }
        return docIdsToCheckReuse;
    }

    private void setModelInfo(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList) {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        HashMap modelIdToInfoMap = new HashMap();
        for (Map.Entry<String, MLModel> entry : this.modelIdToModelMap.entrySet()) {
            String modelType;
            HashMap<String, String> modelInfo = new HashMap<String, String>();
            String modelId = entry.getKey();
            MLModel mlModel = entry.getValue();
            if (this.modelIdToModelTypeMap.containsKey(modelId)) {
                modelType = this.modelIdToModelTypeMap.get(modelId);
            } else {
                modelType = SemanticMLModelUtils.getModelType(mlModel);
                this.modelIdToModelTypeMap.put(modelId, modelType);
            }
            modelInfo.put("id", modelId);
            modelInfo.put("type", modelType);
            modelInfo.put("name", mlModel.getName());
            modelIdToInfoMap.put(modelId, modelInfo);
        }
        for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
            ingestDocument.setFieldValue(semanticFieldInfo.getFullPathForModelInfoInDoc(), modelIdToInfoMap.get(semanticFieldInfo.getModelId()));
        }
    }

    private void generateAndSetEmbedding(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList, @NonNull BiConsumer<IngestDocument, Exception> handler) {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        Map<String, Set<String>> modelIdToRawDataMap = this.groupRawDataByModelId(semanticFieldInfoList);
        this.generateEmbedding(modelIdToRawDataMap, modelIdValueToEmbeddingMap -> {
            try {
                this.setInference(ingestDocument, semanticFieldInfoList, (Map<Pair<String, String>, Pair<Object, Exception>>)modelIdValueToEmbeddingMap);
            }
            catch (Exception e) {
                handler.accept(null, e);
            }
            handler.accept(ingestDocument, null);
        });
    }

    private Map<String, Set<String>> groupRawDataByModelId(@NonNull Collection<List<SemanticFieldInfo>> semanticFieldInfoLists) {
        Objects.requireNonNull(semanticFieldInfoLists, "semanticFieldInfoLists is marked non-null but is null");
        HashMap<String, Set<String>> modelIdToRawDataMap = new HashMap<String, Set<String>>();
        for (List<SemanticFieldInfo> semanticFieldInfoList : semanticFieldInfoLists) {
            for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
                modelIdToRawDataMap.computeIfAbsent(semanticFieldInfo.getModelId(), k -> new HashSet()).addAll(semanticFieldInfo.getChunks());
            }
        }
        return modelIdToRawDataMap;
    }

    private Map<String, Set<String>> groupRawDataByModelId(@NonNull List<SemanticFieldInfo> semanticFieldInfoList) {
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        return this.groupRawDataByModelId(Collections.singleton(semanticFieldInfoList));
    }

    private void setInference(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList, @NonNull Map<Pair<String, String>, Pair<Object, Exception>> modelIdValueToEmbeddingMap) throws Exception {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        Objects.requireNonNull(modelIdValueToEmbeddingMap, "modelIdValueToEmbeddingMap is marked non-null but is null");
        for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
            String modelId = semanticFieldInfo.getModelId();
            boolean isDenseModel = SemanticMLModelUtils.isDenseModel(this.modelIdToModelTypeMap.get(modelId));
            List<String> chunks = semanticFieldInfo.getChunks();
            for (int i = 0; i < chunks.size(); ++i) {
                String chunk = chunks.get(i);
                Exception exception = (Exception)modelIdValueToEmbeddingMap.get(Pair.of((Object)modelId, (Object)chunk)).getRight();
                if (exception != null) {
                    throw exception;
                }
                Map<String, Float> embedding = modelIdValueToEmbeddingMap.get(Pair.of((Object)modelId, (Object)chunk)).getLeft();
                if (!isDenseModel) {
                    PruneType pruneType;
                    SparseEncodingConfig sparseEncodingConfig = semanticFieldInfo.getSparseEncodingConfig();
                    PruneType pruneType2 = pruneType = sparseEncodingConfig != null && sparseEncodingConfig.getPruneType() != null ? sparseEncodingConfig.getPruneType() : PruneType.MAX_RATIO;
                    if (!PruneType.NONE.equals((Object)pruneType)) {
                        Float pruneRatio = Float.valueOf(sparseEncodingConfig != null && sparseEncodingConfig.getPruneRatio() != null ? sparseEncodingConfig.getPruneRatio().floatValue() : 0.1f);
                        embedding = PruneUtils.pruneSparseVector(pruneType, pruneRatio.floatValue(), embedding);
                    }
                }
                String embeddingFullPath = semanticFieldInfo.getFullPathForEmbeddingInDoc(i);
                ingestDocument.setFieldValue(embeddingFullPath, (Object)embedding);
            }
        }
    }

    private List<SemanticFieldInfo> getSemanticFieldInfo(IngestDocument ingestDocument) {
        ArrayList<SemanticFieldInfo> semanticFieldInfos = new ArrayList<SemanticFieldInfo>();
        Map doc = ingestDocument.getSourceAndMetadata();
        String docId = null;
        if (doc instanceof Map) {
            Map docMap = doc;
            docId = (String)docMap.get("_id");
        }
        for (Map.Entry<String, Map<String, Object>> entry : this.pathToFieldConfig.entrySet()) {
            String path = entry.getKey();
            Map<String, Object> config = entry.getValue();
            this.collectSemanticFieldInfo(doc, path.split("\\."), config, 0, "", semanticFieldInfos, docId);
        }
        return semanticFieldInfos;
    }

    private boolean chunk(@NonNull IngestDocument ingestDocument, @NonNull List<SemanticFieldInfo> semanticFieldInfoList) {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        Map sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
        int maxTokenCount = ProcessorUtils.getMaxTokenCount(sourceAndMetadataMap, this.environment.settings(), this.clusterService);
        boolean isChunked = false;
        for (SemanticFieldInfo semanticFieldInfo : semanticFieldInfoList) {
            if (semanticFieldInfo.getChunkingEnabled().booleanValue()) {
                isChunked = true;
                if (semanticFieldInfo.getChunkers() == null || semanticFieldInfo.getChunkers().isEmpty()) {
                    semanticFieldInfo.setChunkers(List.of(this.defaultTextChunker));
                }
                this.executeChunkers(semanticFieldInfo, maxTokenCount);
                this.setChunkedText(ingestDocument, semanticFieldInfo);
                continue;
            }
            semanticFieldInfo.setChunks(List.of(semanticFieldInfo.getValue()));
        }
        return isChunked;
    }

    private void setChunkedText(@NonNull IngestDocument ingestDocument, @NonNull SemanticFieldInfo semanticFieldInfo) {
        Objects.requireNonNull(ingestDocument, "ingestDocument is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfo, "semanticFieldInfo is marked non-null but is null");
        ArrayList chunks = new ArrayList();
        for (String text : semanticFieldInfo.getChunks()) {
            HashMap<String, String> chunk = new HashMap<String, String>();
            chunk.put("text", text);
            chunks.add(chunk);
        }
        ingestDocument.setFieldValue(semanticFieldInfo.getFullPathForChunksInDoc(), chunks);
    }

    private void executeChunkers(@NonNull SemanticFieldInfo semanticFieldInfo, int maxTokenCount) {
        Objects.requireNonNull(semanticFieldInfo, "semanticFieldInfo is marked non-null but is null");
        for (Chunker chunker : semanticFieldInfo.getChunkers()) {
            HashMap<String, Object> runtimeParameters = new HashMap<String, Object>();
            List<String> chunks = semanticFieldInfo.getChunks();
            boolean isFirstChunker = chunks == null;
            runtimeParameters.put("max_token_count", maxTokenCount);
            runtimeParameters.put(Chunker.CHUNK_STRING_COUNT_FIELD, isFirstChunker ? 1 : chunks.size());
            runtimeParameters.put(Chunker.MAX_CHUNK_LIMIT_FIELD, chunker.getMaxChunkLimit());
            ArrayList<String> chunkedText = new ArrayList<String>();
            if (isFirstChunker) {
                chunkedText.addAll(ChunkUtils.chunkString(chunker, semanticFieldInfo.getValue(), runtimeParameters));
            } else {
                chunkedText.addAll(ChunkUtils.chunkList(chunker, chunks, runtimeParameters));
            }
            semanticFieldInfo.setChunks(chunkedText);
        }
    }

    private void collectSemanticFieldInfo(@Nullable Object node, @NonNull String[] pathParts, @NonNull Map<String, Object> fieldConfig, int depth, @NonNull String currentPath, @NonNull List<SemanticFieldInfo> semanticFieldInfoList, @Nullable String docId) {
        String key;
        Objects.requireNonNull(pathParts, "pathParts is marked non-null but is null");
        Objects.requireNonNull(fieldConfig, "fieldConfig is marked non-null but is null");
        Objects.requireNonNull(currentPath, "currentPath is marked non-null but is null");
        Objects.requireNonNull(semanticFieldInfoList, "semanticFieldInfoList is marked non-null but is null");
        if (depth > pathParts.length || node == null) {
            return;
        }
        String string = key = depth < pathParts.length ? pathParts[depth] : null;
        if (depth < pathParts.length && node instanceof Map) {
            Map mapNode = (Map)node;
            Object nextNode = mapNode.get(key);
            String newPath = currentPath.isEmpty() ? key : currentPath + "." + key;
            this.collectSemanticFieldInfo(nextNode, pathParts, fieldConfig, depth + 1, newPath, semanticFieldInfoList, docId);
        } else if (depth < pathParts.length && node instanceof List) {
            List listNode = (List)node;
            for (int i = 0; i < listNode.size(); ++i) {
                Object listItem = listNode.get(i);
                String indexedPath = currentPath + "." + i;
                this.collectSemanticFieldInfo(listItem, pathParts, fieldConfig, depth, indexedPath, semanticFieldInfoList, docId);
            }
        } else if (depth == pathParts.length) {
            String pathToSemanticField = String.join((CharSequence)".", pathParts);
            if (!(node instanceof String)) {
                throw new IllegalArgumentException(String.format(Locale.ROOT, "Expect the semantic field at path: %s to be a string but found: %s.", pathToSemanticField, node.getClass()));
            }
            SemanticFieldInfo semanticFieldInfo = SemanticFieldInfo.builder().value(node.toString()).modelId(SemanticMappingUtils.getModelId(fieldConfig, pathToSemanticField)).semanticFieldFullPathInMapping(String.join((CharSequence)".", pathParts)).semanticFieldFullPathInDoc(currentPath).semanticInfoFullPathInDoc(SemanticMappingUtils.getSemanticInfoFieldFullPath(fieldConfig, currentPath, pathToSemanticField)).chunkingEnabled(SemanticMappingUtils.isChunkingEnabled(fieldConfig, pathToSemanticField)).sparseEncodingConfig(new SparseEncodingConfig(fieldConfig)).skipExistingEmbedding(SemanticMappingUtils.isSkipExistingEmbeddingEnabled(fieldConfig, pathToSemanticField)).docId(docId).build();
            semanticFieldInfo.setChunkingConfig(new ChunkingConfig(fieldConfig), this.analysisRegistry);
            semanticFieldInfoList.add(semanticFieldInfo);
        }
    }

    public void subBatchExecute(List<IngestDocumentWrapper> ingestDocumentWrappers, Consumer<List<IngestDocumentWrapper>> handler) {
        EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_EXECUTIONS);
        if (ingestDocumentWrappers == null || ingestDocumentWrappers.isEmpty()) {
            handler.accept(ingestDocumentWrappers);
            return;
        }
        try {
            HashMap<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap = new HashMap<IngestDocumentWrapper, List<SemanticFieldInfo>>();
            for (IngestDocumentWrapper ingestDocumentWrapper : ingestDocumentWrappers) {
                IngestDocument ingestDocument = ingestDocumentWrapper.getIngestDocument();
                if (ingestDocument == null) continue;
                ProcessorDocumentUtils.unflattenIngestDoc(ingestDocument);
                List<SemanticFieldInfo> semanticFieldInfoList = this.getSemanticFieldInfo(ingestDocument);
                if (semanticFieldInfoList.isEmpty()) continue;
                docToSemanticFieldInfoMap.put(ingestDocumentWrapper, semanticFieldInfoList);
            }
            if (docToSemanticFieldInfoMap.isEmpty()) {
                handler.accept(ingestDocumentWrappers);
            } else {
                try {
                    this.fetchModelInfoThenBatchProcess(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
                }
                catch (Exception e) {
                    this.addExceptionToImpactedDocs(docToSemanticFieldInfoMap.keySet(), e);
                    handler.accept(ingestDocumentWrappers);
                }
            }
        }
        catch (Exception e) {
            this.addExceptionToImpactedDocs(new HashSet<IngestDocumentWrapper>(ingestDocumentWrappers), e);
            handler.accept(ingestDocumentWrappers);
        }
    }

    private void fetchModelInfoThenBatchProcess(@NonNull List<IngestDocumentWrapper> ingestDocumentWrappers, @NonNull Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap, @NonNull Consumer<List<IngestDocumentWrapper>> handler) {
        Objects.requireNonNull(ingestDocumentWrappers, "ingestDocumentWrappers is marked non-null but is null");
        Objects.requireNonNull(docToSemanticFieldInfoMap, "docToSemanticFieldInfoMap is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        HashSet<String> modelIdsToGetConfig = new HashSet<String>();
        docToSemanticFieldInfoMap.values().forEach(semanticFieldInfoList -> semanticFieldInfoList.forEach(semanticFieldInfo -> modelIdsToGetConfig.add(semanticFieldInfo.getModelId())));
        for (String existingModelId : this.modelIdToModelMap.keySet()) {
            modelIdsToGetConfig.remove(existingModelId);
        }
        if (modelIdsToGetConfig.isEmpty()) {
            this.batchProcess(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
        } else {
            this.mlCommonsClientAccessor.getModels(modelIdsToGetConfig, modelIdToConfigMap -> {
                this.modelIdToModelMap.putAll((Map<String, MLModel>)modelIdToConfigMap);
                this.batchProcess(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
            }, e -> {
                this.addExceptionToImpactedDocs((Collection<IngestDocumentWrapper>)docToSemanticFieldInfoMap.keySet(), (Exception)e);
                handler.accept(ingestDocumentWrappers);
            });
        }
    }

    private void batchProcess(@NonNull List<IngestDocumentWrapper> ingestDocumentWrappers, @NonNull Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap, @NonNull Consumer<List<IngestDocumentWrapper>> handler) {
        Object index;
        Set<String> docIdsToCheckReuse;
        Objects.requireNonNull(ingestDocumentWrappers, "ingestDocumentWrappers is marked non-null but is null");
        Objects.requireNonNull(docToSemanticFieldInfoMap, "docToSemanticFieldInfoMap is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        boolean isChunked = false;
        for (Map.Entry<IngestDocumentWrapper, List<SemanticFieldInfo>> entry : docToSemanticFieldInfoMap.entrySet()) {
            try {
                List<SemanticFieldInfo> fields;
                IngestDocument ingestDoc = entry.getKey().getIngestDocument();
                if (!this.chunk(ingestDoc, fields = entry.getValue())) continue;
                isChunked = true;
            }
            catch (Exception e) {
                this.logAndUpdate(entry.getKey(), "chunk", e);
            }
        }
        if (isChunked) {
            EventStatsManager.increment(EventStatName.SEMANTIC_FIELD_PROCESSOR_CHUNKING_EXECUTIONS);
        }
        if (this.shouldCheckExistDoc(docIdsToCheckReuse = this.getDocIdsToCheckReuse(docToSemanticFieldInfoMap.values()), index = ingestDocumentWrappers.getFirst().getIngestDocument().getSourceAndMetadata().get("_index"))) {
            this.getExistingDocs(docIdsToCheckReuse, (String)index, (existingDocs, exception) -> {
                if (exception != null) {
                    this.addExceptionToImpactedDocs(docToSemanticFieldInfoMap.keySet(), this.wrapGetExistDocException((Exception)exception));
                    handler.accept(ingestDocumentWrappers);
                    return;
                }
                HashMap<IngestDocumentWrapper, List<SemanticFieldInfo>> docToFieldsNeedingEmbedding = new HashMap<IngestDocumentWrapper, List<SemanticFieldInfo>>();
                docToSemanticFieldInfoMap.forEach((docWrapper, infos) -> {
                    List<SemanticFieldInfo> fieldsNeedingEmbedding = this.applyReusableEmbeddingsAndFilterUnprocessedFields(docWrapper.getIngestDocument(), (List<SemanticFieldInfo>)infos, (Map<String, Map<String, Object>>)existingDocs);
                    if (!fieldsNeedingEmbedding.isEmpty()) {
                        docToFieldsNeedingEmbedding.put((IngestDocumentWrapper)docWrapper, fieldsNeedingEmbedding);
                    }
                });
                if (docToFieldsNeedingEmbedding.isEmpty()) {
                    handler.accept(ingestDocumentWrappers);
                } else {
                    this.batchSetModelInfo(docToFieldsNeedingEmbedding);
                    this.batchGenerateAndSetEmbedding(ingestDocumentWrappers, docToFieldsNeedingEmbedding, handler);
                }
            });
            return;
        }
        this.batchSetModelInfo(docToSemanticFieldInfoMap);
        this.batchGenerateAndSetEmbedding(ingestDocumentWrappers, docToSemanticFieldInfoMap, handler);
    }

    private void batchSetModelInfo(Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToFieldsNeedingEmbedding) {
        docToFieldsNeedingEmbedding.forEach((docWrapper, infos) -> {
            try {
                this.setModelInfo(docWrapper.getIngestDocument(), (List<SemanticFieldInfo>)infos);
            }
            catch (Exception e) {
                this.logAndUpdate((IngestDocumentWrapper)docWrapper, "set model info", e);
            }
        });
    }

    private void logAndUpdate(@NonNull IngestDocumentWrapper wrapper, @NonNull String operation, @NonNull Exception e) {
        Objects.requireNonNull(wrapper, "wrapper is marked non-null but is null");
        Objects.requireNonNull(operation, "operation is marked non-null but is null");
        Objects.requireNonNull(e, "e is marked non-null but is null");
        IngestDocument doc = wrapper.getIngestDocument();
        log.error(String.format(Locale.ROOT, "Failed to %s ingest document %s. Root cause: %s", operation, doc.toString(), e.getMessage()), (Throwable)e);
        if (wrapper.getException() == null) {
            wrapper.update(doc, e);
        }
    }

    private void getExistingDocs(@NonNull Set<String> docIds, @NonNull String index, @NonNull BiConsumer<Map<String, Map<String, Object>>, Exception> handler) {
        Objects.requireNonNull(docIds, "docIds is marked non-null but is null");
        Objects.requireNonNull(index, "index is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        MultiGetRequest multiGetRequest = new MultiGetRequest();
        for (String docId : docIds) {
            multiGetRequest.add(index, docId);
        }
        this.openSearchClient.execute((ActionType)MultiGetAction.INSTANCE, (ActionRequest)multiGetRequest, ActionListener.wrap(response -> {
            MultiGetItemResponse[] items = response.getResponses();
            if (items == null || items.length == 0) {
                handler.accept(Collections.emptyMap(), null);
                return;
            }
            HashMap<String, Map> existingDocs = new HashMap<String, Map>();
            for (MultiGetItemResponse item : items) {
                if (item.getResponse() == null || !item.getResponse().isExists()) continue;
                existingDocs.put(item.getId(), item.getResponse().getSourceAsMap());
            }
            handler.accept(existingDocs, null);
        }, e -> handler.accept((Map<String, Map<String, Object>>)null, (Exception)e)));
    }

    private void generateEmbedding(@NonNull Map<String, Set<String>> modelIdToRawDataMap, @NonNull Consumer<Map<Pair<String, String>, Pair<Object, Exception>>> onComplete) {
        Objects.requireNonNull(modelIdToRawDataMap, "modelIdToRawDataMap is marked non-null but is null");
        Objects.requireNonNull(onComplete, "onComplete is marked non-null but is null");
        AtomicInteger counter = new AtomicInteger(modelIdToRawDataMap.size());
        ConcurrentHashMap modelIdValueToEmbeddingMap = new ConcurrentHashMap();
        for (Map.Entry<String, Set<String>> entry : modelIdToRawDataMap.entrySet()) {
            String modelId = entry.getKey();
            boolean isDenseModel = SemanticMLModelUtils.isDenseModel(this.modelIdToModelTypeMap.get(modelId));
            ArrayList<String> values = new ArrayList<String>((Collection)entry.getValue());
            InferenceRequest textInferenceRequest = ((TextInferenceRequest.TextInferenceRequestBuilder)((InferenceRequest.InferenceRequestBuilder)TextInferenceRequest.builder().inputTexts(values)).modelId(modelId)).build();
            ActionListener listener = ActionListener.wrap(embeddings -> {
                List<Map<String, Float>> formattedEmbeddings = (List<Map<String, Float>>)embeddings;
                if (!isDenseModel) {
                    formattedEmbeddings = TokenWeightUtil.fetchListOfTokenWeightMap((List)embeddings);
                }
                for (int i = 0; i < values.size(); ++i) {
                    modelIdValueToEmbeddingMap.put(Pair.of((Object)modelId, (Object)((String)values.get(i))), Pair.of(formattedEmbeddings.get(i), null));
                }
                if (counter.decrementAndGet() == 0) {
                    onComplete.accept(modelIdValueToEmbeddingMap);
                }
            }, e -> {
                for (String value : values) {
                    modelIdValueToEmbeddingMap.put(Pair.of((Object)modelId, (Object)value), Pair.of(null, (Object)e));
                }
                if (counter.decrementAndGet() == 0) {
                    onComplete.accept(modelIdValueToEmbeddingMap);
                }
            });
            if (isDenseModel) {
                this.mlCommonsClientAccessor.inferenceSentences((TextInferenceRequest)textInferenceRequest, (ActionListener<List<List<Number>>>)listener);
                continue;
            }
            this.mlCommonsClientAccessor.inferenceSentencesWithMapResult((TextInferenceRequest)textInferenceRequest, listener);
        }
    }

    private void batchGenerateAndSetEmbedding(@NonNull List<IngestDocumentWrapper> ingestDocumentWrappers, @NonNull Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap, @NonNull Consumer<List<IngestDocumentWrapper>> handler) {
        Objects.requireNonNull(ingestDocumentWrappers, "ingestDocumentWrappers is marked non-null but is null");
        Objects.requireNonNull(docToSemanticFieldInfoMap, "docToSemanticFieldInfoMap is marked non-null but is null");
        Objects.requireNonNull(handler, "handler is marked non-null but is null");
        Map<String, Set<String>> modelIdToRawDataMap = this.groupRawDataByModelId(docToSemanticFieldInfoMap.values());
        this.generateEmbedding(modelIdToRawDataMap, modelIdValueToEmbeddingMap -> {
            this.batchSetInference(docToSemanticFieldInfoMap, (Map<Pair<String, String>, Pair<Object, Exception>>)modelIdValueToEmbeddingMap);
            handler.accept(ingestDocumentWrappers);
        });
    }

    private void batchSetInference(@NonNull Map<IngestDocumentWrapper, List<SemanticFieldInfo>> docToSemanticFieldInfoMap, @NonNull Map<Pair<String, String>, Pair<Object, Exception>> modelIdValueToEmbeddingMap) {
        Objects.requireNonNull(docToSemanticFieldInfoMap, "docToSemanticFieldInfoMap is marked non-null but is null");
        Objects.requireNonNull(modelIdValueToEmbeddingMap, "modelIdValueToEmbeddingMap is marked non-null but is null");
        for (Map.Entry<IngestDocumentWrapper, List<SemanticFieldInfo>> entry : docToSemanticFieldInfoMap.entrySet()) {
            IngestDocumentWrapper ingestDocumentWrapper = entry.getKey();
            IngestDocument ingestDocument = ingestDocumentWrapper.getIngestDocument();
            List<SemanticFieldInfo> semanticFieldInfoList = entry.getValue();
            try {
                this.setInference(ingestDocument, semanticFieldInfoList, modelIdValueToEmbeddingMap);
            }
            catch (Exception e) {
                ingestDocumentWrapper.update(ingestDocument, e);
            }
        }
    }

    private void addExceptionToImpactedDocs(@NonNull Collection<IngestDocumentWrapper> impactedDocs, @NonNull Exception e) {
        Objects.requireNonNull(impactedDocs, "impactedDocs is marked non-null but is null");
        Objects.requireNonNull(e, "e is marked non-null but is null");
        for (IngestDocumentWrapper ingestDocumentWrapper : impactedDocs) {
            if (ingestDocumentWrapper.getException() != null) continue;
            ingestDocumentWrapper.update(ingestDocumentWrapper.getIngestDocument(), e);
        }
    }

    public String getType() {
        return PROCESSOR_TYPE;
    }

    @VisibleForTesting
    public int getBatchSize() {
        return this.batchSize;
    }
}

