package com.cloudera.nav.hive.extractor;

import com.cloudera.nav.core.model.EntityHolder;
import com.cloudera.nav.core.model.EntityType;
import com.cloudera.nav.core.model.Operation;
import com.cloudera.nav.core.model.OperationExecution;
import com.cloudera.nav.core.model.Relation;
import com.cloudera.nav.core.model.Source;
import com.cloudera.nav.core.model.SourceType;
import com.cloudera.nav.core.model.relations.ControlFlowRelation;
import com.cloudera.nav.core.model.relations.DataFlowRelation;
import com.cloudera.nav.extractors.model.EngineType;
import com.cloudera.nav.hive.model.HQuery;
import com.cloudera.nav.hive.model.HQueryExecution;
import com.cloudera.nav.hive.model.HQueryPart;
import com.cloudera.nav.hive.queryparser.Column;
import com.cloudera.nav.hive.queryparser.HiveParserDao;
import com.cloudera.nav.hive.queryparser.ParserContext;
import com.cloudera.nav.hive.queryparser.QueryParser;
import com.cloudera.nav.hive.queryparser.Table;
import com.cloudera.nav.idgenerator.SequenceGenerator;
import com.cloudera.nav.mapreduce.MRExtractorContext;
import com.cloudera.nav.persist.RelationManager;
import com.cloudera.nav.utils.ExtractorUtils;
import com.cloudera.nav.utils.FilterUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.configuration.MapConfiguration;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.javatuples.Pair;
import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/cloudera/nav/hive/extractor/HiveOperationExtractor.class */
public class HiveOperationExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(HiveOperationExtractor.class);
    private static final int MAX_QUERY_TEXT_LENGTH = 55;
    private final HiveExtractorHelper helper;
    private final HiveIdGenerator hiveIdGenerator;
    private final SequenceGenerator sequenceGenerator;
    private final QueryParser parser;
    private final HiveExtractorDao extractorDao;
    private final String extractorRunId;

    public HiveOperationExtractor(HiveIdGenerator hiveIdGenerator, SequenceGenerator sequenceGenerator, String str) {
        this(hiveIdGenerator, sequenceGenerator, new HiveExtractorDao(), str);
    }

    @VisibleForTesting
    HiveOperationExtractor(HiveIdGenerator hiveIdGenerator, SequenceGenerator sequenceGenerator, HiveExtractorDao hiveExtractorDao, String str) {
        this.hiveIdGenerator = hiveIdGenerator;
        this.sequenceGenerator = sequenceGenerator;
        this.extractorDao = hiveExtractorDao;
        this.parser = new QueryParser();
        this.extractorRunId = str;
        this.helper = HiveExtractorHelperFactory.newHelper();
    }

    public Optional<Pair<EntityHolder<Operation>, OperationExecution>> extract(MRExtractorContext mRExtractorContext, Source source, Source source2, MapConfiguration mapConfiguration, String str, long j) {
        HiveMetaStoreClient hiveMetaStoreClient = null;
        try {
            try {
                hiveMetaStoreClient = this.helper.setUpConnection(mapConfiguration);
                Optional<Pair<EntityHolder<Operation>, OperationExecution>> extract = extract(mRExtractorContext, source, new HiveParserDao(hiveMetaStoreClient, mapConfiguration), mapConfiguration, str, j, source2);
                if (hiveMetaStoreClient != null) {
                    hiveMetaStoreClient.close();
                }
                return extract;
            } catch (MetaException e) {
                LOG.error("Error establishing connection to metastore");
                throw Throwables.propagate(e);
            }
        } catch (Throwable th) {
            if (hiveMetaStoreClient != null) {
                hiveMetaStoreClient.close();
            }
            throw th;
        }
    }

    @VisibleForTesting
    Optional<Pair<EntityHolder<Operation>, OperationExecution>> extract(MRExtractorContext mRExtractorContext, Source source, HiveParserDao hiveParserDao, MapConfiguration mapConfiguration, String str, long j, Source source2) {
        HQuery hQuery;
        String queryString = HiveUtils.getQueryString(mapConfiguration);
        String generateQueryId = this.hiveIdGenerator.generateQueryId(source2.getIdentity(), queryString);
        Map operationMap = mRExtractorContext.getOperationMap();
        Optional entity = ExtractorUtils.getEntity(mRExtractorContext.getEm(), operationMap, generateQueryId);
        try {
            ArrayList newArrayList = Lists.newArrayList();
            if (entity.isPresent()) {
                hQuery = (HQuery) entity.get();
            } else {
                hQuery = new HQuery(generateQueryId, Long.valueOf(mRExtractorContext.getSequenceGenerator().getNextElementId()), source2.getId());
                try {
                    ParserContext parse = this.parser.parse(queryString, getDatabase(mapConfiguration), hiveParserDao, this.helper.convertConfigMapToHadoopConfig(mapConfiguration));
                    String str2 = queryString;
                    if (mRExtractorContext.getOptions().isPiiMaskingEnabled()) {
                        str2 = FilterUtil.maskText(queryString, mRExtractorContext.getOptions().getPiiMaskingRegex());
                    }
                    hQuery.setOriginalName(str2.length() > MAX_QUERY_TEXT_LENGTH ? str2.substring(0, 52) + "..." : str2);
                    hQuery.setQueryText(str2);
                    hQuery.setExtractorRunId(this.extractorRunId);
                    setOutputs(hQuery, parse);
                    setInputs(hQuery, parse);
                    ArrayList newArrayList2 = Lists.newArrayList();
                    collectQueryParts(hQuery, parse, mRExtractorContext, source2, newArrayList2, newArrayList, queryString);
                    mRExtractorContext.getEm().persist(hQuery, false);
                    mRExtractorContext.getEm().persist(newArrayList2, false);
                } catch (Exception e) {
                    if (mRExtractorContext.getOptions().isStoreUnparsedOps()) {
                        LOG.debug("Storing an unparsed operation", e);
                        hQuery.setUnparsed(true);
                    }
                }
            }
            operationMap.put(generateQueryId, hQuery);
            HQueryExecution hQueryExecution = new HQueryExecution(this.hiveIdGenerator.generateQueryExecId(source, mapConfiguration), Long.valueOf(mRExtractorContext.getSequenceGenerator().getNextElementId()), source.getId(), new Instant(j), str, EngineType.MR);
            hQueryExecution.setOriginalName(hQuery.getOriginalName() + " " + hQueryExecution.getStarted().toString());
            hQueryExecution.setExtractorRunId(this.extractorRunId);
            newArrayList.add(RelationsFactory.getQueryExecRelationBuilder(this.sequenceGenerator, hQuery, hQueryExecution, this.extractorRunId).build());
            mRExtractorContext.getEm().persist(hQueryExecution, false);
            RelationManager rm = mRExtractorContext.getRm();
            Iterator<Relation> it = newArrayList.iterator();
            while (it.hasNext()) {
                rm.persist(it.next(), true);
            }
            return Optional.of(Pair.with(EntityHolder.withInstance(hQuery).setPresentInSolr(entity.isPresent()).build(), hQueryExecution));
        } catch (Exception e2) {
            LOG.error("Unable to parse query " + queryString, e2);
            return Optional.absent();
        }
    }

    private String getDatabase(MapConfiguration mapConfiguration) {
        String string = mapConfiguration.getString("hive.current.db");
        return string != null ? string : mapConfiguration.getString("hive.current.database", "default");
    }

    private void setOutputs(HQuery hQuery, ParserContext parserContext) {
        Set<Table> tgtTables = parserContext.getTgtTables();
        if (tgtTables.isEmpty()) {
            return;
        }
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(tgtTables.size());
        collectNamesAndIds(tgtTables, newArrayListWithCapacity);
        hQuery.setOutputs(newArrayListWithCapacity);
    }

    private void collectNamesAndIds(Collection<Table> collection, Collection<String> collection2) {
        Iterator<Table> it = collection.iterator();
        while (it.hasNext()) {
            collection2.add(it.next().getName());
        }
    }

    private void setInputs(HQuery hQuery, ParserContext parserContext) {
        Set<Table> srcTables = parserContext.getSrcTables();
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(srcTables.size());
        collectNamesAndIds(srcTables, newArrayListWithCapacity);
        hQuery.setInputs(newArrayListWithCapacity);
    }

    private void collectQueryParts(HQuery hQuery, ParserContext parserContext, MRExtractorContext mRExtractorContext, Source source, Collection<HQueryPart> collection, Collection<Relation> collection2, String str) {
        Set<Table> tgtTables = parserContext.getTgtTables();
        int i = 1;
        if (tgtTables.isEmpty()) {
            Iterator<Column> it = parserContext.getProjectedColumns().iterator();
            while (it.hasNext()) {
                int i2 = i;
                i++;
                collectSingleQueryPart(hQuery, mRExtractorContext, source, i2, Collections.singleton(it.next()), parserContext.getPredicateColumns(), null, collection, collection2, str);
            }
            return;
        }
        for (Table table : tgtTables) {
            Collection<Column> nonPartCols = table.getNonPartCols();
            Preconditions.checkState(nonPartCols.size() <= table.getProjectionDeps().size());
            int i3 = 0;
            for (Column column : nonPartCols) {
                Collection<Column> collection3 = table.getProjectionDeps().get(i3);
                if (!collection3.isEmpty() || !table.getPredicateDeps().isEmpty()) {
                    int i4 = i;
                    i++;
                    collectSingleQueryPart(hQuery, mRExtractorContext, source, i4, collection3, table.getPredicateDeps(), column, collection, collection2, str);
                    i3++;
                }
            }
        }
    }

    private void collectSingleQueryPart(HQuery hQuery, MRExtractorContext mRExtractorContext, Source source, int i, Collection<Column> collection, Collection<Column> collection2, Column column, Collection<HQueryPart> collection3, Collection<Relation> collection4, String str) {
        HQueryPart hQueryPart = new HQueryPart(this.hiveIdGenerator.generateQueryPartIdentity(source.getIdentity(), str, i), Long.valueOf(mRExtractorContext.getSequenceGenerator().getNextElementId()), source.getId());
        hQueryPart.setFirstClassParentId(hQuery.getId());
        hQueryPart.setOriginalName(String.format("%1$s Part %2$d", hQuery.getOriginalName(), Integer.valueOf(i)));
        hQueryPart.setExtractorRunId(this.extractorRunId);
        if (!collection.isEmpty()) {
            Collection<String> providerEntityIdsForParserColumns = this.helper.getProviderEntityIdsForParserColumns(collection, source.getIdentity(), this.hiveIdGenerator);
            if (!providerEntityIdsForParserColumns.isEmpty()) {
                collection4.add(DataFlowRelation.builder().id(this.sequenceGenerator.getNextRelationId()).unlinkedSourceIds(providerEntityIdsForParserColumns).sourceSourceType(SourceType.HIVE).targetSourceType(SourceType.HIVE).targetId(hQueryPart.getId()).sourceSourceId(source.getId()).targetSourceId(source.getId()).sourceType(EntityType.FIELD).targetType(hQueryPart.getType()).extractorRunId(this.extractorRunId).isUnlinked(true).build());
            }
        }
        if (!collection2.isEmpty()) {
            Collection<String> providerEntityIdsForParserColumns2 = this.helper.getProviderEntityIdsForParserColumns(collection2, source.getIdentity(), this.hiveIdGenerator);
            if (!providerEntityIdsForParserColumns2.isEmpty()) {
                collection4.add(ControlFlowRelation.builder().id(this.sequenceGenerator.getNextRelationId()).unlinkedSourceIds(providerEntityIdsForParserColumns2).sourceSourceType(SourceType.HIVE).targetSourceType(SourceType.HIVE).targetId(hQueryPart.getId()).sourceSourceId(source.getId()).targetSourceId(source.getId()).sourceType(EntityType.FIELD).targetType(hQueryPart.getType()).extractorRunId(this.extractorRunId).isUnlinked(true).build());
            }
        }
        if (column != null) {
            collection4.add(DataFlowRelation.builder().id(this.sequenceGenerator.getNextRelationId()).sourceId(hQueryPart.getId()).sourceSourceType(SourceType.HIVE).targetSourceType(SourceType.HIVE).unlinkedTargetIds(this.helper.getProviderEntityIdsForParserColumns(Collections.singleton(column), source.getIdentity(), this.hiveIdGenerator)).sourceSourceId(source.getId()).targetSourceId(source.getId()).sourceType(hQueryPart.getType()).targetType(EntityType.FIELD).extractorRunId(this.extractorRunId).isUnlinked(true).build());
        }
        collection3.add(hQueryPart);
        collection4.add(RelationsFactory.getQueryPartRelationBuilder(this.sequenceGenerator, hQuery, hQueryPart, this.extractorRunId).build());
    }
}
