package org.apache.hadoop.hive.ql.exec.tez;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSplit;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.split.SplitLocationProvider;
import org.apache.hadoop.mapred.split.TezGroupedSplit;
import org.apache.hadoop.mapred.split.TezMapredSplitsGrouper;
import org.apache.tez.dag.api.TaskLocationHint;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.class */
public class SplitGrouper {
    private static final Logger LOG;
    private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache;
    private final TezMapredSplitsGrouper tezGrouper = new TezMapredSplitsGrouper();
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/hadoop/hive/ql/exec/tez/SplitGrouper$ComparatorCompactor.class */
    public static class ComparatorCompactor implements Comparator<HiveInputFormat.HiveInputSplit>, Serializable {
        ComparatorCompactor() {
        }

        @Override // java.util.Comparator
        public int compare(HiveInputFormat.HiveInputSplit hiveInputSplit, HiveInputFormat.HiveInputSplit hiveInputSplit2) {
            if (hiveInputSplit == hiveInputSplit2) {
                return 0;
            }
            OrcSplit inputSplit = hiveInputSplit.getInputSplit();
            OrcSplit inputSplit2 = hiveInputSplit2.getInputSplit();
            if (inputSplit.getBucketId() != inputSplit2.getBucketId()) {
                return inputSplit.getBucketId() < inputSplit2.getBucketId() ? -1 : 1;
            }
            if (inputSplit.getWriteId() != inputSplit2.getWriteId()) {
                return inputSplit.getWriteId() < inputSplit2.getWriteId() ? -1 : 1;
            }
            if (inputSplit.getStatementId() != inputSplit2.getStatementId()) {
                return inputSplit.getStatementId() < inputSplit2.getStatementId() ? -1 : 1;
            }
            long rowIdOffset = inputSplit.getSyntheticAcidProps() == null ? 0L : inputSplit.getSyntheticAcidProps().getRowIdOffset();
            long rowIdOffset2 = inputSplit2.getSyntheticAcidProps() == null ? 0L : inputSplit2.getSyntheticAcidProps().getRowIdOffset();
            if (rowIdOffset != rowIdOffset2) {
                return rowIdOffset < rowIdOffset2 ? -1 : 1;
            }
            if (inputSplit.getStart() != inputSplit2.getStart()) {
                return inputSplit.getStart() < inputSplit2.getStart() ? -1 : 1;
            }
            throw new RuntimeException("Found 2 equal splits: " + inputSplit + " and " + inputSplit2);
        }
    }

    public Multimap<Integer, InputSplit> group(Configuration configuration, Multimap<Integer, InputSplit> multimap, int i, float f, SplitLocationProvider splitLocationProvider) throws IOException {
        Map<Integer, Integer> estimateBucketSizes = estimateBucketSizes(i, f, multimap.asMap());
        ArrayListMultimap create = ArrayListMultimap.create();
        Iterator it = multimap.keySet().iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            InputSplit[] inputSplitArr = (InputSplit[]) multimap.get(Integer.valueOf(intValue)).toArray(new InputSplit[0]);
            InputSplit[] groupedSplits = this.tezGrouper.getGroupedSplits(configuration, inputSplitArr, estimateBucketSizes.get(Integer.valueOf(intValue)).intValue(), HiveInputFormat.class.getName(), new ColumnarSplitSizeEstimator(), splitLocationProvider);
            LOG.info("Original split count is " + inputSplitArr.length + " grouped split count is " + groupedSplits.length + ", for bucket: " + intValue);
            for (InputSplit inputSplit : groupedSplits) {
                create.put(Integer.valueOf(intValue), inputSplit);
            }
        }
        return create;
    }

    public List<TaskLocationHint> createTaskLocationHints(InputSplit[] inputSplitArr, boolean z) throws IOException {
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(inputSplitArr.length);
        for (InputSplit inputSplit : inputSplitArr) {
            String rack = inputSplit instanceof TezGroupedSplit ? ((TezGroupedSplit) inputSplit).getRack() : null;
            if (rack == null) {
                String[] locations = inputSplit.getLocations();
                if (locations == null || locations.length <= 0) {
                    newArrayListWithCapacity.add(TaskLocationHint.createTaskLocationHint((Set) null, (Set) null));
                } else if (z && locations.length > 1 && (inputSplit instanceof FileSplit)) {
                    Arrays.sort(locations);
                    FileSplit fileSplit = (FileSplit) inputSplit;
                    int hash = Objects.hash(fileSplit.getPath(), Long.valueOf(fileSplit.getStart())) % locations.length;
                    LinkedHashSet linkedHashSet = new LinkedHashSet(locations.length);
                    for (int i = 0; i < locations.length; i++) {
                        linkedHashSet.add(locations[(hash + i) % locations.length]);
                    }
                    newArrayListWithCapacity.add(TaskLocationHint.createTaskLocationHint(linkedHashSet, (Set) null));
                } else {
                    newArrayListWithCapacity.add(TaskLocationHint.createTaskLocationHint(new LinkedHashSet(Arrays.asList(inputSplit.getLocations())), (Set) null));
                }
            } else {
                newArrayListWithCapacity.add(TaskLocationHint.createTaskLocationHint((Set) null, Collections.singleton(rack)));
            }
        }
        return newArrayListWithCapacity;
    }

    public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration configuration, InputSplit[] inputSplitArr, float f, int i, SplitLocationProvider splitLocationProvider) throws Exception {
        return generateGroupedSplits(jobConf, configuration, inputSplitArr, f, i, null, true, splitLocationProvider);
    }

    public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration configuration, InputSplit[] inputSplitArr, float f, int i, String str, boolean z, SplitLocationProvider splitLocationProvider) throws Exception {
        Operator<? extends OperatorDesc> operator;
        boolean z2 = true;
        MapWork populateMapWork = populateMapWork(jobConf, str);
        ArrayListMultimap create = ArrayListMultimap.create();
        if (!HiveConf.getVar(jobConf, HiveConf.ConfVars.SPLIT_GROUPING_MODE).equalsIgnoreCase(CompactorUtil.COMPACTOR)) {
            int i2 = 0;
            InputSplit inputSplit = null;
            for (InputSplit inputSplit2 : inputSplitArr) {
                if (schemaEvolved(inputSplit2, inputSplit, z, populateMapWork)) {
                    i2++;
                    inputSplit = inputSplit2;
                }
                create.put(Integer.valueOf(i2), inputSplit2);
            }
            LOG.info("# Src groups for split generation: " + (i2 + 1));
            return group(jobConf, create, i, f, splitLocationProvider);
        }
        Iterator<Path> it = Utilities.getInputPathsTez(jobConf, populateMapWork).iterator();
        while (it.hasNext()) {
            List<String> list = populateMapWork.getPathToAliases().get(it.next());
            if (list != null && list.size() == 1 && (operator = populateMapWork.getAliasToWork().get(list.get(0))) != null && (operator instanceof TableScanOperator)) {
                TableScanOperator tableScanOperator = (TableScanOperator) operator;
                z2 &= AcidUtils.isCompactionTable(populateMapWork.getAliasToPartnInfo().get(list.get(0)).getTableDesc().getProperties());
                if (!tableScanOperator.getConf().isTranscationalTable() && !z2) {
                    String str2 = "Compactor split grouping is enabled only for transactional tables. Please check the path: " + getFirstSplitPath(inputSplitArr);
                    LOG.error(str2);
                    throw new RuntimeException(str2);
                }
            }
        }
        return getCompactorSplitGroups(inputSplitArr, configuration, z2);
    }

    private String getFirstSplitPath(InputSplit[] inputSplitArr) {
        if (inputSplitArr.length == 0) {
            throw new RuntimeException("The list of splits provided for grouping is empty.");
        }
        return ((FileSplit) inputSplitArr[0]).getPath().toString();
    }

    Multimap<Integer, InputSplit> getCompactorSplitGroups(InputSplit[] inputSplitArr, Configuration configuration, boolean z) {
        ArrayListMultimap create = ArrayListMultimap.create();
        HiveInputFormat.HiveInputSplit[] hiveInputSplitArr = new HiveInputFormat.HiveInputSplit[inputSplitArr.length];
        int i = 0;
        for (InputSplit inputSplit : inputSplitArr) {
            HiveInputFormat.HiveInputSplit hiveInputSplit = (HiveInputFormat.HiveInputSplit) inputSplit;
            OrcSplit inputSplit2 = hiveInputSplit.getInputSplit();
            if (z) {
                try {
                    inputSplit2.parse(configuration, inputSplit2.getRootDir().getParent());
                } catch (IOException e) {
                    throw new RuntimeException();
                }
            } else {
                inputSplit2.parse(configuration);
            }
            int i2 = i;
            i++;
            hiveInputSplitArr[i2] = hiveInputSplit;
        }
        Arrays.sort(hiveInputSplitArr, new ComparatorCompactor());
        TezGroupedSplit tezGroupedSplit = null;
        int i3 = Integer.MIN_VALUE;
        Path path = null;
        for (int i4 = 0; i4 < hiveInputSplitArr.length; i4++) {
            int bucketId = hiveInputSplitArr[i4].getInputSplit().getBucketId();
            if (!z) {
                if (path == null) {
                    path = hiveInputSplitArr[i4].getInputSplit().getRootDir();
                }
                Path rootDir = hiveInputSplitArr[i4].getInputSplit().getRootDir();
                if (!$assertionsDisabled && !path.equals(rootDir)) {
                    throw new AssertionError();
                }
            }
            if (bucketId != i3) {
                tezGroupedSplit = new TezGroupedSplit(1, "org.apache.hadoop.hive.ql.io.HiveInputFormat", (String[]) null, (String) null);
                create.put(Integer.valueOf(bucketId), tezGroupedSplit);
            }
            tezGroupedSplit.addSplit(hiveInputSplitArr[i4]);
            i3 = bucketId;
        }
        return create;
    }

    private Map<Integer, Integer> estimateBucketSizes(int i, float f, Map<Integer, Collection<InputSplit>> map) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        long j = 0;
        boolean z = false;
        Iterator<Integer> it = map.keySet().iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            long j2 = 0;
            Iterator<InputSplit> it2 = map.get(Integer.valueOf(intValue)).iterator();
            while (it2.hasNext()) {
                FileSplit fileSplit = (InputSplit) it2.next();
                if (fileSplit instanceof FileSplit) {
                    FileSplit fileSplit2 = fileSplit;
                    j2 += fileSplit2.getLength();
                    j += fileSplit2.getLength();
                } else {
                    hashMap2.put(Integer.valueOf(intValue), Integer.valueOf((int) (i * f)));
                    z = true;
                }
            }
            hashMap.put(Integer.valueOf(intValue), Long.valueOf(j2));
        }
        if (z) {
            return hashMap2;
        }
        Iterator it3 = hashMap.keySet().iterator();
        while (it3.hasNext()) {
            int intValue2 = ((Integer) it3.next()).intValue();
            int i2 = 0;
            if (j != 0) {
                i2 = (int) (((i * f) * ((float) ((Long) hashMap.get(Integer.valueOf(intValue2))).longValue())) / ((float) j));
            }
            LOG.info("Estimated number of tasks: " + i2 + " for bucket " + intValue2);
            if (i2 == 0) {
                i2 = 1;
            }
            hashMap2.put(Integer.valueOf(intValue2), Integer.valueOf(i2));
        }
        return hashMap2;
    }

    private static MapWork populateMapWork(JobConf jobConf, String str) {
        MapWork mapWork = null;
        if (str != null) {
            mapWork = (MapWork) Utilities.getMergeWork(jobConf, str);
        }
        if (mapWork == null) {
            mapWork = Utilities.getMapWork(jobConf);
        }
        return mapWork;
    }

    private boolean schemaEvolved(InputSplit inputSplit, InputSplit inputSplit2, boolean z, MapWork mapWork) throws IOException {
        boolean z2 = false;
        Path path = ((FileSplit) inputSplit).getPath();
        PartitionDesc partitionDesc = (PartitionDesc) HiveFileFormatUtils.getFromPathRecursively(mapWork.getPathToPartitionInfo(), path, cache);
        String deserializerClassName = partitionDesc.getDeserializerClassName();
        Class<? extends InputFormat> inputFileFormatClass = partitionDesc.getInputFileFormatClass();
        Class<? extends InputFormat> cls = null;
        String str = null;
        if (inputSplit2 != null) {
            Path path2 = ((FileSplit) inputSplit2).getPath();
            if (!z) {
                return !path.equals(path2);
            }
            PartitionDesc partitionDesc2 = (PartitionDesc) HiveFileFormatUtils.getFromPathRecursively(mapWork.getPathToPartitionInfo(), path2, cache);
            str = partitionDesc2.getDeserializerClassName();
            cls = partitionDesc2.getInputFileFormatClass();
        }
        if (inputFileFormatClass != cls || !deserializerClassName.equals(str)) {
            z2 = true;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding split " + path + " to src new group? " + z2);
        }
        return z2;
    }

    static {
        $assertionsDisabled = !SplitGrouper.class.desiredAssertionStatus();
        LOG = LoggerFactory.getLogger(SplitGrouper.class);
        cache = new ConcurrentHashMap();
    }
}
