package com.cloudera.nav.hdfs.datasets;

import com.cloudera.cmf.cdhclient.HadoopConfiguration;
import com.cloudera.cmf.cdhclient.common.security.UserGroupInformation;
import com.cloudera.nav.core.model.DatasetType;
import com.cloudera.nav.core.model.FileFormat;
import com.cloudera.nav.hdfs.HdfsExtractorUtils;
import com.cloudera.nav.hdfs.extractor.HdfsReader;
import com.cloudera.nav.hdfs.model.FSEntity;
import com.cloudera.nav.server.NavOptions;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.UnmodifiableIterator;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.Callable;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.kitesdk.data.CompressionType;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetNotFoundException;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.Formats;
import org.kitesdk.data.ValidationException;
import org.kitesdk.data.spi.DefaultConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/cloudera/nav/hdfs/datasets/KiteDatasetReader.class */
public class KiteDatasetReader {
    private static final Logger LOG = LoggerFactory.getLogger(KiteDatasetReader.class);
    private final URI hdfsUri;
    private final HdfsReader<NavDatasetDesc> reader;
    private final Configuration config;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: com.cloudera.nav.hdfs.datasets.KiteDatasetReader$2, reason: invalid class name */
    /* loaded from: input_file:com/cloudera/nav/hdfs/datasets/KiteDatasetReader$2.class */
    public static /* synthetic */ class AnonymousClass2 {
        static final /* synthetic */ int[] $SwitchMap$org$kitesdk$data$CompressionType = new int[CompressionType.values().length];

        static {
            try {
                $SwitchMap$org$kitesdk$data$CompressionType[CompressionType.Lzo.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$kitesdk$data$CompressionType[CompressionType.Bzip2.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$kitesdk$data$CompressionType[CompressionType.Deflate.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$kitesdk$data$CompressionType[CompressionType.Snappy.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/cloudera/nav/hdfs/datasets/KiteDatasetReader$PartitionField.class */
    public static class PartitionField {
        public String field;
        public String type;

        public PartitionField(String str, String str2) {
            this.field = str;
            this.type = str2;
        }
    }

    public KiteDatasetReader(HadoopConfiguration hadoopConfiguration, UserGroupInformation userGroupInformation, NavOptions navOptions) {
        this.reader = newReader(userGroupInformation, navOptions);
        this.hdfsUri = getHdfsUri(hadoopConfiguration);
        this.config = convertConfig(hadoopConfiguration);
    }

    public void requestSchema(final FSEntity fSEntity) {
        this.reader.submitReadRequest(new Callable<NavDatasetDesc>() { // from class: com.cloudera.nav.hdfs.datasets.KiteDatasetReader.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.concurrent.Callable
            public NavDatasetDesc call() {
                return KiteDatasetReader.this.parseDataset(fSEntity);
            }
        });
    }

    public void waitForCompletion(Function<NavDatasetDesc, Integer> function) {
        this.reader.waitForCompletion(function);
    }

    @VisibleForTesting
    NavDatasetDesc parseDataset(FSEntity fSEntity) {
        String fileSystemPath = fSEntity.getFileSystemPath();
        LOG.debug("Attempting to extract schema from " + fileSystemPath);
        try {
            DatasetDescriptor readDataset = readDataset(fileSystemPath);
            NavDatasetDesc navDatasetDesc = new NavDatasetDesc();
            navDatasetDesc.setDatasetType(DatasetType.KITE);
            String originalName = fSEntity.getOriginalName();
            if (StringUtils.isEmpty(originalName)) {
                originalName = HdfsExtractorUtils.getDirName(fSEntity.getFileSystemPath());
            }
            navDatasetDesc.setName(originalName);
            navDatasetDesc.setContainer(fSEntity);
            navDatasetDesc.setCompressionType(getCompressionType(readDataset));
            navDatasetDesc.setFileFormat(getFileFormat(readDataset));
            navDatasetDesc.setSchema(readDataset.getSchema());
            setPartitionStrategy(readDataset, navDatasetDesc);
            for (String str : readDataset.listProperties()) {
                navDatasetDesc.setProperty(str, readDataset.getProperty(str));
            }
            return navDatasetDesc;
        } catch (DatasetIOException e) {
            LOG.error("Error reading dataset " + fileSystemPath, e);
            return null;
        } catch (DatasetNotFoundException e2) {
            LOG.error("Dataset expected but not found at " + fileSystemPath, e2);
            return null;
        }
    }

    @VisibleForTesting
    DatasetDescriptor readDataset(String str) {
        URI datasetURI = getDatasetURI(str);
        DefaultConfiguration.set(this.config);
        return Datasets.load(datasetURI).getDataset().getDescriptor();
    }

    @VisibleForTesting
    URI getDatasetURI(String str) {
        return URI.create(String.format("dataset:%s", this.hdfsUri.resolve(str).toString()));
    }

    private Configuration convertConfig(HadoopConfiguration hadoopConfiguration) {
        Configuration configuration = new Configuration();
        UnmodifiableIterator it = hadoopConfiguration.asStringMap().entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            configuration.set((String) entry.getKey(), (String) entry.getValue());
        }
        return configuration;
    }

    private URI getHdfsUri(HadoopConfiguration hadoopConfiguration) {
        try {
            return new URI(hadoopConfiguration.safeGetConfig("fs.defaultFS"));
        } catch (URISyntaxException e) {
            throw new RuntimeException(e);
        }
    }

    private HdfsReader<NavDatasetDesc> newReader(UserGroupInformation userGroupInformation, NavOptions navOptions) {
        return new HdfsReader<>(navOptions.getHdfsExtractReaderThreadPoolSize(), userGroupInformation);
    }

    private static com.cloudera.nav.core.model.CompressionType getCompressionType(DatasetDescriptor datasetDescriptor) {
        switch (AnonymousClass2.$SwitchMap$org$kitesdk$data$CompressionType[datasetDescriptor.getCompressionType().ordinal()]) {
            case 1:
                return com.cloudera.nav.core.model.CompressionType.LZO;
            case 2:
                return com.cloudera.nav.core.model.CompressionType.BZIP2;
            case 3:
                return com.cloudera.nav.core.model.CompressionType.DEFLATE;
            case 4:
                return com.cloudera.nav.core.model.CompressionType.SNAPPY;
            default:
                return null;
        }
    }

    private static FileFormat getFileFormat(DatasetDescriptor datasetDescriptor) {
        return datasetDescriptor.getFormat() == Formats.AVRO ? FileFormat.AVRO : datasetDescriptor.getFormat() == Formats.PARQUET ? FileFormat.PARQUET : datasetDescriptor.getFormat() == Formats.CSV ? FileFormat.CSV : FileFormat.CUSTOM;
    }

    private static void setPartitionStrategy(DatasetDescriptor datasetDescriptor, NavDatasetDesc navDatasetDesc) {
        if (datasetDescriptor.isPartitioned()) {
            ArrayList newArrayList = Lists.newArrayList();
            ArrayList newArrayList2 = Lists.newArrayList();
            try {
                Iterator elements = parseJson(datasetDescriptor.getPartitionStrategy().toString()).elements();
                while (elements.hasNext()) {
                    try {
                        PartitionField readPartitionField = readPartitionField((JsonNode) elements.next());
                        newArrayList.add(readPartitionField.field);
                        newArrayList2.add(readPartitionField.type);
                    } catch (ValidationException e) {
                        LOG.error("Skipping partitioning strategy due to validation error", e);
                        return;
                    }
                }
                navDatasetDesc.setPartitionFields(newArrayList);
                navDatasetDesc.setPartitionTypes(newArrayList2);
            } catch (DatasetIOException e2) {
                LOG.error("Skipping partitioning strategy due to io error", e2);
            } catch (ValidationException e3) {
                LOG.error("Skipping partitioning strategy due to validation error", e3);
            }
        }
    }

    private static PartitionField readPartitionField(JsonNode jsonNode) {
        ValidationException.check(jsonNode.isObject(), "A partitioner must be a JSON record", new Object[0]);
        ValidationException.check(jsonNode.has("type"), "Partitioners must have a type", new Object[0]);
        String asText = jsonNode.get("type").asText();
        boolean equals = asText.equals("provided");
        ValidationException.check(equals || jsonNode.has("source"), "Partitioners must have a source", new Object[0]);
        if (!equals) {
            return new PartitionField(jsonNode.get("source").asText(), asText);
        }
        ValidationException.check(jsonNode.has("name"), "Provided partitioners must have a name", new Object[0]);
        String asText2 = jsonNode.get("name").asText();
        if (jsonNode.has("values")) {
            asText = jsonNode.get("values").asText();
        }
        return new PartitionField(asText2, asText);
    }

    private static JsonNode parseJson(String str) {
        try {
            return (JsonNode) new ObjectMapper().readValue(str, JsonNode.class);
        } catch (IOException e) {
            throw new DatasetIOException("Cannot initialize JSON parser", e);
        } catch (JsonMappingException e2) {
            throw new ValidationException("Invalid JSON", e2);
        } catch (JsonParseException e3) {
            throw new ValidationException("Invalid JSON", e3);
        }
    }
}
