package edu.cmu.lemurproject;

import java.io.DataInputStream;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.MultiFileSplit;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.util.ReflectionUtils;

/* loaded from: input_file:edu/cmu/lemurproject/WarcFileRecordReader.class */
public class WarcFileRecordReader<K extends WritableComparable, V extends Writable> implements RecordReader<LongWritable, WritableWarcRecord> {
    public static final Log LOG = LogFactory.getLog(WarcFileRecordReader.class);
    private Path[] filePathList;
    private CompressionCodec compressionCodec;
    private FileSystem fs;
    private Configuration conf;
    private long recordNumber = 1;
    private int currentFilePath = -1;
    private FSDataInputStream currentFile = null;
    private DataInputStream compressionInput = null;
    private long totalFileSize = 0;
    private long totalNumBytesRead = 0;

    public WarcFileRecordReader(Configuration configuration, InputSplit inputSplit) throws IOException {
        this.filePathList = null;
        this.compressionCodec = null;
        this.fs = null;
        this.fs = FileSystem.get(configuration);
        this.conf = configuration;
        if (inputSplit instanceof FileSplit) {
            this.filePathList = new Path[1];
            this.filePathList[0] = ((FileSplit) inputSplit).getPath();
        } else {
            if (!(inputSplit instanceof MultiFileSplit)) {
                throw new IOException("InputSplit is not a file split or a multi-file split - aborting");
            }
            this.filePathList = ((MultiFileSplit) inputSplit).getPaths();
        }
        for (int i = 0; i < this.filePathList.length; i++) {
            this.totalFileSize += this.fs.getFileStatus(this.filePathList[i]).getLen();
        }
        try {
            this.compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(configuration.getClassByName("org.apache.hadoop.io.compress.GzipCodec").asSubclass(CompressionCodec.class), configuration);
        } catch (ClassNotFoundException e) {
            this.compressionCodec = null;
            LOG.info("!!! ClassNotFound Exception thrown setting Gzip codec");
        }
        openNextFile();
    }

    private boolean openNextFile() {
        try {
            if (this.compressionInput != null) {
                this.compressionInput.close();
            } else if (this.currentFile != null) {
                this.currentFile.close();
            }
            this.currentFile = null;
            this.compressionInput = null;
            this.currentFilePath++;
            if (this.currentFilePath >= this.filePathList.length) {
                return false;
            }
            this.currentFile = this.filePathList[this.currentFilePath].getFileSystem(this.conf).open(this.filePathList[this.currentFilePath]);
            LOG.debug(this.filePathList[this.currentFilePath]);
            if (this.compressionCodec != null && this.filePathList[this.currentFilePath].getName().endsWith("gz")) {
                this.compressionInput = new DataInputStream(this.compressionCodec.createInputStream(this.currentFile));
                LOG.info("Compression enabled");
            }
            return true;
        } catch (IOException e) {
            LOG.info("IOError opening " + this.filePathList[this.currentFilePath].toString() + " - message: " + e.getMessage());
            return false;
        }
    }

    public boolean next(LongWritable longWritable, WritableWarcRecord writableWarcRecord) throws IOException {
        DataInputStream dataInputStream = null;
        if (this.compressionInput != null) {
            dataInputStream = this.compressionInput;
        } else if (this.currentFile != null) {
            dataInputStream = this.currentFile;
        }
        if (dataInputStream == null) {
            return false;
        }
        WarcRecord readNextWarcRecord = WarcRecord.readNextWarcRecord(dataInputStream);
        if (readNextWarcRecord == null) {
            if (openNextFile()) {
                readNextWarcRecord = WarcRecord.readNextWarcRecord(dataInputStream);
            }
            if (readNextWarcRecord == null) {
                return false;
            }
        }
        this.totalNumBytesRead += readNextWarcRecord.getTotalRecordLength();
        readNextWarcRecord.setWarcFilePath(this.filePathList[this.currentFilePath].toString());
        writableWarcRecord.setRecord(readNextWarcRecord);
        longWritable.set(this.recordNumber);
        this.recordNumber++;
        return true;
    }

    /* renamed from: createKey, reason: merged with bridge method [inline-methods] */
    public LongWritable m31createKey() {
        return new LongWritable();
    }

    /* renamed from: createValue, reason: merged with bridge method [inline-methods] */
    public WritableWarcRecord m30createValue() {
        return new WritableWarcRecord();
    }

    public long getPos() throws IOException {
        return this.totalNumBytesRead;
    }

    public void close() throws IOException {
        this.totalNumBytesRead = this.totalFileSize;
        if (this.compressionInput != null) {
            this.compressionInput.close();
        } else if (this.currentFile != null) {
            this.currentFile.close();
        }
    }

    public float getProgress() throws IOException {
        if (this.compressionInput != null) {
            if (this.filePathList.length == 0) {
                return 1.0f;
            }
            return this.currentFilePath / this.filePathList.length;
        }
        if (this.totalFileSize == 0) {
            return 0.0f;
        }
        return ((float) this.totalNumBytesRead) / ((float) this.totalFileSize);
    }
}
