org.apache.hadoop.hive.ql.io
Class HiveInputFormat<K extends org.apache.hadoop.io.WritableComparable,V extends org.apache.hadoop.io.Writable>

java.lang.Object
  extended by org.apache.hadoop.hive.ql.io.HiveInputFormat<K,V>
All Implemented Interfaces:
org.apache.hadoop.mapred.InputFormat<K,V>, org.apache.hadoop.mapred.JobConfigurable
Direct Known Subclasses:
BucketizedHiveInputFormat, CombineHiveInputFormat, HiveIndexedInputFormat

public class HiveInputFormat<K extends org.apache.hadoop.io.WritableComparable,V extends org.apache.hadoop.io.Writable>
extends Object
implements org.apache.hadoop.mapred.InputFormat<K,V>, org.apache.hadoop.mapred.JobConfigurable

HiveInputFormat is a parameterized InputFormat which looks at the path name and determine the correct InputFormat for that path name from mapredPlan.pathToPartitionInfo(). It can be used to read files with different input format in the same map-reduce job.


Nested Class Summary
static class HiveInputFormat.HiveInputSplit
          HiveInputSplit encapsulates an InputSplit with its corresponding inputFormatClass.
 
Field Summary
protected static Map<Class,org.apache.hadoop.mapred.InputFormat<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable>> inputFormats
          A cache of InputFormat instances.
static org.apache.commons.logging.Log LOG
           
protected  Map<String,PartitionDesc> pathToPartitionInfo
           
 
Constructor Summary
HiveInputFormat()
           
 
Method Summary
 void configure(org.apache.hadoop.mapred.JobConf job)
           
static org.apache.hadoop.mapred.InputFormat<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable> getInputFormatFromCache(Class inputFormatClass, org.apache.hadoop.mapred.JobConf job)
           
protected static PartitionDesc getPartitionDescFromPath(Map<String,PartitionDesc> pathToPartitionInfo, org.apache.hadoop.fs.Path dir)
           
 org.apache.hadoop.mapred.RecordReader getRecordReader(org.apache.hadoop.mapred.InputSplit split, org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.mapred.Reporter reporter)
           
 org.apache.hadoop.mapred.InputSplit[] getSplits(org.apache.hadoop.mapred.JobConf job, int numSplits)
           
protected  void init(org.apache.hadoop.mapred.JobConf job)
           
protected  void pushFilters(org.apache.hadoop.mapred.JobConf jobConf, TableScanOperator tableScan)
           
protected  void pushProjectionsAndFilters(org.apache.hadoop.mapred.JobConf jobConf, Class inputFormatClass, String splitPath, String splitPathWithNoSchema)
           
protected  void pushProjectionsAndFilters(org.apache.hadoop.mapred.JobConf jobConf, Class inputFormatClass, String splitPath, String splitPathWithNoSchema, boolean nonNative)
           
 void validateInput(org.apache.hadoop.mapred.JobConf job)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

public static final org.apache.commons.logging.Log LOG

inputFormats

protected static Map<Class,org.apache.hadoop.mapred.InputFormat<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable>> inputFormats
A cache of InputFormat instances.


pathToPartitionInfo

protected Map<String,PartitionDesc> pathToPartitionInfo
Constructor Detail

HiveInputFormat

public HiveInputFormat()
Method Detail

configure

public void configure(org.apache.hadoop.mapred.JobConf job)
Specified by:
configure in interface org.apache.hadoop.mapred.JobConfigurable

getInputFormatFromCache

public static org.apache.hadoop.mapred.InputFormat<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable> getInputFormatFromCache(Class inputFormatClass,
                                                                                                                                                  org.apache.hadoop.mapred.JobConf job)
                                                                                                                                           throws IOException
Throws:
IOException

getRecordReader

public org.apache.hadoop.mapred.RecordReader getRecordReader(org.apache.hadoop.mapred.InputSplit split,
                                                             org.apache.hadoop.mapred.JobConf job,
                                                             org.apache.hadoop.mapred.Reporter reporter)
                                                      throws IOException
Specified by:
getRecordReader in interface org.apache.hadoop.mapred.InputFormat<K extends org.apache.hadoop.io.WritableComparable,V extends org.apache.hadoop.io.Writable>
Throws:
IOException

init

protected void init(org.apache.hadoop.mapred.JobConf job)

getSplits

public org.apache.hadoop.mapred.InputSplit[] getSplits(org.apache.hadoop.mapred.JobConf job,
                                                       int numSplits)
                                                throws IOException
Specified by:
getSplits in interface org.apache.hadoop.mapred.InputFormat<K extends org.apache.hadoop.io.WritableComparable,V extends org.apache.hadoop.io.Writable>
Throws:
IOException

validateInput

public void validateInput(org.apache.hadoop.mapred.JobConf job)
                   throws IOException
Throws:
IOException

getPartitionDescFromPath

protected static PartitionDesc getPartitionDescFromPath(Map<String,PartitionDesc> pathToPartitionInfo,
                                                        org.apache.hadoop.fs.Path dir)
                                                 throws IOException
Throws:
IOException

pushFilters

protected void pushFilters(org.apache.hadoop.mapred.JobConf jobConf,
                           TableScanOperator tableScan)

pushProjectionsAndFilters

protected void pushProjectionsAndFilters(org.apache.hadoop.mapred.JobConf jobConf,
                                         Class inputFormatClass,
                                         String splitPath,
                                         String splitPathWithNoSchema)

pushProjectionsAndFilters

protected void pushProjectionsAndFilters(org.apache.hadoop.mapred.JobConf jobConf,
                                         Class inputFormatClass,
                                         String splitPath,
                                         String splitPathWithNoSchema,
                                         boolean nonNative)


Copyright © 2011 The Apache Software Foundation