001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapred.lib;
020    
021    import java.io.*;
022    import java.lang.reflect.*;
023    
024    import org.apache.hadoop.fs.FileSystem;
025    
026    import org.apache.hadoop.mapred.*;
027    import org.apache.hadoop.classification.InterfaceAudience;
028    import org.apache.hadoop.classification.InterfaceStability;
029    import org.apache.hadoop.conf.Configuration;
030    
031    /**
032     * A generic RecordReader that can hand out different recordReaders
033     * for each chunk in a {@link CombineFileSplit}.
034     * A CombineFileSplit can combine data chunks from multiple files. 
035     * This class allows using different RecordReaders for processing
036     * these data chunks from different files.
037     * @see CombineFileSplit
038     * @deprecated Use
039     * {@link org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader}
040     */
041    @Deprecated
042    @InterfaceAudience.Public
043    @InterfaceStability.Stable
044    public class CombineFileRecordReader<K, V> implements RecordReader<K, V> {
045    
046      static final Class [] constructorSignature = new Class [] 
047                                             {CombineFileSplit.class, 
048                                              Configuration.class, 
049                                              Reporter.class,
050                                              Integer.class};
051    
052      protected CombineFileSplit split;
053      protected JobConf jc;
054      protected Reporter reporter;
055      protected Class<RecordReader<K, V>> rrClass;
056      protected Constructor<RecordReader<K, V>> rrConstructor;
057      protected FileSystem fs;
058      
059      protected int idx;
060      protected long progress;
061      protected RecordReader<K, V> curReader;
062      
063      public boolean next(K key, V value) throws IOException {
064    
065        while ((curReader == null) || !curReader.next(key, value)) {
066          if (!initNextRecordReader()) {
067            return false;
068          }
069        }
070        return true;
071      }
072    
073      public K createKey() {
074        return curReader.createKey();
075      }
076      
077      public V createValue() {
078        return curReader.createValue();
079      }
080      
081      /**
082       * return the amount of data processed
083       */
084      public long getPos() throws IOException {
085        return progress;
086      }
087      
088      public void close() throws IOException {
089        if (curReader != null) {
090          curReader.close();
091          curReader = null;
092        }
093      }
094      
095      /**
096       * return progress based on the amount of data processed so far.
097       */
098      public float getProgress() throws IOException {
099        return Math.min(1.0f,  progress/(float)(split.getLength()));
100      }
101      
102      /**
103       * A generic RecordReader that can hand out different recordReaders
104       * for each chunk in the CombineFileSplit.
105       */
106      public CombineFileRecordReader(JobConf job, CombineFileSplit split, 
107                                     Reporter reporter,
108                                     Class<RecordReader<K, V>> rrClass)
109        throws IOException {
110        this.split = split;
111        this.jc = job;
112        this.rrClass = rrClass;
113        this.reporter = reporter;
114        this.idx = 0;
115        this.curReader = null;
116        this.progress = 0;
117    
118        try {
119          rrConstructor = rrClass.getDeclaredConstructor(constructorSignature);
120          rrConstructor.setAccessible(true);
121        } catch (Exception e) {
122          throw new RuntimeException(rrClass.getName() + 
123                                     " does not have valid constructor", e);
124        }
125        initNextRecordReader();
126      }
127      
128      /**
129       * Get the record reader for the next chunk in this CombineFileSplit.
130       */
131      protected boolean initNextRecordReader() throws IOException {
132    
133        if (curReader != null) {
134          curReader.close();
135          curReader = null;
136          if (idx > 0) {
137            progress += split.getLength(idx-1);    // done processing so far
138          }
139        }
140    
141        // if all chunks have been processed, nothing more to do.
142        if (idx == split.getNumPaths()) {
143          return false;
144        }
145    
146        // get a record reader for the idx-th chunk
147        try {
148          curReader =  rrConstructor.newInstance(new Object [] 
149                                {split, jc, reporter, Integer.valueOf(idx)});
150    
151          // setup some helper config variables.
152          jc.set(JobContext.MAP_INPUT_FILE, split.getPath(idx).toString());
153          jc.setLong(JobContext.MAP_INPUT_START, split.getOffset(idx));
154          jc.setLong(JobContext.MAP_INPUT_PATH, split.getLength(idx));
155        } catch (Exception e) {
156          throw new RuntimeException (e);
157        }
158        idx++;
159        return true;
160      }
161    }