001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapred.lib; 020 021 import java.io.*; 022 import java.lang.reflect.*; 023 024 import org.apache.hadoop.fs.FileSystem; 025 026 import org.apache.hadoop.mapred.*; 027 import org.apache.hadoop.classification.InterfaceAudience; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.conf.Configuration; 030 031 /** 032 * A generic RecordReader that can hand out different recordReaders 033 * for each chunk in a {@link CombineFileSplit}. 034 * A CombineFileSplit can combine data chunks from multiple files. 035 * This class allows using different RecordReaders for processing 036 * these data chunks from different files. 037 * @see CombineFileSplit 038 * @deprecated Use 039 * {@link org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader} 040 */ 041 @Deprecated 042 @InterfaceAudience.Public 043 @InterfaceStability.Stable 044 public class CombineFileRecordReader<K, V> implements RecordReader<K, V> { 045 046 static final Class [] constructorSignature = new Class [] 047 {CombineFileSplit.class, 048 Configuration.class, 049 Reporter.class, 050 Integer.class}; 051 052 protected CombineFileSplit split; 053 protected JobConf jc; 054 protected Reporter reporter; 055 protected Class<RecordReader<K, V>> rrClass; 056 protected Constructor<RecordReader<K, V>> rrConstructor; 057 protected FileSystem fs; 058 059 protected int idx; 060 protected long progress; 061 protected RecordReader<K, V> curReader; 062 063 public boolean next(K key, V value) throws IOException { 064 065 while ((curReader == null) || !curReader.next(key, value)) { 066 if (!initNextRecordReader()) { 067 return false; 068 } 069 } 070 return true; 071 } 072 073 public K createKey() { 074 return curReader.createKey(); 075 } 076 077 public V createValue() { 078 return curReader.createValue(); 079 } 080 081 /** 082 * return the amount of data processed 083 */ 084 public long getPos() throws IOException { 085 return progress; 086 } 087 088 public void close() throws IOException { 089 if (curReader != null) { 090 curReader.close(); 091 curReader = null; 092 } 093 } 094 095 /** 096 * return progress based on the amount of data processed so far. 097 */ 098 public float getProgress() throws IOException { 099 return Math.min(1.0f, progress/(float)(split.getLength())); 100 } 101 102 /** 103 * A generic RecordReader that can hand out different recordReaders 104 * for each chunk in the CombineFileSplit. 105 */ 106 public CombineFileRecordReader(JobConf job, CombineFileSplit split, 107 Reporter reporter, 108 Class<RecordReader<K, V>> rrClass) 109 throws IOException { 110 this.split = split; 111 this.jc = job; 112 this.rrClass = rrClass; 113 this.reporter = reporter; 114 this.idx = 0; 115 this.curReader = null; 116 this.progress = 0; 117 118 try { 119 rrConstructor = rrClass.getDeclaredConstructor(constructorSignature); 120 rrConstructor.setAccessible(true); 121 } catch (Exception e) { 122 throw new RuntimeException(rrClass.getName() + 123 " does not have valid constructor", e); 124 } 125 initNextRecordReader(); 126 } 127 128 /** 129 * Get the record reader for the next chunk in this CombineFileSplit. 130 */ 131 protected boolean initNextRecordReader() throws IOException { 132 133 if (curReader != null) { 134 curReader.close(); 135 curReader = null; 136 if (idx > 0) { 137 progress += split.getLength(idx-1); // done processing so far 138 } 139 } 140 141 // if all chunks have been processed, nothing more to do. 142 if (idx == split.getNumPaths()) { 143 return false; 144 } 145 146 // get a record reader for the idx-th chunk 147 try { 148 curReader = rrConstructor.newInstance(new Object [] 149 {split, jc, reporter, Integer.valueOf(idx)}); 150 151 // setup some helper config variables. 152 jc.set(JobContext.MAP_INPUT_FILE, split.getPath(idx).toString()); 153 jc.setLong(JobContext.MAP_INPUT_START, split.getOffset(idx)); 154 jc.setLong(JobContext.MAP_INPUT_PATH, split.getLength(idx)); 155 } catch (Exception e) { 156 throw new RuntimeException (e); 157 } 158 idx++; 159 return true; 160 } 161 }