001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.mapred; 019 020 import java.io.IOException; 021 022 import org.apache.hadoop.classification.InterfaceAudience; 023 import org.apache.hadoop.classification.InterfaceStability; 024 import org.apache.hadoop.conf.Configuration; 025 import org.apache.hadoop.fs.FileSystem; 026 import org.apache.hadoop.fs.Path; 027 import org.apache.hadoop.io.BytesWritable; 028 import org.apache.hadoop.io.DataOutputBuffer; 029 import org.apache.hadoop.io.SequenceFile; 030 import org.apache.hadoop.mapred.InputSplit; 031 import org.apache.hadoop.mapred.JobConf; 032 import org.apache.hadoop.mapred.RecordReader; 033 import org.apache.hadoop.mapred.Reporter; 034 import org.apache.hadoop.mapred.SequenceFileInputFormat; 035 036 /** 037 * InputFormat reading keys, values from SequenceFiles in binary (raw) 038 * format. 039 * @deprecated Use 040 * {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat} 041 * instead 042 */ 043 @Deprecated 044 @InterfaceAudience.Public 045 @InterfaceStability.Stable 046 public class SequenceFileAsBinaryInputFormat 047 extends SequenceFileInputFormat<BytesWritable,BytesWritable> { 048 049 public SequenceFileAsBinaryInputFormat() { 050 super(); 051 } 052 053 public RecordReader<BytesWritable,BytesWritable> getRecordReader( 054 InputSplit split, JobConf job, Reporter reporter) 055 throws IOException { 056 return new SequenceFileAsBinaryRecordReader(job, (FileSplit)split); 057 } 058 059 /** 060 * Read records from a SequenceFile as binary (raw) bytes. 061 */ 062 public static class SequenceFileAsBinaryRecordReader 063 implements RecordReader<BytesWritable,BytesWritable> { 064 private SequenceFile.Reader in; 065 private long start; 066 private long end; 067 private boolean done = false; 068 private DataOutputBuffer buffer = new DataOutputBuffer(); 069 private SequenceFile.ValueBytes vbytes; 070 071 public SequenceFileAsBinaryRecordReader(Configuration conf, FileSplit split) 072 throws IOException { 073 Path path = split.getPath(); 074 FileSystem fs = path.getFileSystem(conf); 075 this.in = new SequenceFile.Reader(fs, path, conf); 076 this.end = split.getStart() + split.getLength(); 077 if (split.getStart() > in.getPosition()) 078 in.sync(split.getStart()); // sync to start 079 this.start = in.getPosition(); 080 vbytes = in.createValueBytes(); 081 done = start >= end; 082 } 083 084 public BytesWritable createKey() { 085 return new BytesWritable(); 086 } 087 088 public BytesWritable createValue() { 089 return new BytesWritable(); 090 } 091 092 /** 093 * Retrieve the name of the key class for this SequenceFile. 094 * @see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName 095 */ 096 public String getKeyClassName() { 097 return in.getKeyClassName(); 098 } 099 100 /** 101 * Retrieve the name of the value class for this SequenceFile. 102 * @see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName 103 */ 104 public String getValueClassName() { 105 return in.getValueClassName(); 106 } 107 108 /** 109 * Read raw bytes from a SequenceFile. 110 */ 111 public synchronized boolean next(BytesWritable key, BytesWritable val) 112 throws IOException { 113 if (done) return false; 114 long pos = in.getPosition(); 115 boolean eof = -1 == in.nextRawKey(buffer); 116 if (!eof) { 117 key.set(buffer.getData(), 0, buffer.getLength()); 118 buffer.reset(); 119 in.nextRawValue(vbytes); 120 vbytes.writeUncompressedBytes(buffer); 121 val.set(buffer.getData(), 0, buffer.getLength()); 122 buffer.reset(); 123 } 124 return !(done = (eof || (pos >= end && in.syncSeen()))); 125 } 126 127 public long getPos() throws IOException { 128 return in.getPosition(); 129 } 130 131 public void close() throws IOException { 132 in.close(); 133 } 134 135 /** 136 * Return the progress within the input split 137 * @return 0.0 to 1.0 of the input byte range 138 */ 139 public float getProgress() throws IOException { 140 if (end == start) { 141 return 0.0f; 142 } else { 143 return Math.min(1.0f, (float)((in.getPosition() - start) / 144 (double)(end - start))); 145 } 146 } 147 } 148 }