001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapred; 020 021 import java.io.IOException; 022 import java.util.Arrays; 023 024 import org.apache.hadoop.fs.FileSystem; 025 import org.apache.hadoop.fs.Path; 026 import org.apache.hadoop.fs.FileUtil; 027 028 import org.apache.hadoop.io.SequenceFile; 029 import org.apache.hadoop.io.SequenceFile.CompressionType; 030 import org.apache.hadoop.io.compress.CompressionCodec; 031 import org.apache.hadoop.io.compress.DefaultCodec; 032 import org.apache.hadoop.classification.InterfaceAudience; 033 import org.apache.hadoop.classification.InterfaceStability; 034 import org.apache.hadoop.conf.Configuration; 035 import org.apache.hadoop.util.*; 036 037 /** An {@link OutputFormat} that writes {@link SequenceFile}s. 038 * @deprecated Use 039 * {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat} 040 * instead. 041 */ 042 @Deprecated 043 @InterfaceAudience.Public 044 @InterfaceStability.Stable 045 public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> { 046 047 public RecordWriter<K, V> getRecordWriter( 048 FileSystem ignored, JobConf job, 049 String name, Progressable progress) 050 throws IOException { 051 // get the path of the temporary output file 052 Path file = FileOutputFormat.getTaskOutputPath(job, name); 053 054 FileSystem fs = file.getFileSystem(job); 055 CompressionCodec codec = null; 056 CompressionType compressionType = CompressionType.NONE; 057 if (getCompressOutput(job)) { 058 // find the kind of compression to do 059 compressionType = getOutputCompressionType(job); 060 061 // find the right codec 062 Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, 063 DefaultCodec.class); 064 codec = ReflectionUtils.newInstance(codecClass, job); 065 } 066 final SequenceFile.Writer out = 067 SequenceFile.createWriter(fs, job, file, 068 job.getOutputKeyClass(), 069 job.getOutputValueClass(), 070 compressionType, 071 codec, 072 progress); 073 074 return new RecordWriter<K, V>() { 075 076 public void write(K key, V value) 077 throws IOException { 078 079 out.append(key, value); 080 } 081 082 public void close(Reporter reporter) throws IOException { out.close();} 083 }; 084 } 085 086 /** Open the output generated by this format. */ 087 public static SequenceFile.Reader[] getReaders(Configuration conf, Path dir) 088 throws IOException { 089 FileSystem fs = dir.getFileSystem(conf); 090 Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); 091 092 // sort names, so that hash partitioning works 093 Arrays.sort(names); 094 095 SequenceFile.Reader[] parts = new SequenceFile.Reader[names.length]; 096 for (int i = 0; i < names.length; i++) { 097 parts[i] = new SequenceFile.Reader(fs, names[i], conf); 098 } 099 return parts; 100 } 101 102 /** 103 * Get the {@link CompressionType} for the output {@link SequenceFile}. 104 * @param conf the {@link JobConf} 105 * @return the {@link CompressionType} for the output {@link SequenceFile}, 106 * defaulting to {@link CompressionType#RECORD} 107 */ 108 public static CompressionType getOutputCompressionType(JobConf conf) { 109 String val = conf.get(org.apache.hadoop.mapreduce.lib.output. 110 FileOutputFormat.COMPRESS_TYPE, CompressionType.RECORD.toString()); 111 return CompressionType.valueOf(val); 112 } 113 114 /** 115 * Set the {@link CompressionType} for the output {@link SequenceFile}. 116 * @param conf the {@link JobConf} to modify 117 * @param style the {@link CompressionType} for the output 118 * {@link SequenceFile} 119 */ 120 public static void setOutputCompressionType(JobConf conf, 121 CompressionType style) { 122 setCompressOutput(conf, true); 123 conf.set(org.apache.hadoop.mapreduce.lib.output. 124 FileOutputFormat.COMPRESS_TYPE, style.toString()); 125 } 126 127 } 128