View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.BufferedInputStream;
20  import java.io.BufferedOutputStream;
21  import java.io.FilterOutputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.OutputStream;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.conf.Configurable;
30  import org.apache.hadoop.io.compress.CodecPool;
31  import org.apache.hadoop.io.compress.CompressionCodec;
32  import org.apache.hadoop.io.compress.CompressionInputStream;
33  import org.apache.hadoop.io.compress.CompressionOutputStream;
34  import org.apache.hadoop.io.compress.Compressor;
35  import org.apache.hadoop.io.compress.Decompressor;
36  import org.apache.hadoop.io.compress.GzipCodec;
37  import org.apache.hadoop.io.compress.DefaultCodec;
38  import org.apache.hadoop.util.ReflectionUtils;
39  
40  /**
41   * Compression related stuff.
42   * Copied from hadoop-3315 tfile.
43   */
44  public final class Compression {
45    static final Log LOG = LogFactory.getLog(Compression.class);
46  
47    /**
48     * Prevent the instantiation of class.
49     */
50    private Compression() {
51      super();
52    }
53  
54    static class FinishOnFlushCompressionStream extends FilterOutputStream {
55      public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
56        super(cout);
57      }
58  
59      @Override
60      public void write(byte b[], int off, int len) throws IOException {
61        out.write(b, off, len);
62      }
63  
64      @Override
65      public void flush() throws IOException {
66        CompressionOutputStream cout = (CompressionOutputStream) out;
67        cout.finish();
68        cout.flush();
69        cout.resetState();
70      }
71    }
72  
73    /**
74     * Compression algorithms. The ordinal of these cannot change or else you
75     * risk breaking all existing HFiles out there.  Even the ones that are
76     * not compressed! (They use the NONE algorithm)
77     */
78    public static enum Algorithm {
79      LZO("lzo") {
80        // Use base type to avoid compile-time dependencies.
81        private transient CompressionCodec lzoCodec;
82  
83        @Override
84        CompressionCodec getCodec() {
85          if (lzoCodec == null) {
86            Configuration conf = new Configuration();
87            conf.setBoolean("hadoop.native.lib", true);
88            try {
89              Class<?> externalCodec =
90                  ClassLoader.getSystemClassLoader().loadClass("com.hadoop.compression.lzo.LzoCodec");
91              lzoCodec = (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
92            } catch (ClassNotFoundException e) {
93              throw new RuntimeException(e);
94            }
95          }
96          return lzoCodec;
97        }
98      },
99      GZ("gz") {
100       private transient GzipCodec codec;
101 
102       @Override
103       DefaultCodec getCodec() {
104         if (codec == null) {
105           Configuration conf = new Configuration();
106           conf.setBoolean("hadoop.native.lib", true);
107           codec = new GzipCodec();
108           codec.setConf(conf);
109         }
110 
111         return codec;
112       }
113     },
114 
115     NONE("none") {
116       @Override
117       DefaultCodec getCodec() {
118         return null;
119       }
120 
121       @Override
122       public synchronized InputStream createDecompressionStream(
123           InputStream downStream, Decompressor decompressor,
124           int downStreamBufferSize) throws IOException {
125         if (downStreamBufferSize > 0) {
126           return new BufferedInputStream(downStream, downStreamBufferSize);
127         }
128         // else {
129           // Make sure we bypass FSInputChecker buffer.
130         // return new BufferedInputStream(downStream, 1024);
131         // }
132         // }
133         return downStream;
134       }
135 
136       @Override
137       public synchronized OutputStream createCompressionStream(
138           OutputStream downStream, Compressor compressor,
139           int downStreamBufferSize) throws IOException {
140         if (downStreamBufferSize > 0) {
141           return new BufferedOutputStream(downStream, downStreamBufferSize);
142         }
143 
144         return downStream;
145       }
146     };
147 
148     private final String compressName;
149 	// data input buffer size to absorb small reads from application.
150     private static final int DATA_IBUF_SIZE = 1 * 1024;
151 	// data output buffer size to absorb small writes from application.
152     private static final int DATA_OBUF_SIZE = 4 * 1024;
153 
154     Algorithm(String name) {
155       this.compressName = name;
156     }
157 
158     abstract CompressionCodec getCodec();
159 
160     public InputStream createDecompressionStream(
161         InputStream downStream, Decompressor decompressor,
162         int downStreamBufferSize) throws IOException {
163       CompressionCodec codec = getCodec();
164       // Set the internal buffer size to read from down stream.
165       if (downStreamBufferSize > 0) {
166         Configurable c = (Configurable) codec;
167         c.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
168       }
169       CompressionInputStream cis =
170           codec.createInputStream(downStream, decompressor);
171       BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
172       return bis2;
173 
174     }
175 
176     public OutputStream createCompressionStream(
177         OutputStream downStream, Compressor compressor, int downStreamBufferSize)
178         throws IOException {
179       CompressionCodec codec = getCodec();
180       OutputStream bos1 = null;
181       if (downStreamBufferSize > 0) {
182         bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
183       }
184       else {
185         bos1 = downStream;
186       }
187       Configurable c = (Configurable) codec;
188       c.getConf().setInt("io.file.buffer.size", 32 * 1024);
189       CompressionOutputStream cos =
190           codec.createOutputStream(bos1, compressor);
191       BufferedOutputStream bos2 =
192           new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
193               DATA_OBUF_SIZE);
194       return bos2;
195     }
196 
197     public Compressor getCompressor() {
198       CompressionCodec codec = getCodec();
199       if (codec != null) {
200         Compressor compressor = CodecPool.getCompressor(codec);
201         if (compressor != null) {
202           if (compressor.finished()) {
203             // Somebody returns the compressor to CodecPool but is still using
204             // it.
205             LOG
206                 .warn("Compressor obtained from CodecPool is already finished()");
207             // throw new AssertionError(
208             // "Compressor obtained from CodecPool is already finished()");
209           }
210           compressor.reset();
211         }
212         return compressor;
213       }
214       return null;
215     }
216 
217     public void returnCompressor(Compressor compressor) {
218       if (compressor != null) {
219         CodecPool.returnCompressor(compressor);
220       }
221     }
222 
223     public Decompressor getDecompressor() {
224       CompressionCodec codec = getCodec();
225       if (codec != null) {
226         Decompressor decompressor = CodecPool.getDecompressor(codec);
227         if (decompressor != null) {
228           if (decompressor.finished()) {
229             // Somebody returns the decompressor to CodecPool but is still using
230             // it.
231             LOG
232                 .warn("Deompressor obtained from CodecPool is already finished()");
233             // throw new AssertionError(
234             // "Decompressor obtained from CodecPool is already finished()");
235           }
236           decompressor.reset();
237         }
238         return decompressor;
239       }
240 
241       return null;
242     }
243 
244     public void returnDecompressor(Decompressor decompressor) {
245       if (decompressor != null) {
246         CodecPool.returnDecompressor(decompressor);
247       }
248     }
249 
250     public String getName() {
251       return compressName;
252     }
253   }
254 
255   public static Algorithm getCompressionAlgorithmByName(String compressName) {
256     Algorithm[] algos = Algorithm.class.getEnumConstants();
257 
258     for (Algorithm a : algos) {
259       if (a.getName().equals(compressName)) {
260         return a;
261       }
262     }
263 
264     throw new IllegalArgumentException(
265         "Unsupported compression algorithm name: " + compressName);
266   }
267 
268   static String[] getSupportedAlgorithms() {
269     Algorithm[] algos = Algorithm.class.getEnumConstants();
270 
271     String[] ret = new String[algos.length];
272     int i = 0;
273     for (Algorithm a : algos) {
274       ret[i++] = a.getName();
275     }
276 
277     return ret;
278   }
279 }