1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.io.DataInputStream;
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
26  import org.apache.hadoop.io.RawComparator;
27  
28  /**
29   * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
30   * <ul>
31   * <li>the KeyValues are stored sorted by key</li>
32   * <li>we know the structure of KeyValue</li>
33   * <li>the values are always iterated forward from beginning of block</li>
34   * <li>knowledge of Key Value format</li>
35   * </ul>
36   * It is designed to work fast enough to be feasible as in memory compression.
37   *
38   * After encoding, it also optionally compresses the encoded data if a
39   * compression algorithm is specified in HFileBlockEncodingContext argument of
40   * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
41   */
42  @InterfaceAudience.Private
43  public interface DataBlockEncoder {
44  
45    /**
46     * Encodes KeyValues. It will first encode key value pairs, and then
47     * optionally do the compression for the encoded data.
48     *
49     * @param in
50     *          Source of KeyValue for compression.
51     * @param includesMemstoreTS
52     *          true if including memstore timestamp after every key-value pair
53     * @param encodingContext
54     *          the encoding context which will contain encoded uncompressed bytes
55     *          as well as compressed encoded bytes if compression is enabled, and
56     *          also it will reuse resources across multiple calls.
57     * @throws IOException
58     *           If there is an error writing to output stream.
59     */
60    void encodeKeyValues(
61      ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext
62    ) throws IOException;
63  
64    /**
65     * Decode.
66     * @param source Compressed stream of KeyValues.
67     * @param includesMemstoreTS true if including memstore timestamp after every
68     *          key-value pair
69     * @return Uncompressed block of KeyValues.
70     * @throws IOException If there is an error in source.
71     */
72    ByteBuffer decodeKeyValues(
73      DataInputStream source, boolean includesMemstoreTS
74    ) throws IOException;
75  
76    /**
77     * Uncompress.
78     * @param source encoded stream of KeyValues.
79     * @param allocateHeaderLength allocate this many bytes for the header.
80     * @param skipLastBytes Do not copy n last bytes.
81     * @param includesMemstoreTS true if including memstore timestamp after every
82     *          key-value pair
83     * @return Uncompressed block of KeyValues.
84     * @throws IOException If there is an error in source.
85     */
86    ByteBuffer decodeKeyValues(
87      DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS
88    )
89        throws IOException;
90  
91    /**
92     * Return first key in block. Useful for indexing. Typically does not make
93     * a deep copy but returns a buffer wrapping a segment of the actual block's
94     * byte array. This is because the first key in block is usually stored
95     * unencoded.
96     * @param block encoded block we want index, the position will not change
97     * @return First key in block.
98     */
99    ByteBuffer getFirstKeyInBlock(ByteBuffer block);
100 
101   /**
102    * Create a HFileBlock seeker which find KeyValues within a block.
103    * @param comparator what kind of comparison should be used
104    * @param includesMemstoreTS true if including memstore timestamp after every
105    *          key-value pair
106    * @return A newly created seeker.
107    */
108   EncodedSeeker createSeeker(
109     RawComparator<byte[]> comparator, boolean includesMemstoreTS
110   );
111 
112   /**
113    * Creates a encoder specific encoding context
114    *
115    * @param compressionAlgorithm
116    *          compression algorithm used if the final data needs to be
117    *          compressed
118    * @param encoding
119    *          encoding strategy used
120    * @param headerBytes
121    *          header bytes to be written, put a dummy header here if the header
122    *          is unknown
123    * @return a newly created encoding context
124    */
125   HFileBlockEncodingContext newDataBlockEncodingContext(
126     Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes
127   );
128 
129   /**
130    * Creates an encoder specific decoding context, which will prepare the data
131    * before actual decoding
132    *
133    * @param compressionAlgorithm
134    *          compression algorithm used if the data needs to be decompressed
135    * @return a newly created decoding context
136    */
137   HFileBlockDecodingContext newDataBlockDecodingContext(
138     Algorithm compressionAlgorithm
139   );
140 
141   /**
142    * An interface which enable to seek while underlying data is encoded.
143    *
144    * It works on one HFileBlock, but it is reusable. See
145    * {@link #setCurrentBuffer(ByteBuffer)}.
146    */
147   interface EncodedSeeker {
148     /**
149      * Set on which buffer there will be done seeking.
150      * @param buffer Used for seeking.
151      */
152     void setCurrentBuffer(ByteBuffer buffer);
153 
154     /**
155      * Does a deep copy of the key at the current position. A deep copy is
156      * necessary because buffers are reused in the decoder.
157      * @return key at current position
158      */
159     ByteBuffer getKeyDeepCopy();
160 
161     /**
162      * Does a shallow copy of the value at the current position. A shallow
163      * copy is possible because the returned buffer refers to the backing array
164      * of the original encoded buffer.
165      * @return value at current position
166      */
167     ByteBuffer getValueShallowCopy();
168 
169     /** @return key value at current position with position set to limit */
170     ByteBuffer getKeyValueBuffer();
171 
172     /**
173      * @return the KeyValue object at the current position. Includes memstore
174      *         timestamp.
175      */
176     KeyValue getKeyValue();
177 
178     /** Set position to beginning of given block */
179     void rewind();
180 
181     /**
182      * Move to next position
183      * @return true on success, false if there is no more positions.
184      */
185     boolean next();
186 
187     /**
188      * Moves the seeker position within the current block to:
189      * <ul>
190      * <li>the last key that that is less than or equal to the given key if
191      * <code>seekBefore</code> is false</li>
192      * <li>the last key that is strictly less than the given key if <code>
193      * seekBefore</code> is true. The caller is responsible for loading the
194      * previous block if the requested key turns out to be the first key of the
195      * current block.</li>
196      * </ul>
197      * @param key byte array containing the key
198      * @param offset key position the array
199      * @param length key length in bytes
200      * @param seekBefore find the key strictly less than the given key in case
201      *          of an exact match. Does not matter in case of an inexact match.
202      * @return 0 on exact match, 1 on inexact match.
203      */
204     int seekToKeyInBlock(
205       byte[] key, int offset, int length, boolean seekBefore
206     );
207   }
208 }