1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations
15 * under the License.
16 */
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.io.DataInputStream;
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22
23 import org.apache.hadoop.classification.InterfaceAudience;
24 import org.apache.hadoop.hbase.KeyValue;
25 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
26 import org.apache.hadoop.io.RawComparator;
27
28 /**
29 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
30 * <ul>
31 * <li>the KeyValues are stored sorted by key</li>
32 * <li>we know the structure of KeyValue</li>
33 * <li>the values are always iterated forward from beginning of block</li>
34 * <li>knowledge of Key Value format</li>
35 * </ul>
36 * It is designed to work fast enough to be feasible as in memory compression.
37 *
38 * After encoding, it also optionally compresses the encoded data if a
39 * compression algorithm is specified in HFileBlockEncodingContext argument of
40 * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
41 */
42 @InterfaceAudience.Private
43 public interface DataBlockEncoder {
44
45 /**
46 * Encodes KeyValues. It will first encode key value pairs, and then
47 * optionally do the compression for the encoded data.
48 *
49 * @param in
50 * Source of KeyValue for compression.
51 * @param includesMemstoreTS
52 * true if including memstore timestamp after every key-value pair
53 * @param encodingContext
54 * the encoding context which will contain encoded uncompressed bytes
55 * as well as compressed encoded bytes if compression is enabled, and
56 * also it will reuse resources across multiple calls.
57 * @throws IOException
58 * If there is an error writing to output stream.
59 */
60 void encodeKeyValues(
61 ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext
62 ) throws IOException;
63
64 /**
65 * Decode.
66 * @param source Compressed stream of KeyValues.
67 * @param includesMemstoreTS true if including memstore timestamp after every
68 * key-value pair
69 * @return Uncompressed block of KeyValues.
70 * @throws IOException If there is an error in source.
71 */
72 ByteBuffer decodeKeyValues(
73 DataInputStream source, boolean includesMemstoreTS
74 ) throws IOException;
75
76 /**
77 * Uncompress.
78 * @param source encoded stream of KeyValues.
79 * @param allocateHeaderLength allocate this many bytes for the header.
80 * @param skipLastBytes Do not copy n last bytes.
81 * @param includesMemstoreTS true if including memstore timestamp after every
82 * key-value pair
83 * @return Uncompressed block of KeyValues.
84 * @throws IOException If there is an error in source.
85 */
86 ByteBuffer decodeKeyValues(
87 DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS
88 )
89 throws IOException;
90
91 /**
92 * Return first key in block. Useful for indexing. Typically does not make
93 * a deep copy but returns a buffer wrapping a segment of the actual block's
94 * byte array. This is because the first key in block is usually stored
95 * unencoded.
96 * @param block encoded block we want index, the position will not change
97 * @return First key in block.
98 */
99 ByteBuffer getFirstKeyInBlock(ByteBuffer block);
100
101 /**
102 * Create a HFileBlock seeker which find KeyValues within a block.
103 * @param comparator what kind of comparison should be used
104 * @param includesMemstoreTS true if including memstore timestamp after every
105 * key-value pair
106 * @return A newly created seeker.
107 */
108 EncodedSeeker createSeeker(
109 RawComparator<byte[]> comparator, boolean includesMemstoreTS
110 );
111
112 /**
113 * Creates a encoder specific encoding context
114 *
115 * @param compressionAlgorithm
116 * compression algorithm used if the final data needs to be
117 * compressed
118 * @param encoding
119 * encoding strategy used
120 * @param headerBytes
121 * header bytes to be written, put a dummy header here if the header
122 * is unknown
123 * @return a newly created encoding context
124 */
125 HFileBlockEncodingContext newDataBlockEncodingContext(
126 Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes
127 );
128
129 /**
130 * Creates an encoder specific decoding context, which will prepare the data
131 * before actual decoding
132 *
133 * @param compressionAlgorithm
134 * compression algorithm used if the data needs to be decompressed
135 * @return a newly created decoding context
136 */
137 HFileBlockDecodingContext newDataBlockDecodingContext(
138 Algorithm compressionAlgorithm
139 );
140
141 /**
142 * An interface which enable to seek while underlying data is encoded.
143 *
144 * It works on one HFileBlock, but it is reusable. See
145 * {@link #setCurrentBuffer(ByteBuffer)}.
146 */
147 interface EncodedSeeker {
148 /**
149 * Set on which buffer there will be done seeking.
150 * @param buffer Used for seeking.
151 */
152 void setCurrentBuffer(ByteBuffer buffer);
153
154 /**
155 * Does a deep copy of the key at the current position. A deep copy is
156 * necessary because buffers are reused in the decoder.
157 * @return key at current position
158 */
159 ByteBuffer getKeyDeepCopy();
160
161 /**
162 * Does a shallow copy of the value at the current position. A shallow
163 * copy is possible because the returned buffer refers to the backing array
164 * of the original encoded buffer.
165 * @return value at current position
166 */
167 ByteBuffer getValueShallowCopy();
168
169 /** @return key value at current position with position set to limit */
170 ByteBuffer getKeyValueBuffer();
171
172 /**
173 * @return the KeyValue object at the current position. Includes memstore
174 * timestamp.
175 */
176 KeyValue getKeyValue();
177
178 /** Set position to beginning of given block */
179 void rewind();
180
181 /**
182 * Move to next position
183 * @return true on success, false if there is no more positions.
184 */
185 boolean next();
186
187 /**
188 * Moves the seeker position within the current block to:
189 * <ul>
190 * <li>the last key that that is less than or equal to the given key if
191 * <code>seekBefore</code> is false</li>
192 * <li>the last key that is strictly less than the given key if <code>
193 * seekBefore</code> is true. The caller is responsible for loading the
194 * previous block if the requested key turns out to be the first key of the
195 * current block.</li>
196 * </ul>
197 * @param key byte array containing the key
198 * @param offset key position the array
199 * @param length key length in bytes
200 * @param seekBefore find the key strictly less than the given key in case
201 * of an exact match. Does not matter in case of an inexact match.
202 * @return 0 on exact match, 1 on inexact match.
203 */
204 int seekToKeyInBlock(
205 byte[] key, int offset, int length, boolean seekBefore
206 );
207 }
208 }