1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23 import java.util.zip.Checksum;
24
25 import org.apache.hadoop.fs.Path;
26 import org.apache.hadoop.io.DataOutputBuffer;
27 import org.apache.hadoop.hbase.HConstants;
28 import org.apache.hadoop.hbase.util.Bytes;
29 import org.apache.hadoop.hbase.util.ChecksumFactory;
30 import org.apache.hadoop.hbase.util.ChecksumType;
31
32 /**
33 * Utility methods to compute and validate checksums.
34 */
35 public class ChecksumUtil {
36
37 /** This is used to reserve space in a byte buffer */
38 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
39
40 /**
41 * This is used by unit tests to make checksum failures throw an
42 * exception instead of returning null. Returning a null value from
43 * checksum validation will cause the higher layer to retry that
44 * read with hdfs-level checksums. Instead, we would like checksum
45 * failures to cause the entire unit test to fail.
46 */
47 private static boolean generateExceptions = false;
48
49 /**
50 * Generates a checksum for all the data in indata. The checksum is
51 * written to outdata.
52 * @param indata input data stream
53 * @param startOffset starting offset in the indata stream from where to
54 * compute checkums from
55 * @param endOffset ending offset in the indata stream upto
56 * which checksums needs to be computed
57 * @param outdata the output buffer where checksum values are written
58 * @param outOffset the starting offset in the outdata where the
59 * checksum values are written
60 * @param checksumType type of checksum
61 * @param bytesPerChecksum number of bytes per checksum value
62 */
63 static void generateChecksums(byte[] indata,
64 int startOffset, int endOffset,
65 byte[] outdata, int outOffset,
66 ChecksumType checksumType,
67 int bytesPerChecksum) throws IOException {
68
69 if (checksumType == ChecksumType.NULL) {
70 return; // No checkums for this block.
71 }
72
73 Checksum checksum = checksumType.getChecksumObject();
74 int bytesLeft = endOffset - startOffset;
75 int chunkNum = 0;
76
77 while (bytesLeft > 0) {
78 // generate the checksum for one chunk
79 checksum.reset();
80 int count = Math.min(bytesLeft, bytesPerChecksum);
81 checksum.update(indata, startOffset, count);
82
83 // write the checksum value to the output buffer.
84 int cksumValue = (int)checksum.getValue();
85 outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
86 chunkNum++;
87 startOffset += count;
88 bytesLeft -= count;
89 }
90 }
91
92 /**
93 * Validates that the data in the specified HFileBlock matches the
94 * checksum. Generates the checksum for the data and
95 * then validate that it matches the value stored in the header.
96 * If there is a checksum mismatch, then return false. Otherwise
97 * return true.
98 * The header is extracted from the specified HFileBlock while the
99 * data-to-be-verified is extracted from 'data'.
100 */
101 static boolean validateBlockChecksum(Path path, HFileBlock block,
102 byte[] data, int hdrSize) throws IOException {
103
104 // If this is an older version of the block that does not have
105 // checksums, then return false indicating that checksum verification
106 // did not succeed. Actually, this methiod should never be called
107 // when the minorVersion is 0, thus this is a defensive check for a
108 // cannot-happen case. Since this is a cannot-happen case, it is
109 // better to return false to indicate a checksum validation failure.
110 if (block.getMinorVersion() < HFileBlock.MINOR_VERSION_WITH_CHECKSUM) {
111 return false;
112 }
113
114 // Get a checksum object based on the type of checksum that is
115 // set in the HFileBlock header. A ChecksumType.NULL indicates that
116 // the caller is not interested in validating checksums, so we
117 // always return true.
118 ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
119 if (cktype == ChecksumType.NULL) {
120 return true; // No checkums validations needed for this block.
121 }
122 Checksum checksumObject = cktype.getChecksumObject();
123 checksumObject.reset();
124
125 // read in the stored value of the checksum size from the header.
126 int bytesPerChecksum = block.getBytesPerChecksum();
127
128 // bytesPerChecksum is always larger than the size of the header
129 if (bytesPerChecksum < hdrSize) {
130 String msg = "Unsupported value of bytesPerChecksum. " +
131 " Minimum is " + hdrSize +
132 " but the configured value is " + bytesPerChecksum;
133 HFile.LOG.warn(msg);
134 return false; // cannot happen case, unable to verify checksum
135 }
136 // Extract the header and compute checksum for the header.
137 ByteBuffer hdr = block.getBufferWithHeader();
138 checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize);
139
140 int off = hdrSize;
141 int consumed = hdrSize;
142 int bytesLeft = block.getOnDiskDataSizeWithHeader() - off;
143 int cksumOffset = block.getOnDiskDataSizeWithHeader();
144
145 // validate each chunk
146 while (bytesLeft > 0) {
147 int thisChunkSize = bytesPerChecksum - consumed;
148 int count = Math.min(bytesLeft, thisChunkSize);
149 checksumObject.update(data, off, count);
150
151 int storedChecksum = Bytes.toInt(data, cksumOffset);
152 if (storedChecksum != (int)checksumObject.getValue()) {
153 String msg = "File " + path +
154 " Stored checksum value of " + storedChecksum +
155 " at offset " + cksumOffset +
156 " does not match computed checksum " +
157 checksumObject.getValue() +
158 ", total data size " + data.length +
159 " Checksum data range offset " + off + " len " + count +
160 HFileBlock.toStringHeader(block.getBufferReadOnly());
161 HFile.LOG.warn(msg);
162 if (generateExceptions) {
163 throw new IOException(msg); // this is only for unit tests
164 } else {
165 return false; // checksum validation failure
166 }
167 }
168 cksumOffset += HFileBlock.CHECKSUM_SIZE;
169 bytesLeft -= count;
170 off += count;
171 consumed = 0;
172 checksumObject.reset();
173 }
174 return true; // checksum is valid
175 }
176
177 /**
178 * Returns the number of bytes needed to store the checksums for
179 * a specified data size
180 * @param datasize number of bytes of data
181 * @param bytesPerChecksum number of bytes in a checksum chunk
182 * @return The number of bytes needed to store the checksum values
183 */
184 static long numBytes(long datasize, int bytesPerChecksum) {
185 return numChunks(datasize, bytesPerChecksum) *
186 HFileBlock.CHECKSUM_SIZE;
187 }
188
189 /**
190 * Returns the number of checksum chunks needed to store the checksums for
191 * a specified data size
192 * @param datasize number of bytes of data
193 * @param bytesPerChecksum number of bytes in a checksum chunk
194 * @return The number of checksum chunks
195 */
196 static long numChunks(long datasize, int bytesPerChecksum) {
197 long numChunks = datasize/bytesPerChecksum;
198 if (datasize % bytesPerChecksum != 0) {
199 numChunks++;
200 }
201 return numChunks;
202 }
203
204 /**
205 * Write dummy checksums to the end of the specified bytes array
206 * to reserve space for writing checksums later
207 * @param baos OutputStream to write dummy checkum values
208 * @param numBytes Number of bytes of data for which dummy checksums
209 * need to be generated
210 * @param bytesPerChecksum Number of bytes per checksum value
211 */
212 static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
213 int numBytes, int bytesPerChecksum) throws IOException {
214 long numChunks = numChunks(numBytes, bytesPerChecksum);
215 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
216 while (bytesLeft > 0) {
217 long count = Math.min(bytesLeft, DUMMY_VALUE.length);
218 baos.write(DUMMY_VALUE, 0, (int)count);
219 bytesLeft -= count;
220 }
221 }
222
223 /**
224 * Mechanism to throw an exception in case of hbase checksum
225 * failure. This is used by unit tests only.
226 * @param value Setting this to true will cause hbase checksum
227 * verification failures to generate exceptions.
228 */
229 public static void generateExceptionForChecksumFailureForTest(boolean value) {
230 generateExceptions = value;
231 }
232 }
233