1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations
15 * under the License.
16 */
17 package org.apache.hadoop.hbase.util.test;
18
19 import java.util.Random;
20
21 import org.apache.hadoop.hbase.util.Bytes;
22 import org.apache.hadoop.hbase.util.MD5Hash;
23
24 /**
25 * A generator of random keys and values for load testing. Keys are generated
26 * by converting numeric indexes to strings and prefixing them with an MD5
27 * hash. Values are generated by selecting value size in the configured range
28 * and generating a pseudo-random sequence of bytes seeded by key, column
29 * qualifier, and value size.
30 */
31 public class LoadTestKVGenerator {
32
33 /** A random number generator for determining value size */
34 private Random randomForValueSize = new Random();
35
36 private final int minValueSize;
37 private final int maxValueSize;
38
39 public LoadTestKVGenerator(int minValueSize, int maxValueSize) {
40 if (minValueSize <= 0 || maxValueSize <= 0) {
41 throw new IllegalArgumentException("Invalid min/max value sizes: " +
42 minValueSize + ", " + maxValueSize);
43 }
44 this.minValueSize = minValueSize;
45 this.maxValueSize = maxValueSize;
46 }
47
48 /**
49 * Verifies that the given byte array is the same as what would be generated
50 * for the given seed strings (row/cf/column/...). We are assuming that the
51 * value size is correct, and only verify the actual bytes. However, if the
52 * min/max value sizes are set sufficiently high, an accidental match should be
53 * extremely improbable.
54 */
55 public static boolean verify(byte[] value, byte[]... seedStrings) {
56 byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
57 return Bytes.equals(expectedData, value);
58 }
59
60 /**
61 * Converts the given key to string, and prefixes it with the MD5 hash of
62 * the index's string representation.
63 */
64 public static String md5PrefixedKey(long key) {
65 String stringKey = Long.toString(key);
66 String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey));
67
68 // flip the key to randomize
69 return md5hash + "-" + stringKey;
70 }
71
72 /**
73 * Generates a value for the given key index and column qualifier. Size is
74 * selected randomly in the configured range. The generated value depends
75 * only on the combination of the strings passed (key/cf/column/...) and the selected
76 * value size. This allows to verify the actual value bytes when reading, as done
77 * in {#verify(byte[], byte[]...)}
78 * This method is as thread-safe as Random class. It appears that the worst bug ever
79 * found with the latter is that multiple threads will get some duplicate values, which
80 * we don't care about.
81 */
82 public byte[] generateRandomSizeValue(byte[]... seedStrings) {
83 int dataSize = minValueSize;
84 if(minValueSize != maxValueSize) {
85 dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
86 }
87 return getValueForRowColumn(dataSize, seedStrings);
88 }
89
90 /**
91 * Generates random bytes of the given size for the given row and column
92 * qualifier. The random seed is fully determined by these parameters.
93 */
94 private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
95 long seed = dataSize;
96 for (byte[] str : seedStrings) {
97 seed += Bytes.toString(str).hashCode();
98 }
99 Random seededRandom = new Random(seed);
100 byte[] randomBytes = new byte[dataSize];
101 seededRandom.nextBytes(randomBytes);
102 return randomBytes;
103 }
104
105 }