1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.Random;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.commons.math.random.RandomData;
28  import org.apache.commons.math.random.RandomDataImpl;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
33  import org.apache.hadoop.hbase.io.compress.Compression;
34  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
35  import org.apache.hadoop.hbase.io.hfile.HFile;
36  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
37  import org.apache.hadoop.hbase.util.Bytes;
38  
39  
40  
41  
42  
43  
44  public class HFilePerformanceEvaluation {
45  
46    private static final int ROW_LENGTH = 10;
47    private static final int ROW_COUNT = 1000000;
48    private static final int RFILE_BLOCKSIZE = 8 * 1024;
49  
50    static final Log LOG =
51      LogFactory.getLog(HFilePerformanceEvaluation.class.getName());
52  
53    static byte [] format(final int i) {
54      String v = Integer.toString(i);
55      return Bytes.toBytes("0000000000".substring(v.length()) + v);
56    }
57  
58    static ImmutableBytesWritable format(final int i, ImmutableBytesWritable w) {
59      w.set(format(i));
60      return w;
61    }
62  
63    private void runBenchmarks() throws Exception {
64      final Configuration conf = new Configuration();
65      final FileSystem fs = FileSystem.get(conf);
66      final Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
67      if (fs.exists(mf)) {
68        fs.delete(mf, true);
69      }
70  
71      runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT),
72          ROW_COUNT);
73      PerformanceEvaluationCommons.concurrentReads(new Runnable() {
74        public void run() {
75          try {
76            runBenchmark(new UniformRandomSmallScan(conf, fs, mf, ROW_COUNT),
77              ROW_COUNT);
78          } catch (Exception e) {
79            e.printStackTrace();
80          }
81        }
82      });
83      PerformanceEvaluationCommons.concurrentReads(new Runnable() {
84        public void run() {
85          try {
86            runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
87                ROW_COUNT);
88          } catch (Exception e) {
89            e.printStackTrace();
90          }
91        }
92      });
93      PerformanceEvaluationCommons.concurrentReads(new Runnable() {
94        public void run() {
95          try {
96            runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
97                ROW_COUNT);
98          } catch (Exception e) {
99            e.printStackTrace();
100         }
101       }
102     });
103     PerformanceEvaluationCommons.concurrentReads(new Runnable() {
104       public void run() {
105         try {
106           runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
107               ROW_COUNT);
108         } catch (Exception e) {
109           e.printStackTrace();
110         }
111       }
112     });
113 
114   }
115 
116   protected void runBenchmark(RowOrientedBenchmark benchmark, int rowCount)
117     throws Exception {
118     LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
119         rowCount + " rows.");
120     long elapsedTime = benchmark.run();
121     LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
122         rowCount + " rows took " + elapsedTime + "ms.");
123   }
124 
125   static abstract class RowOrientedBenchmark {
126 
127     protected final Configuration conf;
128     protected final FileSystem fs;
129     protected final Path mf;
130     protected final int totalRows;
131 
132     public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
133         int totalRows) {
134       this.conf = conf;
135       this.fs = fs;
136       this.mf = mf;
137       this.totalRows = totalRows;
138     }
139 
140     void setUp() throws Exception {
141       
142     }
143 
144     abstract void doRow(int i) throws Exception;
145 
146     protected int getReportingPeriod() {
147       return this.totalRows / 10;
148     }
149 
150     void tearDown() throws Exception {
151       
152     }
153 
154     
155 
156 
157 
158 
159     long run() throws Exception {
160       long elapsedTime;
161       setUp();
162       long startTime = System.currentTimeMillis();
163       try {
164         for (int i = 0; i < totalRows; i++) {
165           if (i > 0 && i % getReportingPeriod() == 0) {
166             LOG.info("Processed " + i + " rows.");
167           }
168           doRow(i);
169         }
170         elapsedTime = System.currentTimeMillis() - startTime;
171       } finally {
172         tearDown();
173       }
174       return elapsedTime;
175     }
176 
177   }
178 
179   static class SequentialWriteBenchmark extends RowOrientedBenchmark {
180     protected HFile.Writer writer;
181     private Random random = new Random();
182     private byte[] bytes = new byte[ROW_LENGTH];
183 
184     public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
185         int totalRows) {
186       super(conf, fs, mf, totalRows);
187     }
188 
189     @Override
190     void setUp() throws Exception {
191       writer =
192         HFile.getWriterFactoryNoCache(conf)
193             .withPath(fs, mf)
194             .withBlockSize(RFILE_BLOCKSIZE)
195             .create();
196     }
197 
198     @Override
199     void doRow(int i) throws Exception {
200       writer.append(format(i), generateValue());
201     }
202 
203     private byte[] generateValue() {
204       random.nextBytes(bytes);
205       return bytes;
206     }
207 
208     @Override
209     protected int getReportingPeriod() {
210       return this.totalRows; 
211     }
212 
213     @Override
214     void tearDown() throws Exception {
215       writer.close();
216     }
217 
218   }
219 
220   static abstract class ReadBenchmark extends RowOrientedBenchmark {
221 
222     protected HFile.Reader reader;
223 
224     public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
225         int totalRows) {
226       super(conf, fs, mf, totalRows);
227     }
228 
229     @Override
230     void setUp() throws Exception {
231       reader = HFile.createReader(this.fs, this.mf, new CacheConfig(this.conf));
232       this.reader.loadFileInfo();
233     }
234 
235     @Override
236     void tearDown() throws Exception {
237       reader.close();
238     }
239 
240   }
241 
242   static class SequentialReadBenchmark extends ReadBenchmark {
243     private HFileScanner scanner;
244 
245     public SequentialReadBenchmark(Configuration conf, FileSystem fs,
246       Path mf, int totalRows) {
247       super(conf, fs, mf, totalRows);
248     }
249 
250     @Override
251     void setUp() throws Exception {
252       super.setUp();
253       this.scanner = this.reader.getScanner(false, false);
254       this.scanner.seekTo();
255     }
256 
257     @Override
258     void doRow(int i) throws Exception {
259       if (this.scanner.next()) {
260         ByteBuffer k = this.scanner.getKey();
261         PerformanceEvaluationCommons.assertKey(format(i + 1), k);
262         ByteBuffer v = scanner.getValue();
263         PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
264       }
265     }
266 
267     @Override
268     protected int getReportingPeriod() {
269       return this.totalRows; 
270     }
271 
272   }
273 
274   static class UniformRandomReadBenchmark extends ReadBenchmark {
275 
276     private Random random = new Random();
277 
278     public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
279         Path mf, int totalRows) {
280       super(conf, fs, mf, totalRows);
281     }
282 
283     @Override
284     void doRow(int i) throws Exception {
285       HFileScanner scanner = this.reader.getScanner(false, true);
286       byte [] b = getRandomRow();
287       scanner.seekTo(b);
288       ByteBuffer k = scanner.getKey();
289       PerformanceEvaluationCommons.assertKey(b, k);
290       ByteBuffer v = scanner.getValue();
291       PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
292     }
293 
294     private byte [] getRandomRow() {
295       return format(random.nextInt(totalRows));
296     }
297   }
298 
299   static class UniformRandomSmallScan extends ReadBenchmark {
300     private Random random = new Random();
301 
302     public UniformRandomSmallScan(Configuration conf, FileSystem fs,
303         Path mf, int totalRows) {
304       super(conf, fs, mf, totalRows/10);
305     }
306 
307     @Override
308     void doRow(int i) throws Exception {
309       HFileScanner scanner = this.reader.getScanner(false, false);
310       byte [] b = getRandomRow();
311       if (scanner.seekTo(b) != 0) {
312         System.out.println("Nonexistent row: " + new String(b));
313         return;
314       }
315       ByteBuffer k = scanner.getKey();
316       PerformanceEvaluationCommons.assertKey(b, k);
317       
318       for (int ii = 0; ii < 30; ii++) {
319         if (!scanner.next()) {
320           System.out.println("NOTHING FOLLOWS");
321         }
322         ByteBuffer v = scanner.getValue();
323         PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
324       }
325     }
326 
327     private byte [] getRandomRow() {
328       return format(random.nextInt(totalRows));
329     }
330   }
331 
332   static class GaussianRandomReadBenchmark extends ReadBenchmark {
333 
334     private RandomData randomData = new RandomDataImpl();
335 
336     public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
337         Path mf, int totalRows) {
338       super(conf, fs, mf, totalRows);
339     }
340 
341     @Override
342     void doRow(int i) throws Exception {
343       HFileScanner scanner = this.reader.getScanner(false, true);
344       scanner.seekTo(getGaussianRandomRowBytes());
345       for (int ii = 0; ii < 30; ii++) {
346         if (!scanner.next()) {
347           System.out.println("NOTHING FOLLOWS");
348         }
349         scanner.getKey();
350         scanner.getValue();
351       }
352     }
353 
354     private byte [] getGaussianRandomRowBytes() {
355       int r = (int) randomData.nextGaussian((double)totalRows / 2.0,
356           (double)totalRows / 10.0);
357       return format(r);
358     }
359   }
360 
361   
362 
363 
364 
365 
366   public static void main(String[] args) throws Exception {
367     new HFilePerformanceEvaluation().runBenchmarks();
368   }
369 }