1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.regionserver;
18  
19  import java.io.IOException;
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collections;
23  import java.util.List;
24  import java.util.Random;
25  import java.util.SortedSet;
26  import java.util.concurrent.Callable;
27  import java.util.concurrent.ConcurrentSkipListSet;
28  import java.util.concurrent.ExecutionException;
29  import java.util.concurrent.ExecutorCompletionService;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.Future;
33  import java.util.concurrent.TimeUnit;
34  import java.util.concurrent.atomic.AtomicLong;
35  
36  import org.apache.commons.cli.CommandLine;
37  import org.apache.commons.cli.CommandLineParser;
38  import org.apache.commons.cli.HelpFormatter;
39  import org.apache.commons.cli.Option;
40  import org.apache.commons.cli.OptionGroup;
41  import org.apache.commons.cli.Options;
42  import org.apache.commons.cli.ParseException;
43  import org.apache.commons.cli.PosixParser;
44  import org.apache.commons.logging.Log;
45  import org.apache.commons.logging.LogFactory;
46  import org.apache.hadoop.conf.Configuration;
47  import org.apache.hadoop.fs.FileSystem;
48  import org.apache.hadoop.fs.Path;
49  import org.apache.hadoop.hbase.HBaseConfiguration;
50  import org.apache.hadoop.hbase.HColumnDescriptor;
51  import org.apache.hadoop.hbase.HRegionInfo;
52  import org.apache.hadoop.hbase.HTableDescriptor;
53  import org.apache.hadoop.hbase.KeyValue;
54  import org.apache.hadoop.hbase.client.Scan;
55  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
56  import org.apache.hadoop.hbase.io.hfile.BlockCache;
57  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
58  import org.apache.hadoop.hbase.io.hfile.Compression;
59  import org.apache.hadoop.hbase.io.hfile.HFile;
60  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
61  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
62  import org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter;
63  import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
64  import org.apache.hadoop.hbase.util.Bytes;
65  import org.apache.hadoop.hbase.util.LoadTestTool;
66  import org.apache.hadoop.hbase.util.MD5Hash;
67  import org.apache.hadoop.util.StringUtils;
68  
69  /**
70   * Tests HFile read/write workloads, such as merging HFiles and random reads.
71   */
72  public class HFileReadWriteTest {
73  
74    private static final String TABLE_NAME = "MyTable";
75  
76    private static enum Workload {
77      MERGE("merge", "Merge the specified HFiles", 1, Integer.MAX_VALUE),
78      RANDOM_READS("read", "Perform a random read benchmark on the given HFile",
79          1, 1);
80  
81      private String option;
82      private String description;
83  
84      public final int minNumInputFiles;
85      public final int maxNumInputFiles;
86  
87      Workload(String option, String description, int minNumInputFiles,
88          int maxNumInputFiles) {
89        this.option = option;
90        this.description = description;
91        this.minNumInputFiles = minNumInputFiles;
92        this.maxNumInputFiles = maxNumInputFiles;
93      }
94  
95      static OptionGroup getOptionGroup() {
96        OptionGroup optionGroup = new OptionGroup();
97        for (Workload w : values())
98          optionGroup.addOption(new Option(w.option, w.description));
99        return optionGroup;
100     }
101 
102     private static String getOptionListStr() {
103       StringBuilder sb = new StringBuilder();
104       for (Workload w : values()) {
105         if (sb.length() > 0)
106           sb.append(", ");
107         sb.append("-" + w.option);
108       }
109       return sb.toString();
110     }
111 
112     static Workload fromCmdLine(CommandLine cmdLine) {
113       for (Workload w : values()) {
114         if (cmdLine.hasOption(w.option))
115           return w;
116       }
117       LOG.error("No workload specified. Specify one of the options: " +
118           getOptionListStr());
119       return null;
120     }
121 
122     public String onlyUsedFor() {
123       return ". Only used for the " + this + " workload.";
124     }
125   }
126 
127   private static final String OUTPUT_DIR_OPTION = "output_dir";
128   private static final String COMPRESSION_OPTION = "compression";
129   private static final String BLOOM_FILTER_OPTION = "bloom";
130   private static final String BLOCK_SIZE_OPTION = "block_size";
131   private static final String DURATION_OPTION = "duration";
132   private static final String NUM_THREADS_OPTION = "num_threads";
133 
134   private static final Log LOG = LogFactory.getLog(HFileReadWriteTest.class);
135 
136   private Workload workload;
137   private FileSystem fs;
138   private Configuration conf;
139   private CacheConfig cacheConf;
140   private List<String> inputFileNames;
141   private Path outputDir;
142   private int numReadThreads;
143   private int durationSec;
144   private DataBlockEncoding dataBlockEncoding;
145   private boolean encodeInCacheOnly;
146   private HFileDataBlockEncoder dataBlockEncoder =
147       NoOpDataBlockEncoder.INSTANCE;
148 
149   private StoreFile.BloomType bloomType = StoreFile.BloomType.NONE;
150   private int blockSize;
151   private Compression.Algorithm compression = Compression.Algorithm.NONE;
152 
153   private byte[] firstRow, lastRow;
154 
155   private AtomicLong numSeeks = new AtomicLong();
156   private AtomicLong numKV = new AtomicLong();
157   private AtomicLong totalBytes = new AtomicLong();
158 
159   private byte[] family;
160 
161   private long endTime = Long.MAX_VALUE;
162 
163   private SortedSet<String> keysRead = new ConcurrentSkipListSet<String>();
164   private List<StoreFile> inputStoreFiles;
165 
166   public HFileReadWriteTest() {
167     conf = HBaseConfiguration.create();
168     cacheConf = new CacheConfig(conf);
169   }
170 
171   @SuppressWarnings("unchecked")
172   public boolean parseOptions(String args[]) {
173 
174     Options options = new Options();
175     options.addOption(OUTPUT_DIR_OPTION, true, "Output directory" +
176         Workload.MERGE.onlyUsedFor());
177     options.addOption(COMPRESSION_OPTION, true, " Compression type, one of "
178         + Arrays.toString(Compression.Algorithm.values()) +
179         Workload.MERGE.onlyUsedFor());
180     options.addOption(BLOOM_FILTER_OPTION, true, "Bloom filter type, one of "
181         + Arrays.toString(StoreFile.BloomType.values()) +
182         Workload.MERGE.onlyUsedFor());
183     options.addOption(BLOCK_SIZE_OPTION, true, "HFile block size" +
184         Workload.MERGE.onlyUsedFor());
185     options.addOption(DURATION_OPTION, true, "The amount of time to run the " +
186         "random read workload for" + Workload.RANDOM_READS.onlyUsedFor());
187     options.addOption(NUM_THREADS_OPTION, true, "The number of random " +
188         "reader threads" + Workload.RANDOM_READS.onlyUsedFor());
189     options.addOption(NUM_THREADS_OPTION, true, "The number of random " +
190         "reader threads" + Workload.RANDOM_READS.onlyUsedFor());
191     options.addOption(LoadTestTool.OPT_DATA_BLOCK_ENCODING, true,
192         LoadTestTool.OPT_DATA_BLOCK_ENCODING_USAGE);
193     options.addOption(LoadTestTool.OPT_ENCODE_IN_CACHE_ONLY, false,
194         LoadTestTool.OPT_ENCODE_IN_CACHE_ONLY_USAGE);
195     options.addOptionGroup(Workload.getOptionGroup());
196 
197     if (args.length == 0) {
198       HelpFormatter formatter = new HelpFormatter();
199       formatter.printHelp(HFileReadWriteTest.class.getSimpleName(),
200           options, true);
201       return false;
202     }
203 
204     CommandLineParser parser = new PosixParser();
205     CommandLine cmdLine;
206     try {
207       cmdLine = parser.parse(options, args);
208     } catch (ParseException ex) {
209       LOG.error(ex);
210       return false;
211     }
212 
213     workload = Workload.fromCmdLine(cmdLine);
214     if (workload == null)
215       return false;
216 
217     inputFileNames = (List<String>) cmdLine.getArgList();
218 
219     if (inputFileNames.size() == 0) {
220       LOG.error("No input file names specified");
221       return false;
222     }
223 
224     if (inputFileNames.size() < workload.minNumInputFiles) {
225       LOG.error("Too few input files: at least " + workload.minNumInputFiles +
226           " required");
227       return false;
228     }
229 
230     if (inputFileNames.size() > workload.maxNumInputFiles) {
231       LOG.error("Too many input files: at most " + workload.minNumInputFiles +
232           " allowed");
233       return false;
234     }
235 
236     if (cmdLine.hasOption(COMPRESSION_OPTION)) {
237       compression = Compression.Algorithm.valueOf(
238           cmdLine.getOptionValue(COMPRESSION_OPTION));
239     }
240 
241     if (cmdLine.hasOption(BLOOM_FILTER_OPTION)) {
242       bloomType = StoreFile.BloomType.valueOf(cmdLine.getOptionValue(
243           BLOOM_FILTER_OPTION));
244     }
245 
246     encodeInCacheOnly =
247         cmdLine.hasOption(LoadTestTool.OPT_ENCODE_IN_CACHE_ONLY);
248 
249     if (cmdLine.hasOption(LoadTestTool.OPT_DATA_BLOCK_ENCODING)) {
250       dataBlockEncoding = DataBlockEncoding.valueOf(
251           cmdLine.getOptionValue(LoadTestTool.OPT_DATA_BLOCK_ENCODING));
252       // Optionally encode on disk, always encode in cache.
253       dataBlockEncoder = new HFileDataBlockEncoderImpl(
254           encodeInCacheOnly ? DataBlockEncoding.NONE : dataBlockEncoding,
255           dataBlockEncoding);
256     } else {
257       if (encodeInCacheOnly) {
258         LOG.error("The -" + LoadTestTool.OPT_ENCODE_IN_CACHE_ONLY +
259             " option does not make sense without -" +
260             LoadTestTool.OPT_DATA_BLOCK_ENCODING);
261         return false;
262       }
263     }
264 
265     blockSize = conf.getInt("hfile.min.blocksize.size", 65536);
266     if (cmdLine.hasOption(BLOCK_SIZE_OPTION))
267       blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION));
268 
269     if (workload == Workload.MERGE) {
270       String outputDirStr = cmdLine.getOptionValue(OUTPUT_DIR_OPTION);
271       if (outputDirStr == null) {
272         LOG.error("Output directory is not specified");
273         return false;
274       }
275       outputDir = new Path(outputDirStr);
276       // Will be checked for existence in validateConfiguration.
277     }
278 
279     if (workload == Workload.RANDOM_READS) {
280       if (!requireOptions(cmdLine, new String[] { DURATION_OPTION,
281           NUM_THREADS_OPTION })) {
282         return false;
283       }
284 
285       durationSec = Integer.parseInt(cmdLine.getOptionValue(DURATION_OPTION));
286       numReadThreads = Integer.parseInt(
287           cmdLine.getOptionValue(NUM_THREADS_OPTION));
288     }
289 
290     Collections.sort(inputFileNames);
291 
292     return true;
293   }
294 
295   /** @return true if all the given options are specified */
296   private boolean requireOptions(CommandLine cmdLine,
297       String[] requiredOptions) {
298     for (String option : requiredOptions)
299       if (!cmdLine.hasOption(option)) {
300         LOG.error("Required option -" + option + " not specified");
301         return false;
302       }
303     return true;
304   }
305 
306   public boolean validateConfiguration() throws IOException {
307     fs = FileSystem.get(conf);
308 
309     for (String inputFileName : inputFileNames) {
310       Path path = new Path(inputFileName);
311       if (!fs.exists(path)) {
312         LOG.error("File " + inputFileName + " does not exist");
313         return false;
314       }
315 
316       if (fs.getFileStatus(path).isDir()) {
317         LOG.error(inputFileName + " is a directory");
318         return false;
319       }
320     }
321 
322     if (outputDir != null &&
323         (!fs.exists(outputDir) || !fs.getFileStatus(outputDir).isDir())) {
324       LOG.error(outputDir.toString() + " does not exist or is not a " +
325           "directory");
326       return false;
327     }
328 
329     return true;
330   }
331 
332   public void runMergeWorkload() throws IOException {
333     long maxKeyCount = prepareForMerge();
334 
335     List<StoreFileScanner> scanners =
336         StoreFileScanner.getScannersForStoreFiles(inputStoreFiles, false,
337             false);
338 
339     HColumnDescriptor columnDescriptor = new HColumnDescriptor(
340         HFileReadWriteTest.class.getSimpleName());
341     columnDescriptor.setBlocksize(blockSize);
342     columnDescriptor.setBloomFilterType(bloomType);
343     columnDescriptor.setCompressionType(compression);
344     columnDescriptor.setDataBlockEncoding(dataBlockEncoding);
345     HRegionInfo regionInfo = new HRegionInfo();
346     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
347     HRegion region = new HRegion(outputDir, null, fs, conf, regionInfo, htd,
348         null);
349     Store store = new Store(outputDir, region, columnDescriptor, fs, conf);
350 
351     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf,
352         new CacheConfig(conf), fs, blockSize)
353             .withOutputDir(outputDir)
354             .withCompression(compression)
355             .withDataBlockEncoder(dataBlockEncoder)
356             .withBloomType(bloomType)
357             .withMaxKeyCount(maxKeyCount)
358             .withChecksumType(HFile.DEFAULT_CHECKSUM_TYPE)
359             .withBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
360             .build();
361 
362     StatisticsPrinter statsPrinter = new StatisticsPrinter();
363     statsPrinter.startThread();
364 
365     try {
366       performMerge(scanners, store, writer);
367       writer.close();
368     } finally {
369       statsPrinter.requestStop();
370     }
371 
372     Path resultPath = writer.getPath();
373 
374     resultPath = tryUsingSimpleOutputPath(resultPath);
375 
376     long fileSize = fs.getFileStatus(resultPath).getLen();
377     LOG.info("Created " + resultPath + ", size " + fileSize);
378 
379     System.out.println();
380     System.out.println("HFile information for " + resultPath);
381     System.out.println();
382 
383     HFilePrettyPrinter hfpp = new HFilePrettyPrinter();
384     hfpp.run(new String[] { "-m", "-f", resultPath.toString() });
385   }
386 
387   private Path tryUsingSimpleOutputPath(Path resultPath) throws IOException {
388     if (inputFileNames.size() == 1) {
389       // In case of only one input set output to be consistent with the
390       // input name.
391 
392       Path inputPath = new Path(inputFileNames.get(0));
393       Path betterOutputPath = new Path(outputDir,
394           inputPath.getName());
395       if (!fs.exists(betterOutputPath)) {
396         fs.rename(resultPath, betterOutputPath);
397         resultPath = betterOutputPath;
398       }
399     }
400     return resultPath;
401   }
402 
403   private void performMerge(List<StoreFileScanner> scanners, Store store,
404       StoreFile.Writer writer) throws IOException {
405     InternalScanner scanner = null;
406     try {
407       Scan scan = new Scan();
408 
409       // Include deletes
410       scanner = new StoreScanner(store, store.scanInfo, scan, scanners,
411           ScanType.MAJOR_COMPACT, Long.MIN_VALUE, Long.MIN_VALUE);
412 
413       ArrayList<KeyValue> kvs = new ArrayList<KeyValue>();
414 
415       while (scanner.next(kvs) || kvs.size() != 0) {
416         numKV.addAndGet(kvs.size());
417         for (KeyValue kv : kvs) {
418           totalBytes.addAndGet(kv.getLength());
419           writer.append(kv);
420         }
421         kvs.clear();
422       }
423     } finally {
424       if (scanner != null)
425         scanner.close();
426     }
427   }
428 
429   /**
430    * @return the total key count in the files being merged
431    * @throws IOException
432    */
433   private long prepareForMerge() throws IOException {
434     LOG.info("Merging " + inputFileNames);
435     LOG.info("Using block size: " + blockSize);
436     inputStoreFiles = new ArrayList<StoreFile>();
437 
438     long maxKeyCount = 0;
439     for (String fileName : inputFileNames) {
440       Path filePath = new Path(fileName);
441 
442       // Open without caching.
443       StoreFile sf = openStoreFile(filePath, false);
444       sf.createReader();
445       inputStoreFiles.add(sf);
446 
447       StoreFile.Reader r = sf.getReader();
448       if (r != null) {
449         long keyCount = r.getFilterEntries();
450         maxKeyCount += keyCount;
451         LOG.info("Compacting: " + sf + "; keyCount = " + keyCount
452             + "; Bloom Type = " + r.getBloomFilterType().toString()
453             + "; Size = " + StringUtils.humanReadableInt(r.length()));
454       }
455     }
456     return maxKeyCount;
457   }
458 
459   public HFile.Reader[] getHFileReaders() {
460     HFile.Reader readers[] = new HFile.Reader[inputStoreFiles.size()];
461     for (int i = 0; i < inputStoreFiles.size(); ++i)
462       readers[i] = inputStoreFiles.get(i).getReader().getHFileReader();
463     return readers;
464   }
465 
466   private StoreFile openStoreFile(Path filePath, boolean blockCache)
467       throws IOException {
468     // We are passing the ROWCOL Bloom filter type, but StoreFile will still
469     // use the Bloom filter type specified in the HFile.
470     return new StoreFile(fs, filePath, conf, cacheConf,
471         StoreFile.BloomType.ROWCOL, dataBlockEncoder);
472   }
473 
474   public static int charToHex(int c) {
475     if ('0' <= c && c <= '9')
476       return c - '0';
477     if ('a' <= c && c <= 'f')
478       return 10 + c - 'a';
479     return -1;
480   }
481 
482   public static int hexToChar(int h) {
483     h &= 0xff;
484     if (0 <= h && h <= 9)
485       return '0' + h;
486     if (10 <= h && h <= 15)
487       return 'a' + h - 10;
488     return -1;
489   }
490 
491   public static byte[] createRandomRow(Random rand, byte[] first, byte[] last)
492   {
493     int resultLen = Math.max(first.length, last.length);
494     int minLen = Math.min(first.length, last.length);
495     byte[] result = new byte[resultLen];
496     boolean greaterThanFirst = false;
497     boolean lessThanLast = false;
498 
499     for (int i = 0; i < resultLen; ++i) {
500       // Generate random hex characters if both first and last row are hex
501       // at this position.
502       boolean isHex = i < minLen && charToHex(first[i]) != -1
503           && charToHex(last[i]) != -1;
504 
505       // If our key is already greater than the first key, we can use
506       // arbitrarily low values.
507       int low = greaterThanFirst || i >= first.length ? 0 : first[i] & 0xff;
508 
509       // If our key is already less than the last key, we can use arbitrarily
510       // high values.
511       int high = lessThanLast || i >= last.length ? 0xff : last[i] & 0xff;
512 
513       // Randomly select the next byte between the lowest and the highest
514       // value allowed for this position. Restrict to hex characters if
515       // necessary. We are generally biased towards border cases, which is OK
516       // for test.
517 
518       int r;
519       if (isHex) {
520         // Use hex chars.
521         if (low < '0')
522           low = '0';
523 
524         if (high > 'f')
525           high = 'f';
526 
527         int lowHex = charToHex(low);
528         int highHex = charToHex(high);
529         r = hexToChar(lowHex + rand.nextInt(highHex - lowHex + 1));
530       } else {
531         r = low + rand.nextInt(high - low + 1);
532       }
533 
534       if (r > low)
535         greaterThanFirst = true;
536 
537       if (r < high)
538         lessThanLast = true;
539 
540       result[i] = (byte) r;
541     }
542 
543     if (Bytes.compareTo(result, first) < 0) {
544       throw new IllegalStateException("Generated key " +
545           Bytes.toStringBinary(result) + " is less than the first key " +
546           Bytes.toStringBinary(first));
547     }
548 
549     if (Bytes.compareTo(result, last) > 0) {
550       throw new IllegalStateException("Generated key " +
551           Bytes.toStringBinary(result) + " is greater than te last key " +
552           Bytes.toStringBinary(last));
553     }
554 
555     return result;
556   }
557 
558   private static byte[] createRandomQualifier(Random rand) {
559     byte[] q = new byte[10 + rand.nextInt(30)];
560     rand.nextBytes(q);
561     return q;
562   }
563 
564   private class RandomReader implements Callable<Boolean> {
565 
566     private int readerId;
567     private StoreFile.Reader reader;
568     private boolean pread;
569 
570     public RandomReader(int readerId, StoreFile.Reader reader,
571         boolean pread)
572     {
573       this.readerId = readerId;
574       this.reader = reader;
575       this.pread = pread;
576     }
577 
578     @Override
579     public Boolean call() throws Exception {
580       Thread.currentThread().setName("reader " + readerId);
581       Random rand = new Random();
582       StoreFileScanner scanner = reader.getStoreFileScanner(true, pread);
583 
584       while (System.currentTimeMillis() < endTime) {
585         byte[] row = createRandomRow(rand, firstRow, lastRow);
586         KeyValue kvToSeek = new KeyValue(row, family,
587             createRandomQualifier(rand));
588         if (rand.nextDouble() < 0.0001) {
589           LOG.info("kvToSeek=" + kvToSeek);
590         }
591         boolean seekResult;
592         try {
593           seekResult = scanner.seek(kvToSeek);
594         } catch (IOException ex) {
595           throw new IOException("Seek failed for key " + kvToSeek + ", pread="
596               + pread, ex);
597         }
598         numSeeks.incrementAndGet();
599         if (!seekResult) {
600           error("Seek returned false for row " + Bytes.toStringBinary(row));
601           return false;
602         }
603         for (int i = 0; i < rand.nextInt(10) + 1; ++i) {
604           KeyValue kv = scanner.next();
605           numKV.incrementAndGet();
606           if (i == 0 && kv == null) {
607             error("scanner.next() returned null at the first iteration for " +
608                 "row " + Bytes.toStringBinary(row));
609             return false;
610           }
611           if (kv == null)
612             break;
613 
614           String keyHashStr = MD5Hash.getMD5AsHex(kv.getKey());
615           keysRead.add(keyHashStr);
616           totalBytes.addAndGet(kv.getLength());
617         }
618       }
619 
620       return true;
621     }
622 
623     private void error(String msg) {
624       LOG.error("error in reader " + readerId + " (pread=" + pread + "): "
625           + msg);
626     }
627 
628   }
629 
630   private class StatisticsPrinter implements Callable<Boolean> {
631 
632     private volatile boolean stopRequested;
633     private volatile Thread thread;
634     private long totalSeekAndReads, totalPositionalReads;
635 
636     /**
637      * Run the statistics collector in a separate thread without an executor.
638      */
639     public void startThread() {
640       new Thread() {
641         @Override
642         public void run() {
643           try {
644             call();
645           } catch (Exception e) {
646             LOG.error(e);
647           }
648         }
649       }.start();
650     }
651 
652     @Override
653     public Boolean call() throws Exception {
654       LOG.info("Starting statistics printer");
655       thread = Thread.currentThread();
656       thread.setName(StatisticsPrinter.class.getSimpleName());
657       long startTime = System.currentTimeMillis();
658       long curTime;
659       while ((curTime = System.currentTimeMillis()) < endTime &&
660           !stopRequested) {
661         long elapsedTime = curTime - startTime;
662         printStats(elapsedTime);
663         try {
664           Thread.sleep(1000 - elapsedTime % 1000);
665         } catch (InterruptedException iex) {
666           Thread.currentThread().interrupt();
667           if (stopRequested)
668             break;
669         }
670       }
671       printStats(curTime - startTime);
672       LOG.info("Stopping statistics printer");
673       return true;
674     }
675 
676     private void printStats(long elapsedTime) {
677       long numSeeksL = numSeeks.get();
678       double timeSec = elapsedTime / 1000.0;
679       double seekPerSec = numSeeksL / timeSec;
680       long kvCount = numKV.get();
681       double kvPerSec = kvCount / timeSec;
682       long bytes = totalBytes.get();
683       double bytesPerSec = bytes / timeSec;
684 
685       // readOps and preadOps counters get reset on access, so we have to
686       // accumulate them here. HRegion metrics publishing thread should not
687       // be running in this tool, so no one else should be resetting these
688       // metrics.
689       totalSeekAndReads += HFile.getReadOps();
690       totalPositionalReads += HFile.getPreadOps();
691       long totalBlocksRead = totalSeekAndReads + totalPositionalReads;
692 
693       double blkReadPerSec = totalBlocksRead / timeSec;
694 
695       double seekReadPerSec = totalSeekAndReads / timeSec;
696       double preadPerSec = totalPositionalReads / timeSec;
697 
698       boolean isRead = workload == Workload.RANDOM_READS;
699 
700       StringBuilder sb = new StringBuilder();
701       sb.append("Time: " +  (long) timeSec + " sec");
702       if (isRead)
703         sb.append(", seek/sec: " + (long) seekPerSec);
704       sb.append(", kv/sec: " + (long) kvPerSec);
705       sb.append(", bytes/sec: " + (long) bytesPerSec);
706       sb.append(", blk/sec: " + (long) blkReadPerSec);
707       sb.append(", total KV: " + numKV);
708       sb.append(", total bytes: " + totalBytes);
709       sb.append(", total blk: " + totalBlocksRead);
710 
711       sb.append(", seekRead/sec: " + (long) seekReadPerSec);
712       sb.append(", pread/sec: " + (long) preadPerSec);
713 
714       if (isRead)
715         sb.append(", unique keys: " + (long) keysRead.size());
716 
717       LOG.info(sb.toString());
718     }
719 
720     public void requestStop() {
721       stopRequested = true;
722       if (thread != null)
723         thread.interrupt();
724     }
725 
726   }
727 
728   public boolean runRandomReadWorkload() throws IOException {
729     if (inputFileNames.size() != 1) {
730       throw new IOException("Need exactly one input file for random reads: " +
731           inputFileNames);
732     }
733 
734     Path inputPath = new Path(inputFileNames.get(0));
735 
736     // Make sure we are using caching.
737     StoreFile storeFile = openStoreFile(inputPath, true);
738 
739     StoreFile.Reader reader = storeFile.createReader();
740 
741     LOG.info("First key: " + Bytes.toStringBinary(reader.getFirstKey()));
742     LOG.info("Last key: " + Bytes.toStringBinary(reader.getLastKey()));
743 
744     KeyValue firstKV = KeyValue.createKeyValueFromKey(reader.getFirstKey());
745     firstRow = firstKV.getRow();
746 
747     KeyValue lastKV = KeyValue.createKeyValueFromKey(reader.getLastKey());
748     lastRow = lastKV.getRow();
749 
750     byte[] family = firstKV.getFamily();
751     if (!Bytes.equals(family, lastKV.getFamily())) {
752       LOG.error("First and last key have different families: "
753           + Bytes.toStringBinary(family) + " and "
754           + Bytes.toStringBinary(lastKV.getFamily()));
755       return false;
756     }
757 
758     if (Bytes.equals(firstRow, lastRow)) {
759       LOG.error("First and last row are the same, cannot run read workload: " +
760           "firstRow=" + Bytes.toStringBinary(firstRow) + ", " +
761           "lastRow=" + Bytes.toStringBinary(lastRow));
762       return false;
763     }
764 
765     ExecutorService exec = Executors.newFixedThreadPool(numReadThreads + 1);
766     int numCompleted = 0;
767     int numFailed = 0;
768     try {
769       ExecutorCompletionService<Boolean> ecs =
770           new ExecutorCompletionService<Boolean>(exec);
771       endTime = System.currentTimeMillis() + 1000 * durationSec;
772       boolean pread = true;
773       for (int i = 0; i < numReadThreads; ++i)
774         ecs.submit(new RandomReader(i, reader, pread));
775       ecs.submit(new StatisticsPrinter());
776       Future<Boolean> result;
777       while (true) {
778         try {
779           result = ecs.poll(endTime + 1000 - System.currentTimeMillis(),
780               TimeUnit.MILLISECONDS);
781           if (result == null)
782             break;
783           try {
784             if (result.get()) {
785               ++numCompleted;
786             } else {
787               ++numFailed;
788             }
789           } catch (ExecutionException e) {
790             LOG.error("Worker thread failure", e.getCause());
791             ++numFailed;
792           }
793         } catch (InterruptedException ex) {
794           LOG.error("Interrupted after " + numCompleted +
795               " workers completed");
796           Thread.currentThread().interrupt();
797           continue;
798         }
799 
800       }
801     } finally {
802       storeFile.closeReader(true);
803       exec.shutdown();
804 
805       BlockCache c = cacheConf.getBlockCache();
806       if (c != null) {
807         c.shutdown();
808       }
809     }
810     LOG.info("Worker threads completed: " + numCompleted);
811     LOG.info("Worker threads failed: " + numFailed);
812     return true;
813   }
814 
815   public boolean run() throws IOException {
816     LOG.info("Workload: " + workload);
817     switch (workload) {
818     case MERGE:
819       runMergeWorkload();
820       break;
821     case RANDOM_READS:
822       return runRandomReadWorkload();
823     default:
824       LOG.error("Unknown workload: " + workload);
825       return false;
826     }
827 
828     return true;
829   }
830 
831   private static void failure() {
832     System.exit(1);
833   }
834 
835   public static void main(String[] args) {
836     HFileReadWriteTest app = new HFileReadWriteTest();
837     if (!app.parseOptions(args))
838       failure();
839 
840     try {
841       if (!app.validateConfiguration() ||
842           !app.run())
843         failure();
844     } catch (IOException ex) {
845       LOG.error(ex);
846       failure();
847     }
848   }
849 
850 }