1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertTrue;
26  
27  import java.io.IOException;
28  import java.util.Arrays;
29  import java.util.Random;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileStatus;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HBaseConfiguration;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.PerformanceEvaluation;
42  import org.apache.hadoop.hbase.client.HBaseAdmin;
43  import org.apache.hadoop.hbase.client.HTable;
44  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.Threads;
47  import org.apache.hadoop.io.NullWritable;
48  import org.apache.hadoop.mapreduce.Job;
49  import org.apache.hadoop.mapreduce.Mapper;
50  import org.apache.hadoop.mapreduce.RecordWriter;
51  import org.apache.hadoop.mapreduce.TaskAttemptContext;
52  import org.apache.hadoop.mapreduce.TaskAttemptID;
53  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
54  import org.junit.Before;
55  import org.junit.Test;
56  import org.mockito.Mockito;
57  
58  /**
59   * Simple test for {@link KeyValueSortReducer} and {@link HFileOutputFormat}.
60   * Sets up and runs a mapreduce job that writes hfile output.
61   * Creates a few inner classes to implement splits and an inputformat that
62   * emits keys and values like those of {@link PerformanceEvaluation}.  Makes
63   * as many splits as "mapred.map.tasks" maps.
64   */
65  public class TestHFileOutputFormat  {
66    private final static int ROWSPERSPLIT = 1024;
67  
68    private static final byte[] FAMILY_NAME = PerformanceEvaluation.FAMILY_NAME;
69    private static final byte[] TABLE_NAME = Bytes.toBytes("TestTable");
70    
71    private HBaseTestingUtility util = new HBaseTestingUtility();
72    
73    private static Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
74    
75    /**
76     * Simple mapper that makes KeyValue output.
77     */
78    static class RandomKVGeneratingMapper
79    extends Mapper<NullWritable, NullWritable,
80                   ImmutableBytesWritable, KeyValue> {
81      
82      private int keyLength;
83      private static final int KEYLEN_DEFAULT=10;
84      private static final String KEYLEN_CONF="randomkv.key.length";
85  
86      private int valLength;
87      private static final int VALLEN_DEFAULT=10;
88      private static final String VALLEN_CONF="randomkv.val.length";
89      
90      @Override
91      protected void setup(Context context) throws IOException,
92          InterruptedException {
93        super.setup(context);
94        
95        Configuration conf = context.getConfiguration();
96        keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
97        valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
98      }
99  
100     protected void map(
101         NullWritable n1, NullWritable n2,
102         Mapper<NullWritable, NullWritable,
103                ImmutableBytesWritable,KeyValue>.Context context)
104         throws java.io.IOException ,InterruptedException
105     {
106 
107       byte keyBytes[] = new byte[keyLength];
108       byte valBytes[] = new byte[valLength];
109       
110       int taskId = context.getTaskAttemptID().getTaskID().getId();
111       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
112 
113       Random random = new Random();
114       for (int i = 0; i < ROWSPERSPLIT; i++) {
115 
116         random.nextBytes(keyBytes);
117         // Ensure that unique tasks generate unique keys
118         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
119         random.nextBytes(valBytes);
120         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
121 
122         KeyValue kv = new KeyValue(keyBytes, PerformanceEvaluation.FAMILY_NAME,
123             PerformanceEvaluation.QUALIFIER_NAME, valBytes);
124         context.write(key, kv);
125       }
126     }
127   }
128 
129   @Before
130   public void cleanupDir() throws IOException {
131     util.cleanupTestDir();
132   }
133   
134   
135   private void setupRandomGeneratorMapper(Job job) {
136     job.setInputFormatClass(NMapInputFormat.class);
137     job.setMapperClass(RandomKVGeneratingMapper.class);
138     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
139     job.setMapOutputValueClass(KeyValue.class);
140   }
141 
142   /**
143    * Test that {@link HFileOutputFormat} RecordWriter amends timestamps if
144    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
145    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
146    */
147   @Test
148   public void test_LATEST_TIMESTAMP_isReplaced()
149   throws IOException, InterruptedException {
150     Configuration conf = new Configuration(this.util.getConfiguration());
151     RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
152     TaskAttemptContext context = null;
153     Path dir =
154       HBaseTestingUtility.getTestDir("test_LATEST_TIMESTAMP_isReplaced");
155     try {
156       Job job = new Job(conf);
157       FileOutputFormat.setOutputPath(job, dir);
158       context = new TaskAttemptContext(job.getConfiguration(),
159         new TaskAttemptID());
160       HFileOutputFormat hof = new HFileOutputFormat();
161       writer = hof.getRecordWriter(context);
162       final byte [] b = Bytes.toBytes("b");
163 
164       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
165       // changed by call to write.  Check all in kv is same but ts.
166       KeyValue kv = new KeyValue(b, b, b);
167       KeyValue original = kv.clone();
168       writer.write(new ImmutableBytesWritable(), kv);
169       assertFalse(original.equals(kv));
170       assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
171       assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier()));
172       assertNotSame(original.getTimestamp(), kv.getTimestamp());
173       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
174 
175       // Test 2. Now test passing a kv that has explicit ts.  It should not be
176       // changed by call to record write.
177       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
178       original = kv.clone();
179       writer.write(new ImmutableBytesWritable(), kv);
180       assertTrue(original.equals(kv));
181     } finally {
182       if (writer != null && context != null) writer.close(context);
183       dir.getFileSystem(conf).delete(dir, true);
184     }
185   }
186 
187   /**
188    * Run small MR job.
189    */
190   @Test
191   public void testWritingPEData() throws Exception {
192     Configuration conf = util.getConfiguration();
193     Path testDir = HBaseTestingUtility.getTestDir("testWritingPEData");
194     FileSystem fs = testDir.getFileSystem(conf);
195     
196     // Set down this value or we OOME in eclipse.
197     conf.setInt("io.sort.mb", 20);
198     // Write a few files.
199     conf.setLong("hbase.hregion.max.filesize", 64 * 1024);
200     
201     Job job = new Job(conf, "testWritingPEData");
202     setupRandomGeneratorMapper(job);
203     // This partitioner doesn't work well for number keys but using it anyways
204     // just to demonstrate how to configure it.
205     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
206     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
207     
208     Arrays.fill(startKey, (byte)0);
209     Arrays.fill(endKey, (byte)0xff);
210     
211     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
212     // Set start and end rows for partitioner.
213     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
214     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
215     job.setReducerClass(KeyValueSortReducer.class);
216     job.setOutputFormatClass(HFileOutputFormat.class);
217     job.setNumReduceTasks(4);
218     
219     FileOutputFormat.setOutputPath(job, testDir);
220     assertTrue(job.waitForCompletion(false));
221     FileStatus [] files = fs.listStatus(testDir);
222     assertTrue(files.length > 0);
223   }
224   
225   @Test
226   public void testJobConfiguration() throws Exception {
227     Job job = new Job();
228     HTable table = Mockito.mock(HTable.class);
229     byte[][] mockKeys = new byte[][] {
230         HConstants.EMPTY_BYTE_ARRAY,
231         Bytes.toBytes("aaa"),
232         Bytes.toBytes("ggg"),
233         Bytes.toBytes("zzz")
234     };
235     Mockito.doReturn(mockKeys).when(table).getStartKeys();
236     
237     HFileOutputFormat.configureIncrementalLoad(job, table);
238     assertEquals(job.getNumReduceTasks(), 4);
239   }
240   
241   private byte [][] generateRandomStartKeys(int numKeys) {
242     Random random = new Random();
243     byte[][] ret = new byte[numKeys][];
244     // first region start key is always empty
245     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
246     for (int i = 1; i < numKeys; i++) {
247       ret[i] = PerformanceEvaluation.generateValue(random);
248     }
249     return ret;
250   }
251 
252   @Test
253   public void testMRIncrementalLoad() throws Exception {
254     doIncrementalLoadTest(false);
255   }
256 
257   @Test
258   public void testMRIncrementalLoadWithSplit() throws Exception {
259     doIncrementalLoadTest(true);
260   }
261   
262   private void doIncrementalLoadTest(
263       boolean shouldChangeRegions) throws Exception {
264     Configuration conf = util.getConfiguration();
265     Path testDir = HBaseTestingUtility.getTestDir("testLocalMRIncrementalLoad");
266     byte[][] startKeys = generateRandomStartKeys(5);
267     
268     try {
269       util.startMiniCluster();
270       HBaseAdmin admin = new HBaseAdmin(conf);
271       HTable table = util.createTable(TABLE_NAME, FAMILY_NAME);
272       int numRegions = util.createMultiRegions(
273           util.getConfiguration(), table, FAMILY_NAME,
274           startKeys);
275       assertEquals("Should make 5 regions",
276           numRegions, 5);
277       assertEquals("Should start with empty table",
278           0, util.countRows(table));
279 
280       // Generate the bulk load files
281       util.startMiniMapReduceCluster();
282       runIncrementalPELoad(conf, table, testDir);
283       // This doesn't write into the table, just makes files
284       assertEquals("HFOF should not touch actual table",
285           0, util.countRows(table));
286   
287       if (shouldChangeRegions) {
288         LOG.info("Changing regions in table");
289         admin.disableTable(table.getTableName());
290         while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
291             isRegionsInTransition()) {
292           Threads.sleep(1000);
293           LOG.info("Waiting on table to finish disabling");
294         }
295         byte[][] newStartKeys = generateRandomStartKeys(15);
296         util.createMultiRegions(util.getConfiguration(),
297             table, FAMILY_NAME, newStartKeys);
298         admin.enableTable(table.getTableName());
299         while (table.getRegionsInfo().size() != 15 ||
300             !admin.isTableAvailable(table.getTableName())) {
301           Thread.sleep(1000);
302           LOG.info("Waiting for new region assignment to happen");
303         }
304       }
305       
306       // Perform the actual load
307       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
308       
309       // Ensure data shows up
310       int expectedRows = conf.getInt("mapred.map.tasks", 1) * ROWSPERSPLIT;
311       assertEquals("LoadIncrementalHFiles should put expected data in table",
312           expectedRows, util.countRows(table));
313       String tableDigestBefore = util.checksumRows(table);
314             
315       // Cause regions to reopen
316       admin.disableTable(TABLE_NAME);
317       while (!admin.isTableDisabled(TABLE_NAME)) {
318         Thread.sleep(1000);
319         LOG.info("Waiting for table to disable"); 
320       }
321       admin.enableTable(TABLE_NAME);
322       util.waitTableAvailable(TABLE_NAME, 30000);
323       assertEquals("Data should remain after reopening of regions",
324           tableDigestBefore, util.checksumRows(table));
325     } finally {
326       util.shutdownMiniMapReduceCluster();
327       util.shutdownMiniCluster();
328     }
329   }
330 
331   private void runIncrementalPELoad(
332       Configuration conf, HTable table, Path outDir)
333   throws Exception {
334     Job job = new Job(conf, "testLocalMRIncrementalLoad");
335     setupRandomGeneratorMapper(job);
336     HFileOutputFormat.configureIncrementalLoad(job, table);
337     FileOutputFormat.setOutputPath(job, outDir);
338     
339     assertEquals(table.getRegionsInfo().size(),
340         job.getNumReduceTasks());
341     
342     assertTrue(job.waitForCompletion(true));
343   }
344   
345   public static void main(String args[]) throws Exception {
346     new TestHFileOutputFormat().manualTest(args);
347   }
348   
349   public void manualTest(String args[]) throws Exception {
350     Configuration conf = HBaseConfiguration.create();    
351     util = new HBaseTestingUtility(conf);
352     if ("newtable".equals(args[0])) {
353       byte[] tname = args[1].getBytes();
354       HTable table = util.createTable(tname, FAMILY_NAME);
355       HBaseAdmin admin = new HBaseAdmin(conf);
356       admin.disableTable(tname);
357       util.createMultiRegions(conf, table, FAMILY_NAME,
358           generateRandomStartKeys(5));
359       admin.enableTable(tname);
360     } else if ("incremental".equals(args[0])) {
361       byte[] tname = args[1].getBytes();
362       HTable table = new HTable(conf, tname);
363       Path outDir = new Path("incremental-out");
364       runIncrementalPELoad(conf, table, outDir);
365     } else {
366       throw new RuntimeException(
367           "usage: TestHFileOutputFormat newtable | incremental");
368     }
369   }
370 }