1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.util.Map;
25  import java.util.NavigableMap;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileUtil;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.LargeTests;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.client.Result;
36  import org.apache.hadoop.hbase.client.Scan;
37  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38  import org.apache.hadoop.hbase.util.Bytes;
39  import org.apache.hadoop.io.NullWritable;
40  import org.apache.hadoop.mapreduce.Job;
41  import org.apache.hadoop.mapreduce.Reducer;
42  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
43  import org.junit.After;
44  import org.junit.AfterClass;
45  import org.junit.Before;
46  import org.junit.BeforeClass;
47  import org.junit.Test;
48  import org.junit.experimental.categories.Category;
49  
50  import static org.junit.Assert.assertEquals;
51  import static org.junit.Assert.assertTrue;
52  
53  /**
54   * Tests various scan start and stop row scenarios. This is set in a scan and
55   * tested in a MapReduce job to see if that is handed over and done properly
56   * too.
57   */
58  @Category(LargeTests.class)
59  public class TestTableInputFormatScan {
60  
61    static final Log LOG = LogFactory.getLog(TestTableInputFormatScan.class);
62    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63  
64    static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
65    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66    static final String KEY_STARTROW = "startRow";
67    static final String KEY_LASTROW = "stpRow";
68  
69    private static HTable table = null;
70  
71    @BeforeClass
72    public static void setUpBeforeClass() throws Exception {
73      // switch TIF to log at DEBUG level
74      TEST_UTIL.enableDebug(TableInputFormat.class);
75      TEST_UTIL.enableDebug(TableInputFormatBase.class);
76      // start mini hbase cluster
77      TEST_UTIL.startMiniCluster(3);
78      // create and fill table
79      table = TEST_UTIL.createTable(TABLE_NAME, INPUT_FAMILY);
80      TEST_UTIL.createMultiRegions(table, INPUT_FAMILY);
81      TEST_UTIL.loadTable(table, INPUT_FAMILY);
82      // start MR cluster
83      TEST_UTIL.startMiniMapReduceCluster();
84    }
85  
86    @AfterClass
87    public static void tearDownAfterClass() throws Exception {
88      TEST_UTIL.shutdownMiniMapReduceCluster();
89      TEST_UTIL.shutdownMiniCluster();
90    }
91  
92    /**
93     * Pass the key and value to reduce.
94     */
95    public static class ScanMapper
96    extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
97  
98      /**
99       * Pass the key and value to reduce.
100      *
101      * @param key  The key, here "aaa", "aab" etc.
102      * @param value  The value is the same as the key.
103      * @param context  The task context.
104      * @throws IOException When reading the rows fails.
105      */
106     @Override
107     public void map(ImmutableBytesWritable key, Result value,
108       Context context)
109     throws IOException, InterruptedException {
110       if (value.size() != 1) {
111         throw new IOException("There should only be one input column");
112       }
113       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
114         cf = value.getMap();
115       if(!cf.containsKey(INPUT_FAMILY)) {
116         throw new IOException("Wrong input columns. Missing: '" +
117           Bytes.toString(INPUT_FAMILY) + "'.");
118       }
119       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
120       LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
121         ", value -> " + val);
122       context.write(key, key);
123     }
124 
125   }
126 
127   /**
128    * Checks the last and first key seen against the scanner boundaries.
129    */
130   public static class ScanReducer
131   extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
132                   NullWritable, NullWritable> {
133 
134     private String first = null;
135     private String last = null;
136 
137     protected void reduce(ImmutableBytesWritable key,
138         Iterable<ImmutableBytesWritable> values, Context context)
139     throws IOException ,InterruptedException {
140       int count = 0;
141       for (ImmutableBytesWritable value : values) {
142         String val = Bytes.toStringBinary(value.get());
143         LOG.info("reduce: key[" + count + "] -> " +
144           Bytes.toStringBinary(key.get()) + ", value -> " + val);
145         if (first == null) first = val;
146         last = val;
147         count++;
148       }
149     }
150 
151     protected void cleanup(Context context)
152     throws IOException, InterruptedException {
153       Configuration c = context.getConfiguration();
154       String startRow = c.get(KEY_STARTROW);
155       String lastRow = c.get(KEY_LASTROW);
156       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
157       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
158       if (startRow != null && startRow.length() > 0) {
159         assertEquals(startRow, first);
160       }
161       if (lastRow != null && lastRow.length() > 0) {
162         assertEquals(lastRow, last);
163       }
164     }
165 
166   }
167 
168   /**
169    * Tests a MR scan using specific start and stop rows.
170    *
171    * @throws IOException
172    * @throws ClassNotFoundException
173    * @throws InterruptedException
174    */
175   @Test
176   public void testScanEmptyToEmpty()
177   throws IOException, InterruptedException, ClassNotFoundException {
178     testScan(null, null, null);
179   }
180 
181   /**
182    * Tests a MR scan using specific start and stop rows.
183    *
184    * @throws IOException
185    * @throws ClassNotFoundException
186    * @throws InterruptedException
187    */
188   @Test
189   public void testScanEmptyToAPP()
190   throws IOException, InterruptedException, ClassNotFoundException {
191     testScan(null, "app", "apo");
192   }
193 
194   /**
195    * Tests a MR scan using specific start and stop rows.
196    *
197    * @throws IOException
198    * @throws ClassNotFoundException
199    * @throws InterruptedException
200    */
201   @Test
202   public void testScanEmptyToBBA()
203   throws IOException, InterruptedException, ClassNotFoundException {
204     testScan(null, "bba", "baz");
205   }
206 
207   /**
208    * Tests a MR scan using specific start and stop rows.
209    *
210    * @throws IOException
211    * @throws ClassNotFoundException
212    * @throws InterruptedException
213    */
214   @Test
215   public void testScanEmptyToBBB()
216   throws IOException, InterruptedException, ClassNotFoundException {
217     testScan(null, "bbb", "bba");
218   }
219 
220   /**
221    * Tests a MR scan using specific start and stop rows.
222    *
223    * @throws IOException
224    * @throws ClassNotFoundException
225    * @throws InterruptedException
226    */
227   @Test
228   public void testScanEmptyToOPP()
229   throws IOException, InterruptedException, ClassNotFoundException {
230     testScan(null, "opp", "opo");
231   }
232 
233   /**
234    * Tests a MR scan using specific start and stop rows.
235    *
236    * @throws IOException
237    * @throws ClassNotFoundException
238    * @throws InterruptedException
239    */
240   @Test
241   public void testScanOBBToOPP()
242   throws IOException, InterruptedException, ClassNotFoundException {
243     testScan("obb", "opp", "opo");
244   }
245 
246   /**
247    * Tests a MR scan using specific start and stop rows.
248    *
249    * @throws IOException
250    * @throws ClassNotFoundException
251    * @throws InterruptedException
252    */
253   @Test
254   public void testScanOBBToQPP()
255   throws IOException, InterruptedException, ClassNotFoundException {
256     testScan("obb", "qpp", "qpo");
257   }
258 
259   /**
260    * Tests a MR scan using specific start and stop rows.
261    *
262    * @throws IOException
263    * @throws ClassNotFoundException
264    * @throws InterruptedException
265    */
266   @Test
267   public void testScanOPPToEmpty()
268   throws IOException, InterruptedException, ClassNotFoundException {
269     testScan("opp", null, "zzz");
270   }
271 
272   /**
273    * Tests a MR scan using specific start and stop rows.
274    *
275    * @throws IOException
276    * @throws ClassNotFoundException
277    * @throws InterruptedException
278    */
279   @Test
280   public void testScanYYXToEmpty()
281   throws IOException, InterruptedException, ClassNotFoundException {
282     testScan("yyx", null, "zzz");
283   }
284 
285   /**
286    * Tests a MR scan using specific start and stop rows.
287    *
288    * @throws IOException
289    * @throws ClassNotFoundException
290    * @throws InterruptedException
291    */
292   @Test
293   public void testScanYYYToEmpty()
294   throws IOException, InterruptedException, ClassNotFoundException {
295     testScan("yyy", null, "zzz");
296   }
297 
298   /**
299    * Tests a MR scan using specific start and stop rows.
300    *
301    * @throws IOException
302    * @throws ClassNotFoundException
303    * @throws InterruptedException
304    */
305   @Test
306   public void testScanYZYToEmpty()
307   throws IOException, InterruptedException, ClassNotFoundException {
308     testScan("yzy", null, "zzz");
309   }
310 
311   @Test
312   public void testScanFromConfiguration()
313   throws IOException, InterruptedException, ClassNotFoundException {
314     testScanFromConfiguration("bba", "bbd", "bbc");
315   }
316 
317   /**
318    * Tests an MR Scan initialized from properties set in the Configuration.
319    * 
320    * @throws IOException
321    * @throws ClassNotFoundException
322    * @throws InterruptedException
323    */
324   private void testScanFromConfiguration(String start, String stop, String last)
325   throws IOException, InterruptedException, ClassNotFoundException {
326     String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
327       "To" + (stop != null ? stop.toUpperCase() : "Empty");
328     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
329     c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
330     c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
331     c.set(KEY_STARTROW, start != null ? start : "");
332     c.set(KEY_LASTROW, last != null ? last : "");
333 
334     if (start != null) {
335       c.set(TableInputFormat.SCAN_ROW_START, start);
336     }
337 
338     if (stop != null) {
339       c.set(TableInputFormat.SCAN_ROW_STOP, stop);
340     }
341 
342     Job job = new Job(c, jobName);
343     job.setMapperClass(ScanMapper.class);
344     job.setReducerClass(ScanReducer.class);
345     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
346     job.setMapOutputValueClass(ImmutableBytesWritable.class);
347     job.setInputFormatClass(TableInputFormat.class);
348     job.setNumReduceTasks(1);
349     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
350     job.waitForCompletion(true);
351     assertTrue(job.isComplete());
352   }
353 
354   /**
355    * Tests a MR scan using specific start and stop rows.
356    *
357    * @throws IOException
358    * @throws ClassNotFoundException
359    * @throws InterruptedException
360    */
361   private void testScan(String start, String stop, String last)
362   throws IOException, InterruptedException, ClassNotFoundException {
363     String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
364       "To" + (stop != null ? stop.toUpperCase() : "Empty");
365     LOG.info("Before map/reduce startup - job " + jobName);
366     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
367     Scan scan = new Scan();
368     scan.addFamily(INPUT_FAMILY);
369     if (start != null) {
370       scan.setStartRow(Bytes.toBytes(start));
371     }
372     c.set(KEY_STARTROW, start != null ? start : "");
373     if (stop != null) {
374       scan.setStopRow(Bytes.toBytes(stop));
375     }
376     c.set(KEY_LASTROW, last != null ? last : "");
377     LOG.info("scan before: " + scan);
378     Job job = new Job(c, jobName);
379     TableMapReduceUtil.initTableMapperJob(
380       Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
381       ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
382     job.setReducerClass(ScanReducer.class);
383     job.setNumReduceTasks(1); // one to get final "first" and "last" key
384     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
385     LOG.info("Started " + job.getJobName());
386     job.waitForCompletion(true);
387     assertTrue(job.isComplete());
388     LOG.info("After map/reduce completion - job " + jobName);
389   }
390 
391   @org.junit.Rule
392   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
393     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
394 }
395