1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.nio.ByteBuffer;
26  import java.util.Collection;
27  import java.util.Deque;
28  import java.util.List;
29  import java.util.NavigableMap;
30  import java.util.concurrent.ExecutorService;
31  import java.util.concurrent.atomic.AtomicInteger;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HColumnDescriptor;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.HRegionLocation;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.LargeTests;
44  import org.apache.hadoop.hbase.TableExistsException;
45  import org.apache.hadoop.hbase.client.HConnection;
46  import org.apache.hadoop.hbase.client.HTable;
47  import org.apache.hadoop.hbase.client.Result;
48  import org.apache.hadoop.hbase.client.ResultScanner;
49  import org.apache.hadoop.hbase.client.Scan;
50  import org.apache.hadoop.hbase.ipc.HRegionInterface;
51  import org.apache.hadoop.hbase.regionserver.HRegionServer;
52  import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
53  import org.apache.hadoop.hbase.util.Bytes;
54  import org.apache.hadoop.hbase.util.Pair;
55  import org.junit.AfterClass;
56  import org.junit.BeforeClass;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  import org.mockito.Mockito;
60  
61  import com.google.common.collect.Multimap;
62  
63  /**
64   * Test cases for the atomic load error handling of the bulk load functionality.
65   */
66  @Category(LargeTests.class)
67  public class TestLoadIncrementalHFilesSplitRecovery {
68    final static Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
69  
70    static HBaseTestingUtility util;
71  
72    final static int NUM_CFS = 10;
73    final static byte[] QUAL = Bytes.toBytes("qual");
74    final static int ROWCOUNT = 100;
75  
76    private final static byte[][] families = new byte[NUM_CFS][];
77    static {
78      for (int i = 0; i < NUM_CFS; i++) {
79        families[i] = Bytes.toBytes(family(i));
80      }
81    }
82  
83    static byte[] rowkey(int i) {
84      return Bytes.toBytes(String.format("row_%08d", i));
85    }
86  
87    static String family(int i) {
88      return String.format("family_%04d", i);
89    }
90  
91    static byte[] value(int i) {
92      return Bytes.toBytes(String.format("%010d", i));
93    }
94  
95    public static void buildHFiles(FileSystem fs, Path dir, int value)
96        throws IOException {
97      byte[] val = value(value);
98      for (int i = 0; i < NUM_CFS; i++) {
99        Path testIn = new Path(dir, family(i));
100 
101       TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
102           Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
103     }
104   }
105 
106   /**
107    * Creates a table with given table name and specified number of column
108    * families if the table does not already exist.
109    */
110   private void setupTable(String table, int cfs) throws IOException {
111     try {
112       LOG.info("Creating table " + table);
113       HTableDescriptor htd = new HTableDescriptor(table);
114       for (int i = 0; i < 10; i++) {
115         htd.addFamily(new HColumnDescriptor(family(i)));
116       }
117 
118       util.getHBaseAdmin().createTable(htd);
119     } catch (TableExistsException tee) {
120       LOG.info("Table " + table + " already exists");
121     }
122   }
123 
124   private Path buildBulkFiles(String table, int value) throws Exception {
125     Path dir = util.getDataTestDir(table);
126     Path bulk1 = new Path(dir, table+value);
127     FileSystem fs = util.getTestFileSystem();
128     buildHFiles(fs, bulk1, value);
129     return bulk1;
130   }
131 
132   /**
133    * Populate table with known values.
134    */
135   private void populateTable(String table, int value) throws Exception {
136     // create HFiles for different column families
137     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
138     Path bulk1 = buildBulkFiles(table, value);
139     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
140     lih.doBulkLoad(bulk1, t);
141   }
142 
143   /**
144    * Split the known table in half.  (this is hard coded for this test suite)
145    */
146   private void forceSplit(String table) {
147     try {
148       // need to call regions server to by synchronous but isn't visible.
149       HRegionServer hrs = util.getRSForFirstRegionInTable(Bytes
150           .toBytes(table));
151 
152       for (HRegionInfo hri : hrs.getOnlineRegions()) {
153         if (Bytes.equals(hri.getTableName(), Bytes.toBytes(table))) {
154           // splitRegion doesn't work if startkey/endkey are null
155           hrs.splitRegion(hri, rowkey(ROWCOUNT / 2)); // hard code split
156         }
157       }
158 
159       // verify that split completed.
160       int regions;
161       do {
162         regions = 0;
163         for (HRegionInfo hri : hrs.getOnlineRegions()) {
164           if (Bytes.equals(hri.getTableName(), Bytes.toBytes(table))) {
165             regions++;
166           }
167         }
168         if (regions != 2) {
169           LOG.info("Taking some time to complete split...");
170           Thread.sleep(250);
171         }
172       } while (regions != 2);
173     } catch (IOException e) {
174       e.printStackTrace();
175     } catch (InterruptedException e) {
176       e.printStackTrace();
177     }
178   }
179 
180   @BeforeClass
181   public static void setupCluster() throws Exception {
182     util = new HBaseTestingUtility();
183     util.startMiniCluster(1);
184   }
185 
186   @AfterClass
187   public static void teardownCluster() throws Exception {
188     util.shutdownMiniCluster();
189   }
190 
191   /**
192    * Checks that all columns have the expected value and that there is the
193    * expected number of rows.
194    */
195   void assertExpectedTable(String table, int count, int value) {
196     try {
197       assertEquals(util.getHBaseAdmin().listTables(table).length, 1);
198 
199       HTable t = new HTable(util.getConfiguration(), table);
200       Scan s = new Scan();
201       ResultScanner sr = t.getScanner(s);
202       int i = 0;
203       for (Result r : sr) {
204         i++;
205         for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
206           for (byte[] val : nm.values()) {
207             assertTrue(Bytes.equals(val, value(value)));
208           }
209         }
210       }
211       assertEquals(count, i);
212     } catch (IOException e) {
213       fail("Failed due to exception");
214     }
215   }
216 
217   /**
218    * Test that shows that exception thrown from the RS side will result in an
219    * exception on the LIHFile client.
220    */
221   @Test(expected=IOException.class)
222   public void testBulkLoadPhaseFailure() throws Exception {
223     String table = "bulkLoadPhaseFailure";
224     setupTable(table, 10);
225 
226     final AtomicInteger attmptedCalls = new AtomicInteger();
227     final AtomicInteger failedCalls = new AtomicInteger();
228     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
229         util.getConfiguration()) {
230 
231       protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
232           byte[] tableName, final byte[] first, Collection<LoadQueueItem> lqis)
233       throws IOException {
234         int i = attmptedCalls.incrementAndGet();
235         if (i == 1) {
236           HConnection errConn = null;
237           try {
238             errConn = getMockedConnection(util.getConfiguration());
239           } catch (Exception e) {
240             LOG.fatal("mocking cruft, should never happen", e);
241             throw new RuntimeException("mocking cruft, should never happen");
242           }
243           failedCalls.incrementAndGet();
244           return super.tryAtomicRegionLoad(errConn, tableName, first, lqis);
245         }
246 
247         return super.tryAtomicRegionLoad(conn, tableName, first, lqis);
248       }
249     };
250 
251     // create HFiles for different column families
252     Path dir = buildBulkFiles(table, 1);
253     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
254     lih.doBulkLoad(dir, t);
255 
256     fail("doBulkLoad should have thrown an exception");
257   }
258 
259   private HConnection getMockedConnection(final Configuration conf)
260   throws IOException {
261     HConnection c = Mockito.mock(HConnection.class);
262     Mockito.when(c.getConfiguration()).thenReturn(conf);
263     Mockito.doNothing().when(c).close();
264     // Make it so we return a particular location when asked.
265     final HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO,
266         "example.org", 1234);
267     Mockito.when(c.getRegionLocation((byte[]) Mockito.any(),
268         (byte[]) Mockito.any(), Mockito.anyBoolean())).
269       thenReturn(loc);
270     Mockito.when(c.locateRegion((byte[]) Mockito.any(), (byte[]) Mockito.any())).
271       thenReturn(loc);
272     HRegionInterface hri = Mockito.mock(HRegionInterface.class);
273     Mockito.when(hri.bulkLoadHFiles(Mockito.anyList(), (byte [])Mockito.any(),
274       Mockito.anyBoolean())).thenThrow(new IOException("injecting bulk load error"));
275     Mockito.when(c.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())).
276       thenReturn(hri);
277     return c;
278   }
279 
280   /**
281    * This test exercises the path where there is a split after initial
282    * validation but before the atomic bulk load call. We cannot use presplitting
283    * to test this path, so we actually inject a split just before the atomic
284    * region load.
285    */
286   @Test
287   public void testSplitWhileBulkLoadPhase() throws Exception {
288     final String table = "splitWhileBulkloadPhase";
289     setupTable(table, 10);
290     populateTable(table,1);
291     assertExpectedTable(table, ROWCOUNT, 1);
292 
293     // Now let's cause trouble.  This will occur after checks and cause bulk
294     // files to fail when attempt to atomically import.  This is recoverable.
295     final AtomicInteger attemptedCalls = new AtomicInteger();
296     LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(
297         util.getConfiguration()) {
298 
299       protected void bulkLoadPhase(final HTable htable, final HConnection conn,
300           ExecutorService pool, Deque<LoadQueueItem> queue,
301           final Multimap<ByteBuffer, LoadQueueItem> regionGroups) throws IOException {
302         int i = attemptedCalls.incrementAndGet();
303         if (i == 1) {
304           // On first attempt force a split.
305           forceSplit(table);
306         }
307 
308         super.bulkLoadPhase(htable, conn, pool, queue, regionGroups);
309       }
310     };
311 
312     // create HFiles for different column families
313     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
314     Path bulk = buildBulkFiles(table, 2);
315     lih2.doBulkLoad(bulk, t);
316 
317     // check that data was loaded
318     // The three expected attempts are 1) failure because need to split, 2)
319     // load of split top 3) load of split bottom
320     assertEquals(attemptedCalls.get(), 3);
321     assertExpectedTable(table, ROWCOUNT, 2);
322   }
323 
324   /**
325    * This test splits a table and attempts to bulk load.  The bulk import files
326    * should be split before atomically importing.
327    */
328   @Test
329   public void testGroupOrSplitPresplit() throws Exception {
330     final String table = "groupOrSplitPresplit";
331     setupTable(table, 10);
332     populateTable(table, 1);
333     assertExpectedTable(table, ROWCOUNT, 1);
334     forceSplit(table);
335 
336     final AtomicInteger countedLqis= new AtomicInteger();
337     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
338         util.getConfiguration()) {
339       protected List<LoadQueueItem> groupOrSplit(
340           Multimap<ByteBuffer, LoadQueueItem> regionGroups,
341           final LoadQueueItem item, final HTable htable,
342           final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
343         List<LoadQueueItem> lqis = super.groupOrSplit(regionGroups, item, htable, startEndKeys);
344         if (lqis != null) {
345           countedLqis.addAndGet(lqis.size());
346         }
347         return lqis;
348       }
349     };
350 
351     // create HFiles for different column families
352     Path bulk = buildBulkFiles(table, 2);
353     HTable ht = new HTable(util.getConfiguration(), Bytes.toBytes(table));
354     lih.doBulkLoad(bulk, ht);
355 
356     assertExpectedTable(table, ROWCOUNT, 2);
357     assertEquals(20, countedLqis.get());
358   }
359 
360   /**
361    * This simulates an remote exception which should cause LIHF to exit with an
362    * exception.
363    */
364   @Test(expected = IOException.class)
365   public void testGroupOrSplitFailure() throws Exception {
366     String table = "groupOrSplitFailure";
367     setupTable(table, 10);
368 
369     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
370         util.getConfiguration()) {
371       int i = 0;
372 
373       protected List<LoadQueueItem> groupOrSplit(
374           Multimap<ByteBuffer, LoadQueueItem> regionGroups,
375           final LoadQueueItem item, final HTable table,
376           final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
377         i++;
378 
379         if (i == 5) {
380           throw new IOException("failure");
381         }
382         return super.groupOrSplit(regionGroups, item, table, startEndKeys);
383       }
384     };
385 
386     // create HFiles for different column families
387     Path dir = buildBulkFiles(table,1);
388     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
389     lih.doBulkLoad(dir, t);
390 
391     fail("doBulkLoad should have thrown an exception");
392   }
393 
394   @org.junit.Rule
395   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
396     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
397 }
398