1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertNotNull;
25 import static org.junit.Assert.assertTrue;
26
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.LargeTests;
47 import org.apache.hadoop.hbase.MasterNotRunningException;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.ServerName;
50 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
51 import org.apache.hadoop.hbase.executor.RegionTransitionData;
52 import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
53 import org.apache.hadoop.hbase.regionserver.HRegion;
54 import org.apache.hadoop.hbase.regionserver.HRegionServer;
55 import org.apache.hadoop.hbase.util.Bytes;
56 import org.apache.hadoop.hbase.util.FSTableDescriptors;
57 import org.apache.hadoop.hbase.util.JVMClusterUtil;
58 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
59 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
60 import org.apache.hadoop.hbase.util.Threads;
61 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
62 import org.apache.hadoop.hbase.zookeeper.ZKTable;
63 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
64 import org.junit.Test;
65 import org.junit.experimental.categories.Category;
66
67 @Category(LargeTests.class)
68 public class TestMasterFailover {
69 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
70
71 @Test (timeout=180000)
72 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
73 throws Exception {
74 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
75 final int NUM_MASTERS = 1;
76 final int NUM_RS = 2;
77
78 Configuration conf = HBaseConfiguration.create();
79 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
80 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 8000);
81
82 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
83
84 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
85 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
86
87
88 List<RegionServerThread> regionServerThreads =
89 cluster.getRegionServerThreads();
90 int count = -1;
91 HRegion metaRegion = null;
92 for (RegionServerThread regionServerThread : regionServerThreads) {
93 HRegionServer regionServer = regionServerThread.getRegionServer();
94 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
95 count++;
96 regionServer.abort("");
97 if (null != metaRegion) break;
98 }
99 HRegionServer regionServer = cluster.getRegionServer(count);
100
101 TEST_UTIL.shutdownMiniHBaseCluster();
102
103
104 ZooKeeperWatcher zkw =
105 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
106 metaRegion, regionServer.getServerName());
107
108 LOG.info("Staring cluster for second time");
109 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
110
111
112 log("Waiting for no more RIT");
113 ZKAssign.blockUntilNoRIT(zkw);
114
115 zkw.close();
116
117 TEST_UTIL.shutdownMiniCluster();
118 }
119
120
121
122
123
124
125
126
127
128 @Test (timeout=240000)
129 public void testSimpleMasterFailover() throws Exception {
130
131 final int NUM_MASTERS = 3;
132 final int NUM_RS = 3;
133
134
135 Configuration conf = HBaseConfiguration.create();
136
137
138 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
139 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
140 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
141
142
143 List<MasterThread> masterThreads = cluster.getMasterThreads();
144
145
146 for (MasterThread mt : masterThreads) {
147 assertTrue(mt.isAlive());
148 }
149
150
151 int numActive = 0;
152 int activeIndex = -1;
153 ServerName activeName = null;
154 HMaster active = null;
155 for (int i = 0; i < masterThreads.size(); i++) {
156 if (masterThreads.get(i).getMaster().isActiveMaster()) {
157 numActive++;
158 activeIndex = i;
159 active = masterThreads.get(activeIndex).getMaster();
160 activeName = active.getServerName();
161 }
162 }
163 assertEquals(1, numActive);
164 assertEquals(NUM_MASTERS, masterThreads.size());
165 LOG.info("Active master " + activeName);
166
167
168 assertNotNull(active);
169 ClusterStatus status = active.getClusterStatus();
170 assertTrue(status.getMaster().equals(activeName));
171 assertEquals(2, status.getBackupMastersSize());
172 assertEquals(2, status.getBackupMasters().size());
173
174
175 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
176 HMaster master = cluster.getMaster(backupIndex);
177 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
178 cluster.stopMaster(backupIndex, false);
179 cluster.waitOnMaster(backupIndex);
180
181
182 for (int i = 0; i < masterThreads.size(); i++) {
183 if (masterThreads.get(i).getMaster().isActiveMaster()) {
184 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
185 activeIndex = i;
186 active = masterThreads.get(activeIndex).getMaster();
187 }
188 }
189 assertEquals(1, numActive);
190 assertEquals(2, masterThreads.size());
191 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
192 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
193 assertEquals(3, rsCount);
194
195
196 assertNotNull(active);
197 status = active.getClusterStatus();
198 assertTrue(status.getMaster().equals(activeName));
199 assertEquals(1, status.getBackupMastersSize());
200 assertEquals(1, status.getBackupMasters().size());
201
202
203 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
204 cluster.stopMaster(activeIndex, false);
205 cluster.waitOnMaster(activeIndex);
206
207
208 assertTrue(cluster.waitForActiveAndReadyMaster());
209
210 LOG.debug("\n\nVerifying backup master is now active\n");
211
212 assertEquals(1, masterThreads.size());
213
214
215 active = masterThreads.get(0).getMaster();
216 assertNotNull(active);
217 status = active.getClusterStatus();
218 ServerName mastername = status.getMaster();
219 assertTrue(mastername.equals(active.getServerName()));
220 assertTrue(active.isActiveMaster());
221 assertEquals(0, status.getBackupMastersSize());
222 assertEquals(0, status.getBackupMasters().size());
223 int rss = status.getServersSize();
224 LOG.info("Active master " + mastername.getServerName() + " managing " +
225 rss + " region servers");
226 assertEquals(3, rss);
227
228
229 TEST_UTIL.shutdownMiniCluster();
230 }
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311 @Test (timeout=180000)
312 public void testMasterFailoverWithMockedRIT() throws Exception {
313
314 final int NUM_MASTERS = 1;
315 final int NUM_RS = 3;
316
317
318 Configuration conf = HBaseConfiguration.create();
319
320 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
321 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
322 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
323 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 3);
324
325
326 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
327 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
328 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
329 log("Cluster started");
330
331
332 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
333
334
335 List<MasterThread> masterThreads = cluster.getMasterThreads();
336 assertEquals(1, masterThreads.size());
337
338
339 assertTrue(cluster.waitForActiveAndReadyMaster());
340 HMaster master = masterThreads.get(0).getMaster();
341 assertTrue(master.isActiveMaster());
342 assertTrue(master.isInitialized());
343
344
345 master.balanceSwitch(false);
346
347
348 byte [] FAMILY = Bytes.toBytes("family");
349 byte [][] SPLIT_KEYS = new byte [][] {
350 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
351 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
352 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
353 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
354 };
355
356 byte [] enabledTable = Bytes.toBytes("enabledTable");
357 HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
358 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
359
360 FileSystem filesystem = FileSystem.get(conf);
361 Path rootdir = filesystem.makeQualified(
362 new Path(conf.get(HConstants.HBASE_DIR)));
363
364 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdEnabled);
365
366 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getName(), null, null);
367 createRegion(hriEnabled, rootdir, conf, htdEnabled);
368
369 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
370 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
371
372 byte [] disabledTable = Bytes.toBytes("disabledTable");
373 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
374 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
375
376 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdDisabled);
377 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getName(), null, null);
378 createRegion(hriDisabled, rootdir, conf, htdDisabled);
379 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
380 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
381
382 log("Regions in META have been created");
383
384
385 assertEquals(2, cluster.countServedRegions());
386
387
388 HRegionServer hrs = cluster.getRegionServer(0);
389 ServerName serverName = hrs.getServerName();
390 HRegionInfo closingRegion = enabledRegions.remove(0);
391
392 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
393 enabledAndAssignedRegions.add(enabledRegions.remove(0));
394 enabledAndAssignedRegions.add(enabledRegions.remove(0));
395 enabledAndAssignedRegions.add(closingRegion);
396
397 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
398 disabledAndAssignedRegions.add(disabledRegions.remove(0));
399 disabledAndAssignedRegions.add(disabledRegions.remove(0));
400
401
402 for (HRegionInfo hri : enabledAndAssignedRegions) {
403 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
404 new RegionPlan(hri, null, serverName));
405 master.assignRegion(hri);
406 }
407 for (HRegionInfo hri : disabledAndAssignedRegions) {
408 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
409 new RegionPlan(hri, null, serverName));
410 master.assignRegion(hri);
411 }
412
413
414 log("Waiting for assignment to finish");
415 ZKAssign.blockUntilNoRIT(zkw);
416 log("Assignment completed");
417
418
419 log("Aborting master");
420 cluster.abortMaster(0);
421 cluster.waitOnMaster(0);
422 log("Master has aborted");
423
424
425
426
427
428
429 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
430 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
431
432 log("Beginning to mock scenarios");
433
434
435 ZKTable zktable = new ZKTable(zkw);
436 zktable.setDisabledTable(Bytes.toString(disabledTable));
437
438
439
440
441
442
443 HRegionInfo region = enabledRegions.remove(0);
444 regionsThatShouldBeOnline.add(region);
445 ZKAssign.createNodeOffline(zkw, region, serverName);
446
447
448
449
450 regionsThatShouldBeOnline.add(closingRegion);
451 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
452
453
454
455
456
457
458 region = enabledRegions.remove(0);
459 regionsThatShouldBeOnline.add(region);
460 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
461 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
462
463
464 region = disabledRegions.remove(0);
465 regionsThatShouldBeOffline.add(region);
466 version = ZKAssign.createNodeClosing(zkw, region, serverName);
467 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
468
469
470
471
472
473
474 region = enabledRegions.remove(0);
475 regionsThatShouldBeOnline.add(region);
476 ZKAssign.createNodeOffline(zkw, region, serverName);
477 ZKAssign.transitionNodeOpening(zkw, region, serverName);
478
479
480
481
482
483
484 region = enabledRegions.remove(0);
485 regionsThatShouldBeOnline.add(region);
486 ZKAssign.createNodeOffline(zkw, region, serverName);
487 hrs.openRegion(region);
488 while (true) {
489 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
490 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
491 break;
492 }
493 Thread.sleep(100);
494 }
495
496
497 region = disabledRegions.remove(0);
498 regionsThatShouldBeOffline.add(region);
499 ZKAssign.createNodeOffline(zkw, region, serverName);
500 hrs.openRegion(region);
501 while (true) {
502 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
503 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
504 break;
505 }
506 Thread.sleep(100);
507 }
508
509
510
511
512
513
514
515
516
517 log("Done mocking data up in ZK");
518
519
520 log("Starting up a new master");
521 master = cluster.startMaster().getMaster();
522 log("Waiting for master to be ready");
523 cluster.waitForActiveAndReadyMaster();
524 log("Master is ready");
525
526
527 log("Waiting for no more RIT");
528 ZKAssign.blockUntilNoRIT(zkw);
529 log("No more RIT in ZK, now doing final test verification");
530
531
532 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
533 for (JVMClusterUtil.RegionServerThread rst :
534 cluster.getRegionServerThreads()) {
535 onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
536 }
537
538
539 for (HRegionInfo hri : regionsThatShouldBeOnline) {
540 assertTrue(onlineRegions.contains(hri));
541 }
542
543
544 for (HRegionInfo hri : regionsThatShouldBeOffline) {
545 assertFalse(onlineRegions.contains(hri));
546 }
547
548 log("Done with verification, all passed, shutting down cluster");
549
550
551 TEST_UTIL.shutdownMiniCluster();
552 }
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611 @Test (timeout=180000)
612 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
613
614 final int NUM_MASTERS = 1;
615 final int NUM_RS = 2;
616
617
618 Configuration conf = HBaseConfiguration.create();
619
620 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
621 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
622 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
623 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
624
625
626 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
627 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
628 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
629 log("Cluster started");
630
631
632 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
633 "unittest", new Abortable() {
634
635 @Override
636 public void abort(String why, Throwable e) {
637 LOG.error("Fatal ZK Error: " + why, e);
638 org.junit.Assert.assertFalse("Fatal ZK error", true);
639 }
640
641 @Override
642 public boolean isAborted() {
643 return false;
644 }
645
646 });
647
648
649 List<MasterThread> masterThreads = cluster.getMasterThreads();
650 assertEquals(1, masterThreads.size());
651
652
653 assertTrue(cluster.waitForActiveAndReadyMaster());
654 HMaster master = masterThreads.get(0).getMaster();
655 assertTrue(master.isActiveMaster());
656 assertTrue(master.isInitialized());
657
658
659 master.balanceSwitch(false);
660
661
662 byte [] FAMILY = Bytes.toBytes("family");
663 byte [][] SPLIT_KEYS = new byte [][] {
664 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
665 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
666 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
667 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
668 };
669
670 byte [] enabledTable = Bytes.toBytes("enabledTable");
671 HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
672 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
673 FileSystem filesystem = FileSystem.get(conf);
674 Path rootdir = filesystem.makeQualified(
675 new Path(conf.get(HConstants.HBASE_DIR)));
676
677 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdEnabled);
678 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getName(),
679 null, null);
680 createRegion(hriEnabled, rootdir, conf, htdEnabled);
681
682 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
683 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
684
685 byte [] disabledTable = Bytes.toBytes("disabledTable");
686 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
687 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
688
689 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdDisabled);
690 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getName(), null, null);
691 createRegion(hriDisabled, rootdir, conf, htdDisabled);
692
693 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
694 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
695
696 log("Regions in META have been created");
697
698
699 assertEquals(2, cluster.countServedRegions());
700
701
702 List<RegionServerThread> regionservers =
703 cluster.getRegionServerThreads();
704 HRegionServer hrs = regionservers.get(0).getRegionServer();
705
706
707 RegionServerThread hrsDeadThread = regionservers.get(1);
708 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
709 ServerName deadServerName = hrsDead.getServerName();
710
711
712 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
713 enabledAndAssignedRegions.add(enabledRegions.remove(0));
714 enabledAndAssignedRegions.add(enabledRegions.remove(0));
715 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
716 disabledAndAssignedRegions.add(disabledRegions.remove(0));
717 disabledAndAssignedRegions.add(disabledRegions.remove(0));
718
719
720 for (HRegionInfo hri : enabledAndAssignedRegions) {
721 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
722 new RegionPlan(hri, null, hrs.getServerName()));
723 master.assignRegion(hri);
724 }
725 for (HRegionInfo hri : disabledAndAssignedRegions) {
726 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
727 new RegionPlan(hri, null, hrs.getServerName()));
728 master.assignRegion(hri);
729 }
730
731 assertTrue(" Table must be enabled.", master.getAssignmentManager()
732 .getZKTable().isEnabledTable("enabledTable"));
733
734 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
735 enabledAndOnDeadRegions.add(enabledRegions.remove(0));
736 enabledAndOnDeadRegions.add(enabledRegions.remove(0));
737 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
738 disabledAndOnDeadRegions.add(disabledRegions.remove(0));
739 disabledAndOnDeadRegions.add(disabledRegions.remove(0));
740
741
742 for (HRegionInfo hri : enabledAndOnDeadRegions) {
743 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
744 new RegionPlan(hri, null, deadServerName));
745 master.assignRegion(hri);
746 }
747 for (HRegionInfo hri : disabledAndOnDeadRegions) {
748 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
749 new RegionPlan(hri, null, deadServerName));
750 master.assignRegion(hri);
751 }
752
753
754 log("Waiting for assignment to finish");
755 ZKAssign.blockUntilNoRIT(zkw);
756 log("Assignment completed");
757
758
759 log("Aborting master");
760 cluster.abortMaster(0);
761 cluster.waitOnMaster(0);
762 log("Master has aborted");
763
764
765
766
767
768
769 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
770 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
771
772 log("Beginning to mock scenarios");
773
774
775 ZKTable zktable = new ZKTable(zkw);
776 zktable.setDisabledTable(Bytes.toString(disabledTable));
777
778 assertTrue(" The enabled table should be identified on master fail over.",
779 zktable.isEnabledTable("enabledTable"));
780
781
782
783
784
785
786 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
787 regionsThatShouldBeOnline.add(region);
788 ZKAssign.createNodeClosing(zkw, region, deadServerName);
789 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
790 region + "\n\n");
791
792
793 region = disabledAndOnDeadRegions.remove(0);
794 regionsThatShouldBeOffline.add(region);
795 ZKAssign.createNodeClosing(zkw, region, deadServerName);
796 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
797 region + "\n\n");
798
799
800
801
802
803
804 region = enabledAndOnDeadRegions.remove(0);
805 regionsThatShouldBeOnline.add(region);
806 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
807 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
808 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
809 region + "\n\n");
810
811
812 region = disabledAndOnDeadRegions.remove(0);
813 regionsThatShouldBeOffline.add(region);
814 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
815 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
816 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
817 region + "\n\n");
818
819
820
821
822
823
824 region = enabledRegions.remove(0);
825 regionsThatShouldBeOnline.add(region);
826 ZKAssign.createNodeOffline(zkw, region, deadServerName);
827 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
828 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
829 region + "\n\n");
830
831
832 region = disabledRegions.remove(0);
833 regionsThatShouldBeOffline.add(region);
834 ZKAssign.createNodeOffline(zkw, region, deadServerName);
835 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
836 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
837 region + "\n\n");
838
839
840
841
842
843
844 region = enabledRegions.remove(0);
845 regionsThatShouldBeOnline.add(region);
846 ZKAssign.createNodeOffline(zkw, region, deadServerName);
847 hrsDead.openRegion(region);
848 while (true) {
849 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
850 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
851 break;
852 }
853 Thread.sleep(100);
854 }
855 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
856 region + "\n\n");
857
858
859 region = disabledRegions.remove(0);
860 regionsThatShouldBeOffline.add(region);
861 ZKAssign.createNodeOffline(zkw, region, deadServerName);
862 hrsDead.openRegion(region);
863 while (true) {
864 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
865 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
866 break;
867 }
868 Thread.sleep(100);
869 }
870 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
871 region + "\n\n");
872
873
874
875
876
877
878 region = enabledRegions.remove(0);
879 regionsThatShouldBeOnline.add(region);
880 ZKAssign.createNodeOffline(zkw, region, deadServerName);
881 hrsDead.openRegion(region);
882 while (true) {
883 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
884 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
885 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
886 break;
887 }
888 Thread.sleep(100);
889 }
890 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
891 + "\n" + region + "\n\n");
892
893
894 region = disabledRegions.remove(0);
895 regionsThatShouldBeOffline.add(region);
896 ZKAssign.createNodeOffline(zkw, region, deadServerName);
897 hrsDead.openRegion(region);
898 while (true) {
899 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
900 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
901 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
902 break;
903 }
904 Thread.sleep(100);
905 }
906 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
907 + "\n" + region + "\n\n");
908
909
910
911
912
913 log("Done mocking data up in ZK");
914
915
916 log("Killing RS " + deadServerName);
917 hrsDead.abort("Killing for unit test");
918 log("RS " + deadServerName + " killed");
919
920
921
922 while (hrsDeadThread.isAlive()) {
923 Threads.sleep(10);
924 }
925 log("Starting up a new master");
926 master = cluster.startMaster().getMaster();
927 log("Waiting for master to be ready");
928 assertTrue(cluster.waitForActiveAndReadyMaster());
929 log("Master is ready");
930
931
932
933
934
935
936
937
938 region = enabledRegions.remove(0);
939 regionsThatShouldBeOnline.add(region);
940 master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
941 new RegionState(region, RegionState.State.PENDING_OPEN, 0, null));
942 ZKAssign.createNodeOffline(zkw, region, master.getServerName());
943
944 region = disabledRegions.remove(0);
945 regionsThatShouldBeOffline.add(region);
946 master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
947 new RegionState(region, RegionState.State.PENDING_OPEN, 0, null));
948 ZKAssign.createNodeOffline(zkw, region, master.getServerName());
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971 log("Waiting for no more RIT");
972 ZKAssign.blockUntilNoRIT(zkw);
973 log("No more RIT in ZK");
974 long now = System.currentTimeMillis();
975 final long maxTime = 120000;
976 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
977 if (!done) {
978 LOG.info("rit=" + master.assignmentManager.getRegionsInTransition());
979 }
980 long elapsed = System.currentTimeMillis() - now;
981 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
982 elapsed < maxTime);
983 log("No more RIT in RIT map, doing final test verification");
984
985
986 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
987 for (JVMClusterUtil.RegionServerThread rst :
988 cluster.getRegionServerThreads()) {
989 try {
990 onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
991 } catch (org.apache.hadoop.hbase.regionserver.RegionServerStoppedException e) {
992 LOG.info("Got RegionServerStoppedException", e);
993 }
994 }
995
996
997 for (HRegionInfo hri : regionsThatShouldBeOnline) {
998 assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri));
999 }
1000
1001
1002 for (HRegionInfo hri : regionsThatShouldBeOffline) {
1003 assertFalse(onlineRegions.contains(hri));
1004 }
1005
1006 log("Done with verification, all passed, shutting down cluster");
1007
1008
1009 TEST_UTIL.shutdownMiniCluster();
1010 }
1011
1012 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
1013 final HTableDescriptor htd)
1014 throws IOException {
1015 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
1016
1017
1018
1019
1020
1021 HRegion.closeHRegion(r);
1022 return r;
1023 }
1024
1025
1026
1027
1028 private void log(String string) {
1029 LOG.info("\n\n" + string + " \n\n");
1030 }
1031
1032 @org.junit.Rule
1033 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
1034 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
1035 }
1036