1   /*
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Set;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.HTableDescriptor;
38  import org.apache.hadoop.hbase.LargeTests;
39  import org.apache.hadoop.hbase.MiniHBaseCluster;
40  import org.apache.hadoop.hbase.ServerName;
41  import org.apache.hadoop.hbase.client.HBaseAdmin;
42  import org.apache.hadoop.hbase.client.HTable;
43  import org.apache.hadoop.hbase.client.Put;
44  import org.apache.hadoop.hbase.client.ResultScanner;
45  import org.apache.hadoop.hbase.client.Scan;
46  import org.apache.hadoop.hbase.master.HMaster;
47  import org.apache.hadoop.hbase.master.MasterFileSystem;
48  import org.apache.hadoop.hbase.master.ServerManager;
49  import org.apache.hadoop.hbase.master.TestMasterFailover;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
52  import org.apache.hadoop.hbase.util.Threads;
53  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
54  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
55  import org.apache.zookeeper.KeeperException;
56  import org.junit.AfterClass;
57  import org.junit.BeforeClass;
58  import org.junit.Test;
59  import org.junit.experimental.categories.Category;
60  
61  @Category(LargeTests.class)
62  public class TestRSKilledWhenMasterInitializing {
63    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
64  
65    private static final HBaseTestingUtility TESTUTIL = new HBaseTestingUtility();
66    private static final int NUM_MASTERS = 1;
67    private static final int NUM_RS = 4;
68  
69    @BeforeClass
70    public static void setUpBeforeClass() throws Exception {
71      // Set it so that this test runs with my custom master
72      Configuration conf = TESTUTIL.getConfiguration();
73      conf.setClass(HConstants.MASTER_IMPL, TestingMaster.class, HMaster.class);
74      conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
75      conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 4);
76  
77      // Start up the cluster.
78      TESTUTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
79    }
80  
81    @AfterClass
82    public static void tearDownAfterClass() throws Exception {
83      if (!TESTUTIL.getHBaseCluster().getMaster().isInitialized()) {
84        // master is not initialized and is waiting something forever.
85        for (MasterThread mt : TESTUTIL.getHBaseCluster().getLiveMasterThreads()) {
86          mt.interrupt();
87        }
88      }
89      TESTUTIL.shutdownMiniCluster();
90    }
91  
92    /**
93     * An HMaster instance used in this test. If 'TestingMaster.sleep' is set in
94     * the Configuration, then we'll sleep after log is split and we'll also
95     * return a custom RegionServerTracker.
96     */
97    public static class TestingMaster extends HMaster {
98      private boolean logSplit = false;
99  
100     public TestingMaster(Configuration conf) throws IOException,
101         KeeperException, InterruptedException {
102       super(conf);
103     }
104 
105     @Override
106     protected void splitLogAfterStartup(MasterFileSystem mfs) {
107       super.splitLogAfterStartup(mfs);
108       logSplit = true;
109       // If "TestingMaster.sleep" is set, sleep after log split.
110       if (getConfiguration().getBoolean("TestingMaster.sleep", false)) {
111         int duration = getConfiguration().getInt(
112             "TestingMaster.sleep.duration", 0);
113         Threads.sleep(duration);
114       }
115     }
116 
117 
118     public boolean isLogSplitAfterStartup() {
119       return logSplit;
120     }
121   }
122 
123   @Test(timeout = 120000)
124   public void testCorrectnessWhenMasterFailOver() throws Exception {
125     final byte[] TABLENAME = Bytes.toBytes("testCorrectnessWhenMasterFailOver");
126     final byte[] FAMILY = Bytes.toBytes("family");
127     final byte[][] SPLITKEYS = { Bytes.toBytes("b"), Bytes.toBytes("i") };
128 
129     MiniHBaseCluster cluster = TESTUTIL.getHBaseCluster();
130 
131     HTableDescriptor desc = new HTableDescriptor(TABLENAME);
132     desc.addFamily(new HColumnDescriptor(FAMILY));
133     HBaseAdmin hbaseAdmin = TESTUTIL.getHBaseAdmin();
134     hbaseAdmin.createTable(desc, SPLITKEYS);
135 
136     assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
137 
138     HTable table = new HTable(TESTUTIL.getConfiguration(), TABLENAME);
139     List<Put> puts = new ArrayList<Put>();
140     Put put1 = new Put(Bytes.toBytes("a"));
141     put1.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
142     Put put2 = new Put(Bytes.toBytes("h"));
143     put2.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
144     Put put3 = new Put(Bytes.toBytes("o"));
145     put3.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
146     puts.add(put1);
147     puts.add(put2);
148     puts.add(put3);
149     table.put(puts);
150     ResultScanner resultScanner = table.getScanner(new Scan());
151     int count = 0;
152     while (resultScanner.next() != null) {
153       count++;
154     }
155     resultScanner.close();
156     table.close();
157     assertEquals(3, count);
158 
159     /* Starting test */
160     cluster.getConfiguration().setBoolean("TestingMaster.sleep", true);
161     cluster.getConfiguration().setInt("TestingMaster.sleep.duration", 10000);
162 
163     /* NO.1 .META. region correctness */
164     // First abort master
165     abortMaster(cluster);
166     TestingMaster master = startMasterAndWaitUntilLogSplit(cluster);
167 
168     // Second kill meta server
169     int metaServerNum = cluster.getServerWithMeta();
170     int rootServerNum = cluster.getServerWith(HRegionInfo.ROOT_REGIONINFO
171         .getRegionName());
172     HRegionServer metaRS = cluster.getRegionServer(metaServerNum);
173     LOG.debug("Killing metaRS and carryingRoot = "
174         + (metaServerNum == rootServerNum));
175     metaRS.kill();
176     metaRS.join();
177 
178     /*
179      * Sleep double time of TestingMaster.sleep.duration, so we can ensure that
180      * master has already assigned ROOTandMETA or is blocking on assigning
181      * ROOTandMETA
182      */
183     Thread.sleep(10000 * 2);
184 
185     waitUntilMasterIsInitialized(master);
186 
187     // Third check whether data is correct in meta region
188     assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
189 
190     /*
191      * NO.2 -ROOT- region correctness . If the .META. server killed in the NO.1
192      * is also carrying -ROOT- region, it is not needed
193      */
194     if (rootServerNum != metaServerNum) {
195       // First abort master
196       abortMaster(cluster);
197       master = startMasterAndWaitUntilLogSplit(cluster);
198 
199       // Second kill meta server
200       HRegionServer rootRS = cluster.getRegionServer(rootServerNum);
201       LOG.debug("Killing rootRS");
202       rootRS.kill();
203       rootRS.join();
204 
205       /*
206        * Sleep double time of TestingMaster.sleep.duration, so we can ensure
207        * that master has already assigned ROOTandMETA or is blocking on
208        * assigning ROOTandMETA
209        */
210       Thread.sleep(10000 * 2);
211       waitUntilMasterIsInitialized(master);
212 
213       // Third check whether data is correct in meta region
214       assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
215     }
216 
217     /* NO.3 data region correctness */
218     ServerManager serverManager = cluster.getMaster().getServerManager();
219     while (serverManager.areDeadServersInProgress()) {
220       Thread.sleep(100);
221     }
222     // Create a ZKW to use in the test
223     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTUTIL);
224     ZKAssign.blockUntilNoRIT(zkw);
225 
226     table = new HTable(TESTUTIL.getConfiguration(), TABLENAME);
227     resultScanner = table.getScanner(new Scan());
228     count = 0;
229     while (resultScanner.next() != null) {
230       count++;
231     }
232     resultScanner.close();
233     table.close();
234     assertEquals(3, count);
235   }
236 
237   private void abortMaster(MiniHBaseCluster cluster)
238       throws InterruptedException {
239     for (MasterThread mt : cluster.getLiveMasterThreads()) {
240       if (mt.getMaster().isActiveMaster()) {
241         mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
242         mt.join();
243         break;
244       }
245     }
246     LOG.debug("Master is aborted");
247   }
248 
249   private TestingMaster startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
250       throws IOException, InterruptedException {
251     TestingMaster master = (TestingMaster) cluster.startMaster().getMaster();
252     while (!master.isLogSplitAfterStartup()) {
253       Thread.sleep(100);
254     }
255     LOG.debug("splitted:" + master.isLogSplitAfterStartup() + ",initialized:"
256         + master.isInitialized());
257     return master;
258   }
259 
260   private void waitUntilMasterIsInitialized(HMaster master)
261       throws InterruptedException {
262     while (!master.isInitialized()) {
263       Thread.sleep(100);
264     }
265     LOG.debug("master isInitialized");
266   }
267   
268   @org.junit.Rule
269   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
270     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
271 
272 }