1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import java.io.IOException;
21  import java.util.HashMap;
22  
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.hbase.ClusterManager.ServiceType;
26  import org.apache.hadoop.hbase.ipc.HRegionInterface;
27  import org.apache.hadoop.hbase.ipc.HMasterInterface;
28  import org.apache.hadoop.hbase.client.HBaseAdmin;
29  import org.apache.hadoop.hbase.client.HConnection;
30  import org.apache.hadoop.hbase.client.HConnectionManager;
31  import org.apache.hadoop.hbase.util.Threads;
32  
33  import com.google.common.collect.Sets;
34  
35  /**
36   * Manages the interactions with an already deployed distributed cluster (as opposed to
37   * a pseudo-distributed, or mini/local cluster). This is used by integration and system tests.
38   */
39  @InterfaceAudience.Private
40  public class DistributedHBaseCluster extends HBaseCluster {
41  
42    private HBaseAdmin admin;
43  
44    private ClusterManager clusterManager;
45  
46    public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
47        throws IOException {
48      super(conf);
49      this.clusterManager = clusterManager;
50      this.admin = new HBaseAdmin(conf);
51      this.initialClusterStatus = getClusterStatus();
52    }
53  
54    public void setClusterManager(ClusterManager clusterManager) {
55      this.clusterManager = clusterManager;
56    }
57  
58    public ClusterManager getClusterManager() {
59      return clusterManager;
60    }
61  
62    /**
63     * Returns a ClusterStatus for this HBase cluster
64     * @throws IOException
65     */
66    @Override
67    public ClusterStatus getClusterStatus() throws IOException {
68      return admin.getClusterStatus();
69    }
70  
71    @Override
72    public ClusterStatus getInitialClusterStatus() throws IOException {
73      return initialClusterStatus;
74    }
75  
76    @Override
77    public void close() throws IOException {
78      if (this.admin != null) {
79        admin.close();
80      }
81    }
82  
83    @Override
84    public void startRegionServer(String hostname) throws IOException {
85      LOG.info("Starting RS on: " + hostname);
86      clusterManager.start(ServiceType.HBASE_REGIONSERVER, hostname);
87    }
88  
89    @Override
90    public void killRegionServer(ServerName serverName) throws IOException {
91      LOG.info("Aborting RS: " + serverName.getServerName());
92      clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
93    }
94  
95    @Override
96    public void stopRegionServer(ServerName serverName) throws IOException {
97      LOG.info("Stopping RS: " + serverName.getServerName());
98      clusterManager.stop(ServiceType.HBASE_REGIONSERVER, serverName.getHostname());
99    }
100 
101   @Override
102   public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException {
103     waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
104   }
105 
106   private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout)
107     throws IOException {
108     LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName());
109     long start = System.currentTimeMillis();
110 
111     while ((System.currentTimeMillis() - start) < timeout) {
112       if (!clusterManager.isRunning(service, serverName.getHostname())) {
113         return;
114       }
115       Threads.sleep(1000);
116     }
117     throw new IOException("did timeout waiting for service to stop:" + serverName);
118   }
119 
120   @Override
121   public HMasterInterface getMasterAdmin() throws IOException {
122     HConnection conn = HConnectionManager.getConnection(conf);
123     return conn.getMaster();
124   }
125 
126   @Override
127   public void startMaster(String hostname) throws IOException {
128     LOG.info("Starting Master on: " + hostname);
129     clusterManager.start(ServiceType.HBASE_MASTER, hostname);
130   }
131 
132   @Override
133   public void killMaster(ServerName serverName) throws IOException {
134     LOG.info("Aborting Master: " + serverName.getServerName());
135     clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname());
136   }
137 
138   @Override
139   public void stopMaster(ServerName serverName) throws IOException {
140     LOG.info("Stopping Master: " + serverName.getServerName());
141     clusterManager.stop(ServiceType.HBASE_MASTER, serverName.getHostname());
142   }
143 
144   @Override
145   public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException {
146     waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout);
147   }
148 
149   @Override
150   public boolean waitForActiveAndReadyMaster(long timeout) throws IOException {
151     long start = System.currentTimeMillis();
152     while (System.currentTimeMillis() - start < timeout) {
153       try {
154         getMasterAdmin();
155         return true;
156       } catch (MasterNotRunningException m) {
157         LOG.warn("Master not started yet " + m);
158       } catch (ZooKeeperConnectionException e) {
159         LOG.warn("Failed to connect to ZK " + e);
160       }
161       Threads.sleep(1000);
162     }
163     return false;
164   }
165 
166   @Override
167   public ServerName getServerHoldingRegion(byte[] regionName) throws IOException {
168     HConnection connection = admin.getConnection();
169     HRegionLocation regionLoc = connection.locateRegion(regionName);
170     if (regionLoc == null) {
171       return null;
172     }
173 
174     org.apache.hadoop.hbase.HServerInfo sn
175 		= connection.getHRegionConnection(regionLoc.getHostname(), regionLoc.getPort()).getHServerInfo();
176 
177     return new ServerName(sn.getServerAddress().getHostname(), sn.getServerAddress().getPort(), sn.getStartCode());
178   }
179 
180   @Override
181   public void waitUntilShutDown() {
182     //Simply wait for a few seconds for now (after issuing serverManager.kill
183     throw new RuntimeException("Not implemented yet");
184   }
185 
186   @Override
187   public void shutdown() throws IOException {
188     //not sure we want this
189     throw new RuntimeException("Not implemented yet");
190   }
191 
192   @Override
193   public boolean isDistributedCluster() {
194     return true;
195   }
196 
197   @Override
198   public void restoreClusterStatus(ClusterStatus initial) throws IOException {
199     //TODO: caution: not tested throughly
200     ClusterStatus current = getClusterStatus();
201 
202     //restore masters
203 
204     //check whether current master has changed
205     if (!ServerName.isSameHostnameAndPort(initial.getMaster(), current.getMaster())) {
206       //master has changed, we would like to undo this.
207       //1. Kill the current backups
208       //2. Stop current master
209       //3. Start a master at the initial hostname (if not already running as backup)
210       //4. Start backup masters
211       boolean foundOldMaster = false;
212       for (ServerName currentBackup : current.getBackupMasters()) {
213         if (!ServerName.isSameHostnameAndPort(currentBackup, initial.getMaster())) {
214           stopMaster(currentBackup);
215         } else {
216           foundOldMaster = true;
217         }
218       }
219       stopMaster(current.getMaster());
220       if (foundOldMaster) { //if initial master is not running as a backup
221         startMaster(initial.getMaster().getHostname());
222       }
223       waitForActiveAndReadyMaster(); //wait so that active master takes over
224 
225       //start backup masters
226       for (ServerName backup : initial.getBackupMasters()) {
227         //these are not started in backup mode, but we should already have an active master
228         startMaster(backup.getHostname());
229       }
230     } else {
231       //current master has not changed, match up backup masters
232       HashMap<String, ServerName> initialBackups = new HashMap<String, ServerName>();
233       HashMap<String, ServerName> currentBackups = new HashMap<String, ServerName>();
234 
235       for (ServerName server : initial.getBackupMasters()) {
236         initialBackups.put(server.getHostname(), server);
237       }
238       for (ServerName server : current.getBackupMasters()) {
239         currentBackups.put(server.getHostname(), server);
240       }
241 
242       for (String hostname : Sets.difference(initialBackups.keySet(), currentBackups.keySet())) {
243         startMaster(hostname);
244       }
245 
246       for (String hostname : Sets.difference(currentBackups.keySet(), initialBackups.keySet())) {
247         stopMaster(currentBackups.get(hostname));
248       }
249     }
250 
251     //restore region servers
252     HashMap<String, ServerName> initialServers = new HashMap<String, ServerName>();
253     HashMap<String, ServerName> currentServers = new HashMap<String, ServerName>();
254 
255     for (ServerName server : initial.getServers()) {
256       initialServers.put(server.getHostname(), server);
257     }
258     for (ServerName server : current.getServers()) {
259       currentServers.put(server.getHostname(), server);
260     }
261 
262     for (String hostname : Sets.difference(initialServers.keySet(), currentServers.keySet())) {
263       startRegionServer(hostname);
264     }
265 
266     for (String hostname : Sets.difference(currentServers.keySet(), initialServers.keySet())) {
267       stopRegionServer(currentServers.get(hostname));
268     }
269   }
270 }