View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import java.util.concurrent.atomic.AtomicBoolean;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.Server;
28  import org.apache.hadoop.hbase.ServerName;
29  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
30  import org.apache.hadoop.hbase.util.Bytes;
31  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
32  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
33  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
34  import org.apache.zookeeper.KeeperException;
35  import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
36  
37  /**
38   * Handles everything on master-side related to master election.
39   *
40   * <p>Listens and responds to ZooKeeper notifications on the master znode,
41   * both <code>nodeCreated</code> and <code>nodeDeleted</code>.
42   *
43   * <p>Contains blocking methods which will hold up backup masters, waiting
44   * for the active master to fail.
45   *
46   * <p>This class is instantiated in the HMaster constructor and the method
47   * #blockUntilBecomingActiveMaster() is called to wait until becoming
48   * the active master of the cluster.
49   */
50  class ActiveMasterManager extends ZooKeeperListener {
51    private static final Log LOG = LogFactory.getLog(ActiveMasterManager.class);
52  
53    final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
54  
55    private final ServerName sn;
56    private final Server master;
57  
58    /**
59     * @param watcher
60     * @param sn ServerName
61     * @param master In an instance of a Master.
62     */
63    ActiveMasterManager(ZooKeeperWatcher watcher, ServerName sn, Server master) {
64      super(watcher);
65      this.sn = sn;
66      this.master = master;
67    }
68  
69    @Override
70    public void nodeCreated(String path) {
71      if(path.equals(watcher.masterAddressZNode) && !master.isStopped()) {
72        handleMasterNodeChange();
73      }
74    }
75  
76    @Override
77    public void nodeDeleted(String path) {
78      if(path.equals(watcher.masterAddressZNode) && !master.isStopped()) {
79        handleMasterNodeChange();
80      }
81    }
82  
83    /**
84     * Handle a change in the master node.  Doesn't matter whether this was called
85     * from a nodeCreated or nodeDeleted event because there are no guarantees
86     * that the current state of the master node matches the event at the time of
87     * our next ZK request.
88     *
89     * <p>Uses the watchAndCheckExists method which watches the master address node
90     * regardless of whether it exists or not.  If it does exist (there is an
91     * active master), it returns true.  Otherwise it returns false.
92     *
93     * <p>A watcher is set which guarantees that this method will get called again if
94     * there is another change in the master node.
95     */
96    private void handleMasterNodeChange() {
97      // Watch the node and check if it exists.
98      try {
99        synchronized(clusterHasActiveMaster) {
100         if(ZKUtil.watchAndCheckExists(watcher, watcher.masterAddressZNode)) {
101           // A master node exists, there is an active master
102           LOG.debug("A master is now available");
103           clusterHasActiveMaster.set(true);
104         } else {
105           // Node is no longer there, cluster does not have an active master
106           LOG.debug("No master available. Notifying waiting threads");
107           clusterHasActiveMaster.set(false);
108           // Notify any thread waiting to become the active master
109           clusterHasActiveMaster.notifyAll();
110         }
111       }
112     } catch (KeeperException ke) {
113       master.abort("Received an unexpected KeeperException, aborting", ke);
114     }
115   }
116 
117   /**
118    * Block until becoming the active master.
119    *
120    * Method blocks until there is not another active master and our attempt
121    * to become the new active master is successful.
122    *
123    * This also makes sure that we are watching the master znode so will be
124    * notified if another master dies.
125    * @param startupStatus
126    * @return True if no issue becoming active master else false if another
127    * master was running or if some other problem (zookeeper, stop flag has been
128    * set on this Master)
129    */
130   boolean blockUntilBecomingActiveMaster(MonitoredTask startupStatus,
131     ClusterStatusTracker clusterStatusTracker) {
132     while (true) {
133       startupStatus.setStatus("Trying to register in ZK as active master");
134       // Try to become the active master, watch if there is another master.
135       // Write out our ServerName as versioned bytes.
136       try {
137         String backupZNode = ZKUtil.joinZNode(
138           this.watcher.backupMasterAddressesZNode, this.sn.toString());
139         if (ZKUtil.createEphemeralNodeAndWatch(this.watcher,
140           this.watcher.masterAddressZNode, this.sn.getVersionedBytes())) {
141           // If we were a backup master before, delete our ZNode from the backup
142           // master directory since we are the active now
143           LOG.info("Deleting ZNode for " + backupZNode +
144             " from backup master directory");
145           ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
146 
147           // We are the master, return
148           startupStatus.setStatus("Successfully registered as active master.");
149           this.clusterHasActiveMaster.set(true);
150           LOG.info("Master=" + this.sn);
151           return true;
152         }
153 
154         // There is another active master running elsewhere or this is a restart
155         // and the master ephemeral node has not expired yet.
156         this.clusterHasActiveMaster.set(true);
157 
158         /*
159          * Add a ZNode for ourselves in the backup master directory since we are
160          * not the active master.
161          *
162          * If we become the active master later, ActiveMasterManager will delete
163          * this node explicitly.  If we crash before then, ZooKeeper will delete
164          * this node for us since it is ephemeral.
165          */
166         LOG.info("Adding ZNode for " + backupZNode +
167           " in backup master directory");
168         ZKUtil.createEphemeralNodeAndWatch(this.watcher, backupZNode,
169           this.sn.getVersionedBytes());
170 
171         String msg;
172         byte [] bytes =
173           ZKUtil.getDataAndWatch(this.watcher, this.watcher.masterAddressZNode);
174         if (bytes == null) {
175           msg = ("A master was detected, but went down before its address " +
176             "could be read.  Attempting to become the next active master");
177         } else {
178           ServerName currentMaster = ServerName.parseVersionedServerName(bytes);
179           if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
180             msg = ("Current master has this master's address, " +
181               currentMaster + "; master was restarted? Deleting node.");
182             // Hurry along the expiration of the znode.
183             ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode);
184           } else {
185             msg = "Another master is the active master, " + currentMaster +
186               "; waiting to become the next active master";
187           }
188         }
189         LOG.info(msg);
190         startupStatus.setStatus(msg);
191       } catch (KeeperException ke) {
192         master.abort("Received an unexpected KeeperException, aborting", ke);
193         return false;
194       }
195       synchronized (this.clusterHasActiveMaster) {
196         while (this.clusterHasActiveMaster.get() && !this.master.isStopped()) {
197           try {
198             this.clusterHasActiveMaster.wait();
199           } catch (InterruptedException e) {
200             // We expect to be interrupted when a master dies, will fall out if so
201             LOG.debug("Interrupted waiting for master to die", e);
202           }
203         }
204         if (!clusterStatusTracker.isClusterUp()) {
205           this.master.stop("Cluster went down before this master became active");
206         }
207         if (this.master.isStopped()) {
208           return false;
209         }
210         // Try to become active master again now that there is no active master
211       }
212     }
213   }
214 
215   /**
216    * @return True if cluster has an active master.
217    */
218   public boolean isActiveMaster() {
219     try {
220       if (ZKUtil.checkExists(watcher, watcher.masterAddressZNode) >= 0) {
221         return true;
222       }
223     } 
224     catch (KeeperException ke) {
225       LOG.info("Received an unexpected KeeperException when checking " +
226           "isActiveMaster : "+ ke);
227     }
228     return false;
229   }
230 
231   public void stop() {
232     try {
233       // If our address is in ZK, delete it on our way out
234       byte [] bytes =
235         ZKUtil.getDataAndWatch(watcher, watcher.masterAddressZNode);
236       // TODO: redo this to make it atomic (only added for tests)
237       ServerName master = bytes == null ? null : ServerName.parseVersionedServerName(bytes);
238       if (master != null &&  master.equals(this.sn)) {
239         ZKUtil.deleteNode(watcher, watcher.masterAddressZNode);
240       }
241     } catch (KeeperException e) {
242       LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
243     }
244   }
245 }